From 5d087830d2a10eb18534ea3eee487b3991bc1ba1 Mon Sep 17 00:00:00 2001 From: Johan Fylling Date: Wed, 28 Aug 2024 11:50:31 +0200 Subject: [PATCH] topdown: Adding unification scope to virtual-cache key Fixing issue where ref-head rules could put evaluation result scoped by call-site ref unification into global virtual-cache, which would later erroneously be read by ref to same rule/virtual document but with different "unification scope". Fixes: #6926 Signed-off-by: Johan Fylling --- .../v1/refheads/test-regressions.yaml | 18 ++ topdown/eval.go | 159 +++++++++++++++++- topdown/eval_test.go | 102 +++++++++++ 3 files changed, 273 insertions(+), 6 deletions(-) diff --git a/test/cases/testdata/v1/refheads/test-regressions.yaml b/test/cases/testdata/v1/refheads/test-regressions.yaml index 1fc427f66e..a0c1731fd1 100644 --- a/test/cases/testdata/v1/refheads/test-regressions.yaml +++ b/test/cases/testdata/v1/refheads/test-regressions.yaml @@ -160,3 +160,21 @@ cases: q if p[true] == false want_result: - x: true + - note: regression/virtual-cache key scope + query: data.test.main = x + modules: + - | + package test + + obj.sub[x][x] contains x if some x in ["one", "two"] + + obj[x][x] contains x if x := "whatever" + + main contains x if { + [1 | obj.sub[_].one[_]] + x := obj.sub[_][_][_] + } + want_result: + - x: + - "one" + - "two" diff --git a/topdown/eval.go b/topdown/eval.go index 6263efba64..52b913fc83 100644 --- a/topdown/eval.go +++ b/topdown/eval.go @@ -2407,6 +2407,15 @@ type evalVirtualPartialCacheHint struct { full bool } +func (h *evalVirtualPartialCacheHint) keyWithoutScope() ast.Ref { + if h.key != nil { + if _, ok := h.key[len(h.key)-1].Value.(vcKeyScope); ok { + return h.key[:len(h.key)-1] + } + } + return h.key +} + func (e evalVirtualPartial) eval(iter unifyIterator) error { unknown := e.e.unknown(e.ref[:e.pos+1], e.bindings) @@ -2485,7 +2494,7 @@ func (e evalVirtualPartial) evalEachRule(iter unifyIterator, unknown bool) error } if hint.key != nil { - if v, err := result.Value.Find(hint.key[e.pos+1:]); err == nil && v != nil { + if v, err := result.Value.Find(hint.keyWithoutScope()[e.pos+1:]); err == nil && v != nil { e.e.virtualCache.Put(hint.key, ast.NewTerm(v)) } } @@ -2832,6 +2841,8 @@ func (e evalVirtualPartial) evalCache(iter unifyIterator) (evalVirtualPartialCac plugged := e.bindings.Plug(e.ref[e.pos+1]) if _, ok := plugged.Value.(ast.Var); ok { + // Note: we might have additional opportunity to optimize here, if we consider that ground values + // right of e.pos could create a smaller eval "scope" through ref bi-unification before evaluating rules. hint.full = true hint.key = e.plugged[:e.pos+1] e.e.instr.counterIncr(evalOpVirtualCacheMiss) @@ -2840,19 +2851,76 @@ func (e evalVirtualPartial) evalCache(iter unifyIterator) (evalVirtualPartialCac m := maxRefLength(e.ir.Rules, len(e.ref)) + // Creating the hint key by walking the ref and plugging vars until we hit a non-ground term. + // Any ground term right of this point will affect the scope of evaluation by ref unification, + // so we create a virtual-cache scope key to qualify the result stored in the cache. + // + // E.g. given the following rule: + // + // package example + // + // a[x][y][z] := x + y + z if { + // some x in [1, 2] + // some y in [3, 4] + // some z in [5, 6] + // } + // + // and the following ref (1): + // + // data.example.a[1][_][5] + // + // then the hint key will be: + // + // data.example.a[1][<_,5>] + // + // where <_,5> is the scope of the pre-eval unification. + // This part does not contribute to the "location" of the cached data. + // + // The following ref (2): + // + // data.example.a[1][_][6] + // + // will produce the same hint key "location" 'data.example.a[1]', but a different scope component + // '<_,6>', which will create a different entry in the cache. + scoping := false + hintKeyEnd := 0 for i := e.pos + 1; i < m; i++ { plugged = e.bindings.Plug(e.ref[i]) - if !plugged.IsGround() { - break + if plugged.IsGround() && !scoping { + hintKeyEnd = i + hint.key = append(e.plugged[:i], plugged) + } else { + scoping = true + hl := len(hint.key) + if hl == 0 { + break + } + if scope, ok := hint.key[hl-1].Value.(vcKeyScope); ok { + scope.Ref = append(scope.Ref, plugged) + hint.key[len(hint.key)-1] = ast.NewTerm(scope) + } else { + scope = vcKeyScope{} + scope.Ref = append(scope.Ref, plugged) + hint.key = append(hint.key, ast.NewTerm(scope)) + } } - hint.key = append(e.plugged[:i], plugged) - if cached, _ := e.e.virtualCache.Get(hint.key); cached != nil { e.e.instr.counterIncr(evalOpVirtualCacheHit) hint.hit = true - return hint, e.evalTerm(iter, i+1, cached, e.bindings) + return hint, e.evalTerm(iter, hintKeyEnd+1, cached, e.bindings) + } + } + + if hl := len(hint.key); hl > 0 { + if scope, ok := hint.key[hl-1].Value.(vcKeyScope); ok { + scope = scope.reduce() + if scope.empty() { + hint.key = hint.key[:hl-1] + } else { + hint.key[hl-1].Value = scope + } } } @@ -2861,6 +2929,85 @@ func (e evalVirtualPartial) evalCache(iter unifyIterator) (evalVirtualPartialCac return hint, nil } +// vcKeyScope represents the scoping that pre-rule-eval ref unification imposes on a virtual cache entry. +type vcKeyScope struct { + ast.Ref +} + +func (q vcKeyScope) Compare(other ast.Value) int { + if q2, ok := other.(vcKeyScope); ok { + r1 := q.Ref + r2 := q2.Ref + if len(r1) != len(r2) { + return -1 + } + + for i := range r1 { + _, v1IsVar := r1[i].Value.(ast.Var) + _, v2IsVar := r2[i].Value.(ast.Var) + if v1IsVar && v2IsVar { + continue + } + if r1[i].Value.Compare(r2[i].Value) != 0 { + return -1 + } + } + + return 0 + } + return 1 +} + +func (vcKeyScope) Find(ast.Ref) (ast.Value, error) { + return nil, nil +} + +func (q vcKeyScope) Hash() int { + var hash int + for _, v := range q.Ref { + if _, ok := v.Value.(ast.Var); ok { + // all vars are equal + hash++ + } else { + hash += v.Value.Hash() + } + } + return hash +} + +func (q vcKeyScope) IsGround() bool { + return false +} + +func (q vcKeyScope) String() string { + buf := make([]string, 0, len(q.Ref)) + for _, t := range q.Ref { + if _, ok := t.Value.(ast.Var); ok { + buf = append(buf, "_") + } else { + buf = append(buf, t.String()) + } + } + return fmt.Sprintf("<%s>", strings.Join(buf, ",")) +} + +// reduce removes vars from the tail of the ref. +func (q vcKeyScope) reduce() vcKeyScope { + ref := q.Ref.Copy() + var i int + for i = len(q.Ref) - 1; i >= 0; i-- { + if _, ok := q.Ref[i].Value.(ast.Var); !ok { + break + } + } + ref = ref[:i+1] + return vcKeyScope{ref} +} + +func (q vcKeyScope) empty() bool { + return len(q.Ref) == 0 +} + func getNestedObject(ref ast.Ref, rootObj *ast.Object, b *bindings, l *ast.Location) (*ast.Object, error) { current := rootObj for _, term := range ref { diff --git a/topdown/eval_test.go b/topdown/eval_test.go index 9b4e565ff9..acbb06c806 100644 --- a/topdown/eval_test.go +++ b/topdown/eval_test.go @@ -561,6 +561,108 @@ func TestTopdownVirtualCache(t *testing.T) { miss: 3, // 'data.test.p = true' + 'data.test.q[[y, 1]] = z' + 'data.test.q = x' exp: 1, }, + { + note: "partial object, ref-head, ref with unification scope", + module: `package test + import rego.v1 + + a[x][y][z] := x + y + z if { + some x in [1, 2] + some y in [3, 4] + some z in [5, 6] + } + + p if { + x := a[1][_][5] # miss, cache key: data.test.a[1][<_,5>] + some foo + y := a[1][foo][5] # hit, cache key: data.test.a[1][<_,5>] + x == y + }`, + query: `data.test.p = x`, + hit: 1, // data.test.a[1][_][5] + miss: 2, // data.test.p + data.test.a[1][_][5] + }, + { + note: "partial object, ref-head, ref with unification scope, component order", + module: `package test + import rego.v1 + + a[x][y][a][b] := i if { + some x in [1, 2] + some y in [3, 4] + some a in ["foo", "bar"] + some i, b in ["foo", "bar"] + } + + p if { + x := a[1][_]["foo"]["bar"] # miss, cache key: data.test.a[1][<_,foo,bar>] + y := a[1][_]["bar"]["foo"] # miss, cache key: data.test.a[1][<_,bar,foo>] + x != y + }`, + query: `data.test.p = x`, + hit: 0, + miss: 3, // data.test.p + data.test.a[1][<_,foo,bar>] + data.test.a[1][<_,bar,foo>] + }, + { + note: "partial object, ref-head, ref with unification scope, diverging key scope", + module: `package test + import rego.v1 + + a[x][y][z] := x + y + z if { + some x in [1, 2] + some y in [3, 4] + some z in [5, 6] + } + + p if { + x := a[1][_][5] # miss, cache key: data.test.a[1][<_,5>] + y := a[1][_][6] # miss, cache key: data.test.a[1][<_,6>] + z := a[1][_][5] # hit, cache key: data.test.a[1][<_,5>] + x != y + x == z + }`, + query: `data.test.p = x`, + hit: 1, // data.test.a[1][_][5] + miss: 3, // data.test.p + data.test.a[1][_][5] + data.test.a[1][_][6] + }, + { + note: "partial object, ref-head, ref with unification scope, trailing vars don't contribute to key scope", + module: `package test + import rego.v1 + + a[x][y][z][x] := x + y + z if { + some x in [1, 2] + some y in [3, 4] + some z in [5, 6] + } + + p if { + x := a[1][_][5][_] # miss, cache key: data.test.a[1][<_,5>] + y := a[1][_][5] # hit, cache key: data.test.a[1][<_,5>] + x == y[_] + }`, + query: `data.test.p = x`, + hit: 1, // data.test.a[1][_][5] + miss: 2, // data.test.p + data.test.a[1][_][5] + }, + { + // Regression test for https://github.com/open-policy-agent/opa/issues/6926 + note: "partial object, ref-head, leaf set, ref with unification scope", + module: `package p + import rego.v1 + + obj.sub[x][x] contains x if some x in ["one", "two"] + + obj[x][x] contains x if x := "whatever" + + main contains x if { + [1 | obj.sub[_].one[_]] # miss, cache key: data.p.obj.sub[<_,one>] + x := obj.sub[_][_][_] # miss, cache key: data.p.obj.sub + }`, + query: `data.p.main = x`, + hit: 0, + miss: 3, // data.p.main + data.p.obj.sub[<_,one>] + data.p.obj.sub + }, } for _, tc := range tests {