Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/virtual-cache ref unification scope #6957

18 changes: 18 additions & 0 deletions test/cases/testdata/v1/refheads/test-regressions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -160,3 +160,21 @@ cases:
q if p[true] == false
want_result:
- x: true
- note: regression/virtual-cache key scope
query: data.test.main = x
modules:
- |
package test

obj.sub[x][x] contains x if some x in ["one", "two"]

obj[x][x] contains x if x := "whatever"

main contains x if {
[1 | obj.sub[_].one[_]]
x := obj.sub[_][_][_]
}
want_result:
- x:
- "one"
- "two"
159 changes: 153 additions & 6 deletions topdown/eval.go
Original file line number Diff line number Diff line change
Expand Up @@ -2407,6 +2407,15 @@ type evalVirtualPartialCacheHint struct {
full bool
}

func (h *evalVirtualPartialCacheHint) keyWithoutScope() ast.Ref {
if h.key != nil {
if _, ok := h.key[len(h.key)-1].Value.(vcKeyScope); ok {
return h.key[:len(h.key)-1]
}
}
return h.key
}

func (e evalVirtualPartial) eval(iter unifyIterator) error {

unknown := e.e.unknown(e.ref[:e.pos+1], e.bindings)
Expand Down Expand Up @@ -2485,7 +2494,7 @@ func (e evalVirtualPartial) evalEachRule(iter unifyIterator, unknown bool) error
}

if hint.key != nil {
if v, err := result.Value.Find(hint.key[e.pos+1:]); err == nil && v != nil {
if v, err := result.Value.Find(hint.keyWithoutScope()[e.pos+1:]); err == nil && v != nil {
e.e.virtualCache.Put(hint.key, ast.NewTerm(v))
}
}
Expand Down Expand Up @@ -2832,6 +2841,8 @@ func (e evalVirtualPartial) evalCache(iter unifyIterator) (evalVirtualPartialCac
plugged := e.bindings.Plug(e.ref[e.pos+1])

if _, ok := plugged.Value.(ast.Var); ok {
// Note: we might have additional opportunity to optimize here, if we consider that ground values
// right of e.pos could create a smaller eval "scope" through ref bi-unification before evaluating rules.
hint.full = true
hint.key = e.plugged[:e.pos+1]
e.e.instr.counterIncr(evalOpVirtualCacheMiss)
Expand All @@ -2840,19 +2851,76 @@ func (e evalVirtualPartial) evalCache(iter unifyIterator) (evalVirtualPartialCac

m := maxRefLength(e.ir.Rules, len(e.ref))

// Creating the hint key by walking the ref and plugging vars until we hit a non-ground term.
// Any ground term right of this point will affect the scope of evaluation by ref unification,
// so we create a virtual-cache scope key to qualify the result stored in the cache.
//
// E.g. given the following rule:
//
// package example
//
// a[x][y][z] := x + y + z if {
// some x in [1, 2]
// some y in [3, 4]
// some z in [5, 6]
// }
//
// and the following ref (1):
//
// data.example.a[1][_][5]
//
// then the hint key will be:
//
// data.example.a[1][<_,5>]
//
// where <_,5> is the scope of the pre-eval unification.
// This part does not contribute to the "location" of the cached data.
//
// The following ref (2):
//
// data.example.a[1][_][6]
//
// will produce the same hint key "location" 'data.example.a[1]', but a different scope component
// '<_,6>', which will create a different entry in the cache.
scoping := false
hintKeyEnd := 0
for i := e.pos + 1; i < m; i++ {
plugged = e.bindings.Plug(e.ref[i])

if !plugged.IsGround() {
break
if plugged.IsGround() && !scoping {
hintKeyEnd = i
hint.key = append(e.plugged[:i], plugged)
} else {
scoping = true
hl := len(hint.key)
if hl == 0 {
break
}
if scope, ok := hint.key[hl-1].Value.(vcKeyScope); ok {
scope.Ref = append(scope.Ref, plugged)
hint.key[len(hint.key)-1] = ast.NewTerm(scope)
} else {
scope = vcKeyScope{}
scope.Ref = append(scope.Ref, plugged)
hint.key = append(hint.key, ast.NewTerm(scope))
}
}
Comment on lines +2890 to 2907
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be helpful if we were to add a comment here explaining the code in the loop with an example if feasible. It would probably help a future reviewer.


hint.key = append(e.plugged[:i], plugged)

if cached, _ := e.e.virtualCache.Get(hint.key); cached != nil {
e.e.instr.counterIncr(evalOpVirtualCacheHit)
hint.hit = true
return hint, e.evalTerm(iter, i+1, cached, e.bindings)
return hint, e.evalTerm(iter, hintKeyEnd+1, cached, e.bindings)
}
}

if hl := len(hint.key); hl > 0 {
if scope, ok := hint.key[hl-1].Value.(vcKeyScope); ok {
scope = scope.reduce()
if scope.empty() {
hint.key = hint.key[:hl-1]
} else {
hint.key[hl-1].Value = scope
}
}
}

Expand All @@ -2861,6 +2929,85 @@ func (e evalVirtualPartial) evalCache(iter unifyIterator) (evalVirtualPartialCac
return hint, nil
}

// vcKeyScope represents the scoping that pre-rule-eval ref unification imposes on a virtual cache entry.
type vcKeyScope struct {
ast.Ref
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would we safe memory if we kept the index next to the ast.Ref?

type vcKeyScope struct {
  ref ast.Ref
  idx int
}

so that we can avoid copying the ast.Ref? I guess it doesn't make a huge difference 🤔

}

func (q vcKeyScope) Compare(other ast.Value) int {
if q2, ok := other.(vcKeyScope); ok {
r1 := q.Ref
r2 := q2.Ref
if len(r1) != len(r2) {
return -1
}

for i := range r1 {
_, v1IsVar := r1[i].Value.(ast.Var)
_, v2IsVar := r2[i].Value.(ast.Var)
if v1IsVar && v2IsVar {
continue
}
if r1[i].Value.Compare(r2[i].Value) != 0 {
return -1
}
}

return 0
}
return 1
}

func (vcKeyScope) Find(ast.Ref) (ast.Value, error) {
return nil, nil
}

func (q vcKeyScope) Hash() int {
var hash int
for _, v := range q.Ref {
if _, ok := v.Value.(ast.Var); ok {
// all vars are equal
hash++
} else {
hash += v.Value.Hash()
}
}
return hash
}

func (q vcKeyScope) IsGround() bool {
return false
}

func (q vcKeyScope) String() string {
buf := make([]string, 0, len(q.Ref))
for _, t := range q.Ref {
if _, ok := t.Value.(ast.Var); ok {
buf = append(buf, "_")
} else {
buf = append(buf, t.String())
}
}
return fmt.Sprintf("<%s>", strings.Join(buf, ","))
}

// reduce removes vars from the tail of the ref.
func (q vcKeyScope) reduce() vcKeyScope {
ref := q.Ref.Copy()
var i int
for i = len(q.Ref) - 1; i >= 0; i-- {
if _, ok := q.Ref[i].Value.(ast.Var); !ok {
break
}
}
ref = ref[:i+1]
return vcKeyScope{ref}
}

func (q vcKeyScope) empty() bool {
return len(q.Ref) == 0
}

func getNestedObject(ref ast.Ref, rootObj *ast.Object, b *bindings, l *ast.Location) (*ast.Object, error) {
current := rootObj
for _, term := range ref {
Expand Down
102 changes: 102 additions & 0 deletions topdown/eval_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,108 @@ func TestTopdownVirtualCache(t *testing.T) {
miss: 3, // 'data.test.p = true' + 'data.test.q[[y, 1]] = z' + 'data.test.q = x'
exp: 1,
},
{
note: "partial object, ref-head, ref with unification scope",
module: `package test
import rego.v1

a[x][y][z] := x + y + z if {
some x in [1, 2]
some y in [3, 4]
some z in [5, 6]
}

p if {
x := a[1][_][5] # miss, cache key: data.test.a[1][<_,5>]
some foo
y := a[1][foo][5] # hit, cache key: data.test.a[1][<_,5>]
x == y
}`,
query: `data.test.p = x`,
hit: 1, // data.test.a[1][_][5]
miss: 2, // data.test.p + data.test.a[1][_][5]
},
{
note: "partial object, ref-head, ref with unification scope, component order",
module: `package test
import rego.v1

a[x][y][a][b] := i if {
some x in [1, 2]
some y in [3, 4]
some a in ["foo", "bar"]
some i, b in ["foo", "bar"]
}

p if {
x := a[1][_]["foo"]["bar"] # miss, cache key: data.test.a[1][<_,foo,bar>]
y := a[1][_]["bar"]["foo"] # miss, cache key: data.test.a[1][<_,bar,foo>]
x != y
}`,
query: `data.test.p = x`,
hit: 0,
miss: 3, // data.test.p + data.test.a[1][<_,foo,bar>] + data.test.a[1][<_,bar,foo>]
},
{
note: "partial object, ref-head, ref with unification scope, diverging key scope",
module: `package test
import rego.v1

a[x][y][z] := x + y + z if {
some x in [1, 2]
some y in [3, 4]
some z in [5, 6]
}

p if {
x := a[1][_][5] # miss, cache key: data.test.a[1][<_,5>]
y := a[1][_][6] # miss, cache key: data.test.a[1][<_,6>]
z := a[1][_][5] # hit, cache key: data.test.a[1][<_,5>]
x != y
x == z
}`,
query: `data.test.p = x`,
hit: 1, // data.test.a[1][_][5]
miss: 3, // data.test.p + data.test.a[1][_][5] + data.test.a[1][_][6]
},
{
note: "partial object, ref-head, ref with unification scope, trailing vars don't contribute to key scope",
module: `package test
import rego.v1

a[x][y][z][x] := x + y + z if {
some x in [1, 2]
some y in [3, 4]
some z in [5, 6]
}

p if {
x := a[1][_][5][_] # miss, cache key: data.test.a[1][<_,5>]
y := a[1][_][5] # hit, cache key: data.test.a[1][<_,5>]
x == y[_]
}`,
query: `data.test.p = x`,
hit: 1, // data.test.a[1][_][5]
miss: 2, // data.test.p + data.test.a[1][_][5]
},
{
// Regression test for https://github.com/open-policy-agent/opa/issues/6926
note: "partial object, ref-head, leaf set, ref with unification scope",
module: `package p
import rego.v1

obj.sub[x][x] contains x if some x in ["one", "two"]

obj[x][x] contains x if x := "whatever"

main contains x if {
[1 | obj.sub[_].one[_]] # miss, cache key: data.p.obj.sub[<_,one>]
x := obj.sub[_][_][_] # miss, cache key: data.p.obj.sub
}`,
query: `data.p.main = x`,
hit: 0,
miss: 3, // data.p.main + data.p.obj.sub[<_,one>] + data.p.obj.sub
},
}

for _, tc := range tests {
Expand Down