open-policy-agent · johanfylling · Aug 28, 2024 · Aug 23, 2024 · Aug 23, 2024 · Aug 23, 2024
diff --git a/test/cases/testdata/v1/refheads/test-regressions.yaml b/test/cases/testdata/v1/refheads/test-regressions.yaml
@@ -160,3 +160,21 @@ cases:
         q if p[true] == false
     want_result:
       - x: true
+  - note: regression/virtual-cache key scope
+    query: data.test.main = x
+    modules:
+      - |
+        package test
+
+        obj.sub[x][x] contains x if some x in ["one", "two"]
+
+        obj[x][x] contains x if x := "whatever"
+
+        main contains x if {
+          [1 | obj.sub[_].one[_]]
+          x := obj.sub[_][_][_]
+        }
+    want_result:
+      - x:
+          - "one"
+          - "two"
diff --git a/topdown/eval.go b/topdown/eval.go
@@ -2407,6 +2407,15 @@ type evalVirtualPartialCacheHint struct {
 	full bool
 }
 
+func (h *evalVirtualPartialCacheHint) keyWithoutScope() ast.Ref {
+	if h.key != nil {
+		if _, ok := h.key[len(h.key)-1].Value.(vcKeyScope); ok {
+			return h.key[:len(h.key)-1]
+		}
+	}
+	return h.key
+}
+
 func (e evalVirtualPartial) eval(iter unifyIterator) error {
 
 	unknown := e.e.unknown(e.ref[:e.pos+1], e.bindings)
@@ -2485,7 +2494,7 @@ func (e evalVirtualPartial) evalEachRule(iter unifyIterator, unknown bool) error
 	}
 
 	if hint.key != nil {
-		if v, err := result.Value.Find(hint.key[e.pos+1:]); err == nil && v != nil {
+		if v, err := result.Value.Find(hint.keyWithoutScope()[e.pos+1:]); err == nil && v != nil {
 			e.e.virtualCache.Put(hint.key, ast.NewTerm(v))
 		}
 	}
@@ -2832,6 +2841,8 @@ func (e evalVirtualPartial) evalCache(iter unifyIterator) (evalVirtualPartialCac
 	plugged := e.bindings.Plug(e.ref[e.pos+1])
 
 	if _, ok := plugged.Value.(ast.Var); ok {
+		// Note: we might have additional opportunity to optimize here, if we consider that ground values
+		// right of e.pos could create a smaller eval "scope" through ref bi-unification before evaluating rules.
 		hint.full = true
 		hint.key = e.plugged[:e.pos+1]
 		e.e.instr.counterIncr(evalOpVirtualCacheMiss)
@@ -2840,19 +2851,76 @@ func (e evalVirtualPartial) evalCache(iter unifyIterator) (evalVirtualPartialCac
 
 	m := maxRefLength(e.ir.Rules, len(e.ref))
 
+	// Creating the hint key by walking the ref and plugging vars until we hit a non-ground term.
+	// Any ground term right of this point will affect the scope of evaluation by ref unification,
+	// so we create a virtual-cache scope key to qualify the result stored in the cache.
+	//
+	// E.g. given the following rule:
+	//
+	//   package example
+	//
+	//   a[x][y][z] := x + y + z if {
+	//     some x in [1, 2]
+	//     some y in [3, 4]
+	//     some z in [5, 6]
+	//   }
+	//
+	// and the following ref (1):
+	//
+	//   data.example.a[1][_][5]
+	//
+	// then the hint key will be:
+	//
+	//   data.example.a[1][<_,5>]
+	//
+	// where <_,5> is the scope of the pre-eval unification.
+	// This part does not contribute to the "location" of the cached data.
+	//
+	// The following ref (2):
+	//
+	//   data.example.a[1][_][6]
+	//
+	// will produce the same hint key "location" 'data.example.a[1]', but a different scope component
+	// '<_,6>', which will create a different entry in the cache.
+	scoping := false
+	hintKeyEnd := 0
 	for i := e.pos + 1; i < m; i++ {
 		plugged = e.bindings.Plug(e.ref[i])
 
-		if !plugged.IsGround() {
-			break
+		if plugged.IsGround() && !scoping {
+			hintKeyEnd = i
+			hint.key = append(e.plugged[:i], plugged)
+		} else {
+			scoping = true
+			hl := len(hint.key)
+			if hl == 0 {
+				break
+			}
+			if scope, ok := hint.key[hl-1].Value.(vcKeyScope); ok {
+				scope.Ref = append(scope.Ref, plugged)
+				hint.key[len(hint.key)-1] = ast.NewTerm(scope)
+			} else {
+				scope = vcKeyScope{}
+				scope.Ref = append(scope.Ref, plugged)
+				hint.key = append(hint.key, ast.NewTerm(scope))
+			}
 		}
 
-		hint.key = append(e.plugged[:i], plugged)
-
 		if cached, _ := e.e.virtualCache.Get(hint.key); cached != nil {
 			e.e.instr.counterIncr(evalOpVirtualCacheHit)
 			hint.hit = true
-			return hint, e.evalTerm(iter, i+1, cached, e.bindings)
+			return hint, e.evalTerm(iter, hintKeyEnd+1, cached, e.bindings)
+		}
+	}
+
+	if hl := len(hint.key); hl > 0 {
+		if scope, ok := hint.key[hl-1].Value.(vcKeyScope); ok {
+			scope = scope.reduce()
+			if scope.empty() {
+				hint.key = hint.key[:hl-1]
+			} else {
+				hint.key[hl-1].Value = scope
+			}
 		}
 	}
 
@@ -2861,6 +2929,85 @@ func (e evalVirtualPartial) evalCache(iter unifyIterator) (evalVirtualPartialCac
 	return hint, nil
 }
 
+// vcKeyScope represents the scoping that pre-rule-eval ref unification imposes on a virtual cache entry.
+type vcKeyScope struct {
+	ast.Ref
+}
+
+func (q vcKeyScope) Compare(other ast.Value) int {
+	if q2, ok := other.(vcKeyScope); ok {
+		r1 := q.Ref
+		r2 := q2.Ref
+		if len(r1) != len(r2) {
+			return -1
+		}
+
+		for i := range r1 {
+			_, v1IsVar := r1[i].Value.(ast.Var)
+			_, v2IsVar := r2[i].Value.(ast.Var)
+			if v1IsVar && v2IsVar {
+				continue
+			}
+			if r1[i].Value.Compare(r2[i].Value) != 0 {
+				return -1
+			}
+		}
+
+		return 0
+	}
+	return 1
+}
+
+func (vcKeyScope) Find(ast.Ref) (ast.Value, error) {
+	return nil, nil
+}
+
+func (q vcKeyScope) Hash() int {
+	var hash int
+	for _, v := range q.Ref {
+		if _, ok := v.Value.(ast.Var); ok {
+			// all vars are equal
+			hash++
+		} else {
+			hash += v.Value.Hash()
+		}
+	}
+	return hash
+}
+
+func (q vcKeyScope) IsGround() bool {
+	return false
+}
+
+func (q vcKeyScope) String() string {
+	buf := make([]string, 0, len(q.Ref))
+	for _, t := range q.Ref {
+		if _, ok := t.Value.(ast.Var); ok {
+			buf = append(buf, "_")
+		} else {
+			buf = append(buf, t.String())
+		}
+	}
+	return fmt.Sprintf("<%s>", strings.Join(buf, ","))
+}
+
+// reduce removes vars from the tail of the ref.
+func (q vcKeyScope) reduce() vcKeyScope {
+	ref := q.Ref.Copy()
+	var i int
+	for i = len(q.Ref) - 1; i >= 0; i-- {
+		if _, ok := q.Ref[i].Value.(ast.Var); !ok {
+			break
+		}
+	}
+	ref = ref[:i+1]
+	return vcKeyScope{ref}
+}
+
+func (q vcKeyScope) empty() bool {
+	return len(q.Ref) == 0
+}
+
 func getNestedObject(ref ast.Ref, rootObj *ast.Object, b *bindings, l *ast.Location) (*ast.Object, error) {
 	current := rootObj
 	for _, term := range ref {

diff --git a/topdown/eval_test.go b/topdown/eval_test.go
@@ -561,6 +561,108 @@ func TestTopdownVirtualCache(t *testing.T) {
 			miss:  3, // 'data.test.p = true' + 'data.test.q[[y, 1]] = z' + 'data.test.q = x'
 			exp:   1,
 		},
+		{
+			note: "partial object, ref-head, ref with unification scope",
+			module: `package test
+			import rego.v1
+
+			a[x][y][z] := x + y + z if {
+				some x in [1, 2]
+				some y in [3, 4]
+				some z in [5, 6]
+			}
+
+			p if {
+				x := a[1][_][5]   # miss, cache key: data.test.a[1][<_,5>]
+				some foo
+				y := a[1][foo][5] # hit, cache key: data.test.a[1][<_,5>]
+				x == y
+			}`,
+			query: `data.test.p = x`,
+			hit:   1, // data.test.a[1][_][5]
+			miss:  2, // data.test.p + data.test.a[1][_][5]
+		},
+		{
+			note: "partial object, ref-head, ref with unification scope, component order",
+			module: `package test
+			import rego.v1
+
+			a[x][y][a][b] := i if {
+				some x in [1, 2]
+				some y in [3, 4]
+				some a in ["foo", "bar"]
+				some i, b in ["foo", "bar"]
+			}
+
+			p if {
+				x := a[1][_]["foo"]["bar"] # miss, cache key: data.test.a[1][<_,foo,bar>]
+				y := a[1][_]["bar"]["foo"] # miss, cache key: data.test.a[1][<_,bar,foo>]
+				x != y
+			}`,
+			query: `data.test.p = x`,
+			hit:   0,
+			miss:  3, // data.test.p + data.test.a[1][<_,foo,bar>] + data.test.a[1][<_,bar,foo>]
+		},
+		{
+			note: "partial object, ref-head, ref with unification scope, diverging key scope",
+			module: `package test
+			import rego.v1
+
+			a[x][y][z] := x + y + z if {
+				some x in [1, 2]
+				some y in [3, 4]
+				some z in [5, 6]
+			}
+
+			p if {
+				x := a[1][_][5] # miss, cache key: data.test.a[1][<_,5>]
+				y := a[1][_][6] # miss, cache key: data.test.a[1][<_,6>]
+				z := a[1][_][5] # hit, cache key: data.test.a[1][<_,5>]
+				x != y
+				x == z
+			}`,
+			query: `data.test.p = x`,
+			hit:   1, // data.test.a[1][_][5]
+			miss:  3, // data.test.p + data.test.a[1][_][5] + data.test.a[1][_][6]
+		},
+		{
+			note: "partial object, ref-head, ref with unification scope, trailing vars don't contribute to key scope",
+			module: `package test
+				import rego.v1
+
+				a[x][y][z][x] := x + y + z if {
+					some x in [1, 2]
+					some y in [3, 4]
+					some z in [5, 6]
+				}
+
+				p if {
+					x := a[1][_][5][_] # miss, cache key: data.test.a[1][<_,5>]
+					y := a[1][_][5]    # hit, cache key: data.test.a[1][<_,5>]
+					x == y[_]
+				}`,
+			query: `data.test.p = x`,
+			hit:   1, // data.test.a[1][_][5]
+			miss:  2, // data.test.p + data.test.a[1][_][5]
+		},
+		{
+			// Regression test for https://github.com/open-policy-agent/opa/issues/6926
+			note: "partial object, ref-head, leaf set, ref with unification scope",
+			module: `package p
+				import rego.v1
+
+				obj.sub[x][x] contains x if some x in ["one", "two"]
+
+				obj[x][x] contains x if x := "whatever"
+
+				main contains x if {
+					[1 | obj.sub[_].one[_]] # miss, cache key: data.p.obj.sub[<_,one>]
+					x := obj.sub[_][_][_]   # miss, cache key: data.p.obj.sub
+				}`,
+			query: `data.p.main = x`,
+			hit:   0,
+			miss:  3, // data.p.main + data.p.obj.sub[<_,one>] + data.p.obj.sub
+		},
 	}
 
 	for _, tc := range tests {