grafana · mdisibio · Oct 11, 2024 · Oct 7, 2024 · Oct 8, 2024 · Oct 8, 2024
@@ -402,6 +402,8 @@ type BinaryOperation struct {
 	RHS FieldExpression
 
 	compiledExpression *regexp.Regexp
+
+	b branchOptimizer
 }
 
 func newBinaryOperation(op Operator, lhs, rhs FieldExpression) FieldExpression {
@@ -417,6 +419,10 @@ func newBinaryOperation(op Operator, lhs, rhs FieldExpression) FieldExpression {
 		}
 	}
 
+	if (op == OpAnd || op == OpOr) && binop.referencesSpan() {
+		binop.b = newBranchPredictor(2, 1000)
+	}
+
 	return binop
 }
 

@@ -325,30 +325,50 @@ func (a Aggregate) evaluate(input []*Spanset) (output []*Spanset, err error) {
 }
 
 func (o *BinaryOperation) execute(span Span) (Static, error) {
+	recording := o.b.Recording
+	if recording {
+		o.b.Start()
+	}
+
 	lhs, err := o.LHS.execute(span)
 	if err != nil {
 		return NewStaticNil(), err
 	}
 
+	if recording {
+		o.b.Finish(0)
+	}
+
 	// Look for cases where we don't even need to evalulate the RHS
-	if lhsB, ok := lhs.Bool(); ok {
-		if o.Op == OpAnd && !lhsB {
-			// x && y
-			// x is false so we don't need to evalulate y
-			return StaticFalse, nil
-		}
-		if o.Op == OpOr && lhsB {
-			// x || y
-			// x is true so we don't need to evalulate y
-			return StaticTrue, nil
+	// But wait until we have enough samples so we can optimize
+	if !recording {
+		if lhsB, ok := lhs.Bool(); ok {
+			if o.Op == OpAnd && !lhsB {
+				// x && y
+				// x is false so we don't need to evalulate y
+				return StaticFalse, nil
+			}
+			if o.Op == OpOr && lhsB {
+				// x || y
+				// x is true so we don't need to evalulate y
+				return StaticTrue, nil
+			}
 		}
 	}
 
+	if recording {
+		o.b.Start()
+	}
+
 	rhs, err := o.RHS.execute(span)
 	if err != nil {
 		return NewStaticNil(), err
 	}
 
+	if recording {
+		o.b.Finish(1)
+	}
+
 	// Ensure the resolved types are still valid
 	lhsT := lhs.Type
 	rhsT := rhs.Type
@@ -428,10 +448,40 @@ func (o *BinaryOperation) execute(span Span) (Static, error) {
 		lhsB, _ := lhs.Bool()
 		rhsB, _ := rhs.Bool()
 
+		if recording {
+			if done := o.b.Sampled(); done {
+				if o.b.OptimalBranch() == 1 {
+					// RHS is the optimal starting branch,
+					// so swap the elements now.
+					o.LHS, o.RHS = o.RHS, o.LHS
+				}
+			}
+		}
+
 		switch o.Op {
 		case OpAnd:
+			if recording {
+				if !lhsB {
+					// Record cost of wasted rhs execution
+					o.b.Penalize(1)
+				}
+				if !rhsB {
+					// Record cost of wasted lhs execution
+					o.b.Penalize(0)
+				}
+			}
 			return NewStaticBool(lhsB && rhsB), nil
 		case OpOr:
+			if recording {
+				if rhsB {
+					// Record cost of wasted lhs execution
+					o.b.Penalize(0)
+				}
+				if lhsB {
+					// Record cost of wasated rhs execution
+					o.b.Penalize(1)
+				}
+			}
 			return NewStaticBool(lhsB || rhsB), nil
 		}
 	}

@@ -1,6 +1,8 @@
 package traceql
 
 import (
+	"time"
+
 	"github.com/grafana/tempo/pkg/tempopb"
 	"go.opentelemetry.io/otel"
 )
@@ -81,3 +83,56 @@ func (b *bucketSet) addAndTest(i int) bool {
 	b.buckets[b.sz]++
 	return false
 }
+
+type branchOptimizer struct {
+	start            time.Time
+	last             []time.Duration
+	totals           []time.Duration
+	Recording        bool
+	samplesRemaining int
+}
+
+func newBranchPredictor(numBranches int, numSamples int) branchOptimizer {
+	return branchOptimizer{
+		totals:           make([]time.Duration, numBranches),
+		last:             make([]time.Duration, numBranches),
+		samplesRemaining: numSamples,
+		Recording:        true,
+	}
+}
+
+// Start recording. Should be called immediately prior to a branch execution.
+func (b *branchOptimizer) Start() {
+	b.start = time.Now()
+}
+
+// Finish the recording and temporarily save the cost for the given branch number.
+func (b *branchOptimizer) Finish(branch int) {
+	b.last[branch] = time.Since(b.start)
+}
+
+// Penalize the given branch using it's previously recorded cost.  This is called after
+// executing all branches and then knowing in retrospect which ones were not needed.
+func (b *branchOptimizer) Penalize(branch int) {
+	b.totals[branch] += b.last[branch]
+}
+
+// Sampled indicates that a full execution was done and see if we have enough samples.
+func (b *branchOptimizer) Sampled() (done bool) {
+	b.samplesRemaining--
+	b.Recording = b.samplesRemaining > 0
+	return !b.Recording
+}
+
+// OptimalBranch returns the branch with the least penalized cost over time, i.e. the optimal one to start with.
+func (b *branchOptimizer) OptimalBranch() int {
+	mini := 0
+	min := b.totals[0]
+	for i := 1; i < len(b.totals); i++ {
+		if b.totals[i] < min {
+			mini = i
+			min = b.totals[i]
+		}
+	}
+	return mini
+}