Add eval.SemanticMatchScorer (#32)

`eval.SemanticMatchScorer` can be used for semantic similarity scoring using embedding vectors and (currently only) a cosine similarity metric. See #31
maragudk · Jan 17, 2025 · 90978ca · 90978ca
1 parent 406287f
commit 90978ca
Show file tree

Hide file tree

Showing 4 changed files with 114 additions and 10 deletions.
diff --git a/eval/eval.go b/eval/eval.go
@@ -4,6 +4,7 @@ package eval
 
 import (
 	"fmt"
+	"math"
 
 	"github.com/agnivade/levenshtein"
 )
@@ -39,8 +40,9 @@ type Result struct {
 type Scorer = func(s Sample) Result
 
 // LevenshteinDistanceScorer returns a [Scorer] that uses the Levenshtein distance to compare strings.
-// It does this by computing the distance between the expected and output strings, and then normalizing
-// it to a [Score] between 0 and 1 using the max length of the two strings.
+// This is a common lexical similarity metric which is useful if you have a reference text.
+// The scorer computes the distance between the expected (reference) and output strings of the [Sample],
+// and then normalizes it to a [Score] between 0 and 1 using the max length of the two strings.
 func LevenshteinDistanceScorer() Scorer {
 	return func(sample Sample) Result {
 		score := levenshteinDistanceScore(sample.Expected, sample.Output)
@@ -67,3 +69,70 @@ func ExactMatchScorer() Scorer {
 		return Result{Score: 0, Type: "ExactMatch"}
 	}
 }
+
+// VectorComponent is a single component of a vector.
+type VectorComponent interface {
+	~float32 | ~float64
+}
+
+type embeddingGetter[T VectorComponent] interface {
+	GetEmbedding(v string) ([]T, error)
+}
+
+// SemanticMatchScorer returns a [Scorer] which uses embedding vectors to compare expected and output strings from a [Sample].
+// You can choose which vector similarity function to use. If in doubt, use [CosineSimilarity].
+func SemanticMatchScorer[T VectorComponent](eg embeddingGetter[T], similarityFunc func(a, b []T) Score) Scorer {
+	return func(sample Sample) Result {
+		expected, err := eg.GetEmbedding(sample.Expected)
+		if err != nil {
+			panic("could not get embedding for expected string: " + err.Error())
+		}
+		output, err := eg.GetEmbedding(sample.Output)
+		if err != nil {
+			panic("could not get embedding for output string: " + err.Error())
+		}
+
+		score := similarityFunc(expected, output)
+		return Result{Score: score, Type: "SemanticMatch"}
+	}
+}
+
+// CosineSimilarity between two embedding vectors a and b, normalized to a [Score].
+func CosineSimilarity[T VectorComponent](a, b []T) Score {
+	if len(a) != len(b) {
+		panic(fmt.Sprintf("vectors must have equal length, but are lengths %v and %v", len(a), len(b)))
+	}
+
+	if len(a) == 0 {
+		panic("vectors cannot be empty")
+	}
+
+	// Compute dot product and Euclidean norm (L2 norm)
+	var dotProduct, normA, normB T
+	for i := range len(a) {
+		dotProduct += a[i] * b[i]
+		normA += a[i] * a[i]
+		normB += b[i] * b[i]
+	}
+	normA = T(math.Sqrt(float64(normA)))
+	normB = T(math.Sqrt(float64(normB)))
+
+	if normA == 0 || normB == 0 {
+		panic("norm of a or b is zero and cosine similarity is undefined")
+	}
+
+	similarity := dotProduct / (normA * normB)
+
+	// Normalize from [-1, 1] to [0, 1] range
+	normalizedSimilarity := (similarity + 1) / 2
+
+	// Clamp to [0, 1] range, may be necessary because of floating point rounding errors
+	if normalizedSimilarity < 0 {
+		return 0
+	}
+	if normalizedSimilarity > 1 {
+		return 1
+	}
+
+	return Score(normalizedSimilarity)
+}
diff --git a/eval/eval_test.go b/eval/eval_test.go
@@ -11,8 +11,8 @@ import (
 
 func TestLevenshteinDistanceScorer(t *testing.T) {
 	tests := []struct {
-		s1, s2 string
-		score  eval.Score
+		expected, output string
+		score            eval.Score
 	}{
 		{"", "", 1},
 		{"a", "", 0},
@@ -25,29 +25,64 @@ func TestLevenshteinDistanceScorer(t *testing.T) {
 		{"aaa", "a", 1.0 / 3},
 	}
 	for _, test := range tests {
-		t.Run(test.s1+" "+test.s2, func(t *testing.T) {
+		t.Run(test.expected+" "+test.output, func(t *testing.T) {
 			scorer := eval.LevenshteinDistanceScorer()
-			result := scorer(eval.Sample{Expected: test.s1, Output: test.s2})
+			result := scorer(eval.Sample{Expected: test.expected, Output: test.output})
 			is.True(t, math.Abs(float64(test.score-result.Score)) < 0.01)
 		})
 	}
 }
 
 func TestExactMatchScorer(t *testing.T) {
 	tests := []struct {
-		s1, s2 string
-		score  eval.Score
+		expected, output string
+		score            eval.Score
 	}{
 		{"", "", 1},
 		{"a", "", 0},
 		{"", "a", 0},
 		{"a", "a", 1},
 	}
 	for _, test := range tests {
-		t.Run(test.s1+" "+test.s2, func(t *testing.T) {
+		t.Run(test.expected+" "+test.output, func(t *testing.T) {
 			scorer := eval.ExactMatchScorer()
-			result := scorer(eval.Sample{Expected: test.s1, Output: test.s2})
+			result := scorer(eval.Sample{Expected: test.expected, Output: test.output})
 			is.Equal(t, test.score, result.Score)
 		})
 	}
 }
+
+func TestSemanticMatchScorer(t *testing.T) {
+	tests := []struct {
+		expected, output                   string
+		expectedEmbedding, outputEmbedding []float64
+		score                              eval.Score
+	}{
+		{"a", "a", []float64{1, 2, 3}, []float64{1, 2, 3}, 1},    // exact
+		{"a", "b", []float64{1, 2, 3}, []float64{-1, -2, -3}, 0}, // opposite
+		{"x", "y", []float64{1, 0, 0}, []float64{0, 1, 0}, 0.5},  // orthogonal
+	}
+	for _, test := range tests {
+		t.Run(test.expected+" "+test.output, func(t *testing.T) {
+
+			eg := &mockEmbeddingGetter{
+				embeddings: map[string][]float64{
+					test.expected: test.expectedEmbedding,
+					test.output:   test.outputEmbedding,
+				},
+			}
+
+			scorer := eval.SemanticMatchScorer(eg, eval.CosineSimilarity)
+			result := scorer(eval.Sample{Expected: test.expected, Output: test.output})
+			is.True(t, math.Abs(float64(test.score-result.Score)) < 0.01)
+		})
+	}
+}
+
+type mockEmbeddingGetter struct {
+	embeddings map[string][]float64
+}
+
+func (m *mockEmbeddingGetter) GetEmbedding(v string) ([]float64, error) {
+	return m.embeddings[v], nil
+}
diff --git a/internal/examples/llm_test.go → internal/examples/hi_test.go b/internal/examples/llm_test.go → internal/examples/hi_test.go
diff --git a/internal/examples/prompt_test.go → internal/examples/mock_test.go b/internal/examples/prompt_test.go → internal/examples/mock_test.go