Add LexicalSimilarityScorer

Use this as a top-level scorer instead of `eval.LevenshteinDistanceScorer` and `eval.ExactMatchScorer`, and take the two similarity metric functions as parameters instead.
maragudk · Jan 17, 2025 · bddbfd5 · bddbfd5
1 parent 90978ca
commit bddbfd5
Show file tree

Hide file tree

Showing 5 changed files with 70 additions and 67 deletions.
diff --git a/README.md b/README.md
@@ -51,7 +51,7 @@ func TestEvalPrompt(t *testing.T) {
 
 		// Score the sample using the Levenshtein distance scorer.
 		// The scorer is created inline, but for scorers that need more setup, this can be done elsewhere.
-		result := e.Score(sample, eval.LevenshteinDistanceScorer())
+		result := e.Score(sample, eval.LexicalSimilarityScorer(eval.LevenshteinDistance))
 
 		// Log the sample, result, and timing information.
 		e.Log(sample, result)

diff --git a/eval/eval.go b/eval/eval.go
@@ -39,35 +39,36 @@ type Result struct {
 // Scorer produces a [Result] (including a [Score]) for the given [Sample].
 type Scorer = func(s Sample) Result
 
-// LevenshteinDistanceScorer returns a [Scorer] that uses the Levenshtein distance to compare strings.
-// This is a common lexical similarity metric which is useful if you have a reference text.
-// The scorer computes the distance between the expected (reference) and output strings of the [Sample],
-// and then normalizes it to a [Score] between 0 and 1 using the max length of the two strings.
-func LevenshteinDistanceScorer() Scorer {
+// LexicalSimilarityScorer returns a [Scorer] which uses a lexical similarity metric to compare
+// expected and output strings from a [Sample].
+// This is a common way to score texts if you have a reference text.
+// You can choose which similarity function to use, such as [LevenshteinDistance] or [ExactMatch].
+func LexicalSimilarityScorer(similarityFunc func(a, b string) Score) Scorer {
 	return func(sample Sample) Result {
-		score := levenshteinDistanceScore(sample.Expected, sample.Output)
-		return Result{Score: score, Type: "LevenshteinDistance"}
+		score := LevenshteinDistance(sample.Expected, sample.Output)
+		return Result{Score: score, Type: "LexicalSimilarity"}
 	}
 }
 
-// levenshteinDistanceScore computes a [Score] between two strings using the levenshtein distance.
+// LevenshteinDistance computes a [Score] between two strings using the levenshtein distance,
+// and is useful as a lexical similarity metric together with [LexicalSimilarityScorer].
 // A score of 1 means the strings are equal, and 0 means they are completely different.
-// Uses https://github.com/agnivade/levenshtein
-func levenshteinDistanceScore(s1, s2 string) Score {
-	if s1 == s2 {
+// The score is normalized to the length of the longest string.
+// Uses https://github.com/agnivade/levenshtein internally.
+func LevenshteinDistance(a, b string) Score {
+	if a == b {
 		return 1
 	}
-	return Score(1 - float64(levenshtein.ComputeDistance(s1, s2))/float64(max(len(s1), len(s2))))
+	return Score(1 - float64(levenshtein.ComputeDistance(a, b))/float64(max(len(a), len(b))))
 }
 
-// ExactMatchScorer returns a [Scorer] that scores 1 if the expected and output strings are equal, and 0 otherwise.
-func ExactMatchScorer() Scorer {
-	return func(sample Sample) Result {
-		if sample.Expected == sample.Output {
-			return Result{Score: 1, Type: "ExactMatch"}
-		}
-		return Result{Score: 0, Type: "ExactMatch"}
+// ExactMatch computes a [Score] between two strings, returning 1 if they are equal and 0 otherwise.
+// Useful as a simple [Scorer] for exact string matching together with [LexicalSimilarityScorer].
+func ExactMatch(a, b string) Score {
+	if a == b {
+		return 1
 	}
+	return 0
 }
 
 // VectorComponent is a single component of a vector.
@@ -79,9 +80,9 @@ type embeddingGetter[T VectorComponent] interface {
 	GetEmbedding(v string) ([]T, error)
 }
 
-// SemanticMatchScorer returns a [Scorer] which uses embedding vectors to compare expected and output strings from a [Sample].
+// SemanticSimilarityScorer returns a [Scorer] which uses embedding vectors to compare expected and output strings from a [Sample].
 // You can choose which vector similarity function to use. If in doubt, use [CosineSimilarity].
-func SemanticMatchScorer[T VectorComponent](eg embeddingGetter[T], similarityFunc func(a, b []T) Score) Scorer {
+func SemanticSimilarityScorer[T VectorComponent](eg embeddingGetter[T], similarityFunc func(a, b []T) Score) Scorer {
 	return func(sample Sample) Result {
 		expected, err := eg.GetEmbedding(sample.Expected)
 		if err != nil {
@@ -93,7 +94,7 @@ func SemanticMatchScorer[T VectorComponent](eg embeddingGetter[T], similarityFun
 		}
 
 		score := similarityFunc(expected, output)
-		return Result{Score: score, Type: "SemanticMatch"}
+		return Result{Score: score, Type: "SemanticSimilarity"}
 	}
 }
 

diff --git a/eval/eval_test.go b/eval/eval_test.go
@@ -9,50 +9,52 @@ import (
 	"maragu.dev/llm/eval"
 )
 
-func TestLevenshteinDistanceScorer(t *testing.T) {
-	tests := []struct {
-		expected, output string
-		score            eval.Score
-	}{
-		{"", "", 1},
-		{"a", "", 0},
-		{"", "a", 0},
-		{"a", "a", 1},
-		{"a", "b", 0},
-		{"a", "aa", 0.5},
-		{"aa", "a", 0.5},
-		{"a", "aaa", 1.0 / 3},
-		{"aaa", "a", 1.0 / 3},
-	}
-	for _, test := range tests {
-		t.Run(test.expected+" "+test.output, func(t *testing.T) {
-			scorer := eval.LevenshteinDistanceScorer()
-			result := scorer(eval.Sample{Expected: test.expected, Output: test.output})
-			is.True(t, math.Abs(float64(test.score-result.Score)) < 0.01)
-		})
-	}
-}
+func TestLexicalSimilarityScorer(t *testing.T) {
+	t.Run("with LevenshteinDistance", func(t *testing.T) {
+		tests := []struct {
+			expected, output string
+			score            eval.Score
+		}{
+			{"", "", 1},
+			{"a", "", 0},
+			{"", "a", 0},
+			{"a", "a", 1},
+			{"a", "b", 0},
+			{"a", "aa", 0.5},
+			{"aa", "a", 0.5},
+			{"a", "aaa", 1.0 / 3},
+			{"aaa", "a", 1.0 / 3},
+		}
+		for _, test := range tests {
+			t.Run(test.expected+" "+test.output, func(t *testing.T) {
+				scorer := eval.LexicalSimilarityScorer(eval.LevenshteinDistance)
+				result := scorer(eval.Sample{Expected: test.expected, Output: test.output})
+				is.True(t, math.Abs(float64(test.score-result.Score)) < 0.01)
+			})
+		}
+	})
 
-func TestExactMatchScorer(t *testing.T) {
-	tests := []struct {
-		expected, output string
-		score            eval.Score
-	}{
-		{"", "", 1},
-		{"a", "", 0},
-		{"", "a", 0},
-		{"a", "a", 1},
-	}
-	for _, test := range tests {
-		t.Run(test.expected+" "+test.output, func(t *testing.T) {
-			scorer := eval.ExactMatchScorer()
-			result := scorer(eval.Sample{Expected: test.expected, Output: test.output})
-			is.Equal(t, test.score, result.Score)
-		})
-	}
+	t.Run("with ExactMatch", func(t *testing.T) {
+		tests := []struct {
+			expected, output string
+			score            eval.Score
+		}{
+			{"", "", 1},
+			{"a", "", 0},
+			{"", "a", 0},
+			{"a", "a", 1},
+		}
+		for _, test := range tests {
+			t.Run(test.expected+" "+test.output, func(t *testing.T) {
+				scorer := eval.LexicalSimilarityScorer(eval.ExactMatch)
+				result := scorer(eval.Sample{Expected: test.expected, Output: test.output})
+				is.Equal(t, test.score, result.Score)
+			})
+		}
+	})
 }
 
-func TestSemanticMatchScorer(t *testing.T) {
+func TestSemanticSimilarityScorer(t *testing.T) {
 	tests := []struct {
 		expected, output                   string
 		expectedEmbedding, outputEmbedding []float64
@@ -72,7 +74,7 @@ func TestSemanticMatchScorer(t *testing.T) {
 				},
 			}
 
-			scorer := eval.SemanticMatchScorer(eg, eval.CosineSimilarity)
+			scorer := eval.SemanticSimilarityScorer(eg, eval.CosineSimilarity)
 			result := scorer(eval.Sample{Expected: test.expected, Output: test.output})
 			is.True(t, math.Abs(float64(test.score-result.Score)) < 0.01)
 		})

diff --git a/internal/examples/hi_test.go b/internal/examples/hi_test.go
@@ -52,7 +52,7 @@ func TestEvalLLMs(t *testing.T) {
 				Expected: test.expected,
 			}
 
-			result := e.Score(sample, eval.LevenshteinDistanceScorer())
+			result := e.Score(sample, eval.LexicalSimilarityScorer(eval.LevenshteinDistance))
 
 			e.Log(sample, result)
 		})

diff --git a/internal/examples/mock_test.go b/internal/examples/mock_test.go
@@ -27,7 +27,7 @@ func TestEvalPrompt(t *testing.T) {
 
 		// Score the sample using the Levenshtein distance scorer.
 		// The scorer is created inline, but for scorers that need more setup, this can be done elsewhere.
-		result := e.Score(sample, eval.LevenshteinDistanceScorer())
+		result := e.Score(sample, eval.LexicalSimilarityScorer(eval.LevenshteinDistance))
 
 		// Log the sample, result, and timing information.
 		e.Log(sample, result)