Skip to content

Commit

Permalink
Add LexicalSimilarityScorer
Browse files Browse the repository at this point in the history
Use this as a top-level scorer instead of `eval.LevenshteinDistanceScorer` and `eval.ExactMatchScorer`,
and take the two similarity metric functions as parameters instead.
  • Loading branch information
markuswustenberg committed Jan 17, 2025
1 parent 90978ca commit bddbfd5
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 67 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func TestEvalPrompt(t *testing.T) {

// Score the sample using the Levenshtein distance scorer.
// The scorer is created inline, but for scorers that need more setup, this can be done elsewhere.
result := e.Score(sample, eval.LevenshteinDistanceScorer())
result := e.Score(sample, eval.LexicalSimilarityScorer(eval.LevenshteinDistance))

// Log the sample, result, and timing information.
e.Log(sample, result)
Expand Down
45 changes: 23 additions & 22 deletions eval/eval.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,35 +39,36 @@ type Result struct {
// Scorer produces a [Result] (including a [Score]) for the given [Sample].
type Scorer = func(s Sample) Result

// LevenshteinDistanceScorer returns a [Scorer] that uses the Levenshtein distance to compare strings.
// This is a common lexical similarity metric which is useful if you have a reference text.
// The scorer computes the distance between the expected (reference) and output strings of the [Sample],
// and then normalizes it to a [Score] between 0 and 1 using the max length of the two strings.
func LevenshteinDistanceScorer() Scorer {
// LexicalSimilarityScorer returns a [Scorer] which uses a lexical similarity metric to compare
// expected and output strings from a [Sample].
// This is a common way to score texts if you have a reference text.
// You can choose which similarity function to use, such as [LevenshteinDistance] or [ExactMatch].
func LexicalSimilarityScorer(similarityFunc func(a, b string) Score) Scorer {
return func(sample Sample) Result {
score := levenshteinDistanceScore(sample.Expected, sample.Output)
return Result{Score: score, Type: "LevenshteinDistance"}
score := LevenshteinDistance(sample.Expected, sample.Output)
return Result{Score: score, Type: "LexicalSimilarity"}
}
}

// levenshteinDistanceScore computes a [Score] between two strings using the levenshtein distance.
// LevenshteinDistance computes a [Score] between two strings using the levenshtein distance,
// and is useful as a lexical similarity metric together with [LexicalSimilarityScorer].
// A score of 1 means the strings are equal, and 0 means they are completely different.
// Uses https://github.com/agnivade/levenshtein
func levenshteinDistanceScore(s1, s2 string) Score {
if s1 == s2 {
// The score is normalized to the length of the longest string.
// Uses https://github.com/agnivade/levenshtein internally.
func LevenshteinDistance(a, b string) Score {
if a == b {
return 1
}
return Score(1 - float64(levenshtein.ComputeDistance(s1, s2))/float64(max(len(s1), len(s2))))
return Score(1 - float64(levenshtein.ComputeDistance(a, b))/float64(max(len(a), len(b))))
}

// ExactMatchScorer returns a [Scorer] that scores 1 if the expected and output strings are equal, and 0 otherwise.
func ExactMatchScorer() Scorer {
return func(sample Sample) Result {
if sample.Expected == sample.Output {
return Result{Score: 1, Type: "ExactMatch"}
}
return Result{Score: 0, Type: "ExactMatch"}
// ExactMatch computes a [Score] between two strings, returning 1 if they are equal and 0 otherwise.
// Useful as a simple [Scorer] for exact string matching together with [LexicalSimilarityScorer].
func ExactMatch(a, b string) Score {
if a == b {
return 1
}
return 0
}

// VectorComponent is a single component of a vector.
Expand All @@ -79,9 +80,9 @@ type embeddingGetter[T VectorComponent] interface {
GetEmbedding(v string) ([]T, error)
}

// SemanticMatchScorer returns a [Scorer] which uses embedding vectors to compare expected and output strings from a [Sample].
// SemanticSimilarityScorer returns a [Scorer] which uses embedding vectors to compare expected and output strings from a [Sample].
// You can choose which vector similarity function to use. If in doubt, use [CosineSimilarity].
func SemanticMatchScorer[T VectorComponent](eg embeddingGetter[T], similarityFunc func(a, b []T) Score) Scorer {
func SemanticSimilarityScorer[T VectorComponent](eg embeddingGetter[T], similarityFunc func(a, b []T) Score) Scorer {
return func(sample Sample) Result {
expected, err := eg.GetEmbedding(sample.Expected)
if err != nil {
Expand All @@ -93,7 +94,7 @@ func SemanticMatchScorer[T VectorComponent](eg embeddingGetter[T], similarityFun
}

score := similarityFunc(expected, output)
return Result{Score: score, Type: "SemanticMatch"}
return Result{Score: score, Type: "SemanticSimilarity"}
}
}

Expand Down
86 changes: 44 additions & 42 deletions eval/eval_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,50 +9,52 @@ import (
"maragu.dev/llm/eval"
)

func TestLevenshteinDistanceScorer(t *testing.T) {
tests := []struct {
expected, output string
score eval.Score
}{
{"", "", 1},
{"a", "", 0},
{"", "a", 0},
{"a", "a", 1},
{"a", "b", 0},
{"a", "aa", 0.5},
{"aa", "a", 0.5},
{"a", "aaa", 1.0 / 3},
{"aaa", "a", 1.0 / 3},
}
for _, test := range tests {
t.Run(test.expected+" "+test.output, func(t *testing.T) {
scorer := eval.LevenshteinDistanceScorer()
result := scorer(eval.Sample{Expected: test.expected, Output: test.output})
is.True(t, math.Abs(float64(test.score-result.Score)) < 0.01)
})
}
}
func TestLexicalSimilarityScorer(t *testing.T) {
t.Run("with LevenshteinDistance", func(t *testing.T) {
tests := []struct {
expected, output string
score eval.Score
}{
{"", "", 1},
{"a", "", 0},
{"", "a", 0},
{"a", "a", 1},
{"a", "b", 0},
{"a", "aa", 0.5},
{"aa", "a", 0.5},
{"a", "aaa", 1.0 / 3},
{"aaa", "a", 1.0 / 3},
}
for _, test := range tests {
t.Run(test.expected+" "+test.output, func(t *testing.T) {
scorer := eval.LexicalSimilarityScorer(eval.LevenshteinDistance)
result := scorer(eval.Sample{Expected: test.expected, Output: test.output})
is.True(t, math.Abs(float64(test.score-result.Score)) < 0.01)
})
}
})

func TestExactMatchScorer(t *testing.T) {
tests := []struct {
expected, output string
score eval.Score
}{
{"", "", 1},
{"a", "", 0},
{"", "a", 0},
{"a", "a", 1},
}
for _, test := range tests {
t.Run(test.expected+" "+test.output, func(t *testing.T) {
scorer := eval.ExactMatchScorer()
result := scorer(eval.Sample{Expected: test.expected, Output: test.output})
is.Equal(t, test.score, result.Score)
})
}
t.Run("with ExactMatch", func(t *testing.T) {
tests := []struct {
expected, output string
score eval.Score
}{
{"", "", 1},
{"a", "", 0},
{"", "a", 0},
{"a", "a", 1},
}
for _, test := range tests {
t.Run(test.expected+" "+test.output, func(t *testing.T) {
scorer := eval.LexicalSimilarityScorer(eval.ExactMatch)
result := scorer(eval.Sample{Expected: test.expected, Output: test.output})
is.Equal(t, test.score, result.Score)
})
}
})
}

func TestSemanticMatchScorer(t *testing.T) {
func TestSemanticSimilarityScorer(t *testing.T) {
tests := []struct {
expected, output string
expectedEmbedding, outputEmbedding []float64
Expand All @@ -72,7 +74,7 @@ func TestSemanticMatchScorer(t *testing.T) {
},
}

scorer := eval.SemanticMatchScorer(eg, eval.CosineSimilarity)
scorer := eval.SemanticSimilarityScorer(eg, eval.CosineSimilarity)
result := scorer(eval.Sample{Expected: test.expected, Output: test.output})
is.True(t, math.Abs(float64(test.score-result.Score)) < 0.01)
})
Expand Down
2 changes: 1 addition & 1 deletion internal/examples/hi_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func TestEvalLLMs(t *testing.T) {
Expected: test.expected,
}

result := e.Score(sample, eval.LevenshteinDistanceScorer())
result := e.Score(sample, eval.LexicalSimilarityScorer(eval.LevenshteinDistance))

e.Log(sample, result)
})
Expand Down
2 changes: 1 addition & 1 deletion internal/examples/mock_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ func TestEvalPrompt(t *testing.T) {

// Score the sample using the Levenshtein distance scorer.
// The scorer is created inline, but for scorers that need more setup, this can be done elsewhere.
result := e.Score(sample, eval.LevenshteinDistanceScorer())
result := e.Score(sample, eval.LexicalSimilarityScorer(eval.LevenshteinDistance))

// Log the sample, result, and timing information.
e.Log(sample, result)
Expand Down

0 comments on commit bddbfd5

Please sign in to comment.