Skip to content

Commit 1ba8e20

Browse files
committed
fix(vector): fixed euclidean spelling and a bug in parameters (#9165)
Fixed spelling of euclidean from euclidian. Updated default parameters.
1 parent 93a3a62 commit 1ba8e20

17 files changed

+54
-54
lines changed

graphql/resolve/query_rewriter.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -659,7 +659,7 @@ func rewriteAsSimilarByIdQuery(
659659
topK := query.ArgValue(schema.SimilarTopKArgName)
660660
similarByField := typ.Field(similarBy)
661661
metric := similarByField.EmbeddingSearchMetric()
662-
distanceFormula := "math(sqrt((v2 - v1) dot (v2 - v1)))" // default - euclidian
662+
distanceFormula := "math(sqrt((v2 - v1) dot (v2 - v1)))" // default - euclidean
663663

664664
if metric == schema.SimilarSearchMetricDotProduct {
665665
distanceFormula = "math((1.0 - (v1 dot v2)) /2.0)"
@@ -755,7 +755,7 @@ func rewriteAsSimilarByIdQuery(
755755
result = append(result, distance)
756756
}
757757

758-
// order the result by euclidian distance, For example,
758+
// order the result by euclidean distance, For example,
759759
// querySimilarProductById(func: uid(distance), orderasc: val(distance)) {
760760
// Product.id : Product.id
761761
// Product.description : Product.description
@@ -819,7 +819,7 @@ func rewriteAsSimilarByEmbeddingQuery(
819819

820820
similarByField := typ.Field(similarBy)
821821
metric := similarByField.EmbeddingSearchMetric()
822-
distanceFormula := "math(sqrt((v2 - $search_vector) dot (v2 - $search_vector)))" // default = euclidian
822+
distanceFormula := "math(sqrt((v2 - $search_vector) dot (v2 - $search_vector)))" // default = euclidean
823823

824824
if metric == schema.SimilarSearchMetricDotProduct {
825825
distanceFormula = "math(( 1.0 - (($search_vector) dot v2)) /2.0)"
@@ -860,7 +860,7 @@ func rewriteAsSimilarByEmbeddingQuery(
860860
},
861861
}
862862

863-
// Compute the euclidian distance between the neighbor
863+
// Compute the euclidean distance between the neighbor
864864
// and the search vector
865865
dgQuery[0].Children = []*dql.GraphQuery{
866866
{

graphql/schema/dgraph_schemagen_test.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ schemas:
148148
dt4: DateTime @search(by: [day])
149149
dt5: DateTime @search(by: [hour])
150150
vf1: [Float!] @embedding @search(by: ["hnsw"])
151-
vf2: [Float!] @embedding @search(by: ["hnsw(exponent: 4, metric: euclidian)"])
151+
vf2: [Float!] @embedding @search(by: ["hnsw(exponent: 4, metric: euclidean)"])
152152
vf3: [Float!] @embedding @search(by: ["hnsw(metric: cosine)"])
153153
vf4: [Float!] @embedding @search(by: ["hnsw(metric: dotproduct, exponent: 4)"])
154154
e: E @search
@@ -219,7 +219,7 @@ schemas:
219219
X.dt4: dateTime @index(day) .
220220
X.dt5: dateTime @index(hour) .
221221
X.vf1: float32vector @index(hnsw) .
222-
X.vf2: float32vector @index(hnsw(exponent: "4", metric: "euclidian")) .
222+
X.vf2: float32vector @index(hnsw(exponent: "4", metric: "euclidean")) .
223223
X.vf3: float32vector @index(hnsw(metric: "cosine")) .
224224
X.vf4: float32vector @index(hnsw(exponent: "4", metric: "dotproduct")) .
225225
X.e: string @index(hash) .

graphql/schema/rules.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -1154,7 +1154,7 @@ func searchValidation(
11541154
//
11551155
// <searchArg> := <searchType> [ <openParen> <searchOptions> <closeParen> ]
11561156
//
1157-
// hnsw(metric: euclidian, exponent: 6)
1157+
// hnsw(metric: euclidean, exponent: 6)
11581158
// hnsw
11591159
// hnsw(exponent: 3)
11601160
func parseSearchType(searchArg string) string {
@@ -1175,7 +1175,7 @@ func parseSearchType(searchArg string) string {
11751175
// <searchOption> := <OptionName><COLON><SPACE><OptionValue>
11761176
// Examples:
11771177
//
1178-
// hnsw(metric: euclidian, exponent: 6)
1178+
// hnsw(metric: euclidean, exponent: 6)
11791179
// hnsw
11801180
// hnsw(exponent: 3)
11811181
func parseSearchOptions(searchArg string) (map[string]string, bool) {

graphql/schema/testdata/schemagen/input/embedding-directive-with-similar-queries.graphql

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ type Product {
55
description: String
66
title: String
77
imageUrl: String
8-
product_vector: [Float!] @embedding @search(by: ["hnsw(metric: euclidian, exponent: 4)"])
8+
product_vector: [Float!] @embedding @search(by: ["hnsw(metric: euclidean, exponent: 4)"])
99
}
1010

1111
type Purchase @lambdaOnMutate(add: true){

graphql/schema/testdata/schemagen/output/embedding-directive-with-similar-queries.graphql

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ type Product {
77
description: String
88
title: String
99
imageUrl: String
10-
product_vector: [Float!] @embedding @search(by: ["hnsw(metric: euclidian, exponent: 4)"])
10+
product_vector: [Float!] @embedding @search(by: ["hnsw(metric: euclidean, exponent: 4)"])
1111
vector_distance: Float
1212
}
1313

graphql/schema/wrappers.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ const (
116116
SimilarByEmbeddingQuerySuffix = "ByEmbedding"
117117
SimilarQueryResultTypeSuffix = "WithDistance"
118118
SimilarQueryDistanceFieldName = "vector_distance"
119-
SimilarSearchMetricEuclidian = "euclidian"
119+
SimilarSearchMetricEuclidean = "euclidean"
120120
SimilarSearchMetricDotProduct = "dotproduct"
121121
SimilarSearchMetricCosine = "cosine"
122122
)

query/common_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,7 @@ func populateCluster(dc dgraphapi.Cluster) {
380380
// alive
381381
// user_profile
382382
// }
383-
// user_profile : float32vector @index(hnsw(metric:"euclidian")) .`
383+
// user_profile : float32vector @index(hnsw(metric:"euclidean")) .`
384384
// } else {
385385
// ts = testSchema + `type User {
386386
// name

query/vector/vector_graphql_test.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ func TestVectorGraphQLAddVectorPredicate(t *testing.T) {
170170
require.NoError(t, err)
171171
hc.LoginIntoNamespace("groot", "password", 0)
172172
// add schema
173-
require.NoError(t, hc.UpdateGQLSchema(fmt.Sprintf(graphQLVectorSchema, "euclidian")))
173+
require.NoError(t, hc.UpdateGQLSchema(fmt.Sprintf(graphQLVectorSchema, "euclidean")))
174174
}
175175

176176
func TestVectorSchema(t *testing.T) {
@@ -188,16 +188,16 @@ func TestVectorSchema(t *testing.T) {
188188

189189
// add schema
190190
require.NoError(t, hc.UpdateGQLSchema(schema))
191-
require.Error(t, hc.UpdateGQLSchema(fmt.Sprintf(graphQLVectorSchema, "euclidian")))
191+
require.Error(t, hc.UpdateGQLSchema(fmt.Sprintf(graphQLVectorSchema, "euclidean")))
192192
}
193193

194-
func TestVectorGraphQlEuclidianIndexMutationAndQuery(t *testing.T) {
194+
func TestVectorGraphQlEuclideanIndexMutationAndQuery(t *testing.T) {
195195
require.NoError(t, client.DropAll())
196196
hc, err := dc.HTTPClient()
197197
require.NoError(t, err)
198198
hc.LoginIntoNamespace("groot", "password", 0)
199199

200-
schema := fmt.Sprintf(graphQLVectorSchema, "euclidian")
200+
schema := fmt.Sprintf(graphQLVectorSchema, "euclidean")
201201
// add schema
202202
require.NoError(t, hc.UpdateGQLSchema(schema))
203203
testVectorGraphQlMutationAndQuery(t, hc)

query/vector/vector_test.go

+13-13
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,7 @@ func TestInvalidVectorIndex(t *testing.T) {
412412

413413
func TestVectorIndexRebuildWhenChange(t *testing.T) {
414414
dropPredicate("vtest")
415-
setSchema(fmt.Sprintf(vectorSchemaWithIndex, "vtest", "4", "euclidian"))
415+
setSchema(fmt.Sprintf(vectorSchemaWithIndex, "vtest", "4", "euclidean"))
416416

417417
numVectors := 9000
418418
vectorSize := 100
@@ -421,7 +421,7 @@ func TestVectorIndexRebuildWhenChange(t *testing.T) {
421421
require.NoError(t, addTriplesToCluster(randomVectors))
422422

423423
startTime := time.Now()
424-
setSchema(fmt.Sprintf(vectorSchemaWithIndex, "vtest", "6", "euclidian"))
424+
setSchema(fmt.Sprintf(vectorSchemaWithIndex, "vtest", "6", "euclidean"))
425425

426426
dur := time.Since(startTime)
427427
// Easy way to check that the index was actually rebuilt
@@ -430,7 +430,7 @@ func TestVectorIndexRebuildWhenChange(t *testing.T) {
430430

431431
func TestVectorInQueryArgument(t *testing.T) {
432432
dropPredicate("vtest")
433-
setSchema(fmt.Sprintf(vectorSchemaWithIndex, "vtest", "4", "euclidian"))
433+
setSchema(fmt.Sprintf(vectorSchemaWithIndex, "vtest", "4", "euclidean"))
434434

435435
numVectors := 100
436436
vectorSize := 4
@@ -456,7 +456,7 @@ func TestVectorInQueryArgument(t *testing.T) {
456456
func TestVectorsMutateFixedLengthWithDiffrentIndexes(t *testing.T) {
457457
dropPredicate("vtest")
458458

459-
setSchema(fmt.Sprintf(vectorSchemaWithIndex, "vtest", "4", "euclidian"))
459+
setSchema(fmt.Sprintf(vectorSchemaWithIndex, "vtest", "4", "euclidean"))
460460
testVectorMutationSameLength(t)
461461
dropPredicate("vtest")
462462

@@ -489,7 +489,7 @@ func TestVectorDeadlockwithTimeout(t *testing.T) {
489489
DropAttr: pred,
490490
})
491491
dropPredicate(pred)
492-
setSchema(fmt.Sprintf(vectorSchemaWithIndex, pred, "4", "euclidian"))
492+
setSchema(fmt.Sprintf(vectorSchemaWithIndex, pred, "4", "euclidean"))
493493
numVectors := 10000
494494
vectorSize := 1000
495495

@@ -514,8 +514,8 @@ func TestVectorDeadlockwithTimeout(t *testing.T) {
514514
func TestVectorMutateDiffrentLengthWithDiffrentIndexes(t *testing.T) {
515515
dropPredicate("vtest")
516516

517-
setSchema(fmt.Sprintf(vectorSchemaWithIndex, "vtest", "4", "euclidian"))
518-
testVectorMutationDiffrentLength(t, "can not compute euclidian distance on vectors of different lengths")
517+
setSchema(fmt.Sprintf(vectorSchemaWithIndex, "vtest", "4", "euclidean"))
518+
testVectorMutationDiffrentLength(t, "can not compute euclidean distance on vectors of different lengths")
519519
dropPredicate("vtest")
520520

521521
setSchema(fmt.Sprintf(vectorSchemaWithIndex, "vtest", "4", "cosine"))
@@ -532,7 +532,7 @@ func TestVectorReindex(t *testing.T) {
532532

533533
pred := "vtest"
534534

535-
setSchema(fmt.Sprintf(vectorSchemaWithIndex, pred, "4", "euclidian"))
535+
setSchema(fmt.Sprintf(vectorSchemaWithIndex, pred, "4", "euclidean"))
536536

537537
numVectors := 100
538538
vectorSize := 4
@@ -555,7 +555,7 @@ func TestVectorReindex(t *testing.T) {
555555
_, err := querySingleVectorError(t, strings.Split(triple, `"`)[1], "vtest", false)
556556
require.NotNil(t, err)
557557

558-
setSchema(fmt.Sprintf(vectorSchemaWithIndex, pred, "4", "euclidian"))
558+
setSchema(fmt.Sprintf(vectorSchemaWithIndex, pred, "4", "euclidean"))
559559
vector, err := querySingleVector(t, strings.Split(triple, `"`)[1], "vtest")
560560
require.NoError(t, err)
561561
require.Contains(t, allVectors, vector)
@@ -605,7 +605,7 @@ func TestVectorDelete(t *testing.T) {
605605
pred := "vtest"
606606
dropPredicate(pred)
607607

608-
setSchema(fmt.Sprintf(vectorSchemaWithIndex, pred, "4", "euclidian"))
608+
setSchema(fmt.Sprintf(vectorSchemaWithIndex, pred, "4", "euclidean"))
609609

610610
numVectors := 1000
611611
rdf, vectors := generateRandomVectors(numVectors, 10, "vtest")
@@ -661,7 +661,7 @@ func TestVectorUpdate(t *testing.T) {
661661
pred := "vtest"
662662
dropPredicate(pred)
663663

664-
setSchema(fmt.Sprintf(vectorSchemaWithIndex, pred, "4", "euclidian"))
664+
setSchema(fmt.Sprintf(vectorSchemaWithIndex, pred, "4", "euclidean"))
665665

666666
numVectors := 1000
667667
rdf, vectors := generateRandomVectors(1000, 10, "vtest")
@@ -694,7 +694,7 @@ func TestVectorUpdate(t *testing.T) {
694694
func TestVectorWithoutQuote(t *testing.T) {
695695
pred := "test-ve"
696696
dropPredicate(pred)
697-
setSchema(fmt.Sprintf(vectorSchemaWithIndex, pred, "4", "euclidian"))
697+
setSchema(fmt.Sprintf(vectorSchemaWithIndex, pred, "4", "euclidean"))
698698

699699
setJson := `
700700
{
@@ -738,7 +738,7 @@ func TestVectorTwoTxnWithoutCommit(t *testing.T) {
738738
pred := "vtest"
739739
dropPredicate(pred)
740740

741-
setSchema(fmt.Sprintf(vectorSchemaWithIndex, pred, "4", "euclidian"))
741+
setSchema(fmt.Sprintf(vectorSchemaWithIndex, pred, "4", "euclidean"))
742742

743743
rdf, vectors := generateRandomVectors(5, 5, "vtest")
744744
txn1 := client.NewTxn()

schema/parse_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ name: string .
5353
address: string .
5454
<http://scalar.com/helloworld/> : string .
5555
coordinates: float32vector .
56-
indexvector: float32vector @index(hnsw(metric:"euclidian")) .
56+
indexvector: float32vector @index(hnsw(metric:"euclidean")) .
5757
`
5858

5959
func TestSchema(t *testing.T) {
@@ -90,7 +90,7 @@ func TestSchema(t *testing.T) {
9090
Options: []*pb.OptionPair{
9191
{
9292
Key: "metric",
93-
Value: "euclidian",
93+
Value: "euclidean",
9494
},
9595
},
9696
},

systest/vector/vector_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ import (
3434

3535
const (
3636
testSchema = `
37-
project_discription_v: float32vector @index(hnsw(exponent: "5", metric: "euclidian")) .`
37+
project_discription_v: float32vector @index(hnsw(exponent: "5", metric: "euclidean")) .`
3838

3939
testSchemaWithoutIndex = `project_discription_v: float32vector .`
4040
)

tok/hnsw/helper.go

+6-6
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ import (
4040
)
4141

4242
const (
43-
Euclidian = "euclidian"
43+
Euclidean = "euclidean"
4444
Cosine = "cosine"
4545
DotProd = "dotproduct"
4646
plError = "\nerror fetching posting list for data key: "
@@ -121,8 +121,8 @@ func cosineSimilarity[T c.Float](a, b []T, floatBits int) (T, error) {
121121
// This needs to implement signature of SimilarityType[T].distanceScore
122122
// function, hence it takes in a floatBits parameter,
123123
// but doesn't actually use it.
124-
func euclidianDistanceSq[T c.Float](a, b []T, floatBits int) (T, error) {
125-
return applyDistanceFunction(a, b, floatBits, "euclidian distance", vek32.Distance, vek.Distance)
124+
func euclideanDistanceSq[T c.Float](a, b []T, floatBits int) (T, error) {
125+
return applyDistanceFunction(a, b, floatBits, "euclidean distance", vek32.Distance, vek.Distance)
126126
}
127127

128128
// Used for distance, since shorter distance is better
@@ -225,8 +225,8 @@ type SimilarityType[T c.Float] struct {
225225

226226
func GetSimType[T c.Float](indexType string, floatBits int) SimilarityType[T] {
227227
switch {
228-
case indexType == Euclidian:
229-
return SimilarityType[T]{indexType: Euclidian, distanceScore: euclidianDistanceSq[T],
228+
case indexType == Euclidean:
229+
return SimilarityType[T]{indexType: Euclidean, distanceScore: euclideanDistanceSq[T],
230230
insortHeap: insortPersistentHeapAscending[T], isBetterScore: isBetterScoreForDistance[T]}
231231
case indexType == Cosine:
232232
return SimilarityType[T]{indexType: Cosine, distanceScore: cosineSimilarity[T],
@@ -235,7 +235,7 @@ func GetSimType[T c.Float](indexType string, floatBits int) SimilarityType[T] {
235235
return SimilarityType[T]{indexType: DotProd, distanceScore: dotProduct[T],
236236
insortHeap: insortPersistentHeapDescending[T], isBetterScore: isBetterScoreForSimilarity[T]}
237237
default:
238-
return SimilarityType[T]{indexType: Euclidian, distanceScore: euclidianDistanceSq[T],
238+
return SimilarityType[T]{indexType: Euclidean, distanceScore: euclideanDistanceSq[T],
239239
insortHeap: insortPersistentHeapAscending[T], isBetterScore: isBetterScoreForDistance[T]}
240240
}
241241
}

tok/hnsw/persistent_factory.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ func (hf *persistentIndexFactory[T]) AllowedOptions() opt.AllowedOptions {
8181
AddIntOption(EfConstructionOpt).
8282
AddIntOption(EfSearchOpt)
8383
getSimFunc := func(optValue string) (any, error) {
84-
if optValue != Euclidian && optValue != Cosine && optValue != DotProd {
84+
if optValue != Euclidean && optValue != Cosine && optValue != DotProd {
8585
return nil, errors.New(fmt.Sprintf("Can't create a vector index for %s", optValue))
8686
}
8787
return GetSimType[T](optValue, hf.floatBits), nil

tok/hnsw/persistent_hnsw.go

+5-5
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,11 @@ func (ph *persistentHNSW[T]) applyOptions(o opt.Options) error {
9191
}
9292

9393
if !o.Specifies(EfConstructionOpt) {
94-
o.SetOpt(EfConstructionOpt, 6*exponent)
94+
o.SetOpt(EfConstructionOpt, 50*exponent)
9595
}
9696

9797
if !o.Specifies(EfSearchOpt) {
98-
o.SetOpt(EfConstructionOpt, 9*exponent)
98+
o.SetOpt(EfSearchOpt, 30*exponent)
9999
}
100100
}
101101

@@ -104,11 +104,11 @@ func (ph *persistentHNSW[T]) applyOptions(o opt.Options) error {
104104
if err != nil {
105105
return err
106106
}
107-
ph.efConstruction, _, err = opt.GetOpt(o, EfConstructionOpt, 18)
107+
ph.efConstruction, _, err = opt.GetOpt(o, EfConstructionOpt, 150)
108108
if err != nil {
109109
return err
110110
}
111-
ph.efSearch, _, err = opt.GetOpt(o, EfSearchOpt, 27)
111+
ph.efSearch, _, err = opt.GetOpt(o, EfSearchOpt, 90)
112112
if err != nil {
113113
return err
114114
}
@@ -120,7 +120,7 @@ func (ph *persistentHNSW[T]) applyOptions(o opt.Options) error {
120120
}
121121
ph.simType = okSimType
122122
} else {
123-
ph.simType = SimilarityType[T]{indexType: Euclidian, distanceScore: euclidianDistanceSq[T],
123+
ph.simType = SimilarityType[T]{indexType: Euclidean, distanceScore: euclideanDistanceSq[T],
124124
insortHeap: insortPersistentHeapAscending[T], isBetterScore: isBetterScoreForDistance[T]}
125125
}
126126
return nil

tok/hnsw/persistent_hnsw_test.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -47,16 +47,16 @@ var createpersistentHNSWTests = []createpersistentHNSWTest[float64]{
4747
efConstruction: 1,
4848
pred: "a",
4949
indexType: "b",
50-
expectedIndexType: Euclidian,
50+
expectedIndexType: Euclidean,
5151
floatBits: 64,
5252
},
5353
{
5454
maxLevels: 1,
5555
efSearch: 1,
5656
efConstruction: 1,
5757
pred: "a",
58-
indexType: Euclidian,
59-
expectedIndexType: Euclidian,
58+
indexType: Euclidean,
59+
expectedIndexType: Euclidean,
6060
floatBits: 64,
6161
},
6262
{
@@ -275,7 +275,7 @@ var flatPhs = []*persistentHNSW[float64]{
275275
vecKey: ConcatStrings("0-a", VecKeyword),
276276
vecDead: ConcatStrings("0-a", VecDead),
277277
floatBits: 64,
278-
simType: GetSimType[float64](Euclidian, 64),
278+
simType: GetSimType[float64](Euclidean, 64),
279279
nodeAllEdges: make(map[uint64][][]uint64),
280280
},
281281
{

0 commit comments

Comments
 (0)