From df9e61b02276360e019b1d116ac507522fa3bcd5 Mon Sep 17 00:00:00 2001 From: Likith B Date: Tue, 8 Aug 2023 14:06:29 +0530 Subject: [PATCH 1/7] MB-54131: Geoshape query decode optimization - Added a buffer pool to be shared by all documents --- search/searcher/search_geoshape.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/search/searcher/search_geoshape.go b/search/searcher/search_geoshape.go index 1107c9438..426dc6567 100644 --- a/search/searcher/search_geoshape.go +++ b/search/searcher/search_geoshape.go @@ -70,6 +70,7 @@ func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueRea var dvShapeValue []byte var startReading, finishReading bool var reader *bytes.Reader + var bufPool [][]byte return func(d *search.DocumentMatch) bool { var found bool @@ -104,7 +105,7 @@ func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueRea // apply the filter once the entire docvalue is finished reading. if finishReading { v, err := geojson.FilterGeoShapesOnRelation(shape, - dvShapeValue, relation, &reader) + dvShapeValue, relation, &reader, &bufPool) if err == nil && v { found = true } From c3a474b029f96c1fb9eea5557985faa9730e29cd Mon Sep 17 00:00:00 2001 From: Likith B Date: Tue, 8 Aug 2023 15:42:40 +0530 Subject: [PATCH 2/7] MB-54131 initialize buffers at per query level --- search/searcher/search_geoshape.go | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/search/searcher/search_geoshape.go b/search/searcher/search_geoshape.go index 426dc6567..334734b1e 100644 --- a/search/searcher/search_geoshape.go +++ b/search/searcher/search_geoshape.go @@ -70,7 +70,20 @@ func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueRea var dvShapeValue []byte var startReading, finishReading bool var reader *bytes.Reader - var bufPool [][]byte + + bufPool := make([][]byte, 11) + bufPool[0] = make([]byte, 8192*3) + bufPool[1] = make([]byte, 4096*3) + bufPool[2] = make([]byte, 2048*3) + bufPool[3] = make([]byte, 1024*3) + bufPool[4] = make([]byte, 512*3) + bufPool[5] = make([]byte, 256*3) + bufPool[6] = make([]byte, 128*3) + bufPool[7] = make([]byte, 64*3) + bufPool[8] = make([]byte, 32*3) + bufPool[9] = make([]byte, 16*3) + bufPool[10] = make([]byte, 8*3) + return func(d *search.DocumentMatch) bool { var found bool From 547ee17e6e928fa2be7f853579dd76b63425ac9b Mon Sep 17 00:00:00 2001 From: Likith B Date: Fri, 25 Aug 2023 15:54:03 +0530 Subject: [PATCH 3/7] MB-54131: Geoshape query decode optimization - Removed initialization of buffers in the pool - Modified code to fit GeoBufferPool --- search/searcher/search_geoshape.go | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/search/searcher/search_geoshape.go b/search/searcher/search_geoshape.go index 334734b1e..8c6e7cec5 100644 --- a/search/searcher/search_geoshape.go +++ b/search/searcher/search_geoshape.go @@ -22,6 +22,7 @@ import ( "github.com/blevesearch/bleve/v2/search" index "github.com/blevesearch/bleve_index_api" "github.com/blevesearch/geo/geojson" + "github.com/blevesearch/geo/s2" ) func NewGeoShapeSearcher(ctx context.Context, indexReader index.IndexReader, shape index.GeoJSON, @@ -63,6 +64,12 @@ func NewGeoShapeSearcher(ctx context.Context, indexReader index.IndexReader, sha // implementation of doc values. var termSeparatorSplitSlice = []byte{0xff} +// Assigning the size of the largest buffer in the pool to 24KB and +// the smallest buffer to 24 bytes. The pools are used to read a +// sequence of vertices which are always 24 bytes each. +var maxBufPoolSize = 24 * 1024 +var minBufPoolSize = 24 + func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueReader, field string, relation string, shape index.GeoJSON) FilterFunc { // this is for accumulating the shape's actual complete value @@ -70,19 +77,8 @@ func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueRea var dvShapeValue []byte var startReading, finishReading bool var reader *bytes.Reader - - bufPool := make([][]byte, 11) - bufPool[0] = make([]byte, 8192*3) - bufPool[1] = make([]byte, 4096*3) - bufPool[2] = make([]byte, 2048*3) - bufPool[3] = make([]byte, 1024*3) - bufPool[4] = make([]byte, 512*3) - bufPool[5] = make([]byte, 256*3) - bufPool[6] = make([]byte, 128*3) - bufPool[7] = make([]byte, 64*3) - bufPool[8] = make([]byte, 32*3) - bufPool[9] = make([]byte, 16*3) - bufPool[10] = make([]byte, 8*3) + + bufPool := s2.NewGeoBufferPool(maxBufPoolSize, minBufPoolSize) return func(d *search.DocumentMatch) bool { var found bool @@ -118,7 +114,7 @@ func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueRea // apply the filter once the entire docvalue is finished reading. if finishReading { v, err := geojson.FilterGeoShapesOnRelation(shape, - dvShapeValue, relation, &reader, &bufPool) + dvShapeValue, relation, &reader, bufPool) if err == nil && v { found = true } From 05a85da6c82f9882e648c33f9811a98143dec4ce Mon Sep 17 00:00:00 2001 From: Likith B Date: Mon, 28 Aug 2023 17:09:33 +0530 Subject: [PATCH 4/7] MB-54131: Geoshape query decode optimization - Added buffer pool to context - Added relavent key and callback functions in util.go --- index_impl.go | 9 +++++++++ search/searcher/search_geoshape.go | 9 +-------- search/util.go | 16 +++++++++++++++- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/index_impl.go b/index_impl.go index b5f115411..022c4081f 100644 --- a/index_impl.go +++ b/index_impl.go @@ -35,6 +35,7 @@ import ( "github.com/blevesearch/bleve/v2/search/facet" "github.com/blevesearch/bleve/v2/search/highlight" index "github.com/blevesearch/bleve_index_api" + "github.com/blevesearch/geo/s2" ) type indexImpl struct { @@ -482,6 +483,14 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr ctx = context.WithValue(ctx, search.SearchIOStatsCallbackKey, search.SearchIOStatsCallbackFunc(sendBytesRead)) + bufPool := s2.NewGeoBufferPool(24 * 1024, 24) + getBufferPool := func() *s2.GeoBufferPool { + return bufPool + } + + ctx = context.WithValue(ctx, search.GeoBufferPoolCallbackKey, + search.GeoBufferPoolCallbackFunc(getBufferPool)) + searcher, err := req.Query.Searcher(ctx, indexReader, i.m, search.SearcherOptions{ Explain: req.Explain, IncludeTermVectors: req.IncludeLocations || req.Highlight != nil, diff --git a/search/searcher/search_geoshape.go b/search/searcher/search_geoshape.go index 8c6e7cec5..940b28c79 100644 --- a/search/searcher/search_geoshape.go +++ b/search/searcher/search_geoshape.go @@ -22,7 +22,6 @@ import ( "github.com/blevesearch/bleve/v2/search" index "github.com/blevesearch/bleve_index_api" "github.com/blevesearch/geo/geojson" - "github.com/blevesearch/geo/s2" ) func NewGeoShapeSearcher(ctx context.Context, indexReader index.IndexReader, shape index.GeoJSON, @@ -64,12 +63,6 @@ func NewGeoShapeSearcher(ctx context.Context, indexReader index.IndexReader, sha // implementation of doc values. var termSeparatorSplitSlice = []byte{0xff} -// Assigning the size of the largest buffer in the pool to 24KB and -// the smallest buffer to 24 bytes. The pools are used to read a -// sequence of vertices which are always 24 bytes each. -var maxBufPoolSize = 24 * 1024 -var minBufPoolSize = 24 - func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueReader, field string, relation string, shape index.GeoJSON) FilterFunc { // this is for accumulating the shape's actual complete value @@ -78,7 +71,7 @@ func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueRea var startReading, finishReading bool var reader *bytes.Reader - bufPool := s2.NewGeoBufferPool(maxBufPoolSize, minBufPoolSize) + bufPool := ctx.Value(search.GeoBufferPoolCallbackKey).(search.GeoBufferPoolCallbackFunc)() return func(d *search.DocumentMatch) bool { var found bool diff --git a/search/util.go b/search/util.go index 7a946868e..ceb022758 100644 --- a/search/util.go +++ b/search/util.go @@ -14,7 +14,11 @@ package search -import "context" +import ( + "context" + + "github.com/blevesearch/geo/s2" +) func MergeLocations(locations []FieldTermLocationMap) FieldTermLocationMap { rv := locations[0] @@ -118,3 +122,13 @@ func RecordSearchCost(ctx context.Context, } } } + +const GeoBufferPoolCallbackKey = "_geo_buffer_pool_callback_key" + +// Assigning the size of the largest buffer in the pool to 24KB and +// the smallest buffer to 24 bytes. The pools are used to read a +// sequence of vertices which are always 24 bytes each. +const MaxBufPoolSize = 24 * 1024 +const MinBufPoolSize = 24 + +type GeoBufferPoolCallbackFunc func() *s2.GeoBufferPool From 2af4aa7188dcec7e447f6b8a1d17fa8b9090b33f Mon Sep 17 00:00:00 2001 From: Likith B Date: Tue, 29 Aug 2023 13:18:52 +0530 Subject: [PATCH 5/7] MB-54131: Geoshape query decode optimization - Changed bufPool initialization to be used only when query has geoshape component --- index_impl.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/index_impl.go b/index_impl.go index 022c4081f..3c20d8c0b 100644 --- a/index_impl.go +++ b/index_impl.go @@ -483,8 +483,12 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr ctx = context.WithValue(ctx, search.SearchIOStatsCallbackKey, search.SearchIOStatsCallbackFunc(sendBytesRead)) - bufPool := s2.NewGeoBufferPool(24 * 1024, 24) + var bufPool *s2.GeoBufferPool getBufferPool := func() *s2.GeoBufferPool { + if bufPool == nil { + bufPool = s2.NewGeoBufferPool(search.MaxBufPoolSize, search.MinBufPoolSize) + } + return bufPool } From c7943c3f63cdbc8670c162b8eb44f051a1bd3f78 Mon Sep 17 00:00:00 2001 From: Likith B Date: Thu, 31 Aug 2023 11:28:53 +0530 Subject: [PATCH 6/7] MB-54131: Geoshape query decode optimization - Changed constant names to better reflect their purpose --- index_impl.go | 2 +- search/util.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/index_impl.go b/index_impl.go index 3c20d8c0b..2bc954650 100644 --- a/index_impl.go +++ b/index_impl.go @@ -486,7 +486,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr var bufPool *s2.GeoBufferPool getBufferPool := func() *s2.GeoBufferPool { if bufPool == nil { - bufPool = s2.NewGeoBufferPool(search.MaxBufPoolSize, search.MinBufPoolSize) + bufPool = s2.NewGeoBufferPool(search.MaxGeoBufPoolSize, search.MinGeoBufPoolSize) } return bufPool diff --git a/search/util.go b/search/util.go index ceb022758..c164ac9e5 100644 --- a/search/util.go +++ b/search/util.go @@ -128,7 +128,7 @@ const GeoBufferPoolCallbackKey = "_geo_buffer_pool_callback_key" // Assigning the size of the largest buffer in the pool to 24KB and // the smallest buffer to 24 bytes. The pools are used to read a // sequence of vertices which are always 24 bytes each. -const MaxBufPoolSize = 24 * 1024 -const MinBufPoolSize = 24 +const MaxGeoBufPoolSize = 24 * 1024 +const MinGeoBufPoolSize = 24 type GeoBufferPoolCallbackFunc func() *s2.GeoBufferPool From 2cdca4fedf5baa16cc853c091b7b1bb67793cc14 Mon Sep 17 00:00:00 2001 From: Likith B Date: Thu, 31 Aug 2023 19:20:35 +0530 Subject: [PATCH 7/7] MB-54131: Geoshape query decode optimization - Handled an edge case where searcher is called with nil context --- go.mod | 2 +- go.sum | 6 ++++-- search/searcher/search_geoshape.go | 8 ++++++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/go.mod b/go.mod index d842f1c18..3870e5e32 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ require ( github.com/RoaringBitmap/roaring v1.2.3 github.com/bits-and-blooms/bitset v1.2.0 github.com/blevesearch/bleve_index_api v1.0.5 - github.com/blevesearch/geo v0.1.17 + github.com/blevesearch/geo v0.1.18 github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475 github.com/blevesearch/go-porterstemmer v1.0.3 github.com/blevesearch/goleveldb v1.0.1 diff --git a/go.sum b/go.sum index c5901a723..9e8a8e5da 100644 --- a/go.sum +++ b/go.sum @@ -4,8 +4,8 @@ github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjL github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA= github.com/blevesearch/bleve_index_api v1.0.5 h1:Lc986kpC4Z0/n1g3gg8ul7H+lxgOQPcXb9SxvQGu+tw= github.com/blevesearch/bleve_index_api v1.0.5/go.mod h1:YXMDwaXFFXwncRS8UobWs7nvo0DmusriM1nztTlj1ms= -github.com/blevesearch/geo v0.1.17 h1:AguzI6/5mHXapzB0gE9IKWo+wWPHZmXZoscHcjFgAFA= -github.com/blevesearch/geo v0.1.17/go.mod h1:uRMGWG0HJYfWfFJpK3zTdnnr1K+ksZTuWKhXeSokfnM= +github.com/blevesearch/geo v0.1.18 h1:Np8jycHTZ5scFe7VEPLrDoHnnb9C4j636ue/CGrhtDw= +github.com/blevesearch/geo v0.1.18/go.mod h1:uRMGWG0HJYfWfFJpK3zTdnnr1K+ksZTuWKhXeSokfnM= github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475 h1:kDy+zgJFJJoJYBvdfBSiZYBbdsUL0XcjHYWezpQBGPA= github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475/go.mod h1:9eJDeqxJ3E7WnLebQUlPD7ZjSce7AnDb9vjGmMCbD0A= github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo= @@ -92,8 +92,10 @@ golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68= golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= diff --git a/search/searcher/search_geoshape.go b/search/searcher/search_geoshape.go index 940b28c79..ae113107d 100644 --- a/search/searcher/search_geoshape.go +++ b/search/searcher/search_geoshape.go @@ -22,6 +22,7 @@ import ( "github.com/blevesearch/bleve/v2/search" index "github.com/blevesearch/bleve_index_api" "github.com/blevesearch/geo/geojson" + "github.com/blevesearch/geo/s2" ) func NewGeoShapeSearcher(ctx context.Context, indexReader index.IndexReader, shape index.GeoJSON, @@ -70,8 +71,11 @@ func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueRea var dvShapeValue []byte var startReading, finishReading bool var reader *bytes.Reader - - bufPool := ctx.Value(search.GeoBufferPoolCallbackKey).(search.GeoBufferPoolCallbackFunc)() + + var bufPool *s2.GeoBufferPool + if ctx != nil { + bufPool = ctx.Value(search.GeoBufferPoolCallbackKey).(search.GeoBufferPoolCallbackFunc)() + } return func(d *search.DocumentMatch) bool { var found bool