From d8d57e6990c3b928f2832b980158b1a69d8ff773 Mon Sep 17 00:00:00 2001
From: Stanislav Sokolov <sokolov.stas@gmail.com>
Date: Thu, 16 Feb 2017 12:44:52 +0500
Subject: [PATCH 001/728] Added Russian analyzer with snowball stemmer

---
 analysis/lang/ru/analyzer_ru.go           |  57 ++
 analysis/lang/ru/analyzer_ru_test.go      |  70 ++
 analysis/lang/ru/snowball/stem_Unicode.go | 737 ++++++++++++++++++++++
 analysis/lang/ru/stemmer_ru.go            |  50 ++
 analysis/lang/ru/stemmer_ru_test.go       |  67 ++
 analysis/lang/ru/stop_filter_ru.go        |  33 +
 analysis/lang/ru/stop_words_ru.go         | 256 ++++++++
 analysis/token/snowball/snowball.go       |  59 ++
 analysis/token/snowball/snowball_test.go  | 115 ++++
 9 files changed, 1444 insertions(+)
 create mode 100644 analysis/lang/ru/analyzer_ru.go
 create mode 100644 analysis/lang/ru/analyzer_ru_test.go
 create mode 100644 analysis/lang/ru/snowball/stem_Unicode.go
 create mode 100644 analysis/lang/ru/stemmer_ru.go
 create mode 100644 analysis/lang/ru/stemmer_ru_test.go
 create mode 100644 analysis/lang/ru/stop_filter_ru.go
 create mode 100644 analysis/lang/ru/stop_words_ru.go
 create mode 100644 analysis/token/snowball/snowball.go
 create mode 100644 analysis/token/snowball/snowball_test.go

diff --git a/analysis/lang/ru/analyzer_ru.go b/analysis/lang/ru/analyzer_ru.go
new file mode 100644
index 000000000..3b3404037
--- /dev/null
+++ b/analysis/lang/ru/analyzer_ru.go
@@ -0,0 +1,57 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ru
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+
+	"github.com/blevesearch/bleve/analysis/token/lowercase"
+	"github.com/blevesearch/bleve/analysis/tokenizer/unicode"
+)
+
+const AnalyzerName = "ru"
+
+func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
+	tokenizer, err := cache.TokenizerNamed(unicode.Name)
+	if err != nil {
+		return nil, err
+	}
+	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
+	if err != nil {
+		return nil, err
+	}
+	stopRuFilter, err := cache.TokenFilterNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	stemmerRuFilter, err := cache.TokenFilterNamed(SnowballStemmerName)
+	if err != nil {
+		return nil, err
+	}
+	rv := analysis.Analyzer{
+		Tokenizer: tokenizer,
+		TokenFilters: []analysis.TokenFilter{
+			toLowerFilter,
+			stopRuFilter,
+			stemmerRuFilter,
+		},
+	}
+	return &rv, nil
+}
+
+func init() {
+	registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
+}
diff --git a/analysis/lang/ru/analyzer_ru_test.go b/analysis/lang/ru/analyzer_ru_test.go
new file mode 100644
index 000000000..a7ffef414
--- /dev/null
+++ b/analysis/lang/ru/analyzer_ru_test.go
@@ -0,0 +1,70 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ru
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+func TestRussianAnalyzer(t *testing.T) {
+	tests := []struct {
+		input  []byte
+		output analysis.TokenStream
+	}{
+		// stemming
+		{
+			input: []byte("километрах"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("километр"),
+				},
+			},
+		},
+		{
+			input: []byte("актеров"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("актер"),
+				},
+			},
+		},
+		// stop word
+		{
+			input:  []byte("как"),
+			output: analysis.TokenStream{},
+		},
+	}
+
+	cache := registry.NewCache()
+	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := analyzer.Analyze(test.input)
+		if len(actual) != len(test.output) {
+			t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
+		}
+		for i, tok := range actual {
+			if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
+				t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
+			}
+		}
+	}
+}
diff --git a/analysis/lang/ru/snowball/stem_Unicode.go b/analysis/lang/ru/snowball/stem_Unicode.go
new file mode 100644
index 000000000..dfd2f2eb0
--- /dev/null
+++ b/analysis/lang/ru/snowball/stem_Unicode.go
@@ -0,0 +1,737 @@
+//! This file was generated automatically by the Snowball to Go compiler
+//! http://snowballstem.org/
+
+package snowball
+
+import (
+	snowballRuntime "github.com/snowballstem/snowball/go"
+)
+
+var A_0 = []*snowballRuntime.Among{
+	&snowballRuntime.Among{Str: "\u0432\u0448\u0438\u0441\u044C", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044B\u0432\u0448\u0438\u0441\u044C", A: 0, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u0432\u0448\u0438\u0441\u044C", A: 0, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u0432", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044B\u0432", A: 3, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u0432", A: 3, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u0432\u0448\u0438", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044B\u0432\u0448\u0438", A: 6, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u0432\u0448\u0438", A: 6, B: 2, F: nil},
+}
+
+var A_1 = []*snowballRuntime.Among{
+	&snowballRuntime.Among{Str: "\u0435\u043C\u0443", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u043E\u043C\u0443", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044B\u0445", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u0445", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0443\u044E", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044E\u044E", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0435\u044E", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u043E\u044E", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044F\u044F", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0430\u044F", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044B\u0435", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0435\u0435", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u0435", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u043E\u0435", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044B\u043C\u0438", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u043C\u0438", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044B\u0439", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0435\u0439", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u0439", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u043E\u0439", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044B\u043C", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0435\u043C", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u043C", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u043E\u043C", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0435\u0433\u043E", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u043E\u0433\u043E", A: -1, B: 1, F: nil},
+}
+
+var A_2 = []*snowballRuntime.Among{
+	&snowballRuntime.Among{Str: "\u0432\u0448", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044B\u0432\u0448", A: 0, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u0432\u0448", A: 0, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u0449", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044E\u0449", A: 3, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0443\u044E\u0449", A: 4, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u0435\u043C", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u043D\u043D", A: -1, B: 1, F: nil},
+}
+
+var A_3 = []*snowballRuntime.Among{
+	&snowballRuntime.Among{Str: "\u0441\u044C", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0441\u044F", A: -1, B: 1, F: nil},
+}
+
+var A_4 = []*snowballRuntime.Among{
+	&snowballRuntime.Among{Str: "\u044B\u0442", A: -1, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u044E\u0442", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0443\u044E\u0442", A: 1, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u044F\u0442", A: -1, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u0435\u0442", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0443\u0435\u0442", A: 4, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u0442", A: -1, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u043D\u044B", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0435\u043D\u044B", A: 7, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u0442\u044C", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044B\u0442\u044C", A: 9, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u0442\u044C", A: 9, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u0435\u0448\u044C", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u0448\u044C", A: -1, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u044E", A: -1, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u0443\u044E", A: 14, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u043B\u0430", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044B\u043B\u0430", A: 16, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u043B\u0430", A: 16, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u043D\u0430", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0435\u043D\u0430", A: 19, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u0435\u0442\u0435", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u0442\u0435", A: -1, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u0439\u0442\u0435", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0443\u0439\u0442\u0435", A: 23, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u0435\u0439\u0442\u0435", A: 23, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u043B\u0438", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044B\u043B\u0438", A: 26, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u043B\u0438", A: 26, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u0439", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0443\u0439", A: 29, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u0435\u0439", A: 29, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u043B", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044B\u043B", A: 32, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u043B", A: 32, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u044B\u043C", A: -1, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u0435\u043C", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u043C", A: -1, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u043D", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0435\u043D", A: 38, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u043B\u043E", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044B\u043B\u043E", A: 40, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u043B\u043E", A: 40, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u043D\u043E", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0435\u043D\u043E", A: 43, B: 2, F: nil},
+	&snowballRuntime.Among{Str: "\u043D\u043D\u043E", A: 43, B: 1, F: nil},
+}
+
+var A_5 = []*snowballRuntime.Among{
+	&snowballRuntime.Among{Str: "\u0443", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044F\u0445", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u044F\u0445", A: 1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0430\u0445", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044B", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044C", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044E", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044C\u044E", A: 6, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u044E", A: 6, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044F", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044C\u044F", A: 9, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u044F", A: 9, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0430", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0435\u0432", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u043E\u0432", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0435", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044C\u0435", A: 15, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u0435", A: 15, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0438", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0435\u0438", A: 18, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u0438", A: 18, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044F\u043C\u0438", A: 18, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u044F\u043C\u0438", A: 21, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0430\u043C\u0438", A: 18, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0439", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0435\u0439", A: 24, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u0435\u0439", A: 25, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u0439", A: 24, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u043E\u0439", A: 24, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044F\u043C", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u044F\u043C", A: 29, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0430\u043C", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0435\u043C", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u0438\u0435\u043C", A: 32, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u043E\u043C", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u043E", A: -1, B: 1, F: nil},
+}
+
+var A_6 = []*snowballRuntime.Among{
+	&snowballRuntime.Among{Str: "\u043E\u0441\u0442", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u043E\u0441\u0442\u044C", A: -1, B: 1, F: nil},
+}
+
+var A_7 = []*snowballRuntime.Among{
+	&snowballRuntime.Among{Str: "\u0435\u0439\u0448", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u044C", A: -1, B: 3, F: nil},
+	&snowballRuntime.Among{Str: "\u0435\u0439\u0448\u0435", A: -1, B: 1, F: nil},
+	&snowballRuntime.Among{Str: "\u043D", A: -1, B: 2, F: nil},
+}
+
+var G_v = []byte{33, 65, 8, 232}
+
+type Context struct {
+	i_p2 int
+	i_pV int
+}
+
+func r_mark_regions(env *snowballRuntime.Env, ctx interface{}) bool {
+	context := ctx.(*Context)
+	_ = context
+	// (, line 57
+	context.i_pV = env.Limit
+	context.i_p2 = env.Limit
+	// do, line 61
+	var v_1 = env.Cursor
+lab0:
+	for {
+		// (, line 61
+		// gopast, line 62
+	golab1:
+		for {
+		lab2:
+			for {
+				if !env.InGrouping(G_v, 1072, 1103) {
+					break lab2
+				}
+				break golab1
+			}
+			if env.Cursor >= env.Limit {
+				break lab0
+			}
+			env.NextChar()
+		}
+		// setmark pV, line 62
+		context.i_pV = env.Cursor
+		// gopast, line 62
+	golab3:
+		for {
+		lab4:
+			for {
+				if !env.OutGrouping(G_v, 1072, 1103) {
+					break lab4
+				}
+				break golab3
+			}
+			if env.Cursor >= env.Limit {
+				break lab0
+			}
+			env.NextChar()
+		}
+		// gopast, line 63
+	golab5:
+		for {
+		lab6:
+			for {
+				if !env.InGrouping(G_v, 1072, 1103) {
+					break lab6
+				}
+				break golab5
+			}
+			if env.Cursor >= env.Limit {
+				break lab0
+			}
+			env.NextChar()
+		}
+		// gopast, line 63
+	golab7:
+		for {
+		lab8:
+			for {
+				if !env.OutGrouping(G_v, 1072, 1103) {
+					break lab8
+				}
+				break golab7
+			}
+			if env.Cursor >= env.Limit {
+				break lab0
+			}
+			env.NextChar()
+		}
+		// setmark p2, line 63
+		context.i_p2 = env.Cursor
+		break lab0
+	}
+	env.Cursor = v_1
+	return true
+}
+
+func r_R2(env *snowballRuntime.Env, ctx interface{}) bool {
+	context := ctx.(*Context)
+	_ = context
+	if !(context.i_p2 <= env.Cursor) {
+		return false
+	}
+	return true
+}
+
+func r_perfective_gerund(env *snowballRuntime.Env, ctx interface{}) bool {
+	context := ctx.(*Context)
+	_ = context
+	var among_var int32
+	// (, line 71
+	// [, line 72
+	env.Ket = env.Cursor
+	// substring, line 72
+	among_var = env.FindAmongB(A_0, context)
+	if among_var == 0 {
+		return false
+	}
+	// ], line 72
+	env.Bra = env.Cursor
+	if among_var == 0 {
+		return false
+	} else if among_var == 1 {
+		// (, line 76
+		// or, line 76
+	lab0:
+		for {
+			var v_1 = env.Limit - env.Cursor
+		lab1:
+			for {
+				// literal, line 76
+				if !env.EqSB("\u0430") {
+					break lab1
+				}
+				break lab0
+			}
+			env.Cursor = env.Limit - v_1
+			// literal, line 76
+			if !env.EqSB("\u044F") {
+				return false
+			}
+			break lab0
+		}
+		// delete, line 76
+		if !env.SliceDel() {
+			return false
+		}
+	} else if among_var == 2 {
+		// (, line 83
+		// delete, line 83
+		if !env.SliceDel() {
+			return false
+		}
+	}
+	return true
+}
+
+func r_adjective(env *snowballRuntime.Env, ctx interface{}) bool {
+	context := ctx.(*Context)
+	_ = context
+	var among_var int32
+	// (, line 87
+	// [, line 88
+	env.Ket = env.Cursor
+	// substring, line 88
+	among_var = env.FindAmongB(A_1, context)
+	if among_var == 0 {
+		return false
+	}
+	// ], line 88
+	env.Bra = env.Cursor
+	if among_var == 0 {
+		return false
+	} else if among_var == 1 {
+		// (, line 97
+		// delete, line 97
+		if !env.SliceDel() {
+			return false
+		}
+	}
+	return true
+}
+
+func r_adjectival(env *snowballRuntime.Env, ctx interface{}) bool {
+	context := ctx.(*Context)
+	_ = context
+	var among_var int32
+	// (, line 101
+	// call adjective, line 102
+	if !r_adjective(env, context) {
+		return false
+	}
+	// try, line 109
+	var v_1 = env.Limit - env.Cursor
+lab0:
+	for {
+		// (, line 109
+		// [, line 110
+		env.Ket = env.Cursor
+		// substring, line 110
+		among_var = env.FindAmongB(A_2, context)
+		if among_var == 0 {
+			env.Cursor = env.Limit - v_1
+			break lab0
+		}
+		// ], line 110
+		env.Bra = env.Cursor
+		if among_var == 0 {
+			env.Cursor = env.Limit - v_1
+			break lab0
+		} else if among_var == 1 {
+			// (, line 115
+			// or, line 115
+		lab1:
+			for {
+				var v_2 = env.Limit - env.Cursor
+			lab2:
+				for {
+					// literal, line 115
+					if !env.EqSB("\u0430") {
+						break lab2
+					}
+					break lab1
+				}
+				env.Cursor = env.Limit - v_2
+				// literal, line 115
+				if !env.EqSB("\u044F") {
+					env.Cursor = env.Limit - v_1
+					break lab0
+				}
+				break lab1
+			}
+			// delete, line 115
+			if !env.SliceDel() {
+				return false
+			}
+		} else if among_var == 2 {
+			// (, line 122
+			// delete, line 122
+			if !env.SliceDel() {
+				return false
+			}
+		}
+		break lab0
+	}
+	return true
+}
+
+func r_reflexive(env *snowballRuntime.Env, ctx interface{}) bool {
+	context := ctx.(*Context)
+	_ = context
+	var among_var int32
+	// (, line 128
+	// [, line 129
+	env.Ket = env.Cursor
+	// substring, line 129
+	among_var = env.FindAmongB(A_3, context)
+	if among_var == 0 {
+		return false
+	}
+	// ], line 129
+	env.Bra = env.Cursor
+	if among_var == 0 {
+		return false
+	} else if among_var == 1 {
+		// (, line 132
+		// delete, line 132
+		if !env.SliceDel() {
+			return false
+		}
+	}
+	return true
+}
+
+func r_verb(env *snowballRuntime.Env, ctx interface{}) bool {
+	context := ctx.(*Context)
+	_ = context
+	var among_var int32
+	// (, line 136
+	// [, line 137
+	env.Ket = env.Cursor
+	// substring, line 137
+	among_var = env.FindAmongB(A_4, context)
+	if among_var == 0 {
+		return false
+	}
+	// ], line 137
+	env.Bra = env.Cursor
+	if among_var == 0 {
+		return false
+	} else if among_var == 1 {
+		// (, line 143
+		// or, line 143
+	lab0:
+		for {
+			var v_1 = env.Limit - env.Cursor
+		lab1:
+			for {
+				// literal, line 143
+				if !env.EqSB("\u0430") {
+					break lab1
+				}
+				break lab0
+			}
+			env.Cursor = env.Limit - v_1
+			// literal, line 143
+			if !env.EqSB("\u044F") {
+				return false
+			}
+			break lab0
+		}
+		// delete, line 143
+		if !env.SliceDel() {
+			return false
+		}
+	} else if among_var == 2 {
+		// (, line 151
+		// delete, line 151
+		if !env.SliceDel() {
+			return false
+		}
+	}
+	return true
+}
+
+func r_noun(env *snowballRuntime.Env, ctx interface{}) bool {
+	context := ctx.(*Context)
+	_ = context
+	var among_var int32
+	// (, line 159
+	// [, line 160
+	env.Ket = env.Cursor
+	// substring, line 160
+	among_var = env.FindAmongB(A_5, context)
+	if among_var == 0 {
+		return false
+	}
+	// ], line 160
+	env.Bra = env.Cursor
+	if among_var == 0 {
+		return false
+	} else if among_var == 1 {
+		// (, line 167
+		// delete, line 167
+		if !env.SliceDel() {
+			return false
+		}
+	}
+	return true
+}
+
+func r_derivational(env *snowballRuntime.Env, ctx interface{}) bool {
+	context := ctx.(*Context)
+	_ = context
+	var among_var int32
+	// (, line 175
+	// [, line 176
+	env.Ket = env.Cursor
+	// substring, line 176
+	among_var = env.FindAmongB(A_6, context)
+	if among_var == 0 {
+		return false
+	}
+	// ], line 176
+	env.Bra = env.Cursor
+	// call R2, line 176
+	if !r_R2(env, context) {
+		return false
+	}
+	if among_var == 0 {
+		return false
+	} else if among_var == 1 {
+		// (, line 179
+		// delete, line 179
+		if !env.SliceDel() {
+			return false
+		}
+	}
+	return true
+}
+
+func r_tidy_up(env *snowballRuntime.Env, ctx interface{}) bool {
+	context := ctx.(*Context)
+	_ = context
+	var among_var int32
+	// (, line 183
+	// [, line 184
+	env.Ket = env.Cursor
+	// substring, line 184
+	among_var = env.FindAmongB(A_7, context)
+	if among_var == 0 {
+		return false
+	}
+	// ], line 184
+	env.Bra = env.Cursor
+	if among_var == 0 {
+		return false
+	} else if among_var == 1 {
+		// (, line 188
+		// delete, line 188
+		if !env.SliceDel() {
+			return false
+		}
+		// [, line 189
+		env.Ket = env.Cursor
+		// literal, line 189
+		if !env.EqSB("\u043D") {
+			return false
+		}
+		// ], line 189
+		env.Bra = env.Cursor
+		// literal, line 189
+		if !env.EqSB("\u043D") {
+			return false
+		}
+		// delete, line 189
+		if !env.SliceDel() {
+			return false
+		}
+	} else if among_var == 2 {
+		// (, line 192
+		// literal, line 192
+		if !env.EqSB("\u043D") {
+			return false
+		}
+		// delete, line 192
+		if !env.SliceDel() {
+			return false
+		}
+	} else if among_var == 3 {
+		// (, line 194
+		// delete, line 194
+		if !env.SliceDel() {
+			return false
+		}
+	}
+	return true
+}
+
+func Stem(env *snowballRuntime.Env) bool {
+	var context = &Context{
+		i_p2: 0,
+		i_pV: 0,
+	}
+	_ = context
+	// (, line 199
+	// do, line 201
+	var v_1 = env.Cursor
+lab0:
+	for {
+		// call mark_regions, line 201
+		if !r_mark_regions(env, context) {
+			break lab0
+		}
+		break lab0
+	}
+	env.Cursor = v_1
+	// backwards, line 202
+	env.LimitBackward = env.Cursor
+	env.Cursor = env.Limit
+	// setlimit, line 202
+	var v_2 = env.Limit - env.Cursor
+	// tomark, line 202
+	if env.Cursor < context.i_pV {
+		return false
+	}
+	env.Cursor = context.i_pV
+	var v_3 = env.LimitBackward
+	env.LimitBackward = env.Cursor
+	env.Cursor = env.Limit - v_2
+	// (, line 202
+	// do, line 203
+	var v_4 = env.Limit - env.Cursor
+lab1:
+	for {
+		// (, line 203
+		// or, line 204
+	lab2:
+		for {
+			var v_5 = env.Limit - env.Cursor
+		lab3:
+			for {
+				// call perfective_gerund, line 204
+				if !r_perfective_gerund(env, context) {
+					break lab3
+				}
+				break lab2
+			}
+			env.Cursor = env.Limit - v_5
+			// (, line 205
+			// try, line 205
+			var v_6 = env.Limit - env.Cursor
+		lab4:
+			for {
+				// call reflexive, line 205
+				if !r_reflexive(env, context) {
+					env.Cursor = env.Limit - v_6
+					break lab4
+				}
+				break lab4
+			}
+			// or, line 206
+		lab5:
+			for {
+				var v_7 = env.Limit - env.Cursor
+			lab6:
+				for {
+					// call adjectival, line 206
+					if !r_adjectival(env, context) {
+						break lab6
+					}
+					break lab5
+				}
+				env.Cursor = env.Limit - v_7
+			lab7:
+				for {
+					// call verb, line 206
+					if !r_verb(env, context) {
+						break lab7
+					}
+					break lab5
+				}
+				env.Cursor = env.Limit - v_7
+				// call noun, line 206
+				if !r_noun(env, context) {
+					break lab1
+				}
+				break lab5
+			}
+			break lab2
+		}
+		break lab1
+	}
+	env.Cursor = env.Limit - v_4
+	// try, line 209
+	var v_8 = env.Limit - env.Cursor
+lab8:
+	for {
+		// (, line 209
+		// [, line 209
+		env.Ket = env.Cursor
+		// literal, line 209
+		if !env.EqSB("\u0438") {
+			env.Cursor = env.Limit - v_8
+			break lab8
+		}
+		// ], line 209
+		env.Bra = env.Cursor
+		// delete, line 209
+		if !env.SliceDel() {
+			return false
+		}
+		break lab8
+	}
+	// do, line 212
+	var v_9 = env.Limit - env.Cursor
+lab9:
+	for {
+		// call derivational, line 212
+		if !r_derivational(env, context) {
+			break lab9
+		}
+		break lab9
+	}
+	env.Cursor = env.Limit - v_9
+	// do, line 213
+	var v_10 = env.Limit - env.Cursor
+lab10:
+	for {
+		// call tidy_up, line 213
+		if !r_tidy_up(env, context) {
+			break lab10
+		}
+		break lab10
+	}
+	env.Cursor = env.Limit - v_10
+	env.LimitBackward = v_3
+	env.Cursor = env.LimitBackward
+	return true
+}
diff --git a/analysis/lang/ru/stemmer_ru.go b/analysis/lang/ru/stemmer_ru.go
new file mode 100644
index 000000000..6da9095b0
--- /dev/null
+++ b/analysis/lang/ru/stemmer_ru.go
@@ -0,0 +1,50 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ru
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/analysis/lang/ru/snowball"
+	"github.com/blevesearch/bleve/registry"
+
+	snowballRuntime "github.com/snowballstem/snowball/go"
+)
+
+const SnowballStemmerName = "stemmer_ru_snowball"
+
+type RussianStemmerFilter struct {
+}
+
+func NewRussianStemmerFilter() *RussianStemmerFilter {
+	return &RussianStemmerFilter{}
+}
+
+func (s *RussianStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+
+		env := snowballRuntime.NewEnv(string(token.Term))
+		snowball.Stem(env)
+		token.Term = []byte(env.Current())
+	}
+	return input
+}
+
+func RussianStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewRussianStemmerFilter(), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(SnowballStemmerName, RussianStemmerFilterConstructor)
+}
diff --git a/analysis/lang/ru/stemmer_ru_test.go b/analysis/lang/ru/stemmer_ru_test.go
new file mode 100644
index 000000000..1795497ff
--- /dev/null
+++ b/analysis/lang/ru/stemmer_ru_test.go
@@ -0,0 +1,67 @@
+//  Copyright (c) 2015 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ru
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+func TestSnowballRussianStemmer(t *testing.T) {
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("актеров"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("актер"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("километров"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("километр"),
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	filter, err := cache.TokenFilterNamed(SnowballStemmerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := filter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
+		}
+	}
+}
diff --git a/analysis/lang/ru/stop_filter_ru.go b/analysis/lang/ru/stop_filter_ru.go
new file mode 100644
index 000000000..5679420a1
--- /dev/null
+++ b/analysis/lang/ru/stop_filter_ru.go
@@ -0,0 +1,33 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ru
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/analysis/token/stop"
+	"github.com/blevesearch/bleve/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+}
diff --git a/analysis/lang/ru/stop_words_ru.go b/analysis/lang/ru/stop_words_ru.go
new file mode 100644
index 000000000..60bec0236
--- /dev/null
+++ b/analysis/lang/ru/stop_words_ru.go
@@ -0,0 +1,256 @@
+package ru
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+const StopName = "stop_ru"
+
+var RussianStopWords = []byte(` | From http://snowball.tartarus.org/algorithms/russian/stop.txt
+
+
+  | a russian stop word list. comments begin with vertical bar. each stop
+  | word is at the start of a line.
+
+  | this is a ranked list (commonest to rarest) of stopwords derived from
+  | a large text sample.
+
+  | letter 'ё' is translated to 'е'.
+
+ и              | and
+ в              | in/into
+ во             | alternative form
+ не             | not
+ что            | what/that
+ он             | he
+ на             | on/onto
+ я              | i
+ с              | from
+ со             | alternative form
+ как            | how
+ а              | milder form of 'no' (but)
+ то             | conjunction and form of 'that'
+ все            | all
+ она            | she
+ так            | so, thus
+ его            | him
+ но             | but
+ да             | yes/and
+ ты             | thou
+ к              | towards, by
+ у              | around, chez
+ же             | intensifier particle
+ вы             | you
+ за             | beyond, behind
+ бы             | conditional/subj. particle
+ по             | up to, along
+ только         | only
+ ее             | her
+ мне            | to me
+ было           | it was
+ вот            | here is/are, particle
+ от             | away from
+ меня           | me
+ еще            | still, yet, more
+ нет            | no, there isnt/arent
+ о              | about
+ из             | out of
+ ему            | to him
+ теперь         | now
+ когда          | when
+ даже           | even
+ ну             | so, well
+ вдруг          | suddenly
+ ли             | interrogative particle
+ если           | if
+ уже            | already, but homonym of 'narrower'
+ или            | or
+ ни             | neither
+ быть           | to be
+ был            | he was
+ него           | prepositional form of его
+ до             | up to
+ вас            | you accusative
+ нибудь         | indef. suffix preceded by hyphen
+ опять          | again
+ уж             | already, but homonym of 'adder'
+ вам            | to you
+ сказал         | he said
+ ведь           | particle 'after all'
+ там            | there
+ потом          | then
+ себя           | oneself
+ ничего         | nothing
+ ей             | to her
+ может          | usually with 'быть' as 'maybe'
+ они            | they
+ тут            | here
+ где            | where
+ есть           | there is/are
+ надо           | got to, must
+ ней            | prepositional form of  ей
+ для            | for
+ мы             | we
+ тебя           | thee
+ их             | them, their
+ чем            | than
+ была           | she was
+ сам            | self
+ чтоб           | in order to
+ без            | without
+ будто          | as if
+ человек        | man, person, one
+ чего           | genitive form of 'what'
+ раз            | once
+ тоже           | also
+ себе           | to oneself
+ под            | beneath
+ жизнь          | life
+ будет          | will be
+ ж              | short form of intensifer particle 'же'
+ тогда          | then
+ кто            | who
+ этот           | this
+ говорил        | was saying
+ того           | genitive form of 'that'
+ потому         | for that reason
+ этого          | genitive form of 'this'
+ какой          | which
+ совсем         | altogether
+ ним            | prepositional form of 'его', 'они'
+ здесь          | here
+ этом           | prepositional form of 'этот'
+ один           | one
+ почти          | almost
+ мой            | my
+ тем            | instrumental/dative plural of 'тот', 'то'
+ чтобы          | full form of 'in order that'
+ нее            | her (acc.)
+ кажется        | it seems
+ сейчас         | now
+ были           | they were
+ куда           | where to
+ зачем          | why
+ сказать        | to say
+ всех           | all (acc., gen. preposn. plural)
+ никогда        | never
+ сегодня        | today
+ можно          | possible, one can
+ при            | by
+ наконец        | finally
+ два            | two
+ об             | alternative form of 'о', about
+ другой         | another
+ хоть           | even
+ после          | after
+ над            | above
+ больше         | more
+ тот            | that one (masc.)
+ через          | across, in
+ эти            | these
+ нас            | us
+ про            | about
+ всего          | in all, only, of all
+ них            | prepositional form of 'они' (they)
+ какая          | which, feminine
+ много          | lots
+ разве          | interrogative particle
+ сказала        | she said
+ три            | three
+ эту            | this, acc. fem. sing.
+ моя            | my, feminine
+ впрочем        | moreover, besides
+ хорошо         | good
+ свою           | ones own, acc. fem. sing.
+ этой           | oblique form of 'эта', fem. 'this'
+ перед          | in front of
+ иногда         | sometimes
+ лучше          | better
+ чуть           | a little
+ том            | preposn. form of 'that one'
+ нельзя         | one must not
+ такой          | such a one
+ им             | to them
+ более          | more
+ всегда         | always
+ конечно        | of course
+ всю            | acc. fem. sing of 'all'
+ между          | between
+
+
+   | b: some paradigms
+   |
+   | personal pronouns
+   |
+   | я  меня  мне  мной  [мною]
+   | ты  тебя  тебе  тобой  [тобою]
+   | он  его  ему  им  [него, нему, ним]
+   | она  ее  эи  ею  [нее, нэи, нею]
+   | оно  его  ему  им  [него, нему, ним]
+   |
+   | мы  нас  нам  нами
+   | вы  вас  вам  вами
+   | они  их  им  ими  [них, ним, ними]
+   |
+   |   себя  себе  собой   [собою]
+   |
+   | demonstrative pronouns: этот (this), тот (that)
+   |
+   | этот  эта  это  эти
+   | этого  эты  это  эти
+   | этого  этой  этого  этих
+   | этому  этой  этому  этим
+   | этим  этой  этим  [этою]  этими
+   | этом  этой  этом  этих
+   |
+   | тот  та  то  те
+   | того  ту  то  те
+   | того  той  того  тех
+   | тому  той  тому  тем
+   | тем  той  тем  [тою]  теми
+   | том  той  том  тех
+   |
+   | determinative pronouns
+   |
+   | (a) весь (all)
+   |
+   | весь  вся  все  все
+   | всего  всю  все  все
+   | всего  всей  всего  всех
+   | всему  всей  всему  всем
+   | всем  всей  всем  [всею]  всеми
+   | всем  всей  всем  всех
+   |
+   | (b) сам (himself etc)
+   |
+   | сам  сама  само  сами
+   | самого саму  само  самих
+   | самого самой самого  самих
+   | самому самой самому  самим
+   | самим  самой  самим  [самою]  самими
+   | самом самой самом  самих
+   |
+   | stems of verbs 'to be', 'to have', 'to do' and modal
+   |
+   | быть  бы  буд  быв  есть  суть
+   | име
+   | дел
+   | мог   мож  мочь
+   | уме
+   | хоч  хот
+   | долж
+   | можн
+   | нужн
+   | нельзя
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(RussianStopWords)
+	return rv, err
+}
+
+func init() {
+	registry.RegisterTokenMap(StopName, TokenMapConstructor)
+}
diff --git a/analysis/token/snowball/snowball.go b/analysis/token/snowball/snowball.go
new file mode 100644
index 000000000..ae876137a
--- /dev/null
+++ b/analysis/token/snowball/snowball.go
@@ -0,0 +1,59 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package snowball
+
+import (
+	"fmt"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+
+	"github.com/kljensen/snowball"
+)
+
+const Name = "stemmer_snowball"
+
+type SnowballStemmer struct {
+	langauge string
+}
+
+func NewSnowballStemmer(language string) *SnowballStemmer {
+	return &SnowballStemmer{
+		langauge: language,
+	}
+}
+
+func (s *SnowballStemmer) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		// if it is not a protected keyword, stem it
+		if !token.KeyWord {
+			stemmed, _ := snowball.Stem(string(token.Term), s.langauge, true)
+			token.Term = []byte(stemmed)
+		}
+	}
+	return input
+}
+
+func SnowballStemmerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	language, ok := config["language"].(string)
+	if !ok {
+		return nil, fmt.Errorf("must specify language")
+	}
+	return NewSnowballStemmer(language), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(Name, SnowballStemmerConstructor)
+}
diff --git a/analysis/token/snowball/snowball_test.go b/analysis/token/snowball/snowball_test.go
new file mode 100644
index 000000000..80c2f6f47
--- /dev/null
+++ b/analysis/token/snowball/snowball_test.go
@@ -0,0 +1,115 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package snowball
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+)
+
+func TestSnowballStemmer(t *testing.T) {
+
+	inputTokenStream := analysis.TokenStream{
+		&analysis.Token{
+			Term: []byte("walking"),
+		},
+		&analysis.Token{
+			Term: []byte("talked"),
+		},
+		&analysis.Token{
+			Term: []byte("business"),
+		},
+		&analysis.Token{
+			Term:    []byte("protected"),
+			KeyWord: true,
+		},
+		&analysis.Token{
+			Term: []byte("cat"),
+		},
+		&analysis.Token{
+			Term: []byte("done"),
+		},
+		// a term which does stem, but does not change length
+		&analysis.Token{
+			Term: []byte("marty"),
+		},
+	}
+
+	expectedTokenStream := analysis.TokenStream{
+		&analysis.Token{
+			Term: []byte("walk"),
+		},
+		&analysis.Token{
+			Term: []byte("talk"),
+		},
+		&analysis.Token{
+			Term: []byte("busi"),
+		},
+		&analysis.Token{
+			Term:    []byte("protected"),
+			KeyWord: true,
+		},
+		&analysis.Token{
+			Term: []byte("cat"),
+		},
+		&analysis.Token{
+			Term: []byte("done"),
+		},
+		&analysis.Token{
+			Term: []byte("marti"),
+		},
+	}
+
+	filter := NewSnowballStemmer("english")
+	ouputTokenStream := filter.Filter(inputTokenStream)
+	if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) {
+		t.Errorf("expected %#v got %#v", expectedTokenStream[3], ouputTokenStream[3])
+	}
+}
+
+func BenchmarkSnowballStemmer(b *testing.B) {
+
+	inputTokenStream := analysis.TokenStream{
+		&analysis.Token{
+			Term: []byte("walking"),
+		},
+		&analysis.Token{
+			Term: []byte("talked"),
+		},
+		&analysis.Token{
+			Term: []byte("business"),
+		},
+		&analysis.Token{
+			Term:    []byte("protected"),
+			KeyWord: true,
+		},
+		&analysis.Token{
+			Term: []byte("cat"),
+		},
+		&analysis.Token{
+			Term: []byte("done"),
+		},
+	}
+
+	filter := NewSnowballStemmer("english")
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		filter.Filter(inputTokenStream)
+	}
+
+}

From dc9f994d95957101c5041e44096dac1d5e986253 Mon Sep 17 00:00:00 2001
From: Andrey Khomenko <xomaczar@gmail.com>
Date: Thu, 20 Jul 2017 12:06:45 -0400
Subject: [PATCH 002/728] Update index.go

---
 index.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/index.go b/index.go
index 293ec9877..e85652d96 100644
--- a/index.go
+++ b/index.go
@@ -76,7 +76,7 @@ func (b *Batch) SetInternal(key, val []byte) {
 	b.internal.SetInternal(key, val)
 }
 
-// SetInternal adds the specified delete internal
+// DeleteInternal adds the specified delete internal
 // operation to the batch. NOTE: the bleve Index is
 // not updated until the batch is executed.
 func (b *Batch) DeleteInternal(key []byte) {

From 4ddc50e86deabe3fc6f23dcc28e869eefbf99250 Mon Sep 17 00:00:00 2001
From: Joachim Schwarm <joachim@schwarm.co>
Date: Tue, 21 Nov 2017 16:35:07 +0100
Subject: [PATCH 003/728] typo in documentation

---
 query.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/query.go b/query.go
index 1fecfa25c..523db5ec0 100644
--- a/query.go
+++ b/query.go
@@ -209,8 +209,8 @@ func NewGeoBoundingBoxQuery(topLeftLon, topLeftLat, bottomRightLon, bottomRightL
 	return query.NewGeoBoundingBoxQuery(topLeftLon, topLeftLat, bottomRightLon, bottomRightLat)
 }
 
-// NewGeoDistanceQuery creates a new Query for performing geo bounding
-// box searches. The arguments describe a position and a distance. Documents
+// NewGeoDistanceQuery creates a new Query for performing geo distance
+// searches. The arguments describe a position and a distance. Documents
 // which have an indexed geo point which is less than or equal to the provided
 // distance from the given position will be returned.
 func NewGeoDistanceQuery(lon, lat float64, distance string) *query.GeoDistanceQuery {

From 23f6dc1cc6c83ce028fa9d36d93243130b0c7605 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 29 Sep 2017 12:42:37 -0400
Subject: [PATCH 004/728] working in-memory version

---
 index/analysis.go                        |   10 +-
 index/scorch/README.md                   |  420 ++++++++
 index/scorch/field_dict_test.go          |  164 ++++
 index/scorch/introducer.go               |   85 ++
 index/scorch/reader.go                   |   84 ++
 index/scorch/reader_test.go              |  511 ++++++++++
 index/scorch/scorch.go                   |  218 +++++
 index/scorch/scorch_test.go              | 1109 ++++++++++++++++++++++
 index/scorch/segment/mem/build.go        |  220 +++++
 index/scorch/segment/mem/dict.go         |   87 ++
 index/scorch/segment/mem/posting.go      |  160 ++++
 index/scorch/segment/mem/segment.go      |  132 +++
 index/scorch/segment/mem/segment_test.go |  521 ++++++++++
 index/scorch/segment/segment.go          |   66 ++
 index/scorch/snapshot_index.go           |  300 ++++++
 index/scorch/snapshot_index_dict.go      |   78 ++
 index/scorch/snapshot_index_doc.go       |   53 ++
 index/scorch/snapshot_index_tfr.go       |   91 ++
 index/scorch/snapshot_segment.go         |   64 ++
 index/scorch/stats.go                    |   33 +
 mapping/index.go                         |    2 +-
 21 files changed, 4406 insertions(+), 2 deletions(-)
 create mode 100644 index/scorch/README.md
 create mode 100644 index/scorch/field_dict_test.go
 create mode 100644 index/scorch/introducer.go
 create mode 100644 index/scorch/reader.go
 create mode 100644 index/scorch/reader_test.go
 create mode 100644 index/scorch/scorch.go
 create mode 100644 index/scorch/scorch_test.go
 create mode 100644 index/scorch/segment/mem/build.go
 create mode 100644 index/scorch/segment/mem/dict.go
 create mode 100644 index/scorch/segment/mem/posting.go
 create mode 100644 index/scorch/segment/mem/segment.go
 create mode 100644 index/scorch/segment/mem/segment_test.go
 create mode 100644 index/scorch/segment/segment.go
 create mode 100644 index/scorch/snapshot_index.go
 create mode 100644 index/scorch/snapshot_index_dict.go
 create mode 100644 index/scorch/snapshot_index_doc.go
 create mode 100644 index/scorch/snapshot_index_tfr.go
 create mode 100644 index/scorch/snapshot_segment.go
 create mode 100644 index/scorch/stats.go

diff --git a/index/analysis.go b/index/analysis.go
index b626b9f3e..840dad97a 100644
--- a/index/analysis.go
+++ b/index/analysis.go
@@ -14,7 +14,10 @@
 
 package index
 
-import "github.com/blevesearch/bleve/document"
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/document"
+)
 
 type IndexRow interface {
 	KeySize() int
@@ -29,6 +32,11 @@ type IndexRow interface {
 type AnalysisResult struct {
 	DocID string
 	Rows  []IndexRow
+
+	// scorch
+	Document *document.Document
+	Analyzed []analysis.TokenFrequencies
+	Length   []int
 }
 
 type AnalysisWork struct {
diff --git a/index/scorch/README.md b/index/scorch/README.md
new file mode 100644
index 000000000..cec982ebd
--- /dev/null
+++ b/index/scorch/README.md
@@ -0,0 +1,420 @@
+# scorch
+
+## Definitions
+
+Batch
+- A collection of Documents to mutate in the index.
+
+Document
+- Has a unique identifier (arbitrary bytes).
+- Is comprised of a list of fields.
+
+Field
+- Has a name (string).
+- Has a type (text, number, date, geopoint).
+- Has a value (depending on type).
+- Can be indexed, stored, or both.
+- If indexed, can be analyzed.
+-m If indexed, can optionally store term vectors.
+
+## Scope
+
+Scorch *MUST* implement the bleve.index API without requiring any changes to this API.
+
+Scorch *MAY* introduce new interfaces, which can be discovered to allow use of new capabilities not in the current API.
+
+## Implementation
+
+The scorch implementation starts with the concept of a segmented index.
+
+A segment is simply a slice, subset, or portion of the entire index.  A segmented index is one which is composed of one or more segments.  Although segments are created in a particular order, knowing this ordering is not required to achieve correct semantics when querying.  Because there is no ordering, this means that when searching an index, you can (and should) search all the segments concurrently.
+
+### Internal Wrapper
+
+In order to accommodate the existing APIs while also improving the implementation, the scorch implementation includes some wrapper functionality that must be described.
+
+#### \_id field
+
+In scorch, field 0 is prearranged to be named \_id.  All documents have a value for this field, which is the documents external identifier.  In this version the field *MUST* be both indexed AND stored.  The scorch wrapper adds this field, as it will not be present in the Document from the calling bleve code.
+
+NOTE: If a document already contains a field \_id, it will be replaced.  If this is problematic, the caller must ensure such a scenario does not happen.
+
+### Proposed Structures
+
+```
+type Segment interface {
+
+  Dictionary(field string) TermDictionary
+
+}
+
+type TermDictionary interface {
+
+  PostingsList(term string, excluding PostingsList) PostingsList
+
+}
+
+type PostingsList interface {
+
+  Next() Posting
+
+  And(other PostingsList) PostingsList
+  Or(other PostingsList) PostingsList
+
+}
+
+type Posting interface {
+  Number() uint64
+
+  Frequency() uint64
+  Norm() float64
+
+  Locations() Locations
+}
+
+type Locations interface {
+  Start() uint64
+  End() uint64
+  Pos() uint64
+  ArrayPositions() ...
+}
+
+type DeletedDocs {
+
+}
+
+type SegmentSnapshot struct {
+  segment Segment
+  deleted PostingsList
+}
+
+type IndexSnapshot struct {
+  segment []SegmentSnapshot
+}
+```
+**What about errors?**
+**What about memory mgmnt or context?**
+**Postings List separate iterator to separate stateful from stateless**
+### Mutating the Index
+
+The bleve.index API has methods for directly making individual mutations (Update/Delete/SetInternal/DeleteInternal), however for this first implementation, we assume that all of these calls can simply be turned into a Batch of size 1.  This may be highly inefficient, but it will be correct.  This decision is made based on the fact that Couchbase FTS always uses Batches.
+
+NOTE: As a side-effect of this decision, it should be clear that performance tuning may depend on the batch size, which may in-turn require changes in FTS.
+
+From this point forward, only Batch mutations will be discussed.
+
+Sequence of Operations:
+
+1.  For each document in the batch, search through all existing segments.  The goal is to build up a per-segment bitset which tells us which documents in that segment are obsoleted by the addition of the new segment we're currently building.  NOTE: we're not ready for this change to take effect yet, so rather than this operation mutating anything, they simply return bitsets, which we can apply later.  Logically, this is something like:
+
+    ```
+      foreach segment {
+        dict := segment.Dictionary("\_id")
+        postings := empty postings list
+        foreach docID {
+          postings = postings.Or(dict.PostingsList(docID, nil))
+        }
+      }
+    ```
+
+    NOTE: it is illustrated above as nested for loops, but some or all of these could be concurrently.  The end result is that for each segment, we have (possibly empty) bitset.
+
+2.  Also concurrent with 1, the documents in the batch are analyzed.  This analysis proceeds using the existing analyzer pool.
+
+3. (after 2 completes) Analyzed documents are fed into a function which builds a new Segment representing this information.
+
+4.  We now have everything we need to update the state of the system to include this new snapshot.
+
+  -  Acquire a lock
+  -  Create a new IndexSnapshot
+  -  For each SegmentSnapshot in the IndexSnapshot, take the deleted PostingsList and OR it with the new postings list for this Segment.  Construct a new SegmentSnapshot for the segment using this new deleted PostingsList.  Append this SegmentSnapshot to the IndexSnapshot.
+  -  Create a new SegmentSnapshot wrapping our new segment with nil deleted docs.
+  -  Append the new SegmentSnapshot to the IndexSnapshot
+  -  Release the lock
+
+An ASCII art example:
+  ```
+  0 - Empty Index
+
+  No segments
+
+  IndexSnapshot
+    segments []
+    deleted []
+
+
+  1 - Index Batch [ A B C ]
+
+  segment       0
+  numbers   [ 1 2 3 ]
+  \_id      [ A B C ]
+
+  IndexSnapshot
+    segments [ 0 ]
+    deleted [ nil ]
+
+
+  2 - Index Batch [ B' ]
+
+  segment       0           1
+  numbers   [ 1 2 3 ]     [ 1 ]
+  \_id      [ A B C ]     [ B ]
+
+  Compute bitset segment-0-deleted-by-1:
+            [ 0 1 0 ]
+
+  OR it with previous (nil) (call it 0-1)
+            [ 0 1 0 ]
+
+  IndexSnapshot
+    segments [  0    1 ]
+    deleted  [ 0-1 nil ]
+
+  3 - Index Batch [ C' ]
+
+    segment       0           1      2
+    numbers   [ 1 2 3 ]     [ 1 ]  [ 1 ]
+    \_id      [ A B C ]     [ B ]  [ C ]
+
+    Compute bitset segment-0-deleted-by-2:
+              [ 0 0 1 ]
+
+    OR it with previous ([ 0 1 0 ]) (call it 0-12)
+              [ 0 1 1 ]
+
+  Compute bitset segment-1-deleted-by-2:
+              [ 0 0 0 ]
+
+  OR it with previous (nil)
+              still just nil
+
+
+    IndexSnapshot
+      segments [  0    1    2 ]
+      deleted  [ 0-12 nil  nil ]
+  ```
+
+**is there opportunity to stop early when doc is found in one segment**
+**also, more efficient way to find bits for long lists of ids?**
+
+### Searching
+
+In the bleve.index API all searching starts by getting an IndexReader, which represents a snapshot of the index at a point in time.
+
+As described in the section above, our index implementation maintains a pointer to the current IndexSnapshot.  When a caller gets an IndexReader, they get a copy of this pointer, and can use it as long as they like.  The IndexSnapshot contains SegmentSnapshots, which only contain pointers to immutable segments.  The deleted posting lists associated with a segment change over time, but the particular deleted posting list in YOUR snapshot is immutable.  This gives a stable view of the data.
+
+#### Term Search
+
+Term search is the only searching primitive exposed in today's bleve.index API.  This ultimately could limit our ability to take advantage of the indexing improvements, but it also means it will be easier to get a first version of this working.
+
+A term search for term T in field F will look something like this:
+
+```
+  searchResultPostings = empty
+  foreach segment {
+    dict := segment.Dictionary(F)
+    segmentResultPostings = dict.PostingsList(T, segmentSnapshotDeleted)
+    // make segmentLocal numbers into global numbers, and flip bits in searchResultPostings
+  }
+```
+
+The searchResultPostings will be a new implementation of the TermFieldReader inteface.
+
+As a reminder this interface is:
+
+```
+// TermFieldReader is the interface exposing the enumeration of documents
+// containing a given term in a given field. Documents are returned in byte
+// lexicographic order over their identifiers.
+type TermFieldReader interface {
+	// Next returns the next document containing the term in this field, or nil
+	// when it reaches the end of the enumeration.  The preAlloced TermFieldDoc
+	// is optional, and when non-nil, will be used instead of allocating memory.
+	Next(preAlloced *TermFieldDoc) (*TermFieldDoc, error)
+
+	// Advance resets the enumeration at specified document or its immediate
+	// follower.
+	Advance(ID IndexInternalID, preAlloced *TermFieldDoc) (*TermFieldDoc, error)
+
+	// Count returns the number of documents contains the term in this field.
+	Count() uint64
+	Close() error
+}
+```
+
+At first glance this appears problematic, we have no way to return documents in order of their identifiers.  But it turns out the wording of this perhaps too strong, or a bit ambiguous.  Originally, this referred to the external identifiers, but with the introduction of a distinction between internal/external identifiers, returning them in order of their internal identifiers is also acceptable.  **ASIDE**: the reason for this is that most callers just use Next() and literally don't care what the order is, they could be in any order and it would be fine.  There is only one search that cares and that is the ConjunctionSearcher, which relies on Next/Advance having very specific semantics.  Later in this document we will have a proposal to split into multiple interfaces:
+
+-  The weakest interface, only supports Next() no ordering at all.
+-  Ordered, supporting Advance()
+-  And/Or'able capable of internally efficiently doing these ops with like interfaces (if not capable then can always fall back to external walking)
+
+But, the good news is that we don't even have to do that for our first implementation.  As long as the global numbers we use for internal identifiers are consistent within this IndexSnapshot, then Next() will be ordered by ascending document number, and Advance() will still work correctly.
+
+NOTE: there is another place where we rely on the ordering of these hits, and that is in the "\_id" sort order.  Previously this was the natural order, and a NOOP for the collector, now it must be implemented by actually sorting on the "\_id" field.  We probably should introduce at least a marker interface to detect this.
+
+An ASCII art example:
+
+```
+Let's start with the IndexSnapshot we ended with earlier:
+
+3 - Index Batch [ C' ]
+
+  segment       0           1      2
+  numbers   [ 1 2 3 ]     [ 1 ]  [ 1 ]
+  \_id      [ A B C ]     [ B ]  [ C ]
+
+  Compute bitset segment-0-deleted-by-2:
+            [ 0 0 1 ]
+
+  OR it with previous ([ 0 1 0 ]) (call it 0-12)
+            [ 0 1 1 ]
+
+Compute bitset segment-1-deleted-by-2:
+            [ 0 0 0 ]
+
+OR it with previous (nil)
+            still just nil
+
+
+  IndexSnapshot
+    segments [  0    1    2 ]
+    deleted  [ 0-12 nil  nil ]
+
+Now let's search for the term 'cat' in the field 'desc' and let's assume that Document C (both versions) would match it.
+
+Concurrently:
+
+  - Segment 0
+   - Get Term Dictionary For Field 'desc'
+   - From it get Postings List for term 'cat' EXCLUDING 0-12
+   - raw segment matches [ 0 0 1 ] but excluding [ 0 1 1 ] gives [ 0 0 0 ]
+  - Segment 1
+   - Get Term Dictionary For Field 'desc'
+   - From it get Postings List for term 'cat' excluding nil
+   - [ 0 ]
+  - Segment 2
+   - Get Term Dictionary For Field 'desc'
+   - From it get Postings List for term 'cat' excluding nil
+   - [ 1 ]
+
+Map local bitsets into global number space (global meaning cross-segment but still unique to this snapshot)
+
+IndexSnapshot already should have mapping something like:
+0 - Offset 0
+1 - Offset 3 (because segment 0 had 3 docs)
+2 - Offset 4 (becuase segment 1 had 1 doc)
+
+This maps to search result bitset:
+
+[ 0 0 0 0 1]
+
+Caller would call Next() and get doc number 5 (assuming 1 based indexing for now)
+
+Caller could then ask to get term locations, stored fields, external doc ID for document number 5.  Internally in the IndexSnapshot, we can now convert that back, and realize doc number 5 comes from segment 2, 5-4=1 so we're looking for doc number 1 in segment 2.  That happens to be C...
+
+```
+
+#### Future improvements
+
+In the future, interfaces to detect these non-serially operating TermFieldReaders could expose their own And() and Or() up to the higher level Conjunction/Disjunction searchers.  Doing this alone offers some win, but also means there would be greater burden on the Searcher code rewriting logical expressions for maximum performance.
+
+Another related topic is that of peak memory usage.  With serially operating TermFieldReaders it was necessary to start them all at the same time and operate in unison.  However, with these non-serially operating TermFieldReaders we have the option of doing a few at a time, consolidating them, dispoting the intermediaries, and then doing a few more.  For very complex queries with many clauses this could reduce peak memory usage.
+
+
+### Memory Tracking
+
+All segments must be able to produce two statistics, an estimate of their explicit memory usage, and their actual size on disk (if any).  For in-memory segments, disk usage could be zero, and the memory usage represents the entire information content.  For mmap-based disk segments, the memory could be as low as the size of tracking structure itself (say just a few pointers).
+
+This would allow the implementation to throttle or block incoming mutations when a threshold memory usage has (or would be) exceeded.
+
+### Persistence
+
+Obviously, we want to support (but maybe not require) asynchronous persistence of segments.  My expectation is that segments are initially built in memory.  At some point they are persisted to disk.  This poses some interesting challenges.
+
+At runtime, the state of an index (it's IndexSnapshot) is not only the contents of the segments, but also the bitmasks of deleted documents.  These bitmasks indirectly encode an ordering in which the segments were added.  The reason is that the bitmasks encode which items have been obsoleted by other (subsequent or more future) segments.  In the runtime implementation we compute bitmask deltas and then merge them at the same time we bring the new segment in.  One idea is that we could take a similar approach on disk.  When we persist a segment, we persist the bitmask deltas of segments known to exist at that time, and eventually these can get merged up into a base segment deleted bitmask.
+
+This also relates to the topic rollback, addressed next...
+
+
+### Rollback
+
+One desirable property in the Couchbase ecosystem is the ability to rollback to some previous (though typically not long ago) state.  One idea for keeping this property in this design is to protect some of the most recent segments from merging.  Then, if necessary, they could be "undone" to reveal previous states of the system.  In these scenarios "undone" has to properly undo the deleted bitmasks on the other segments.  Again, the current thinking is that rather than "undo" anything, it could be work that was deferred in the first place, thus making it easier to logically undo.
+
+Another possibly related approach would be to tie this into our existing snapshot mechanism.  Perhaps simulating a slow reader (holding onto index snapshots) for some period of time, can be the mechanism to achieve the desired end goal.
+
+
+### Internal Storage
+
+The bleve.index API has support for "internal storage".  The ability to store information under a separate name space.
+
+This is not used for high volume storage, so it is tempting to think we could just put a small k/v store alongside the rest of the index.  But, the reality is that this storage is used to maintain key information related to the rollback scenario.  Because of this, its crucial that ordering and overwriting of key/value pairs correspond with actual segment persistence in the index.  Based on this, I believe its important to put the internal key/value pairs inside the segments themselves.  But, this also means that they must follow a similar "deleted" bitmask approach to obsolete values in older segments.  But, this also seems to substantially increase the complexity of the solution because of the separate name space, it would appear to require its own bitmask.  Further keys aren't numeric, which then implies yet another mapping from internal key to number, etc.
+
+More thought is required here.
+
+### Merging
+
+The segmented index approach requires merging to prevent the number of segments from growing too large.
+
+Recent experience with LSMs has taught us that having the correct merge strategy can make a huge difference in the overall performance of the system.  In particular, a simple merge strategy which merges segments too aggressively can lead to high write amplification and unnecessarily rendering cached data useless.
+
+A few simple principles have been identified.
+
+- Roughly we merge multiple smaller segments into a single larger one.
+- The larger a segment gets the less likely we should be to ever merge it.
+- Segments with large numbers of deleted/obsoleted items are good candidates as the merge will result in a space savings.
+- Segments with all items deleted/obsoleted can be dropped.
+
+Merging of a segment should be able to proceed even if that segment is held by an ongoing snapshot, it should only delay the removal of it.
+
+
+## TODO
+
+- need reference counting on the segments, to know when we can safely remove?
+
+- how well will bitmaps perform when large and possibly mmap'd?
+
+
+-----
+thinking out loud on storage
+
+- fields
+ - field name - field id
+- term dictionary
+ - field id - FST (values postings ids)
+- postings
+ - posting id - postings list
+- freqs
+ - posting id - freqs list
+- norms
+ - posting id - norms list
+- stored
+ - docNum
+  - field id - field values
+
+
+
+----
+
+race dialog with steve:
+
+state: 2, 4, 8
+
+- introducing new segment X
+  - deleted bitmasks, 2, 4, 8
+
+- merger, merge 4 and 8
+  new segment Y
+
+
+- merger wins
+
+   state: 2, 9
+
+   introducer: need to recompute bitmask for 9, could lose again and keep losing race
+
+- introducer wins
+
+  state: 2, 4, 8, X
+         2-X, 4-X, 8-X, nil
+
+  merger finishes: new segment Y, is not valid, need to be recomputed
diff --git a/index/scorch/field_dict_test.go b/index/scorch/field_dict_test.go
new file mode 100644
index 000000000..81285e76a
--- /dev/null
+++ b/index/scorch/field_dict_test.go
@@ -0,0 +1,164 @@
+package scorch
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/index"
+)
+
+func TestIndexFieldDict(t *testing.T) {
+
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, nil, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	var expectedCount uint64
+	doc := document.NewDocument("1")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+	expectedCount++
+
+	doc = document.NewDocument("2")
+	doc.AddField(document.NewTextFieldWithAnalyzer("name", []uint64{}, []byte("test test test"), testAnalyzer))
+	doc.AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("eat more rice"), document.IndexField|document.IncludeTermVectors, testAnalyzer))
+	doc.AddField(document.NewTextFieldCustom("prefix", []uint64{}, []byte("bob cat cats catting dog doggy zoo"), document.IndexField|document.IncludeTermVectors, testAnalyzer))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+	expectedCount++
+
+	indexReader, err := idx.Reader()
+	if err != nil {
+		t.Error(err)
+	}
+	defer func() {
+		err := indexReader.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	dict, err := indexReader.FieldDict("name")
+	if err != nil {
+		t.Errorf("error creating reader: %v", err)
+	}
+	defer func() {
+		err := dict.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	termCount := 0
+	curr, err := dict.Next()
+	for err == nil && curr != nil {
+		termCount++
+		if curr.Term != "test" {
+			t.Errorf("expected term to be 'test', got '%s'", curr.Term)
+		}
+		curr, err = dict.Next()
+	}
+	if termCount != 1 {
+		t.Errorf("expected 1 term for this field, got %d", termCount)
+	}
+
+	dict2, err := indexReader.FieldDict("desc")
+	if err != nil {
+		t.Errorf("error creating reader: %v", err)
+	}
+	defer func() {
+		err := dict2.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	termCount = 0
+	terms := make([]string, 0)
+	curr, err = dict2.Next()
+	for err == nil && curr != nil {
+		termCount++
+		terms = append(terms, curr.Term)
+		curr, err = dict2.Next()
+	}
+	if termCount != 3 {
+		t.Errorf("expected 3 term for this field, got %d", termCount)
+	}
+	expectedTerms := []string{"eat", "more", "rice"}
+	if !reflect.DeepEqual(expectedTerms, terms) {
+		t.Errorf("expected %#v, got %#v", expectedTerms, terms)
+	}
+	// test start and end range
+	dict3, err := indexReader.FieldDictRange("desc", []byte("fun"), []byte("nice"))
+	if err != nil {
+		t.Errorf("error creating reader: %v", err)
+	}
+	defer func() {
+		err := dict3.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	termCount = 0
+	terms = make([]string, 0)
+	curr, err = dict3.Next()
+	for err == nil && curr != nil {
+		termCount++
+		terms = append(terms, curr.Term)
+		curr, err = dict3.Next()
+	}
+	if termCount != 1 {
+		t.Errorf("expected 1 term for this field, got %d", termCount)
+	}
+	expectedTerms = []string{"more"}
+	if !reflect.DeepEqual(expectedTerms, terms) {
+		t.Errorf("expected %#v, got %#v", expectedTerms, terms)
+	}
+
+	// test use case for prefix
+	dict4, err := indexReader.FieldDictPrefix("prefix", []byte("cat"))
+	if err != nil {
+		t.Errorf("error creating reader: %v", err)
+	}
+	defer func() {
+		err := dict4.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	termCount = 0
+	terms = make([]string, 0)
+	curr, err = dict4.Next()
+	for err == nil && curr != nil {
+		termCount++
+		terms = append(terms, curr.Term)
+		curr, err = dict4.Next()
+	}
+	if termCount != 3 {
+		t.Errorf("expected 3 term for this field, got %d", termCount)
+	}
+	expectedTerms = []string{"cat", "cats", "catting"}
+	if !reflect.DeepEqual(expectedTerms, terms) {
+		t.Errorf("expected %#v, got %#v", expectedTerms, terms)
+	}
+}
diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
new file mode 100644
index 000000000..dc748ad85
--- /dev/null
+++ b/index/scorch/introducer.go
@@ -0,0 +1,85 @@
+package scorch
+
+import (
+	"github.com/RoaringBitmap/roaring"
+	"github.com/blevesearch/bleve/index/scorch/segment"
+)
+
+type segmentIntroduction struct {
+	id        uint64
+	data      segment.Segment
+	obsoletes map[uint64]*roaring.Bitmap
+	ids       []string
+	internal  map[string][]byte
+
+	applied chan struct{}
+}
+
+func (s *Scorch) mainLoop() {
+	for {
+		select {
+		case <-s.closeCh:
+			return
+
+		case next := <-s.introductions:
+
+			// acquire lock
+			s.rootLock.Lock()
+
+			// prepare new index snapshot, with curr size + 1
+			newSnapshot := &IndexSnapshot{
+				segment:  make([]*SegmentSnapshot, len(s.root.segment)+1),
+				offsets:  make([]uint64, len(s.root.segment)+1),
+				internal: make(map[string][]byte, len(s.root.segment)),
+			}
+
+			// iterate through current segments
+			var running uint64
+			for i := range s.root.segment {
+				// see if optimistic work included this segment
+				delta, ok := next.obsoletes[s.root.segment[i].id]
+				if !ok {
+					delta = s.root.segment[i].segment.DocNumbers(next.ids)
+				}
+				newSnapshot.segment[i] = &SegmentSnapshot{
+					id:      s.root.segment[i].id,
+					segment: s.root.segment[i].segment,
+				}
+				// apply new obsoletions
+				if s.root.segment[i].deleted == nil {
+					newSnapshot.segment[i].deleted = delta
+				} else {
+					newSnapshot.segment[i].deleted = s.root.segment[i].deleted.Clone()
+					newSnapshot.segment[i].deleted.Or(delta)
+				}
+
+				newSnapshot.offsets[i] = running
+				running += s.root.segment[i].Count()
+
+			}
+			// put new segment at end
+			newSnapshot.segment[len(s.root.segment)] = &SegmentSnapshot{
+				id:      next.id,
+				segment: next.data,
+			}
+			newSnapshot.offsets[len(s.root.segment)] = running
+			// copy old values
+			for key, oldVal := range s.root.internal {
+				newSnapshot.internal[key] = oldVal
+			}
+			// set new values and apply deletes
+			for key, newVal := range next.internal {
+				if newVal != nil {
+					newSnapshot.internal[key] = newVal
+				} else {
+					delete(newSnapshot.internal, key)
+				}
+			}
+			// swap in new segment
+			s.root = newSnapshot
+			// release lock
+			s.rootLock.Unlock()
+			close(next.applied)
+		}
+	}
+}
diff --git a/index/scorch/reader.go b/index/scorch/reader.go
new file mode 100644
index 000000000..acb01905d
--- /dev/null
+++ b/index/scorch/reader.go
@@ -0,0 +1,84 @@
+package scorch
+
+import (
+	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/index"
+)
+
+type Reader struct {
+	root *IndexSnapshot
+}
+
+func (r *Reader) TermFieldReader(term []byte, field string, includeFreq,
+	includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
+	return r.root.TermFieldReader(term, field, includeFreq, includeNorm, includeTermVectors)
+}
+
+// DocIDReader returns an iterator over all doc ids
+// The caller must close returned instance to release associated resources.
+func (r *Reader) DocIDReaderAll() (index.DocIDReader, error) {
+	return r.root.DocIDReaderAll()
+}
+
+func (r *Reader) DocIDReaderOnly(ids []string) (index.DocIDReader, error) {
+	return r.root.DocIDReaderOnly(ids)
+}
+
+func (r *Reader) FieldDict(field string) (index.FieldDict, error) {
+	return r.root.FieldDict(field)
+}
+
+// FieldDictRange is currently defined to include the start and end terms
+func (r *Reader) FieldDictRange(field string, startTerm []byte,
+	endTerm []byte) (index.FieldDict, error) {
+	return r.root.FieldDictRange(field, startTerm, endTerm)
+}
+
+func (r *Reader) FieldDictPrefix(field string,
+	termPrefix []byte) (index.FieldDict, error) {
+	return r.root.FieldDictPrefix(field, termPrefix)
+}
+
+func (r *Reader) Document(id string) (*document.Document, error) {
+	return r.root.Document(id)
+}
+func (r *Reader) DocumentVisitFieldTerms(id index.IndexInternalID, fields []string,
+	visitor index.DocumentFieldTermVisitor) error {
+	panic("document visit field terms not implemented")
+}
+
+func (r *Reader) Fields() ([]string, error) {
+	return r.root.Fields()
+}
+
+func (r *Reader) GetInternal(key []byte) ([]byte, error) {
+	return r.root.GetInternal(key)
+}
+
+func (r *Reader) DocCount() (uint64, error) {
+	return r.root.DocCount()
+}
+
+func (r *Reader) ExternalID(id index.IndexInternalID) (string, error) {
+	return r.root.ExternalID(id)
+}
+
+func (r *Reader) InternalID(id string) (index.IndexInternalID, error) {
+	return r.root.InternalID(id)
+}
+
+func (r *Reader) DumpAll() chan interface{} {
+	panic("dumpall")
+}
+
+func (r *Reader) DumpDoc(id string) chan interface{} {
+	panic("dumpdoc")
+}
+
+func (r *Reader) DumpFields() chan interface{} {
+	panic("dumpfields")
+}
+
+func (r *Reader) Close() error {
+	return nil
+}
diff --git a/index/scorch/reader_test.go b/index/scorch/reader_test.go
new file mode 100644
index 000000000..a050bb44a
--- /dev/null
+++ b/index/scorch/reader_test.go
@@ -0,0 +1,511 @@
+package scorch
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/index"
+)
+
+func TestIndexReader(t *testing.T) {
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, nil, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	var expectedCount uint64
+	doc := document.NewDocument("1")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+	expectedCount++
+
+	doc = document.NewDocument("2")
+	doc.AddField(document.NewTextFieldWithAnalyzer("name", []uint64{}, []byte("test test test"), testAnalyzer))
+	doc.AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("eat more rice"), document.IndexField|document.IncludeTermVectors, testAnalyzer))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+	expectedCount++
+
+	indexReader, err := idx.Reader()
+	if err != nil {
+		t.Error(err)
+	}
+	defer func() {
+		err := indexReader.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	// first look for a term that doesn't exist
+	reader, err := indexReader.TermFieldReader([]byte("nope"), "name", true, true, true)
+	if err != nil {
+		t.Errorf("Error accessing term field reader: %v", err)
+	}
+	count := reader.Count()
+	if count != 0 {
+		t.Errorf("Expected doc count to be: %d got: %d", 0, count)
+	}
+	err = reader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	reader, err = indexReader.TermFieldReader([]byte("test"), "name", true, true, true)
+	if err != nil {
+		t.Errorf("Error accessing term field reader: %v", err)
+	}
+
+	expectedCount = 2
+	count = reader.Count()
+	if count != expectedCount {
+		t.Errorf("Exptected doc count to be: %d got: %d", expectedCount, count)
+	}
+
+	var match *index.TermFieldDoc
+	var actualCount uint64
+	match, err = reader.Next(nil)
+	for err == nil && match != nil {
+		match, err = reader.Next(nil)
+		if err != nil {
+			t.Errorf("unexpected error reading next")
+		}
+		actualCount++
+	}
+	if actualCount != count {
+		t.Errorf("count was 2, but only saw %d", actualCount)
+	}
+
+	internalID2, err := indexReader.InternalID("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+	expectedMatch := &index.TermFieldDoc{
+		ID:   internalID2,
+		Freq: 1,
+		Norm: 0.5773502588272095,
+		Vectors: []*index.TermFieldVector{
+			{
+				Field: "desc",
+				Pos:   3,
+				Start: 9,
+				End:   13,
+			},
+		},
+	}
+	tfr, err := indexReader.TermFieldReader([]byte("rice"), "desc", true, true, true)
+	if err != nil {
+		t.Errorf("unexpected error: %v", err)
+	}
+	match, err = tfr.Next(nil)
+	if err != nil {
+		t.Errorf("unexpected error: %v", err)
+	}
+	if !reflect.DeepEqual(expectedMatch, match) {
+		t.Errorf("got %#v, expected %#v", match, expectedMatch)
+	}
+	err = reader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// now test usage of advance
+	reader, err = indexReader.TermFieldReader([]byte("test"), "name", true, true, true)
+	if err != nil {
+		t.Errorf("Error accessing term field reader: %v", err)
+	}
+
+	match, err = reader.Advance(internalID2, nil)
+	if err != nil {
+		t.Errorf("unexpected error: %v", err)
+	}
+	if match == nil {
+		t.Fatalf("Expected match, got nil")
+	}
+	if !match.ID.Equals(internalID2) {
+		t.Errorf("Expected ID '2', got '%s'", match.ID)
+	}
+	// NOTE: no point in changing this to internal id 3, there is no id 3
+	// the test is looking for something that doens't exist and this doesn't
+	match, err = reader.Advance(index.IndexInternalID("3"), nil)
+	if err != nil {
+		t.Errorf("unexpected error: %v", err)
+	}
+	if match != nil {
+		t.Errorf("expected nil, got %v", match)
+	}
+	err = reader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// now test creating a reader for a field that doesn't exist
+	reader, err = indexReader.TermFieldReader([]byte("water"), "doesnotexist", true, true, true)
+	if err != nil {
+		t.Errorf("Error accessing term field reader: %v", err)
+	}
+	count = reader.Count()
+	if count != 0 {
+		t.Errorf("expected count 0 for reader of non-existent field")
+	}
+	match, err = reader.Next(nil)
+	if err != nil {
+		t.Errorf("unexpected error: %v", err)
+	}
+	if match != nil {
+		t.Errorf("expected nil, got %v", match)
+	}
+	match, err = reader.Advance(index.IndexInternalID("anywhere"), nil)
+	if err != nil {
+		t.Errorf("unexpected error: %v", err)
+	}
+	if match != nil {
+		t.Errorf("expected nil, got %v", match)
+	}
+
+}
+
+func TestIndexDocIdReader(t *testing.T) {
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, nil, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	var expectedCount uint64
+	doc := document.NewDocument("1")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+	expectedCount++
+
+	doc = document.NewDocument("2")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test test test")))
+	doc.AddField(document.NewTextFieldWithIndexingOptions("desc", []uint64{}, []byte("eat more rice"), document.IndexField|document.IncludeTermVectors))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+	expectedCount++
+
+	indexReader, err := idx.Reader()
+	if err != nil {
+		t.Error(err)
+	}
+	defer func() {
+		err := indexReader.Close()
+		if err != nil {
+			t.Error(err)
+		}
+	}()
+
+	// first get all doc ids
+	reader, err := indexReader.DocIDReaderAll()
+	if err != nil {
+		t.Errorf("Error accessing doc id reader: %v", err)
+	}
+	defer func() {
+		err := reader.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	id, err := reader.Next()
+	count := uint64(0)
+	for id != nil {
+		count++
+		id, err = reader.Next()
+	}
+	if count != expectedCount {
+		t.Errorf("expected %d, got %d", expectedCount, count)
+	}
+
+	// try it again, but jump to the second doc this time
+	reader2, err := indexReader.DocIDReaderAll()
+	if err != nil {
+		t.Errorf("Error accessing doc id reader: %v", err)
+	}
+	defer func() {
+		err := reader2.Close()
+		if err != nil {
+			t.Error(err)
+		}
+	}()
+
+	internalID2, err := indexReader.InternalID("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	id, err = reader2.Advance(internalID2)
+	if err != nil {
+		t.Error(err)
+	}
+	if !id.Equals(internalID2) {
+		t.Errorf("expected to find id '2', got '%s'", id)
+	}
+
+	// again 3 doesn't exist cannot use internal id for 3 as there is none
+	// the important aspect is that this id doesn't exist, so its ok
+	id, err = reader2.Advance(index.IndexInternalID("3"))
+	if err != nil {
+		t.Error(err)
+	}
+	if id != nil {
+		t.Errorf("expected to find id '', got '%s'", id)
+	}
+}
+
+func TestIndexDocIdOnlyReader(t *testing.T) {
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, nil, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	doc := document.NewDocument("1")
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+
+	doc = document.NewDocument("3")
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+
+	doc = document.NewDocument("5")
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+
+	doc = document.NewDocument("7")
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+
+	doc = document.NewDocument("9")
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+
+	indexReader, err := idx.Reader()
+	if err != nil {
+		t.Error(err)
+	}
+	defer func() {
+		err := indexReader.Close()
+		if err != nil {
+			t.Error(err)
+		}
+	}()
+
+	onlyIds := []string{"1", "5", "9"}
+	reader, err := indexReader.DocIDReaderOnly(onlyIds)
+	if err != nil {
+		t.Errorf("Error accessing doc id reader: %v", err)
+	}
+	defer func() {
+		err := reader.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	id, err := reader.Next()
+	count := uint64(0)
+	for id != nil {
+		count++
+		id, err = reader.Next()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+	if count != 3 {
+		t.Errorf("expected 3, got %d", count)
+	}
+
+	// commented out because advance works with internal ids
+	// this test presumes we see items in external doc id order
+	// which is no longer the case, so simply converting external ids
+	// to internal ones is not logically correct
+	// not removing though because we need some way to test Advance()
+
+	// // try it again, but jump
+	// reader2, err := indexReader.DocIDReaderOnly(onlyIds)
+	// if err != nil {
+	// 	t.Errorf("Error accessing doc id reader: %v", err)
+	// }
+	// defer func() {
+	// 	err := reader2.Close()
+	// 	if err != nil {
+	// 		t.Error(err)
+	// 	}
+	// }()
+	//
+	// id, err = reader2.Advance(index.IndexInternalID("5"))
+	// if err != nil {
+	// 	t.Error(err)
+	// }
+	// if !id.Equals(index.IndexInternalID("5")) {
+	// 	t.Errorf("expected to find id '5', got '%s'", id)
+	// }
+	//
+	// id, err = reader2.Advance(index.IndexInternalID("a"))
+	// if err != nil {
+	// 	t.Error(err)
+	// }
+	// if id != nil {
+	// 	t.Errorf("expected to find id '', got '%s'", id)
+	// }
+
+	// some keys aren't actually there
+	onlyIds = []string{"0", "2", "4", "5", "6", "8", "a"}
+	reader3, err := indexReader.DocIDReaderOnly(onlyIds)
+	if err != nil {
+		t.Errorf("Error accessing doc id reader: %v", err)
+	}
+	defer func() {
+		err := reader3.Close()
+		if err != nil {
+			t.Error(err)
+		}
+	}()
+
+	id, err = reader3.Next()
+	count = uint64(0)
+	for id != nil {
+		count++
+		id, err = reader3.Next()
+	}
+	if count != 1 {
+		t.Errorf("expected 1, got %d", count)
+	}
+
+	// commented out because advance works with internal ids
+	// this test presumes we see items in external doc id order
+	// which is no longer the case, so simply converting external ids
+	// to internal ones is not logically correct
+	// not removing though because we need some way to test Advance()
+
+	// // mix advance and next
+	// onlyIds = []string{"0", "1", "3", "5", "6", "9"}
+	// reader4, err := indexReader.DocIDReaderOnly(onlyIds)
+	// if err != nil {
+	// 	t.Errorf("Error accessing doc id reader: %v", err)
+	// }
+	// defer func() {
+	// 	err := reader4.Close()
+	// 	if err != nil {
+	// 		t.Error(err)
+	// 	}
+	// }()
+	//
+	// // first key is "1"
+	// id, err = reader4.Next()
+	// if err != nil {
+	// 	t.Error(err)
+	// }
+	// if !id.Equals(index.IndexInternalID("1")) {
+	// 	t.Errorf("expected to find id '1', got '%s'", id)
+	// }
+	//
+	// // advancing to key we dont have gives next
+	// id, err = reader4.Advance(index.IndexInternalID("2"))
+	// if err != nil {
+	// 	t.Error(err)
+	// }
+	// if !id.Equals(index.IndexInternalID("3")) {
+	// 	t.Errorf("expected to find id '3', got '%s'", id)
+	// }
+	//
+	// // next after advance works
+	// id, err = reader4.Next()
+	// if err != nil {
+	// 	t.Error(err)
+	// }
+	// if !id.Equals(index.IndexInternalID("5")) {
+	// 	t.Errorf("expected to find id '5', got '%s'", id)
+	// }
+	//
+	// // advancing to key we do have works
+	// id, err = reader4.Advance(index.IndexInternalID("9"))
+	// if err != nil {
+	// 	t.Error(err)
+	// }
+	// if !id.Equals(index.IndexInternalID("9")) {
+	// 	t.Errorf("expected to find id '9', got '%s'", id)
+	// }
+	//
+	// // advance backwards at end
+	// id, err = reader4.Advance(index.IndexInternalID("4"))
+	// if err != nil {
+	// 	t.Error(err)
+	// }
+	// if !id.Equals(index.IndexInternalID("5")) {
+	// 	t.Errorf("expected to find id '5', got '%s'", id)
+	// }
+	//
+	// // next after advance works
+	// id, err = reader4.Next()
+	// if err != nil {
+	// 	t.Error(err)
+	// }
+	// if !id.Equals(index.IndexInternalID("9")) {
+	// 	t.Errorf("expected to find id '9', got '%s'", id)
+	// }
+	//
+	// // advance backwards to key that exists, but not in only set
+	// id, err = reader4.Advance(index.IndexInternalID("7"))
+	// if err != nil {
+	// 	t.Error(err)
+	// }
+	// if !id.Equals(index.IndexInternalID("9")) {
+	// 	t.Errorf("expected to find id '9', got '%s'", id)
+	// }
+
+}
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
new file mode 100644
index 000000000..d20261f5e
--- /dev/null
+++ b/index/scorch/scorch.go
@@ -0,0 +1,218 @@
+package scorch
+
+import (
+	"encoding/json"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/RoaringBitmap/roaring"
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/scorch/segment"
+	"github.com/blevesearch/bleve/index/scorch/segment/mem"
+	"github.com/blevesearch/bleve/index/store"
+	"github.com/blevesearch/bleve/registry"
+)
+
+const Name = "scorch"
+
+const Version uint8 = 1
+
+type Scorch struct {
+	version       uint8
+	storeConfig   map[string]interface{}
+	analysisQueue *index.AnalysisQueue
+	stats         *Stats
+	nextSegmentID uint64
+
+	rootLock sync.RWMutex
+	root     *IndexSnapshot
+
+	closeCh       chan struct{}
+	introductions chan *segmentIntroduction
+}
+
+func NewScorch(storeName string, storeConfig map[string]interface{}, analysisQueue *index.AnalysisQueue) (index.Index, error) {
+	rv := &Scorch{
+		version:       Version,
+		storeConfig:   storeConfig,
+		analysisQueue: analysisQueue,
+		stats:         &Stats{},
+		root:          &IndexSnapshot{},
+	}
+	return rv, nil
+}
+
+func (s *Scorch) Open() error {
+	s.closeCh = make(chan struct{})
+	s.introductions = make(chan *segmentIntroduction)
+	go s.mainLoop()
+	return nil
+}
+
+func (s *Scorch) Close() error {
+	close(s.closeCh)
+	return nil
+}
+
+func (s *Scorch) Update(doc *document.Document) error {
+	b := index.NewBatch()
+	b.Update(doc)
+	return s.Batch(b)
+}
+
+func (s *Scorch) Delete(id string) error {
+	b := index.NewBatch()
+	b.Delete(id)
+	return s.Batch(b)
+}
+
+// Batch applices a batch of changes to the index atomically
+func (s *Scorch) Batch(batch *index.Batch) error {
+
+	analysisStart := time.Now()
+
+	resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps))
+
+	var numUpdates uint64
+	var numPlainTextBytes uint64
+	var ids []string
+	for docID, doc := range batch.IndexOps {
+		if doc != nil {
+			// insert _id field
+			doc.AddField(document.NewTextFieldCustom("_id", nil, []byte(doc.ID), document.IndexField|document.StoreField, nil))
+			numUpdates++
+			numPlainTextBytes += doc.NumPlainTextBytes()
+		}
+		ids = append(ids, docID)
+	}
+
+	// FIXME could sort ids list concurrent with analysis?
+
+	go func() {
+		for _, doc := range batch.IndexOps {
+			if doc != nil {
+				aw := index.NewAnalysisWork(s, doc, resultChan)
+				// put the work on the queue
+				s.analysisQueue.Queue(aw)
+			}
+		}
+	}()
+
+	// wait for analysis result
+	analysisResults := make([]*index.AnalysisResult, int(numUpdates))
+	// newRowsMap := make(map[string][]index.IndexRow)
+	var itemsDeQueued uint64
+	for itemsDeQueued < numUpdates {
+		result := <-resultChan
+		//newRowsMap[result.DocID] = result.Rows
+		analysisResults[itemsDeQueued] = result
+		itemsDeQueued++
+	}
+	close(resultChan)
+
+	atomic.AddUint64(&s.stats.analysisTime, uint64(time.Since(analysisStart)))
+
+	var newSegment segment.Segment
+	if len(analysisResults) > 0 {
+		newSegment = mem.NewFromAnalyzedDocs(analysisResults)
+	} else {
+		newSegment = mem.New()
+	}
+	s.prepareSegment(newSegment, ids, batch.InternalOps)
+
+	return nil
+}
+
+func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
+	internalOps map[string][]byte) error {
+
+	// new introduction
+	introduction := &segmentIntroduction{
+		id:        atomic.AddUint64(&s.nextSegmentID, 1),
+		data:      newSegment,
+		ids:       ids,
+		obsoletes: make(map[uint64]*roaring.Bitmap),
+		internal:  internalOps,
+		applied:   make(chan struct{}),
+	}
+
+	// get read lock, to optimistically prepare obsoleted info
+	s.rootLock.RLock()
+	for i := range s.root.segment {
+		delta := s.root.segment[i].segment.DocNumbers(ids)
+		introduction.obsoletes[s.root.segment[i].id] = delta
+	}
+	s.rootLock.RUnlock()
+
+	s.introductions <- introduction
+
+	// block until this segment is applied
+	<-introduction.applied
+
+	return nil
+}
+
+func (s *Scorch) SetInternal(key, val []byte) error {
+	b := index.NewBatch()
+	b.SetInternal(key, val)
+	return s.Batch(b)
+}
+
+func (s *Scorch) DeleteInternal(key []byte) error {
+	b := index.NewBatch()
+	b.DeleteInternal(key)
+	return s.Batch(b)
+}
+
+// Reader returns a low-level accessor on the index data. Close it to
+// release associated resources.
+func (s *Scorch) Reader() (index.IndexReader, error) {
+	s.rootLock.RLock()
+	defer s.rootLock.RUnlock()
+	return &Reader{
+		root: s.root,
+	}, nil
+}
+
+func (s *Scorch) Stats() json.Marshaler {
+	return s.stats
+}
+func (s *Scorch) StatsMap() map[string]interface{} {
+	return s.stats.statsMap()
+}
+
+func (s *Scorch) Analyze(d *document.Document) *index.AnalysisResult {
+	rv := &index.AnalysisResult{
+		Document: d,
+		Analyzed: make([]analysis.TokenFrequencies, len(d.Fields)+len(d.CompositeFields)),
+		Length:   make([]int, len(d.Fields)+len(d.CompositeFields)),
+	}
+
+	for i, field := range d.Fields {
+		if field.Options().IsIndexed() {
+			fieldLength, tokenFreqs := field.Analyze()
+			rv.Analyzed[i] = tokenFreqs
+			rv.Length[i] = fieldLength
+
+			if len(d.CompositeFields) > 0 {
+				// see if any of the composite fields need this
+				for _, compositeField := range d.CompositeFields {
+					compositeField.Compose(field.Name(), fieldLength, tokenFreqs)
+				}
+			}
+		}
+	}
+
+	return rv
+}
+
+func (s *Scorch) Advanced() (store.KVStore, error) {
+	return nil, nil
+}
+
+func init() {
+	registry.RegisterIndexType(Name, NewScorch)
+}
diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
new file mode 100644
index 000000000..48e80c8b8
--- /dev/null
+++ b/index/scorch/scorch_test.go
@@ -0,0 +1,1109 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package scorch
+
+import (
+	"log"
+	"reflect"
+	"regexp"
+	"strconv"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/blevesearch/bleve/analysis"
+	regexpTokenizer "github.com/blevesearch/bleve/analysis/tokenizer/regexp"
+	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/index"
+)
+
+var testAnalyzer = &analysis.Analyzer{
+	Tokenizer: regexpTokenizer.NewRegexpTokenizer(regexp.MustCompile(`\w+`)),
+}
+
+func TestIndexInsert(t *testing.T) {
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, nil, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	var expectedCount uint64
+	reader, err := idx.Reader()
+	if err != nil {
+		t.Fatal(err)
+	}
+	docCount, err := reader.DocCount()
+	if err != nil {
+		t.Error(err)
+	}
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+	err = reader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	doc := document.NewDocument("1")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+	expectedCount++
+
+	reader, err = idx.Reader()
+	if err != nil {
+		t.Fatal(err)
+	}
+	docCount, err = reader.DocCount()
+	if err != nil {
+		t.Error(err)
+	}
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+	err = reader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestIndexInsertThenDelete(t *testing.T) {
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, nil, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	var expectedCount uint64
+	reader, err := idx.Reader()
+	if err != nil {
+		t.Fatal(err)
+	}
+	docCount, err := reader.DocCount()
+	if err != nil {
+		t.Error(err)
+	}
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+	err = reader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	doc := document.NewDocument("1")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+	expectedCount++
+
+	doc2 := document.NewDocument("2")
+	doc2.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
+	err = idx.Update(doc2)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+	expectedCount++
+
+	reader, err = idx.Reader()
+	if err != nil {
+		t.Fatal(err)
+	}
+	docCount, err = reader.DocCount()
+	if err != nil {
+		t.Error(err)
+	}
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+	err = reader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	err = idx.Delete("1")
+	if err != nil {
+		t.Errorf("Error deleting entry from index: %v", err)
+	}
+	expectedCount--
+
+	reader, err = idx.Reader()
+	if err != nil {
+		t.Fatal(err)
+	}
+	docCount, err = reader.DocCount()
+	if err != nil {
+		t.Error(err)
+	}
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+	err = reader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	err = idx.Delete("2")
+	if err != nil {
+		t.Errorf("Error deleting entry from index: %v", err)
+	}
+	expectedCount--
+
+	reader, err = idx.Reader()
+	if err != nil {
+		t.Fatal(err)
+	}
+	docCount, err = reader.DocCount()
+	if err != nil {
+		t.Error(err)
+	}
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+	err = reader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestIndexInsertThenUpdate(t *testing.T) {
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, nil, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	var expectedCount uint64
+	err = idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	doc := document.NewDocument("1")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+	expectedCount++
+
+	// this update should overwrite one term, and introduce one new one
+	doc = document.NewDocument("1")
+	doc.AddField(document.NewTextFieldWithAnalyzer("name", []uint64{}, []byte("test fail"), testAnalyzer))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error deleting entry from index: %v", err)
+	}
+
+	// now do another update that should remove one of the terms
+	doc = document.NewDocument("1")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("fail")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error deleting entry from index: %v", err)
+	}
+
+	reader, err := idx.Reader()
+	if err != nil {
+		t.Fatal(err)
+	}
+	docCount, err := reader.DocCount()
+	if err != nil {
+		t.Error(err)
+	}
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+	err = reader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestIndexInsertMultiple(t *testing.T) {
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, nil, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+
+	var expectedCount uint64
+
+	doc := document.NewDocument("1")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+	expectedCount++
+
+	doc = document.NewDocument("2")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+	expectedCount++
+
+	doc = document.NewDocument("3")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+	expectedCount++
+
+	reader, err := idx.Reader()
+	if err != nil {
+		t.Fatal(err)
+	}
+	docCount, err := reader.DocCount()
+	if err != nil {
+		t.Error(err)
+	}
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+	err = reader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestIndexInsertWithStore(t *testing.T) {
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, nil, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	var expectedCount uint64
+	reader, err := idx.Reader()
+	if err != nil {
+		t.Fatal(err)
+	}
+	docCount, err := reader.DocCount()
+	if err != nil {
+		t.Error(err)
+	}
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+	err = reader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	doc := document.NewDocument("1")
+	doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), document.IndexField|document.StoreField))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+	expectedCount++
+
+	reader, err = idx.Reader()
+	if err != nil {
+		t.Fatal(err)
+	}
+	docCount, err = reader.DocCount()
+	if err != nil {
+		t.Error(err)
+	}
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+	err = reader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	indexReader, err := idx.Reader()
+	if err != nil {
+		t.Error(err)
+	}
+	defer func() {
+		err := indexReader.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	storedDoc, err := indexReader.Document("1")
+	if err != nil {
+		t.Error(err)
+	}
+
+	if len(storedDoc.Fields) != 2 {
+		t.Errorf("expected 1 stored field, got %d", len(storedDoc.Fields))
+	}
+	for _, field := range storedDoc.Fields {
+		if field.Name() == "name" {
+			textField, ok := field.(*document.TextField)
+			if !ok {
+				t.Errorf("expected text field")
+			}
+			if string(textField.Value()) != "test" {
+				t.Errorf("expected field content 'test', got '%s'", string(textField.Value()))
+			}
+		} else if field.Name() == "_id" {
+			textField, ok := field.(*document.TextField)
+			if !ok {
+				t.Errorf("expected text field")
+			}
+			if string(textField.Value()) != "1" {
+				t.Errorf("expected field content '1', got '%s'", string(textField.Value()))
+			}
+		}
+	}
+}
+
+func TestIndexInternalCRUD(t *testing.T) {
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, nil, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	indexReader, err := idx.Reader()
+	if err != nil {
+		t.Error(err)
+	}
+
+	// get something that doesn't exist yet
+	val, err := indexReader.GetInternal([]byte("key"))
+	if err != nil {
+		t.Error(err)
+	}
+	if val != nil {
+		t.Errorf("expected nil, got %s", val)
+	}
+
+	err = indexReader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// set
+	err = idx.SetInternal([]byte("key"), []byte("abc"))
+	if err != nil {
+		t.Error(err)
+	}
+
+	indexReader2, err := idx.Reader()
+	if err != nil {
+		t.Error(err)
+	}
+
+	// get
+	val, err = indexReader2.GetInternal([]byte("key"))
+	if err != nil {
+		t.Error(err)
+	}
+	if string(val) != "abc" {
+		t.Errorf("expected %s, got '%s'", "abc", val)
+	}
+
+	err = indexReader2.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// delete
+	err = idx.DeleteInternal([]byte("key"))
+	if err != nil {
+		t.Error(err)
+	}
+
+	indexReader3, err := idx.Reader()
+	if err != nil {
+		t.Error(err)
+	}
+
+	// get again
+	val, err = indexReader3.GetInternal([]byte("key"))
+	if err != nil {
+		t.Error(err)
+	}
+	if val != nil {
+		t.Errorf("expected nil, got %s", val)
+	}
+
+	err = indexReader3.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestIndexBatch(t *testing.T) {
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, nil, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	var expectedCount uint64
+
+	// first create 2 docs the old fashioned way
+	doc := document.NewDocument("1")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+	expectedCount++
+
+	doc = document.NewDocument("2")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test2")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+	expectedCount++
+
+	// now create a batch which does 3 things
+	// insert new doc
+	// update existing doc
+	// delete existing doc
+	// net document count change 0
+
+	batch := index.NewBatch()
+	doc = document.NewDocument("3")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test3")))
+	batch.Update(doc)
+	doc = document.NewDocument("2")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test2updated")))
+	batch.Update(doc)
+	batch.Delete("1")
+
+	err = idx.Batch(batch)
+	if err != nil {
+		t.Error(err)
+	}
+
+	indexReader, err := idx.Reader()
+	if err != nil {
+		t.Error(err)
+	}
+	defer func() {
+		err := indexReader.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	docCount, err := indexReader.DocCount()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+
+	docIDReader, err := indexReader.DocIDReaderAll()
+	if err != nil {
+		t.Error(err)
+	}
+	var docIds []index.IndexInternalID
+	docID, err := docIDReader.Next()
+	for docID != nil && err == nil {
+		docIds = append(docIds, docID)
+		docID, err = docIDReader.Next()
+	}
+	if err != nil {
+		t.Error(err)
+	}
+	externalDocIds := map[string]struct{}{}
+	// convert back to external doc ids
+	for _, id := range docIds {
+		externalID, err := indexReader.ExternalID(id)
+		if err != nil {
+			t.Fatal(err)
+		}
+		externalDocIds[externalID] = struct{}{}
+	}
+	expectedDocIds := map[string]struct{}{
+		"2": struct{}{},
+		"3": struct{}{},
+	}
+	if !reflect.DeepEqual(externalDocIds, expectedDocIds) {
+		t.Errorf("expected ids: %v, got ids: %v", expectedDocIds, externalDocIds)
+	}
+}
+
+func TestIndexInsertUpdateDeleteWithMultipleTypesStored(t *testing.T) {
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, nil, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	var expectedCount uint64
+	reader, err := idx.Reader()
+	if err != nil {
+		t.Fatal(err)
+	}
+	docCount, err := reader.DocCount()
+	if err != nil {
+		t.Error(err)
+	}
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+	err = reader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	doc := document.NewDocument("1")
+	doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), document.IndexField|document.StoreField))
+	doc.AddField(document.NewNumericFieldWithIndexingOptions("age", []uint64{}, 35.99, document.IndexField|document.StoreField))
+	df, err := document.NewDateTimeFieldWithIndexingOptions("unixEpoch", []uint64{}, time.Unix(0, 0), document.IndexField|document.StoreField)
+	if err != nil {
+		t.Error(err)
+	}
+	doc.AddField(df)
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+	expectedCount++
+
+	reader, err = idx.Reader()
+	if err != nil {
+		t.Fatal(err)
+	}
+	docCount, err = reader.DocCount()
+	if err != nil {
+		t.Error(err)
+	}
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+	err = reader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	indexReader, err := idx.Reader()
+	if err != nil {
+		t.Error(err)
+	}
+
+	storedDoc, err := indexReader.Document("1")
+	if err != nil {
+		t.Error(err)
+	}
+
+	err = indexReader.Close()
+	if err != nil {
+		t.Error(err)
+	}
+
+	if len(storedDoc.Fields) != 4 {
+		t.Errorf("expected 4 stored field, got %d", len(storedDoc.Fields))
+	}
+	for _, field := range storedDoc.Fields {
+
+		if field.Name() == "name" {
+			textField, ok := field.(*document.TextField)
+			if !ok {
+				t.Errorf("expected text field")
+			}
+			if string(textField.Value()) != "test" {
+				t.Errorf("expected field content 'test', got '%s'", string(textField.Value()))
+			}
+		} else if field.Name() == "age" {
+			numField, ok := field.(*document.NumericField)
+			if !ok {
+				t.Errorf("expected numeric field")
+			}
+			numFieldNumer, err := numField.Number()
+			if err != nil {
+				t.Error(err)
+			} else {
+				if numFieldNumer != 35.99 {
+					t.Errorf("expeted numeric value 35.99, got %f", numFieldNumer)
+				}
+			}
+		} else if field.Name() == "unixEpoch" {
+			dateField, ok := field.(*document.DateTimeField)
+			if !ok {
+				t.Errorf("expected date field")
+			}
+			dateFieldDate, err := dateField.DateTime()
+			if err != nil {
+				t.Error(err)
+			} else {
+				if dateFieldDate != time.Unix(0, 0).UTC() {
+					t.Errorf("expected date value unix epoch, got %v", dateFieldDate)
+				}
+			}
+		}
+
+	}
+
+	// now update the document, but omit one of the fields
+	doc = document.NewDocument("1")
+	doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("testup"), document.IndexField|document.StoreField))
+	doc.AddField(document.NewNumericFieldWithIndexingOptions("age", []uint64{}, 36.99, document.IndexField|document.StoreField))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+
+	indexReader2, err := idx.Reader()
+	if err != nil {
+		t.Error(err)
+	}
+
+	// expected doc count shouldn't have changed
+	docCount, err = indexReader2.DocCount()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+
+	// should only get 2 fields back now though
+	storedDoc, err = indexReader2.Document("1")
+	if err != nil {
+		t.Error(err)
+	}
+
+	err = indexReader2.Close()
+	if err != nil {
+		t.Error(err)
+	}
+
+	if len(storedDoc.Fields) != 3 {
+		t.Errorf("expected 3 stored field, got %d", len(storedDoc.Fields))
+	}
+
+	for _, field := range storedDoc.Fields {
+
+		if field.Name() == "name" {
+			textField, ok := field.(*document.TextField)
+			if !ok {
+				t.Errorf("expected text field")
+			}
+			if string(textField.Value()) != "testup" {
+				t.Errorf("expected field content 'testup', got '%s'", string(textField.Value()))
+			}
+		} else if field.Name() == "age" {
+			numField, ok := field.(*document.NumericField)
+			if !ok {
+				t.Errorf("expected numeric field")
+			}
+			numFieldNumer, err := numField.Number()
+			if err != nil {
+				t.Error(err)
+			} else {
+				if numFieldNumer != 36.99 {
+					t.Errorf("expeted numeric value 36.99, got %f", numFieldNumer)
+				}
+			}
+		}
+	}
+
+	// now delete the document
+	err = idx.Delete("1")
+	expectedCount--
+
+	// expected doc count shouldn't have changed
+	reader, err = idx.Reader()
+	if err != nil {
+		t.Fatal(err)
+	}
+	docCount, err = reader.DocCount()
+	if err != nil {
+		t.Error(err)
+	}
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+	err = reader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestIndexInsertFields(t *testing.T) {
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, nil, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	doc := document.NewDocument("1")
+	doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), document.IndexField|document.StoreField))
+	doc.AddField(document.NewNumericFieldWithIndexingOptions("age", []uint64{}, 35.99, document.IndexField|document.StoreField))
+	dateField, err := document.NewDateTimeFieldWithIndexingOptions("unixEpoch", []uint64{}, time.Unix(0, 0), document.IndexField|document.StoreField)
+	if err != nil {
+		t.Error(err)
+	}
+	doc.AddField(dateField)
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+
+	indexReader, err := idx.Reader()
+	if err != nil {
+		t.Error(err)
+	}
+	defer func() {
+		err := indexReader.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	fields, err := indexReader.Fields()
+	if err != nil {
+		t.Error(err)
+	} else {
+		fieldsMap := map[string]struct{}{}
+		for _, field := range fields {
+			fieldsMap[field] = struct{}{}
+		}
+		expectedFieldsMap := map[string]struct{}{
+			"_id":       struct{}{},
+			"name":      struct{}{},
+			"age":       struct{}{},
+			"unixEpoch": struct{}{},
+		}
+		if !reflect.DeepEqual(fieldsMap, expectedFieldsMap) {
+			t.Errorf("expected fields: %v, got %v", expectedFieldsMap, fieldsMap)
+		}
+	}
+
+}
+
+func TestIndexUpdateComposites(t *testing.T) {
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, nil, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	doc := document.NewDocument("1")
+	doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), document.IndexField|document.StoreField))
+	doc.AddField(document.NewTextFieldWithIndexingOptions("title", []uint64{}, []byte("mister"), document.IndexField|document.StoreField))
+	doc.AddField(document.NewCompositeFieldWithIndexingOptions("_all", true, nil, nil, document.IndexField))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+
+	// now lets update it
+	doc = document.NewDocument("1")
+	doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("testupdated"), document.IndexField|document.StoreField))
+	doc.AddField(document.NewTextFieldWithIndexingOptions("title", []uint64{}, []byte("misterupdated"), document.IndexField|document.StoreField))
+	doc.AddField(document.NewCompositeFieldWithIndexingOptions("_all", true, nil, nil, document.IndexField))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+
+	indexReader, err := idx.Reader()
+	if err != nil {
+		t.Error(err)
+	}
+	defer func() {
+		err := indexReader.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	// make sure new values are in index
+	storedDoc, err := indexReader.Document("1")
+	if err != nil {
+		t.Error(err)
+	}
+	if len(storedDoc.Fields) != 3 {
+		t.Errorf("expected 3 stored field, got %d", len(storedDoc.Fields))
+	}
+	for _, field := range storedDoc.Fields {
+		if field.Name() == "name" {
+			textField, ok := field.(*document.TextField)
+			if !ok {
+				t.Errorf("expected text field")
+			}
+			if string(textField.Value()) != "testupdated" {
+				t.Errorf("expected field content 'test', got '%s'", string(textField.Value()))
+			}
+		}
+	}
+}
+
+func TestIndexTermReaderCompositeFields(t *testing.T) {
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, nil, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	doc := document.NewDocument("1")
+	doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), document.IndexField|document.StoreField|document.IncludeTermVectors))
+	doc.AddField(document.NewTextFieldWithIndexingOptions("title", []uint64{}, []byte("mister"), document.IndexField|document.StoreField|document.IncludeTermVectors))
+	doc.AddField(document.NewCompositeFieldWithIndexingOptions("_all", true, nil, nil, document.IndexField|document.IncludeTermVectors))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+
+	indexReader, err := idx.Reader()
+	if err != nil {
+		t.Error(err)
+	}
+	defer func() {
+		err := indexReader.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	termFieldReader, err := indexReader.TermFieldReader([]byte("mister"), "_all", true, true, true)
+	if err != nil {
+		t.Error(err)
+	}
+
+	tfd, err := termFieldReader.Next(nil)
+	for tfd != nil && err == nil {
+		externalID, err := indexReader.ExternalID(tfd.ID)
+		if err != nil {
+			t.Fatal(err)
+		}
+		if externalID != "1" {
+			t.Errorf("expected to find document id 1")
+		}
+		tfd, err = termFieldReader.Next(nil)
+	}
+	if err != nil {
+		t.Error(err)
+	}
+}
+
+func TestConcurrentUpdate(t *testing.T) {
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, nil, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	// do some concurrent updates
+	var wg sync.WaitGroup
+	for i := 0; i < 10; i++ {
+		wg.Add(1)
+		go func(i int) {
+			doc := document.NewDocument("1")
+			doc.AddField(document.NewTextFieldWithIndexingOptions(strconv.Itoa(i), []uint64{}, []byte(strconv.Itoa(i)), document.StoreField))
+			err := idx.Update(doc)
+			if err != nil {
+				t.Errorf("Error updating index: %v", err)
+			}
+			wg.Done()
+		}(i)
+	}
+	wg.Wait()
+
+	// now load the name field and see what we get
+	r, err := idx.Reader()
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	doc, err := r.Document("1")
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	if len(doc.Fields) > 2 {
+		t.Errorf("expected no more than 2 fields, found %d", len(doc.Fields))
+	}
+}
+
+func TestLargeField(t *testing.T) {
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, nil, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	var largeFieldValue []byte
+	for len(largeFieldValue) < 4096 {
+		largeFieldValue = append(largeFieldValue, bleveWikiArticle1K...)
+	}
+
+	d := document.NewDocument("large")
+	f := document.NewTextFieldWithIndexingOptions("desc", nil, largeFieldValue, document.IndexField|document.StoreField)
+	d.AddField(f)
+
+	err = idx.Update(d)
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
+var bleveWikiArticle1K = []byte(`Boiling liquid expanding vapor explosion
+From Wikipedia, the free encyclopedia
+See also: Boiler explosion and Steam explosion
+
+Flames subsequent to a flammable liquid BLEVE from a tanker. BLEVEs do not necessarily involve fire.
+
+This article's tone or style may not reflect the encyclopedic tone used on Wikipedia. See Wikipedia's guide to writing better articles for suggestions. (July 2013)
+A boiling liquid expanding vapor explosion (BLEVE, /ˈblɛviː/ blev-ee) is an explosion caused by the rupture of a vessel containing a pressurized liquid above its boiling point.[1]
+Contents  [hide]
+1 Mechanism
+1.1 Water example
+1.2 BLEVEs without chemical reactions
+2 Fires
+3 Incidents
+4 Safety measures
+5 See also
+6 References
+7 External links
+Mechanism[edit]
+
+This section needs additional citations for verification. Please help improve this article by adding citations to reliable sources. Unsourced material may be challenged and removed. (July 2013)
+There are three characteristics of liquids which are relevant to the discussion of a BLEVE:`)
diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go
new file mode 100644
index 000000000..ff6fb6056
--- /dev/null
+++ b/index/scorch/segment/mem/build.go
@@ -0,0 +1,220 @@
+package mem
+
+import (
+	"math"
+	"sort"
+
+	"github.com/RoaringBitmap/roaring"
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/index"
+)
+
+// NewFromAnalyzedDocs places the analyzed document mutations into this segment
+func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment {
+	s := New()
+
+	// ensure that _id field get fieldID 0
+	s.getOrDefineField("_id", false)
+
+	// walk each doc
+	for _, result := range results {
+		s.processDocument(result)
+	}
+
+	// go back and sort the dictKeys
+	for _, dict := range s.dictKeys {
+		sort.Strings(dict)
+	}
+
+	// professional debugging
+	//
+	// log.Printf("fields: %v\n", s.fields)
+	// log.Printf("fieldsInv: %v\n", s.fieldsInv)
+	// log.Printf("fieldsLoc: %v\n", s.fieldsLoc)
+	// log.Printf("dicts: %v\n", s.dicts)
+	// log.Printf("dict keys: %v\n", s.dictKeys)
+	// for i, posting := range s.postings {
+	// 	log.Printf("posting %d: %v\n", i, posting)
+	// }
+	// for i, freq := range s.freqs {
+	// 	log.Printf("freq %d: %v\n", i, freq)
+	// }
+	// for i, norm := range s.norms {
+	// 	log.Printf("norm %d: %v\n", i, norm)
+	// }
+	// for i, field := range s.locfields {
+	// 	log.Printf("field %d: %v\n", i, field)
+	// }
+	// for i, start := range s.locstarts {
+	// 	log.Printf("start %d: %v\n", i, start)
+	// }
+	// for i, end := range s.locends {
+	// 	log.Printf("end %d: %v\n", i, end)
+	// }
+	// for i, pos := range s.locpos {
+	// 	log.Printf("pos %d: %v\n", i, pos)
+	// }
+	// for i, apos := range s.locarraypos {
+	// 	log.Printf("apos %d: %v\n", i, apos)
+	// }
+	// log.Printf("stored: %v\n", s.stored)
+	// log.Printf("stored types: %v\n", s.storedTypes)
+	// log.Printf("stored pos: %v\n", s.storedPos)
+
+	return s
+}
+
+func (s *Segment) processDocument(result *index.AnalysisResult) {
+	// used to collate information across fields
+	docMap := map[uint16]analysis.TokenFrequencies{}
+	fieldLens := map[uint16]int{}
+	docNum := uint64(s.addDocument())
+
+	processField := func(field uint16, name string, l int, tf analysis.TokenFrequencies) {
+		fieldLens[field] += l
+		if existingFreqs, ok := docMap[field]; ok {
+			existingFreqs.MergeAll(name, tf)
+		} else {
+			docMap[field] = tf
+		}
+	}
+
+	storeField := func(docNum uint64, field uint16, typ byte, val []byte, pos []uint64) {
+		s.stored[docNum][field] = append(s.stored[docNum][field], val)
+		s.storedTypes[docNum][field] = append(s.storedTypes[docNum][field], typ)
+		s.storedPos[docNum][field] = append(s.storedPos[docNum][field], pos)
+	}
+
+	// walk each composite field
+	for _, field := range result.Document.CompositeFields {
+		fieldID := uint16(s.getOrDefineField(field.Name(), false))
+		l, tf := field.Analyze()
+		processField(fieldID, field.Name(), l, tf)
+	}
+
+	// walk each field
+	for i, field := range result.Document.Fields {
+		fieldID := uint16(s.getOrDefineField(field.Name(), field.Options().IncludeTermVectors()))
+		l := result.Length[i]
+		tf := result.Analyzed[i]
+		processField(fieldID, field.Name(), l, tf)
+		if field.Options().IsStored() {
+			storeField(docNum, fieldID, encodeFieldType(field), field.Value(), field.ArrayPositions())
+		}
+	}
+
+	// now that its been rolled up into docMap, walk that
+	for fieldID, tokenFrequencies := range docMap {
+		for term, tokenFreq := range tokenFrequencies {
+			fieldTermPostings := s.dicts[fieldID][term]
+
+			// FIXME this if/else block has duplicate code that has resulted in
+			// bugs fixed/missed more than once, need to refactor
+			if fieldTermPostings == 0 {
+				// need to build new posting
+				bs := roaring.New()
+				bs.AddInt(int(docNum))
+
+				newPostingID := uint64(len(s.postings) + 1)
+				// add this new bitset to the postings slice
+				s.postings = append(s.postings, bs)
+				// add this to the details slice
+				s.freqs = append(s.freqs, []uint64{uint64(tokenFreq.Frequency())})
+				s.norms = append(s.norms, []float32{float32(1.0 / math.Sqrt(float64(fieldLens[fieldID])))})
+				// add to locations
+				var locfields []uint16
+				var locstarts []uint64
+				var locends []uint64
+				var locpos []uint64
+				var locarraypos [][]uint64
+				for _, loc := range tokenFreq.Locations {
+					var locf = fieldID
+					if loc.Field != "" {
+						locf = uint16(s.getOrDefineField(loc.Field, false))
+					}
+					locfields = append(locfields, locf)
+					locstarts = append(locstarts, uint64(loc.Start))
+					locends = append(locends, uint64(loc.End))
+					locpos = append(locpos, uint64(loc.Position))
+					if len(loc.ArrayPositions) > 0 {
+						locarraypos = append(locarraypos, loc.ArrayPositions)
+					} else {
+						locarraypos = append(locarraypos, nil)
+					}
+				}
+				s.locfields = append(s.locfields, locfields)
+				s.locstarts = append(s.locstarts, locstarts)
+				s.locends = append(s.locends, locends)
+				s.locpos = append(s.locpos, locpos)
+				s.locarraypos = append(s.locarraypos, locarraypos)
+				// record it
+				s.dicts[fieldID][term] = newPostingID
+				// this term was new for this field, add it to dictKeys
+				s.dictKeys[fieldID] = append(s.dictKeys[fieldID], term)
+			} else {
+				// posting already started for this field/term
+				// the actual offset is - 1, because 0 is zero value
+				bs := s.postings[fieldTermPostings-1]
+				bs.AddInt(int(docNum))
+				s.freqs[fieldTermPostings-1] = append(s.freqs[fieldTermPostings-1], uint64(tokenFreq.Frequency()))
+				s.norms[fieldTermPostings-1] = append(s.norms[fieldTermPostings-1], float32(1.0/math.Sqrt(float64(fieldLens[fieldID]))))
+				for _, loc := range tokenFreq.Locations {
+					var locf = fieldID
+					if loc.Field != "" {
+						locf = uint16(s.getOrDefineField(loc.Field, false))
+					}
+					s.locfields[fieldTermPostings-1] = append(s.locfields[fieldTermPostings-1], locf)
+					s.locstarts[fieldTermPostings-1] = append(s.locstarts[fieldTermPostings-1], uint64(loc.Start))
+					s.locends[fieldTermPostings-1] = append(s.locends[fieldTermPostings-1], uint64(loc.End))
+					s.locpos[fieldTermPostings-1] = append(s.locpos[fieldTermPostings-1], uint64(loc.Position))
+					if len(loc.ArrayPositions) > 0 {
+						s.locarraypos[fieldTermPostings-1] = append(s.locarraypos[fieldTermPostings-1], loc.ArrayPositions)
+					} else {
+						s.locarraypos[fieldTermPostings-1] = append(s.locarraypos[fieldTermPostings-1], nil)
+					}
+				}
+			}
+		}
+	}
+}
+
+func (s *Segment) getOrDefineField(name string, hasLoc bool) int {
+	fieldID, ok := s.fields[name]
+	if !ok {
+		fieldID = uint16(len(s.fieldsInv) + 1)
+		s.fields[name] = fieldID
+		s.fieldsInv = append(s.fieldsInv, name)
+		s.fieldsLoc = append(s.fieldsLoc, hasLoc)
+		s.dicts = append(s.dicts, make(map[string]uint64))
+		s.dictKeys = append(s.dictKeys, make([]string, 0))
+	}
+	return int(fieldID - 1)
+}
+
+func (s *Segment) addDocument() int {
+	docNum := len(s.stored)
+	s.stored = append(s.stored, map[uint16][][]byte{})
+	s.storedTypes = append(s.storedTypes, map[uint16][]byte{})
+	s.storedPos = append(s.storedPos, map[uint16][][]uint64{})
+	return docNum
+}
+
+func encodeFieldType(f document.Field) byte {
+	fieldType := byte('x')
+	switch f.(type) {
+	case *document.TextField:
+		fieldType = 't'
+	case *document.NumericField:
+		fieldType = 'n'
+	case *document.DateTimeField:
+		fieldType = 'd'
+	case *document.BooleanField:
+		fieldType = 'b'
+	case *document.GeoPointField:
+		fieldType = 'g'
+	case *document.CompositeField:
+		fieldType = 'c'
+	}
+	return fieldType
+}
diff --git a/index/scorch/segment/mem/dict.go b/index/scorch/segment/mem/dict.go
new file mode 100644
index 000000000..a7d609f6d
--- /dev/null
+++ b/index/scorch/segment/mem/dict.go
@@ -0,0 +1,87 @@
+package mem
+
+import (
+	"sort"
+	"strings"
+
+	"github.com/RoaringBitmap/roaring"
+	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/scorch/segment"
+)
+
+// Dictionary is the in-memory representation of the term dictionary
+type Dictionary struct {
+	segment *Segment
+	field   string
+	fieldID uint16
+}
+
+// PostingsList returns the postings list for the specified term
+func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap) segment.PostingsList {
+	return &PostingsList{
+		dictionary: d,
+		term:       term,
+		postingsID: d.segment.dicts[d.fieldID][term],
+		except:     except,
+	}
+}
+
+// Iterator returns an iterator for this dictionary
+func (d *Dictionary) Iterator() segment.DictionaryIterator {
+	return &DictionaryIterator{
+		d: d,
+	}
+}
+
+// PrefixIterator returns an iterator which only visits terms having the
+// the specified prefix
+func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator {
+	offset := sort.SearchStrings(d.segment.dictKeys[d.fieldID], prefix)
+	return &DictionaryIterator{
+		d:      d,
+		prefix: prefix,
+		offset: offset,
+	}
+}
+
+// RangeIterator returns an iterator which only visits terms between the
+// start and end terms.  NOTE: bleve.index API specifies the end is inclusive.
+func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator {
+	offset := sort.SearchStrings(d.segment.dictKeys[d.fieldID], start)
+	return &DictionaryIterator{
+		d:      d,
+		offset: offset,
+		end:    end,
+	}
+}
+
+// DictionaryIterator is an iterator for term dictionary
+type DictionaryIterator struct {
+	d      *Dictionary
+	prefix string
+	end    string
+	offset int
+}
+
+// Next returns the next entry in the dictionary
+func (d *DictionaryIterator) Next() (*index.DictEntry, error) {
+	if d.offset > len(d.d.segment.dictKeys[d.d.fieldID])-1 {
+		return nil, nil
+	}
+	next := d.d.segment.dictKeys[d.d.fieldID][d.offset]
+	// check prefix
+	if d.prefix != "" && !strings.HasPrefix(next, d.prefix) {
+		return nil, nil
+	}
+	// check end (bleve.index API demands inclusive end)
+	if d.end != "" && next > d.end {
+		return nil, nil
+	}
+
+	d.offset++
+	postingID := d.d.segment.dicts[d.d.fieldID][next]
+	return &index.DictEntry{
+		Term:  next,
+		Count: d.d.segment.postings[postingID-1].GetCardinality(),
+	}, nil
+}
diff --git a/index/scorch/segment/mem/posting.go b/index/scorch/segment/mem/posting.go
new file mode 100644
index 000000000..ac8b2fb72
--- /dev/null
+++ b/index/scorch/segment/mem/posting.go
@@ -0,0 +1,160 @@
+package mem
+
+import (
+	"github.com/RoaringBitmap/roaring"
+	"github.com/blevesearch/bleve/index/scorch/segment"
+)
+
+// PostingsList is an in-memory represenation of a postings list
+type PostingsList struct {
+	dictionary *Dictionary
+	term       string
+	postingsID uint64
+	except     *roaring.Bitmap
+}
+
+// Count returns the number of items on this postings list
+func (p *PostingsList) Count() uint64 {
+	var rv uint64
+	if p.postingsID > 0 {
+		rv = p.dictionary.segment.postings[p.postingsID-1].GetCardinality()
+		if p.except != nil {
+			except := p.except.GetCardinality()
+			if except > rv {
+				// avoid underflow
+				except = rv
+			}
+			rv -= except
+		}
+	}
+	return rv
+}
+
+// Iterator returns an iterator for this postings list
+func (p *PostingsList) Iterator() segment.PostingsIterator {
+	rv := &PostingsIterator{
+		postings: p,
+	}
+	if p.postingsID > 0 {
+		allbits := p.dictionary.segment.postings[p.postingsID-1]
+		rv.all = allbits.Iterator()
+		if p.except != nil {
+			allExcept := allbits.Clone()
+			allExcept.AndNot(p.except)
+			rv.actual = allExcept.Iterator()
+		} else {
+			rv.actual = allbits.Iterator()
+		}
+	}
+
+	return rv
+}
+
+// PostingsIterator provides a way to iterate through the postings list
+type PostingsIterator struct {
+	postings  *PostingsList
+	all       roaring.IntIterable
+	offset    int
+	locoffset int
+	actual    roaring.IntIterable
+}
+
+// Next returns the next posting on the postings list, or nil at the end
+func (i *PostingsIterator) Next() segment.Posting {
+	if i.actual == nil || !i.actual.HasNext() {
+		return nil
+	}
+	n := i.actual.Next()
+	allN := i.all.Next()
+
+	// n is the next actual hit (excluding some postings)
+	// allN is the next hit in the full postings
+	// if they don't match, adjust offsets to factor in item we're skipping over
+	// incr the all iterator, and check again
+	for allN != n {
+		i.locoffset += int(i.postings.dictionary.segment.freqs[i.postings.postingsID-1][i.offset])
+		i.offset++
+		allN = i.all.Next()
+	}
+	rv := &Posting{
+		iterator:  i,
+		docNum:    uint64(n),
+		offset:    i.offset,
+		locoffset: i.locoffset,
+	}
+
+	i.locoffset += int(i.postings.dictionary.segment.freqs[i.postings.postingsID-1][i.offset])
+	i.offset++
+	return rv
+}
+
+// Posting is a single entry in a postings list
+type Posting struct {
+	iterator  *PostingsIterator
+	docNum    uint64
+	offset    int
+	locoffset int
+}
+
+// Number returns the document number of this posting in this segment
+func (p *Posting) Number() uint64 {
+	return p.docNum
+}
+
+// Frequency returns the frequence of occurance of this term in this doc/field
+func (p *Posting) Frequency() uint64 {
+	return p.iterator.postings.dictionary.segment.freqs[p.iterator.postings.postingsID-1][p.offset]
+}
+
+// Norm returns the normalization factor for this posting
+func (p *Posting) Norm() float64 {
+	return float64(p.iterator.postings.dictionary.segment.norms[p.iterator.postings.postingsID-1][p.offset])
+}
+
+// Locations returns the location information for each occurance
+func (p *Posting) Locations() []segment.Location {
+	if !p.iterator.postings.dictionary.segment.fieldsLoc[p.iterator.postings.dictionary.fieldID] {
+		return nil
+	}
+	freq := int(p.Frequency())
+	rv := make([]segment.Location, freq)
+	for i := 0; i < freq; i++ {
+		rv[i] = &Location{
+			p:      p,
+			offset: p.locoffset + i,
+		}
+	}
+	return rv
+}
+
+// Location represents the location of a single occurance
+type Location struct {
+	p      *Posting
+	offset int
+}
+
+// Field returns the name of the field (useful in composite fields to know
+// which original field the value came from)
+func (l *Location) Field() string {
+	return l.p.iterator.postings.dictionary.segment.fieldsInv[l.p.iterator.postings.dictionary.segment.locfields[l.p.iterator.postings.postingsID-1][l.offset]]
+}
+
+// Start returns the start byte offset of this occurance
+func (l *Location) Start() uint64 {
+	return l.p.iterator.postings.dictionary.segment.locstarts[l.p.iterator.postings.postingsID-1][l.offset]
+}
+
+// End returns the end byte offset of this occurance
+func (l *Location) End() uint64 {
+	return l.p.iterator.postings.dictionary.segment.locends[l.p.iterator.postings.postingsID-1][l.offset]
+}
+
+// Pos returns the 1-based phrase position of this occurance
+func (l *Location) Pos() uint64 {
+	return l.p.iterator.postings.dictionary.segment.locpos[l.p.iterator.postings.postingsID-1][l.offset]
+}
+
+// ArrayPositions returns the array position vector associated with this occurance
+func (l *Location) ArrayPositions() []uint64 {
+	return l.p.iterator.postings.dictionary.segment.locarraypos[l.p.iterator.postings.postingsID-1][l.offset]
+}
diff --git a/index/scorch/segment/mem/segment.go b/index/scorch/segment/mem/segment.go
new file mode 100644
index 000000000..fdec1b2e6
--- /dev/null
+++ b/index/scorch/segment/mem/segment.go
@@ -0,0 +1,132 @@
+package mem
+
+import (
+	"github.com/RoaringBitmap/roaring"
+	"github.com/blevesearch/bleve/index/scorch/segment"
+)
+
+// KNOWN ISSUES
+// - LIMITATION - we decided whether or not to store term vectors for a field
+//                at the segment level, based on the first definition of a
+//                field we see.  in normal bleve usage this is fine, all
+//                instances of a field definition will be the same.  however,
+//                advanced users may violate this and provide unique field
+//                definitions with each document.  this segment does not
+//                support this usage.
+
+// TODO
+// - need better testing of multiple docs, iterating freqs, locations and
+//   and verifying the correct results are returned
+// - need tests for term dictionary iteration
+
+// Segment is an in memory implementation of scorch.Segment
+type Segment struct {
+
+	// fields name -> id+1
+	fields map[string]uint16
+	// fields id -> name
+	fieldsInv []string
+	// field id -> has location info
+	fieldsLoc []bool
+
+	// term dictionary
+	//  field id -> term -> posting id + 1
+	dicts []map[string]uint64
+
+	// term dictionary keys
+	//  field id -> []dictionary keys
+	dictKeys [][]string
+
+	// postings list
+	//  postings list id -> postings bitmap
+	postings []*roaring.Bitmap
+
+	// term frequencies
+	//  postings list id -> freqs (one for each hit in bitmap)
+	freqs [][]uint64
+
+	// field norms
+	//  postings list id -> norms (one for each hit in bitmap)
+	norms [][]float32
+
+	// field/start/end/pos/locarraypos
+	//  postings list id -> start/end/pos/locarraypos (one for each freq)
+	locfields   [][]uint16
+	locstarts   [][]uint64
+	locends     [][]uint64
+	locpos      [][]uint64
+	locarraypos [][][]uint64
+
+	// stored field values
+	//  docNum -> field id -> slice of values (each value []byte)
+	stored []map[uint16][][]byte
+
+	// stored field types
+	//  docNum -> field id -> slice of types (each type byte)
+	storedTypes []map[uint16][]byte
+
+	// stored field array positions
+	//  docNum -> field id -> slice of array positions (each is []uint64)
+	storedPos []map[uint16][][]uint64
+}
+
+// New builds a new empty Segment
+func New() *Segment {
+	return &Segment{
+		fields: map[string]uint16{},
+	}
+}
+
+// Fields returns the field names used in this segment
+func (s *Segment) Fields() []string {
+	return s.fieldsInv
+}
+
+// VisitDocument invokes the DocFieldValueVistor for each stored field
+// for the specified doc number
+func (s *Segment) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
+	// ensure document number exists
+	if int(num) > len(s.stored)-1 {
+		return nil
+	}
+	docFields := s.stored[int(num)]
+	for field, values := range docFields {
+		for i, value := range values {
+			keepGoing := visitor(s.fieldsInv[field], s.storedTypes[int(num)][field][i], value, s.storedPos[int(num)][field][i])
+			if !keepGoing {
+				return nil
+			}
+		}
+	}
+	return nil
+}
+
+// Dictionary returns the term dictionary for the specified field
+func (s *Segment) Dictionary(field string) segment.TermDictionary {
+	return &Dictionary{
+		segment: s,
+		field:   field,
+		fieldID: uint16(s.getOrDefineField(field, false)),
+	}
+}
+
+// Count returns the number of documents in this segment
+// (this has no notion of deleted docs)
+func (s *Segment) Count() uint64 {
+	return uint64(len(s.stored))
+}
+
+// DocNumbers returns a bitset corresponding to the doc numbers of all the
+// provided _id strings
+func (s *Segment) DocNumbers(ids []string) *roaring.Bitmap {
+
+	idDictionary := s.dicts[s.getOrDefineField("_id", false)]
+	rv := roaring.New()
+	for _, id := range ids {
+		postingID := idDictionary[id]
+		if postingID > 0 {
+			rv.Or(s.postings[postingID-1])
+		}
+	}
+	return rv
+}
diff --git a/index/scorch/segment/mem/segment_test.go b/index/scorch/segment/mem/segment_test.go
new file mode 100644
index 000000000..bc840e147
--- /dev/null
+++ b/index/scorch/segment/mem/segment_test.go
@@ -0,0 +1,521 @@
+package mem
+
+import (
+	"math"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/index"
+)
+
+func TestEmpty(t *testing.T) {
+
+	emptySegment := New()
+
+	if emptySegment.Count() != 0 {
+		t.Errorf("expected count 0, got %d", emptySegment.Count())
+	}
+
+	dict := emptySegment.Dictionary("name")
+	if dict == nil {
+		t.Fatal("got nil dict, expected non-nil")
+	}
+
+	postingsList := dict.PostingsList("marty", nil)
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItr := postingsList.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	count := 0
+	nextPosting := postingsItr.Next()
+	for nextPosting != nil {
+		count++
+		nextPosting = postingsItr.Next()
+	}
+
+	if count != 0 {
+		t.Errorf("expected count to be 0, got %d", count)
+	}
+
+	// now try and visit a document
+	emptySegment.VisitDocument(0, func(field string, typ byte, value []byte, pos []uint64) bool {
+		t.Errorf("document visitor called, not expected")
+		return true
+	})
+}
+
+func TestSingle(t *testing.T) {
+
+	doc := &document.Document{
+		ID: "a",
+		Fields: []document.Field{
+			document.NewTextFieldCustom("_id", nil, []byte("a"), document.IndexField|document.StoreField, nil),
+			document.NewTextFieldCustom("name", nil, []byte("wow"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+		},
+		CompositeFields: []*document.CompositeField{
+			document.NewCompositeField("_all", true, nil, nil),
+		},
+	}
+
+	// forge analyzed docs
+	results := []*index.AnalysisResult{
+		&index.AnalysisResult{
+			Document: doc,
+			Analyzed: []analysis.TokenFrequencies{
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      1,
+						Position: 1,
+						Term:     []byte("a"),
+					},
+				}, nil, false),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      3,
+						Position: 1,
+						Term:     []byte("wow"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("some"),
+					},
+					&analysis.Token{
+						Start:    5,
+						End:      10,
+						Position: 2,
+						Term:     []byte("thing"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("cold"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("dark"),
+					},
+				}, nil, true),
+			},
+			Length: []int{
+				1,
+				1,
+				2,
+				1,
+				1,
+			},
+		},
+	}
+
+	// fix up composite fields
+	for _, ar := range results {
+		for i, f := range ar.Document.Fields {
+			for _, cf := range ar.Document.CompositeFields {
+				cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
+			}
+		}
+	}
+
+	segment := NewFromAnalyzedDocs(results)
+	if segment == nil {
+		t.Fatalf("segment nil, not expected")
+	}
+
+	if segment.Count() != 1 {
+		t.Errorf("expected count 1, got %d", segment.Count())
+	}
+
+	// check the _id field
+	dict := segment.Dictionary("_id")
+	if dict == nil {
+		t.Fatal("got nil dict, expected non-nil")
+	}
+
+	postingsList := dict.PostingsList("a", nil)
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItr := postingsList.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	count := 0
+	nextPosting := postingsItr.Next()
+	for nextPosting != nil {
+		count++
+		if nextPosting.Frequency() != 1 {
+			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
+		}
+		if nextPosting.Number() != 0 {
+			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
+		}
+		if nextPosting.Norm() != 1.0 {
+			t.Errorf("expected norm 1.0, got %f", nextPosting.Norm())
+		}
+
+		nextPosting = postingsItr.Next()
+	}
+
+	if count != 1 {
+		t.Errorf("expected count to be 1, got %d", count)
+	}
+
+	// check the name field
+	dict = segment.Dictionary("name")
+	if dict == nil {
+		t.Fatal("got nil dict, expected non-nil")
+	}
+
+	postingsList = dict.PostingsList("wow", nil)
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItr = postingsList.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	count = 0
+	nextPosting = postingsItr.Next()
+	for nextPosting != nil {
+		count++
+		if nextPosting.Frequency() != 1 {
+			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
+		}
+		if nextPosting.Number() != 0 {
+			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
+		}
+		if nextPosting.Norm() != 1.0 {
+			t.Errorf("expected norm 1.0, got %f", nextPosting.Norm())
+		}
+		for _, loc := range nextPosting.Locations() {
+			if loc.Start() != 0 {
+				t.Errorf("expected loc start to be 0, got %d", loc.Start())
+			}
+			if loc.End() != 3 {
+				t.Errorf("expected loc end to be 3, got %d", loc.End())
+			}
+			if loc.Pos() != 1 {
+				t.Errorf("expected loc pos to be 1, got %d", loc.Pos())
+			}
+			if loc.ArrayPositions() != nil {
+				t.Errorf("expect loc array pos to be nil, got %v", loc.ArrayPositions())
+			}
+		}
+
+		nextPosting = postingsItr.Next()
+	}
+
+	if count != 1 {
+		t.Errorf("expected count to be 1, got %d", count)
+	}
+
+	// check the _all field (composite)
+	dict = segment.Dictionary("_all")
+	if dict == nil {
+		t.Fatal("got nil dict, expected non-nil")
+	}
+
+	postingsList = dict.PostingsList("wow", nil)
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItr = postingsList.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	count = 0
+	nextPosting = postingsItr.Next()
+	for nextPosting != nil {
+		count++
+		if nextPosting.Frequency() != 1 {
+			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
+		}
+		if nextPosting.Number() != 0 {
+			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
+		}
+		expectedNorm := float32(1.0 / math.Sqrt(float64(6)))
+		if nextPosting.Norm() != float64(expectedNorm) {
+			t.Errorf("expected norm %f, got %f", expectedNorm, nextPosting.Norm())
+		}
+		for _, loc := range nextPosting.Locations() {
+			if loc.Start() != 0 {
+				t.Errorf("expected loc start to be 0, got %d", loc.Start())
+			}
+			if loc.End() != 3 {
+				t.Errorf("expected loc end to be 3, got %d", loc.End())
+			}
+			if loc.Pos() != 1 {
+				t.Errorf("expected loc pos to be 1, got %d", loc.Pos())
+			}
+			if loc.ArrayPositions() != nil {
+				t.Errorf("expect loc array pos to be nil, got %v", loc.ArrayPositions())
+			}
+		}
+
+		nextPosting = postingsItr.Next()
+	}
+
+	if count != 1 {
+		t.Errorf("expected count to be 1, got %d", count)
+	}
+
+	// now try and visit a document
+	var fieldValuesSeen int
+	segment.VisitDocument(0, func(field string, typ byte, value []byte, pos []uint64) bool {
+		fieldValuesSeen++
+		return true
+	})
+	if fieldValuesSeen != 5 {
+		t.Errorf("expected 5 field values, got %d", fieldValuesSeen)
+	}
+
+}
+
+func TestMultiple(t *testing.T) {
+
+	doc := &document.Document{
+		ID: "a",
+		Fields: []document.Field{
+			document.NewTextFieldCustom("_id", nil, []byte("a"), document.IndexField|document.StoreField, nil),
+			document.NewTextFieldCustom("name", nil, []byte("wow"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+		},
+		CompositeFields: []*document.CompositeField{
+			document.NewCompositeField("_all", true, nil, nil),
+		},
+	}
+
+	doc2 := &document.Document{
+		ID: "b",
+		Fields: []document.Field{
+			document.NewTextFieldCustom("_id", nil, []byte("b"), document.IndexField|document.StoreField, nil),
+			document.NewTextFieldCustom("name", nil, []byte("who"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+		},
+		CompositeFields: []*document.CompositeField{
+			document.NewCompositeField("_all", true, nil, nil),
+		},
+	}
+
+	// forge analyzed docs
+	results := []*index.AnalysisResult{
+		&index.AnalysisResult{
+			Document: doc,
+			Analyzed: []analysis.TokenFrequencies{
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      1,
+						Position: 1,
+						Term:     []byte("a"),
+					},
+				}, nil, false),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      3,
+						Position: 1,
+						Term:     []byte("wow"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("some"),
+					},
+					&analysis.Token{
+						Start:    5,
+						End:      10,
+						Position: 2,
+						Term:     []byte("thing"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("cold"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("dark"),
+					},
+				}, nil, true),
+			},
+			Length: []int{
+				1,
+				1,
+				2,
+				1,
+				1,
+			},
+		},
+		&index.AnalysisResult{
+			Document: doc2,
+			Analyzed: []analysis.TokenFrequencies{
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      1,
+						Position: 1,
+						Term:     []byte("b"),
+					},
+				}, nil, false),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      3,
+						Position: 1,
+						Term:     []byte("who"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("some"),
+					},
+					&analysis.Token{
+						Start:    5,
+						End:      10,
+						Position: 2,
+						Term:     []byte("thing"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("cold"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("dark"),
+					},
+				}, nil, true),
+			},
+			Length: []int{
+				1,
+				1,
+				2,
+				1,
+				1,
+			},
+		},
+	}
+
+	// fix up composite fields
+	for _, ar := range results {
+		for i, f := range ar.Document.Fields {
+			for _, cf := range ar.Document.CompositeFields {
+				cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
+			}
+		}
+	}
+
+	segment := NewFromAnalyzedDocs(results)
+	if segment == nil {
+		t.Fatalf("segment nil, not expected")
+	}
+
+	if segment.Count() != 2 {
+		t.Errorf("expected count 2, got %d", segment.Count())
+	}
+
+	// check the desc field
+	dict := segment.Dictionary("desc")
+	if dict == nil {
+		t.Fatal("got nil dict, expected non-nil")
+	}
+
+	postingsList := dict.PostingsList("thing", nil)
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItr := postingsList.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	count := 0
+	nextPosting := postingsItr.Next()
+	for nextPosting != nil {
+		count++
+		nextPosting = postingsItr.Next()
+	}
+
+	if count != 2 {
+		t.Errorf("expected count to be 2, got %d", count)
+	}
+
+	// get docnum of a
+	exclude := segment.DocNumbers([]string{"a"})
+
+	// look for term 'thing' excluding doc 'a'
+	postingsListExcluding := dict.PostingsList("thing", exclude)
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItrExcluding := postingsListExcluding.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	count = 0
+	nextPosting = postingsItrExcluding.Next()
+	for nextPosting != nil {
+		count++
+		nextPosting = postingsItrExcluding.Next()
+	}
+
+	if count != 1 {
+		t.Errorf("expected count to be 1, got %d", count)
+	}
+
+}
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
new file mode 100644
index 000000000..5cd3d5d7f
--- /dev/null
+++ b/index/scorch/segment/segment.go
@@ -0,0 +1,66 @@
+package segment
+
+import (
+	"github.com/RoaringBitmap/roaring"
+	"github.com/blevesearch/bleve/index"
+)
+
+// DocumentFieldValueVisitor defines a callback to be visited for each
+// stored field value.  The return value determines if the visitor
+// should keep going.  Returning true continues visiting, false stops.
+type DocumentFieldValueVisitor func(field string, typ byte, value []byte, pos []uint64) bool
+
+type Segment interface {
+	Dictionary(field string) TermDictionary
+
+	VisitDocument(num uint64, visitor DocumentFieldValueVisitor) error
+	Count() uint64
+
+	DocNumbers([]string) *roaring.Bitmap
+
+	Fields() []string
+}
+
+type TermDictionary interface {
+	PostingsList(term string, except *roaring.Bitmap) PostingsList
+
+	Iterator() DictionaryIterator
+	PrefixIterator(prefix string) DictionaryIterator
+	RangeIterator(start, end string) DictionaryIterator
+}
+
+type DictionaryIterator interface {
+	Next() (*index.DictEntry, error)
+}
+
+type PostingsList interface {
+	Iterator() PostingsIterator
+
+	Count() uint64
+
+	// NOTE deferred for future work
+
+	// And(other PostingsList) PostingsList
+	// Or(other PostingsList) PostingsList
+}
+
+type PostingsIterator interface {
+	Next() Posting
+}
+
+type Posting interface {
+	Number() uint64
+
+	Frequency() uint64
+	Norm() float64
+
+	Locations() []Location
+}
+
+type Location interface {
+	Field() string
+	Start() uint64
+	End() uint64
+	Pos() uint64
+	ArrayPositions() []uint64
+}
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
new file mode 100644
index 000000000..a33cc58e1
--- /dev/null
+++ b/index/scorch/snapshot_index.go
@@ -0,0 +1,300 @@
+package scorch
+
+import (
+	"bytes"
+	"container/heap"
+	"encoding/binary"
+	"fmt"
+
+	"github.com/RoaringBitmap/roaring"
+	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/scorch/segment"
+)
+
+type IndexSnapshot struct {
+	segment  []*SegmentSnapshot
+	offsets  []uint64
+	internal map[string][]byte
+}
+
+func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {
+
+	results := make(chan segment.DictionaryIterator)
+	for index, segment := range i.segment {
+		go func(index int, segment *SegmentSnapshot) {
+			dict := segment.Dictionary(field)
+			results <- makeItr(dict)
+		}(index, segment)
+	}
+
+	rv := &IndexSnapshotFieldDict{
+		snapshot: i,
+		cursors:  make([]*segmentDictCursor, 0, len(i.segment)),
+	}
+	for count := 0; count < len(i.segment); count++ {
+		di := <-results
+		next, err := di.Next()
+		if err != nil {
+			return nil, err
+		}
+		if next != nil {
+			rv.cursors = append(rv.cursors, &segmentDictCursor{
+				itr:  di,
+				curr: next,
+			})
+		}
+	}
+	// prepare heap
+	heap.Init(rv)
+
+	return rv, nil
+}
+
+func (i *IndexSnapshot) FieldDict(field string) (index.FieldDict, error) {
+	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
+		return i.Iterator()
+	})
+}
+
+func (i *IndexSnapshot) FieldDictRange(field string, startTerm []byte,
+	endTerm []byte) (index.FieldDict, error) {
+	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
+		return i.RangeIterator(string(startTerm), string(endTerm))
+	})
+}
+
+func (i *IndexSnapshot) FieldDictPrefix(field string,
+	termPrefix []byte) (index.FieldDict, error) {
+	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
+		return i.PrefixIterator(string(termPrefix))
+	})
+}
+
+func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {
+
+	type segmentDocNumsResult struct {
+		index int
+		docs  *roaring.Bitmap
+	}
+
+	results := make(chan *segmentDocNumsResult)
+	for index, segment := range i.segment {
+		go func(index int, segment *SegmentSnapshot) {
+			docnums := roaring.NewBitmap()
+			docnums.AddRange(0, segment.Count())
+			results <- &segmentDocNumsResult{
+				index: index,
+				docs:  docnums,
+			}
+		}(index, segment)
+	}
+
+	rv := &IndexSnapshotDocIDReader{
+		snapshot:  i,
+		iterators: make([]roaring.IntIterable, len(i.segment)),
+	}
+	for count := 0; count < len(i.segment); count++ {
+		sdnr := <-results
+		rv.iterators[sdnr.index] = sdnr.docs.Iterator()
+	}
+
+	return rv, nil
+}
+
+func (i *IndexSnapshot) DocIDReaderOnly(ids []string) (index.DocIDReader, error) {
+
+	type segmentDocNumsResult struct {
+		index int
+		docs  *roaring.Bitmap
+	}
+
+	results := make(chan *segmentDocNumsResult)
+	for index, segment := range i.segment {
+		go func(index int, segment *SegmentSnapshot) {
+			docnums := segment.DocNumbers(ids)
+			results <- &segmentDocNumsResult{
+				index: index,
+				docs:  docnums,
+			}
+		}(index, segment)
+	}
+
+	rv := &IndexSnapshotDocIDReader{
+		snapshot:  i,
+		iterators: make([]roaring.IntIterable, len(i.segment)),
+	}
+	for count := 0; count < len(i.segment); count++ {
+		sdnr := <-results
+		rv.iterators[count] = sdnr.docs.Iterator()
+	}
+
+	return rv, nil
+}
+
+func (i *IndexSnapshot) Fields() ([]string, error) {
+	// FIXME not making this concurrent for now as it's not used in hot path
+	// of any searches at the moment (just a debug aid)
+	fieldsMap := map[string]struct{}{}
+	for _, segment := range i.segment {
+		fields := segment.Fields()
+		for _, field := range fields {
+			fieldsMap[field] = struct{}{}
+		}
+	}
+	rv := make([]string, 0, len(fieldsMap))
+	for k := range fieldsMap {
+		rv = append(rv, k)
+	}
+	return rv, nil
+}
+
+func (i *IndexSnapshot) GetInternal(key []byte) ([]byte, error) {
+	return i.internal[string(key)], nil
+}
+
+func (i *IndexSnapshot) DocCount() (uint64, error) {
+	var rv uint64
+	for _, segment := range i.segment {
+		rv += segment.Count()
+	}
+	return rv, nil
+}
+
+func (i *IndexSnapshot) Document(id string) (*document.Document, error) {
+	// FIXME could be done more efficiently directly, but reusing for simplicity
+	tfr, err := i.TermFieldReader([]byte(id), "_id", false, false, false)
+	if err != nil {
+		return nil, err
+	}
+	defer tfr.Close()
+
+	next, err := tfr.Next(nil)
+	if err != nil {
+		return nil, err
+	}
+
+	docNum := docInternalToNumber(next.ID)
+	segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum)
+
+	rv := document.NewDocument(id)
+	i.segment[segmentIndex].VisitDocument(localDocNum, func(name string, typ byte, value []byte, pos []uint64) bool {
+		switch typ {
+		case 't':
+			rv.AddField(document.NewTextField(name, pos, value))
+		case 'n':
+			rv.AddField(document.NewNumericFieldFromBytes(name, pos, value))
+		case 'd':
+			rv.AddField(document.NewDateTimeFieldFromBytes(name, pos, value))
+		case 'b':
+			rv.AddField(document.NewBooleanFieldFromBytes(name, pos, value))
+		case 'g':
+			rv.AddField(document.NewGeoPointFieldFromBytes(name, pos, value))
+		}
+
+		return true
+	})
+
+	return rv, nil
+}
+
+func (i *IndexSnapshot) segmentIndexAndLocalDocNumFromGlobal(docNum uint64) (int, uint64) {
+	var segmentIndex uint64
+	for j := 1; j < len(i.offsets); j++ {
+		if docNum >= i.offsets[j] {
+			segmentIndex = uint64(j)
+		} else {
+			break
+		}
+	}
+
+	localDocNum := docNum - i.offsets[segmentIndex]
+	return int(segmentIndex), localDocNum
+}
+
+func (i *IndexSnapshot) ExternalID(id index.IndexInternalID) (string, error) {
+	docNum := docInternalToNumber(id)
+	segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum)
+
+	var found bool
+	var rv string
+	i.segment[segmentIndex].VisitDocument(localDocNum, func(field string, typ byte, value []byte, pos []uint64) bool {
+		if field == "_id" {
+			found = true
+			rv = string(value)
+			return false
+		}
+		return true
+	})
+
+	if found {
+		return rv, nil
+	}
+	return "", fmt.Errorf("document number %d not found", docNum)
+}
+
+func (i *IndexSnapshot) InternalID(id string) (index.IndexInternalID, error) {
+	// FIXME could be done more efficiently directly, but reusing for simplicity
+	tfr, err := i.TermFieldReader([]byte(id), "_id", false, false, false)
+	if err != nil {
+		return nil, err
+	}
+	defer tfr.Close()
+
+	next, err := tfr.Next(nil)
+	if err != nil {
+		return nil, err
+	}
+
+	return next.ID, nil
+}
+
+func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
+	includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
+
+	type segmentPostingResult struct {
+		index    int
+		postings segment.PostingsList
+	}
+
+	results := make(chan *segmentPostingResult)
+	for index, segment := range i.segment {
+		go func(index int, segment *SegmentSnapshot) {
+			dict := segment.Dictionary(field)
+			pl := dict.PostingsList(string(term), nil)
+			results <- &segmentPostingResult{
+				index:    index,
+				postings: pl,
+			}
+		}(index, segment)
+	}
+
+	rv := &IndexSnapshotTermFieldReader{
+		snapshot:           i,
+		postings:           make([]segment.PostingsList, len(i.segment)),
+		iterators:          make([]segment.PostingsIterator, len(i.segment)),
+		includeFreq:        includeFreq,
+		includeNorm:        includeNorm,
+		includeTermVectors: includeTermVectors,
+	}
+	for count := 0; count < len(i.segment); count++ {
+		spr := <-results
+		rv.postings[spr.index] = spr.postings
+		rv.iterators[spr.index] = spr.postings.Iterator()
+	}
+
+	return rv, nil
+}
+
+func docNumberToBytes(in uint64) []byte {
+
+	buf := new(bytes.Buffer)
+	_ = binary.Write(buf, binary.BigEndian, in)
+	return buf.Bytes()
+}
+
+func docInternalToNumber(in index.IndexInternalID) uint64 {
+	var res uint64
+	binary.Read(bytes.NewReader(in), binary.BigEndian, &res)
+	return res
+}
diff --git a/index/scorch/snapshot_index_dict.go b/index/scorch/snapshot_index_dict.go
new file mode 100644
index 000000000..443e401e6
--- /dev/null
+++ b/index/scorch/snapshot_index_dict.go
@@ -0,0 +1,78 @@
+package scorch
+
+import (
+	"container/heap"
+
+	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/scorch/segment"
+)
+
+type segmentDictCursor struct {
+	itr  segment.DictionaryIterator
+	curr *index.DictEntry
+}
+
+type IndexSnapshotFieldDict struct {
+	snapshot *IndexSnapshot
+	cursors  []*segmentDictCursor
+}
+
+func (i *IndexSnapshotFieldDict) Len() int { return len(i.cursors) }
+func (i *IndexSnapshotFieldDict) Less(a, b int) bool {
+	return i.cursors[a].curr.Term < i.cursors[b].curr.Term
+}
+func (i *IndexSnapshotFieldDict) Swap(a, b int) {
+	i.cursors[a], i.cursors[b] = i.cursors[b], i.cursors[a]
+}
+
+func (i *IndexSnapshotFieldDict) Push(x interface{}) {
+	i.cursors = append(i.cursors, x.(*segmentDictCursor))
+}
+
+func (i *IndexSnapshotFieldDict) Pop() interface{} {
+	n := len(i.cursors)
+	x := i.cursors[n-1]
+	i.cursors = i.cursors[0 : n-1]
+	return x
+}
+
+func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) {
+	if len(i.cursors) <= 0 {
+		return nil, nil
+	}
+	rv := i.cursors[0].curr
+	next, err := i.cursors[0].itr.Next()
+	if err != nil {
+		return nil, err
+	}
+	if next == nil {
+		// at end of this cursor, remove it
+		heap.Pop(i)
+	} else {
+		// modified heap, fix it
+		i.cursors[0].curr = next
+		heap.Fix(i, 0)
+	}
+	// look for any other entries with the exact same term
+	for len(i.cursors) > 0 && i.cursors[0].curr.Term == rv.Term {
+		rv.Count += i.cursors[0].curr.Count
+		next, err := i.cursors[0].itr.Next()
+		if err != nil {
+			return nil, err
+		}
+		if next == nil {
+			// at end of this cursor, remove it
+			heap.Pop(i)
+		} else {
+			// modified heap, fix it
+			i.cursors[0].curr = next
+			heap.Fix(i, 0)
+		}
+	}
+
+	return rv, nil
+}
+
+func (i *IndexSnapshotFieldDict) Close() error {
+	return nil
+}
diff --git a/index/scorch/snapshot_index_doc.go b/index/scorch/snapshot_index_doc.go
new file mode 100644
index 000000000..2b1144874
--- /dev/null
+++ b/index/scorch/snapshot_index_doc.go
@@ -0,0 +1,53 @@
+package scorch
+
+import (
+	"bytes"
+
+	"github.com/RoaringBitmap/roaring"
+	"github.com/blevesearch/bleve/index"
+)
+
+type IndexSnapshotDocIDReader struct {
+	snapshot      *IndexSnapshot
+	iterators     []roaring.IntIterable
+	segmentOffset int
+}
+
+func (i *IndexSnapshotDocIDReader) Next() (index.IndexInternalID, error) {
+	for i.segmentOffset < len(i.iterators) {
+		if !i.iterators[i.segmentOffset].HasNext() {
+			i.segmentOffset++
+			continue
+		}
+		next := i.iterators[i.segmentOffset].Next()
+		// make segment number into global number by adding offset
+		globalOffset := i.snapshot.offsets[i.segmentOffset]
+		return docNumberToBytes(uint64(next) + globalOffset), nil
+	}
+	return nil, nil
+}
+
+func (i *IndexSnapshotDocIDReader) Advance(ID index.IndexInternalID) (index.IndexInternalID, error) {
+	// FIXME do something better
+	next, err := i.Next()
+	if err != nil {
+		return nil, err
+	}
+	if next == nil {
+		return nil, nil
+	}
+	for bytes.Compare(next, ID) < 0 {
+		next, err = i.Next()
+		if err != nil {
+			return nil, err
+		}
+		if next == nil {
+			break
+		}
+	}
+	return next, nil
+}
+
+func (i *IndexSnapshotDocIDReader) Close() error {
+	return nil
+}
diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go
new file mode 100644
index 000000000..44172f3d0
--- /dev/null
+++ b/index/scorch/snapshot_index_tfr.go
@@ -0,0 +1,91 @@
+package scorch
+
+import (
+	"bytes"
+
+	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/scorch/segment"
+)
+
+type IndexSnapshotTermFieldReader struct {
+	snapshot           *IndexSnapshot
+	postings           []segment.PostingsList
+	iterators          []segment.PostingsIterator
+	segmentOffset      int
+	includeFreq        bool
+	includeNorm        bool
+	includeTermVectors bool
+}
+
+func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
+	rv := preAlloced
+	if rv == nil {
+		rv = &index.TermFieldDoc{}
+	}
+	// find the next hit
+	for i.segmentOffset < len(i.postings) {
+		next := i.iterators[i.segmentOffset].Next()
+		if next != nil {
+			// make segment number into global number by adding offset
+			globalOffset := i.snapshot.offsets[i.segmentOffset]
+			nnum := next.Number()
+			rv.ID = docNumberToBytes(nnum + globalOffset)
+			if i.includeFreq {
+				rv.Freq = next.Frequency()
+			}
+			if i.includeNorm {
+				rv.Norm = next.Norm()
+			}
+			if i.includeTermVectors {
+				locs := next.Locations()
+				rv.Vectors = make([]*index.TermFieldVector, len(locs))
+				for i, loc := range locs {
+					rv.Vectors[i] = &index.TermFieldVector{
+						Start:          loc.Start(),
+						End:            loc.End(),
+						Pos:            loc.Pos(),
+						ArrayPositions: loc.ArrayPositions(),
+						Field:          loc.Field(),
+					}
+				}
+			}
+
+			return rv, nil
+		}
+		i.segmentOffset++
+	}
+	return nil, nil
+}
+
+func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
+	// FIXME do something better
+	next, err := i.Next(preAlloced)
+	if err != nil {
+		return nil, err
+	}
+	if next == nil {
+		return nil, nil
+	}
+	for bytes.Compare(next.ID, ID) < 0 {
+		next, err = i.Next(preAlloced)
+		if err != nil {
+			return nil, err
+		}
+		if next == nil {
+			break
+		}
+	}
+	return next, nil
+}
+
+func (i *IndexSnapshotTermFieldReader) Count() uint64 {
+	var rv uint64
+	for _, posting := range i.postings {
+		rv += posting.Count()
+	}
+	return rv
+}
+
+func (i *IndexSnapshotTermFieldReader) Close() error {
+	return nil
+}
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
new file mode 100644
index 000000000..b32725e96
--- /dev/null
+++ b/index/scorch/snapshot_segment.go
@@ -0,0 +1,64 @@
+package scorch
+
+import (
+	"github.com/RoaringBitmap/roaring"
+	"github.com/blevesearch/bleve/index/scorch/segment"
+)
+
+type SegmentDictionarySnapshot struct {
+	s *SegmentSnapshot
+	d segment.TermDictionary
+}
+
+func (s *SegmentDictionarySnapshot) PostingsList(term string, except *roaring.Bitmap) segment.PostingsList {
+	return s.d.PostingsList(term, s.s.deleted)
+}
+
+func (s *SegmentDictionarySnapshot) Iterator() segment.DictionaryIterator {
+	return s.d.Iterator()
+}
+
+func (s *SegmentDictionarySnapshot) PrefixIterator(prefix string) segment.DictionaryIterator {
+	return s.d.PrefixIterator(prefix)
+}
+
+func (s *SegmentDictionarySnapshot) RangeIterator(start, end string) segment.DictionaryIterator {
+	return s.d.RangeIterator(start, end)
+}
+
+type SegmentSnapshot struct {
+	id      uint64
+	segment segment.Segment
+	deleted *roaring.Bitmap
+}
+
+func (s *SegmentSnapshot) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
+	return s.segment.VisitDocument(num, visitor)
+}
+
+func (s *SegmentSnapshot) Count() uint64 {
+	rv := s.segment.Count()
+	if s.deleted != nil {
+		rv -= s.deleted.GetCardinality()
+	}
+	return rv
+}
+
+func (s *SegmentSnapshot) Dictionary(field string) segment.TermDictionary {
+	return &SegmentDictionarySnapshot{
+		s: s,
+		d: s.segment.Dictionary(field),
+	}
+}
+
+func (s *SegmentSnapshot) DocNumbers(docIDs []string) *roaring.Bitmap {
+	rv := s.segment.DocNumbers(docIDs)
+	if s.deleted != nil {
+		rv.AndNot(s.deleted)
+	}
+	return rv
+}
+
+func (s *SegmentSnapshot) Fields() []string {
+	return s.segment.Fields()
+}
diff --git a/index/scorch/stats.go b/index/scorch/stats.go
new file mode 100644
index 000000000..f49c8178c
--- /dev/null
+++ b/index/scorch/stats.go
@@ -0,0 +1,33 @@
+package scorch
+
+import (
+	"encoding/json"
+	"sync/atomic"
+)
+
+// Stats tracks statistics about the index
+type Stats struct {
+	analysisTime, indexTime uint64
+}
+
+// FIXME wire up these other stats again
+func (s *Stats) statsMap() map[string]interface{} {
+	m := map[string]interface{}{}
+	// m["updates"] = atomic.LoadUint64(&i.updates)
+	// m["deletes"] = atomic.LoadUint64(&i.deletes)
+	// m["batches"] = atomic.LoadUint64(&i.batches)
+	// m["errors"] = atomic.LoadUint64(&i.errors)
+	m["analysis_time"] = atomic.LoadUint64(&s.analysisTime)
+	m["index_time"] = atomic.LoadUint64(&s.indexTime)
+	// m["term_searchers_started"] = atomic.LoadUint64(&i.termSearchersStarted)
+	// m["term_searchers_finished"] = atomic.LoadUint64(&i.termSearchersFinished)
+	// m["num_plain_text_bytes_indexed"] = atomic.LoadUint64(&i.numPlainTextBytesIndexed)
+
+	return m
+}
+
+// MarshalJSON implements json.Marshaler
+func (s *Stats) MarshalJSON() ([]byte, error) {
+	m := s.statsMap()
+	return json.Marshal(m)
+}
diff --git a/mapping/index.go b/mapping/index.go
index 737f26ffe..cefa59803 100644
--- a/mapping/index.go
+++ b/mapping/index.go
@@ -339,7 +339,7 @@ func (im *IndexMappingImpl) newWalkContext(doc *document.Document, dm *DocumentM
 		doc:             doc,
 		im:              im,
 		dm:              dm,
-		excludedFromAll: []string{},
+		excludedFromAll: []string{"_id"},
 	}
 }
 

From 848aca4639566905f79f39f688a784f1424185cf Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Wed, 29 Nov 2017 13:34:15 -0500
Subject: [PATCH 005/728] fix issues identified by errcheck

---
 index/scorch/scorch.go                   |  4 +---
 index/scorch/segment/mem/segment_test.go | 10 ++++++--
 index/scorch/snapshot_index.go           | 30 +++++++++++++++++-------
 3 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index d20261f5e..1f99b42fe 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -121,9 +121,7 @@ func (s *Scorch) Batch(batch *index.Batch) error {
 	} else {
 		newSegment = mem.New()
 	}
-	s.prepareSegment(newSegment, ids, batch.InternalOps)
-
-	return nil
+	return s.prepareSegment(newSegment, ids, batch.InternalOps)
 }
 
 func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
diff --git a/index/scorch/segment/mem/segment_test.go b/index/scorch/segment/mem/segment_test.go
index bc840e147..4c056d7ad 100644
--- a/index/scorch/segment/mem/segment_test.go
+++ b/index/scorch/segment/mem/segment_test.go
@@ -44,10 +44,13 @@ func TestEmpty(t *testing.T) {
 	}
 
 	// now try and visit a document
-	emptySegment.VisitDocument(0, func(field string, typ byte, value []byte, pos []uint64) bool {
+	err := emptySegment.VisitDocument(0, func(field string, typ byte, value []byte, pos []uint64) bool {
 		t.Errorf("document visitor called, not expected")
 		return true
 	})
+	if err != nil {
+		t.Fatal(err)
+	}
 }
 
 func TestSingle(t *testing.T) {
@@ -288,10 +291,13 @@ func TestSingle(t *testing.T) {
 
 	// now try and visit a document
 	var fieldValuesSeen int
-	segment.VisitDocument(0, func(field string, typ byte, value []byte, pos []uint64) bool {
+	err := segment.VisitDocument(0, func(field string, typ byte, value []byte, pos []uint64) bool {
 		fieldValuesSeen++
 		return true
 	})
+	if err != nil {
+		t.Fatal(err)
+	}
 	if fieldValuesSeen != 5 {
 		t.Errorf("expected 5 field values, got %d", fieldValuesSeen)
 	}
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index a33cc58e1..8c6ea1aaf 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -161,13 +161,17 @@ func (i *IndexSnapshot) DocCount() (uint64, error) {
 	return rv, nil
 }
 
-func (i *IndexSnapshot) Document(id string) (*document.Document, error) {
+func (i *IndexSnapshot) Document(id string) (rv *document.Document, err error) {
 	// FIXME could be done more efficiently directly, but reusing for simplicity
 	tfr, err := i.TermFieldReader([]byte(id), "_id", false, false, false)
 	if err != nil {
 		return nil, err
 	}
-	defer tfr.Close()
+	defer func() {
+		if cerr := tfr.Close(); err == nil && cerr != nil {
+			err = cerr
+		}
+	}()
 
 	next, err := tfr.Next(nil)
 	if err != nil {
@@ -177,8 +181,8 @@ func (i *IndexSnapshot) Document(id string) (*document.Document, error) {
 	docNum := docInternalToNumber(next.ID)
 	segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum)
 
-	rv := document.NewDocument(id)
-	i.segment[segmentIndex].VisitDocument(localDocNum, func(name string, typ byte, value []byte, pos []uint64) bool {
+	rv = document.NewDocument(id)
+	err = i.segment[segmentIndex].VisitDocument(localDocNum, func(name string, typ byte, value []byte, pos []uint64) bool {
 		switch typ {
 		case 't':
 			rv.AddField(document.NewTextField(name, pos, value))
@@ -194,6 +198,9 @@ func (i *IndexSnapshot) Document(id string) (*document.Document, error) {
 
 		return true
 	})
+	if err != nil {
+		return nil, err
+	}
 
 	return rv, nil
 }
@@ -218,7 +225,7 @@ func (i *IndexSnapshot) ExternalID(id index.IndexInternalID) (string, error) {
 
 	var found bool
 	var rv string
-	i.segment[segmentIndex].VisitDocument(localDocNum, func(field string, typ byte, value []byte, pos []uint64) bool {
+	err := i.segment[segmentIndex].VisitDocument(localDocNum, func(field string, typ byte, value []byte, pos []uint64) bool {
 		if field == "_id" {
 			found = true
 			rv = string(value)
@@ -226,6 +233,9 @@ func (i *IndexSnapshot) ExternalID(id index.IndexInternalID) (string, error) {
 		}
 		return true
 	})
+	if err != nil {
+		return "", err
+	}
 
 	if found {
 		return rv, nil
@@ -233,13 +243,17 @@ func (i *IndexSnapshot) ExternalID(id index.IndexInternalID) (string, error) {
 	return "", fmt.Errorf("document number %d not found", docNum)
 }
 
-func (i *IndexSnapshot) InternalID(id string) (index.IndexInternalID, error) {
+func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err error) {
 	// FIXME could be done more efficiently directly, but reusing for simplicity
 	tfr, err := i.TermFieldReader([]byte(id), "_id", false, false, false)
 	if err != nil {
 		return nil, err
 	}
-	defer tfr.Close()
+	defer func() {
+		if cerr := tfr.Close(); err == nil && cerr != nil {
+			err = cerr
+		}
+	}()
 
 	next, err := tfr.Next(nil)
 	if err != nil {
@@ -295,6 +309,6 @@ func docNumberToBytes(in uint64) []byte {
 
 func docInternalToNumber(in index.IndexInternalID) uint64 {
 	var res uint64
-	binary.Read(bytes.NewReader(in), binary.BigEndian, &res)
+	_ = binary.Read(bytes.NewReader(in), binary.BigEndian, &res)
 	return res
 }

From 67986d41bfab5634bc11d81f7a02bd7279782476 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 30 Nov 2017 08:36:01 -0800
Subject: [PATCH 006/728] scorch InternalID() handles case of unknown docId

---
 index/scorch/reader_test.go    | 8 ++++++++
 index/scorch/snapshot_index.go | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/index/scorch/reader_test.go b/index/scorch/reader_test.go
index a050bb44a..f673b198b 100644
--- a/index/scorch/reader_test.go
+++ b/index/scorch/reader_test.go
@@ -93,6 +93,14 @@ func TestIndexReader(t *testing.T) {
 		t.Errorf("count was 2, but only saw %d", actualCount)
 	}
 
+	internalIDBogus, err := indexReader.InternalID("a-bogus-docId")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if internalIDBogus != nil {
+		t.Errorf("expected bogus docId to have nil InternalID")
+	}
+
 	internalID2, err := indexReader.InternalID("2")
 	if err != nil {
 		t.Fatal(err)
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 8c6ea1aaf..11fc063c5 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -256,7 +256,7 @@ func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err err
 	}()
 
 	next, err := tfr.Next(nil)
-	if err != nil {
+	if err != nil || next == nil {
 		return nil, err
 	}
 

From 398dcb19b3c7bc96e74f42a838784307715a92da Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 30 Nov 2017 10:37:02 -0800
Subject: [PATCH 007/728] scorch introducer uses the roaring.Or(x, y) API

Instead of cloning an input bitmap, the roaring.Or(x, y)
implementation fills a brand new result bitmap, which should be allow
for more efficient packing and memory utilization.
---
 index/scorch/introducer.go | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index dc748ad85..37a66dc84 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -49,8 +49,7 @@ func (s *Scorch) mainLoop() {
 				if s.root.segment[i].deleted == nil {
 					newSnapshot.segment[i].deleted = delta
 				} else {
-					newSnapshot.segment[i].deleted = s.root.segment[i].deleted.Clone()
-					newSnapshot.segment[i].deleted.Or(delta)
+					newSnapshot.segment[i].deleted = roaring.Or(s.root.segment[i].deleted, delta)
 				}
 
 				newSnapshot.offsets[i] = running

From 395458ce830fc0b86963a37b8728b03be594c65c Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 1 Dec 2017 07:26:47 -0500
Subject: [PATCH 008/728] refactor to make mem segment contents exported

---
 index/scorch/segment/mem/build.go   | 72 ++++++++++++++---------------
 index/scorch/segment/mem/dict.go    | 14 +++---
 index/scorch/segment/mem/posting.go | 24 +++++-----
 index/scorch/segment/mem/segment.go | 62 ++++++++++++-------------
 4 files changed, 86 insertions(+), 86 deletions(-)

diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go
index ff6fb6056..5154c182f 100644
--- a/index/scorch/segment/mem/build.go
+++ b/index/scorch/segment/mem/build.go
@@ -23,7 +23,7 @@ func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment {
 	}
 
 	// go back and sort the dictKeys
-	for _, dict := range s.dictKeys {
+	for _, dict := range s.DictKeys {
 		sort.Strings(dict)
 	}
 
@@ -81,9 +81,9 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 	}
 
 	storeField := func(docNum uint64, field uint16, typ byte, val []byte, pos []uint64) {
-		s.stored[docNum][field] = append(s.stored[docNum][field], val)
-		s.storedTypes[docNum][field] = append(s.storedTypes[docNum][field], typ)
-		s.storedPos[docNum][field] = append(s.storedPos[docNum][field], pos)
+		s.Stored[docNum][field] = append(s.Stored[docNum][field], val)
+		s.StoredTypes[docNum][field] = append(s.StoredTypes[docNum][field], typ)
+		s.StoredPos[docNum][field] = append(s.StoredPos[docNum][field], pos)
 	}
 
 	// walk each composite field
@@ -107,7 +107,7 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 	// now that its been rolled up into docMap, walk that
 	for fieldID, tokenFrequencies := range docMap {
 		for term, tokenFreq := range tokenFrequencies {
-			fieldTermPostings := s.dicts[fieldID][term]
+			fieldTermPostings := s.Dicts[fieldID][term]
 
 			// FIXME this if/else block has duplicate code that has resulted in
 			// bugs fixed/missed more than once, need to refactor
@@ -116,12 +116,12 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 				bs := roaring.New()
 				bs.AddInt(int(docNum))
 
-				newPostingID := uint64(len(s.postings) + 1)
+				newPostingID := uint64(len(s.Postings) + 1)
 				// add this new bitset to the postings slice
-				s.postings = append(s.postings, bs)
+				s.Postings = append(s.Postings, bs)
 				// add this to the details slice
-				s.freqs = append(s.freqs, []uint64{uint64(tokenFreq.Frequency())})
-				s.norms = append(s.norms, []float32{float32(1.0 / math.Sqrt(float64(fieldLens[fieldID])))})
+				s.Freqs = append(s.Freqs, []uint64{uint64(tokenFreq.Frequency())})
+				s.Norms = append(s.Norms, []float32{float32(1.0 / math.Sqrt(float64(fieldLens[fieldID])))})
 				// add to locations
 				var locfields []uint16
 				var locstarts []uint64
@@ -143,35 +143,35 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 						locarraypos = append(locarraypos, nil)
 					}
 				}
-				s.locfields = append(s.locfields, locfields)
-				s.locstarts = append(s.locstarts, locstarts)
-				s.locends = append(s.locends, locends)
-				s.locpos = append(s.locpos, locpos)
-				s.locarraypos = append(s.locarraypos, locarraypos)
+				s.Locfields = append(s.Locfields, locfields)
+				s.Locstarts = append(s.Locstarts, locstarts)
+				s.Locends = append(s.Locends, locends)
+				s.Locpos = append(s.Locpos, locpos)
+				s.Locarraypos = append(s.Locarraypos, locarraypos)
 				// record it
-				s.dicts[fieldID][term] = newPostingID
+				s.Dicts[fieldID][term] = newPostingID
 				// this term was new for this field, add it to dictKeys
-				s.dictKeys[fieldID] = append(s.dictKeys[fieldID], term)
+				s.DictKeys[fieldID] = append(s.DictKeys[fieldID], term)
 			} else {
 				// posting already started for this field/term
 				// the actual offset is - 1, because 0 is zero value
-				bs := s.postings[fieldTermPostings-1]
+				bs := s.Postings[fieldTermPostings-1]
 				bs.AddInt(int(docNum))
-				s.freqs[fieldTermPostings-1] = append(s.freqs[fieldTermPostings-1], uint64(tokenFreq.Frequency()))
-				s.norms[fieldTermPostings-1] = append(s.norms[fieldTermPostings-1], float32(1.0/math.Sqrt(float64(fieldLens[fieldID]))))
+				s.Freqs[fieldTermPostings-1] = append(s.Freqs[fieldTermPostings-1], uint64(tokenFreq.Frequency()))
+				s.Norms[fieldTermPostings-1] = append(s.Norms[fieldTermPostings-1], float32(1.0/math.Sqrt(float64(fieldLens[fieldID]))))
 				for _, loc := range tokenFreq.Locations {
 					var locf = fieldID
 					if loc.Field != "" {
 						locf = uint16(s.getOrDefineField(loc.Field, false))
 					}
-					s.locfields[fieldTermPostings-1] = append(s.locfields[fieldTermPostings-1], locf)
-					s.locstarts[fieldTermPostings-1] = append(s.locstarts[fieldTermPostings-1], uint64(loc.Start))
-					s.locends[fieldTermPostings-1] = append(s.locends[fieldTermPostings-1], uint64(loc.End))
-					s.locpos[fieldTermPostings-1] = append(s.locpos[fieldTermPostings-1], uint64(loc.Position))
+					s.Locfields[fieldTermPostings-1] = append(s.Locfields[fieldTermPostings-1], locf)
+					s.Locstarts[fieldTermPostings-1] = append(s.Locstarts[fieldTermPostings-1], uint64(loc.Start))
+					s.Locends[fieldTermPostings-1] = append(s.Locends[fieldTermPostings-1], uint64(loc.End))
+					s.Locpos[fieldTermPostings-1] = append(s.Locpos[fieldTermPostings-1], uint64(loc.Position))
 					if len(loc.ArrayPositions) > 0 {
-						s.locarraypos[fieldTermPostings-1] = append(s.locarraypos[fieldTermPostings-1], loc.ArrayPositions)
+						s.Locarraypos[fieldTermPostings-1] = append(s.Locarraypos[fieldTermPostings-1], loc.ArrayPositions)
 					} else {
-						s.locarraypos[fieldTermPostings-1] = append(s.locarraypos[fieldTermPostings-1], nil)
+						s.Locarraypos[fieldTermPostings-1] = append(s.Locarraypos[fieldTermPostings-1], nil)
 					}
 				}
 			}
@@ -180,23 +180,23 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 }
 
 func (s *Segment) getOrDefineField(name string, hasLoc bool) int {
-	fieldID, ok := s.fields[name]
+	fieldID, ok := s.FieldsMap[name]
 	if !ok {
-		fieldID = uint16(len(s.fieldsInv) + 1)
-		s.fields[name] = fieldID
-		s.fieldsInv = append(s.fieldsInv, name)
-		s.fieldsLoc = append(s.fieldsLoc, hasLoc)
-		s.dicts = append(s.dicts, make(map[string]uint64))
-		s.dictKeys = append(s.dictKeys, make([]string, 0))
+		fieldID = uint16(len(s.FieldsInv) + 1)
+		s.FieldsMap[name] = fieldID
+		s.FieldsInv = append(s.FieldsInv, name)
+		s.FieldsLoc = append(s.FieldsLoc, hasLoc)
+		s.Dicts = append(s.Dicts, make(map[string]uint64))
+		s.DictKeys = append(s.DictKeys, make([]string, 0))
 	}
 	return int(fieldID - 1)
 }
 
 func (s *Segment) addDocument() int {
-	docNum := len(s.stored)
-	s.stored = append(s.stored, map[uint16][][]byte{})
-	s.storedTypes = append(s.storedTypes, map[uint16][]byte{})
-	s.storedPos = append(s.storedPos, map[uint16][][]uint64{})
+	docNum := len(s.Stored)
+	s.Stored = append(s.Stored, map[uint16][][]byte{})
+	s.StoredTypes = append(s.StoredTypes, map[uint16][]byte{})
+	s.StoredPos = append(s.StoredPos, map[uint16][][]uint64{})
 	return docNum
 }
 
diff --git a/index/scorch/segment/mem/dict.go b/index/scorch/segment/mem/dict.go
index a7d609f6d..c724493da 100644
--- a/index/scorch/segment/mem/dict.go
+++ b/index/scorch/segment/mem/dict.go
@@ -21,7 +21,7 @@ func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap) segment.P
 	return &PostingsList{
 		dictionary: d,
 		term:       term,
-		postingsID: d.segment.dicts[d.fieldID][term],
+		postingsID: d.segment.Dicts[d.fieldID][term],
 		except:     except,
 	}
 }
@@ -36,7 +36,7 @@ func (d *Dictionary) Iterator() segment.DictionaryIterator {
 // PrefixIterator returns an iterator which only visits terms having the
 // the specified prefix
 func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator {
-	offset := sort.SearchStrings(d.segment.dictKeys[d.fieldID], prefix)
+	offset := sort.SearchStrings(d.segment.DictKeys[d.fieldID], prefix)
 	return &DictionaryIterator{
 		d:      d,
 		prefix: prefix,
@@ -47,7 +47,7 @@ func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator {
 // RangeIterator returns an iterator which only visits terms between the
 // start and end terms.  NOTE: bleve.index API specifies the end is inclusive.
 func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator {
-	offset := sort.SearchStrings(d.segment.dictKeys[d.fieldID], start)
+	offset := sort.SearchStrings(d.segment.DictKeys[d.fieldID], start)
 	return &DictionaryIterator{
 		d:      d,
 		offset: offset,
@@ -65,10 +65,10 @@ type DictionaryIterator struct {
 
 // Next returns the next entry in the dictionary
 func (d *DictionaryIterator) Next() (*index.DictEntry, error) {
-	if d.offset > len(d.d.segment.dictKeys[d.d.fieldID])-1 {
+	if d.offset > len(d.d.segment.DictKeys[d.d.fieldID])-1 {
 		return nil, nil
 	}
-	next := d.d.segment.dictKeys[d.d.fieldID][d.offset]
+	next := d.d.segment.DictKeys[d.d.fieldID][d.offset]
 	// check prefix
 	if d.prefix != "" && !strings.HasPrefix(next, d.prefix) {
 		return nil, nil
@@ -79,9 +79,9 @@ func (d *DictionaryIterator) Next() (*index.DictEntry, error) {
 	}
 
 	d.offset++
-	postingID := d.d.segment.dicts[d.d.fieldID][next]
+	postingID := d.d.segment.Dicts[d.d.fieldID][next]
 	return &index.DictEntry{
 		Term:  next,
-		Count: d.d.segment.postings[postingID-1].GetCardinality(),
+		Count: d.d.segment.Postings[postingID-1].GetCardinality(),
 	}, nil
 }
diff --git a/index/scorch/segment/mem/posting.go b/index/scorch/segment/mem/posting.go
index ac8b2fb72..fa1c0a00f 100644
--- a/index/scorch/segment/mem/posting.go
+++ b/index/scorch/segment/mem/posting.go
@@ -17,7 +17,7 @@ type PostingsList struct {
 func (p *PostingsList) Count() uint64 {
 	var rv uint64
 	if p.postingsID > 0 {
-		rv = p.dictionary.segment.postings[p.postingsID-1].GetCardinality()
+		rv = p.dictionary.segment.Postings[p.postingsID-1].GetCardinality()
 		if p.except != nil {
 			except := p.except.GetCardinality()
 			if except > rv {
@@ -36,7 +36,7 @@ func (p *PostingsList) Iterator() segment.PostingsIterator {
 		postings: p,
 	}
 	if p.postingsID > 0 {
-		allbits := p.dictionary.segment.postings[p.postingsID-1]
+		allbits := p.dictionary.segment.Postings[p.postingsID-1]
 		rv.all = allbits.Iterator()
 		if p.except != nil {
 			allExcept := allbits.Clone()
@@ -72,7 +72,7 @@ func (i *PostingsIterator) Next() segment.Posting {
 	// if they don't match, adjust offsets to factor in item we're skipping over
 	// incr the all iterator, and check again
 	for allN != n {
-		i.locoffset += int(i.postings.dictionary.segment.freqs[i.postings.postingsID-1][i.offset])
+		i.locoffset += int(i.postings.dictionary.segment.Freqs[i.postings.postingsID-1][i.offset])
 		i.offset++
 		allN = i.all.Next()
 	}
@@ -83,7 +83,7 @@ func (i *PostingsIterator) Next() segment.Posting {
 		locoffset: i.locoffset,
 	}
 
-	i.locoffset += int(i.postings.dictionary.segment.freqs[i.postings.postingsID-1][i.offset])
+	i.locoffset += int(i.postings.dictionary.segment.Freqs[i.postings.postingsID-1][i.offset])
 	i.offset++
 	return rv
 }
@@ -103,17 +103,17 @@ func (p *Posting) Number() uint64 {
 
 // Frequency returns the frequence of occurance of this term in this doc/field
 func (p *Posting) Frequency() uint64 {
-	return p.iterator.postings.dictionary.segment.freqs[p.iterator.postings.postingsID-1][p.offset]
+	return p.iterator.postings.dictionary.segment.Freqs[p.iterator.postings.postingsID-1][p.offset]
 }
 
 // Norm returns the normalization factor for this posting
 func (p *Posting) Norm() float64 {
-	return float64(p.iterator.postings.dictionary.segment.norms[p.iterator.postings.postingsID-1][p.offset])
+	return float64(p.iterator.postings.dictionary.segment.Norms[p.iterator.postings.postingsID-1][p.offset])
 }
 
 // Locations returns the location information for each occurance
 func (p *Posting) Locations() []segment.Location {
-	if !p.iterator.postings.dictionary.segment.fieldsLoc[p.iterator.postings.dictionary.fieldID] {
+	if !p.iterator.postings.dictionary.segment.FieldsLoc[p.iterator.postings.dictionary.fieldID] {
 		return nil
 	}
 	freq := int(p.Frequency())
@@ -136,25 +136,25 @@ type Location struct {
 // Field returns the name of the field (useful in composite fields to know
 // which original field the value came from)
 func (l *Location) Field() string {
-	return l.p.iterator.postings.dictionary.segment.fieldsInv[l.p.iterator.postings.dictionary.segment.locfields[l.p.iterator.postings.postingsID-1][l.offset]]
+	return l.p.iterator.postings.dictionary.segment.FieldsInv[l.p.iterator.postings.dictionary.segment.Locfields[l.p.iterator.postings.postingsID-1][l.offset]]
 }
 
 // Start returns the start byte offset of this occurance
 func (l *Location) Start() uint64 {
-	return l.p.iterator.postings.dictionary.segment.locstarts[l.p.iterator.postings.postingsID-1][l.offset]
+	return l.p.iterator.postings.dictionary.segment.Locstarts[l.p.iterator.postings.postingsID-1][l.offset]
 }
 
 // End returns the end byte offset of this occurance
 func (l *Location) End() uint64 {
-	return l.p.iterator.postings.dictionary.segment.locends[l.p.iterator.postings.postingsID-1][l.offset]
+	return l.p.iterator.postings.dictionary.segment.Locends[l.p.iterator.postings.postingsID-1][l.offset]
 }
 
 // Pos returns the 1-based phrase position of this occurance
 func (l *Location) Pos() uint64 {
-	return l.p.iterator.postings.dictionary.segment.locpos[l.p.iterator.postings.postingsID-1][l.offset]
+	return l.p.iterator.postings.dictionary.segment.Locpos[l.p.iterator.postings.postingsID-1][l.offset]
 }
 
 // ArrayPositions returns the array position vector associated with this occurance
 func (l *Location) ArrayPositions() []uint64 {
-	return l.p.iterator.postings.dictionary.segment.locarraypos[l.p.iterator.postings.postingsID-1][l.offset]
+	return l.p.iterator.postings.dictionary.segment.Locarraypos[l.p.iterator.postings.postingsID-1][l.offset]
 }
diff --git a/index/scorch/segment/mem/segment.go b/index/scorch/segment/mem/segment.go
index fdec1b2e6..fe71f17c6 100644
--- a/index/scorch/segment/mem/segment.go
+++ b/index/scorch/segment/mem/segment.go
@@ -22,77 +22,77 @@ import (
 // Segment is an in memory implementation of scorch.Segment
 type Segment struct {
 
-	// fields name -> id+1
-	fields map[string]uint16
+	// FieldsMap name -> id+1
+	FieldsMap map[string]uint16
 	// fields id -> name
-	fieldsInv []string
+	FieldsInv []string
 	// field id -> has location info
-	fieldsLoc []bool
+	FieldsLoc []bool
 
 	// term dictionary
 	//  field id -> term -> posting id + 1
-	dicts []map[string]uint64
+	Dicts []map[string]uint64
 
 	// term dictionary keys
 	//  field id -> []dictionary keys
-	dictKeys [][]string
+	DictKeys [][]string
 
-	// postings list
-	//  postings list id -> postings bitmap
-	postings []*roaring.Bitmap
+	// Postings list
+	//  Postings list id -> Postings bitmap
+	Postings []*roaring.Bitmap
 
 	// term frequencies
-	//  postings list id -> freqs (one for each hit in bitmap)
-	freqs [][]uint64
+	//  postings list id -> Freqs (one for each hit in bitmap)
+	Freqs [][]uint64
 
-	// field norms
-	//  postings list id -> norms (one for each hit in bitmap)
-	norms [][]float32
+	// field Norms
+	//  postings list id -> Norms (one for each hit in bitmap)
+	Norms [][]float32
 
 	// field/start/end/pos/locarraypos
 	//  postings list id -> start/end/pos/locarraypos (one for each freq)
-	locfields   [][]uint16
-	locstarts   [][]uint64
-	locends     [][]uint64
-	locpos      [][]uint64
-	locarraypos [][][]uint64
+	Locfields   [][]uint16
+	Locstarts   [][]uint64
+	Locends     [][]uint64
+	Locpos      [][]uint64
+	Locarraypos [][][]uint64
 
-	// stored field values
+	// Stored field values
 	//  docNum -> field id -> slice of values (each value []byte)
-	stored []map[uint16][][]byte
+	Stored []map[uint16][][]byte
 
 	// stored field types
 	//  docNum -> field id -> slice of types (each type byte)
-	storedTypes []map[uint16][]byte
+	StoredTypes []map[uint16][]byte
 
 	// stored field array positions
 	//  docNum -> field id -> slice of array positions (each is []uint64)
-	storedPos []map[uint16][][]uint64
+	StoredPos []map[uint16][][]uint64
 }
 
 // New builds a new empty Segment
 func New() *Segment {
 	return &Segment{
-		fields: map[string]uint16{},
+		FieldsMap: map[string]uint16{},
 	}
 }
 
 // Fields returns the field names used in this segment
 func (s *Segment) Fields() []string {
-	return s.fieldsInv
+	return s.FieldsInv
 }
 
 // VisitDocument invokes the DocFieldValueVistor for each stored field
 // for the specified doc number
 func (s *Segment) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
 	// ensure document number exists
-	if int(num) > len(s.stored)-1 {
+	if int(num) > len(s.Stored)-1 {
 		return nil
 	}
-	docFields := s.stored[int(num)]
+	docFields := s.Stored[int(num)]
 	for field, values := range docFields {
 		for i, value := range values {
-			keepGoing := visitor(s.fieldsInv[field], s.storedTypes[int(num)][field][i], value, s.storedPos[int(num)][field][i])
+			keepGoing := visitor(s.FieldsInv[field], s.StoredTypes[int(num)][field][i], value, s.StoredPos[int(num)][field][i])
 			if !keepGoing {
 				return nil
 			}
@@ -113,19 +113,19 @@ func (s *Segment) Dictionary(field string) segment.TermDictionary {
 // Count returns the number of documents in this segment
 // (this has no notion of deleted docs)
 func (s *Segment) Count() uint64 {
-	return uint64(len(s.stored))
+	return uint64(len(s.Stored))
 }
 
 // DocNumbers returns a bitset corresponding to the doc numbers of all the
 // provided _id strings
 func (s *Segment) DocNumbers(ids []string) *roaring.Bitmap {
 
-	idDictionary := s.dicts[s.getOrDefineField("_id", false)]
+	idDictionary := s.Dicts[s.getOrDefineField("_id", false)]
 	rv := roaring.New()
 	for _, id := range ids {
 		postingID := idDictionary[id]
 		if postingID > 0 {
-			rv.Or(s.postings[postingID-1])
+			rv.Or(s.Postings[postingID-1])
 		}
 	}
 	return rv

From bcd4bdc3d10a8dc2cf1887e5459dc12ae4cafbe0 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 1 Dec 2017 07:27:04 -0500
Subject: [PATCH 009/728] added initial bolt thought to README

---
 index/scorch/README.md | 92 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 91 insertions(+), 1 deletion(-)

diff --git a/index/scorch/README.md b/index/scorch/README.md
index cec982ebd..690e7d3d5 100644
--- a/index/scorch/README.md
+++ b/index/scorch/README.md
@@ -183,7 +183,7 @@ An ASCII art example:
               [ 0 1 1 ]
 
   Compute bitset segment-1-deleted-by-2:
-              [ 0 0 0 ]
+              [ 0 ]
 
   OR it with previous (nil)
               still just nil
@@ -418,3 +418,93 @@ state: 2, 4, 8
          2-X, 4-X, 8-X, nil
 
   merger finishes: new segment Y, is not valid, need to be recomputed
+
+
+### Bolt Segment Proposal
+
+Bucket
+
+"f"       field storage
+
+          Key                 Val
+          field name          field id (var uint16)
+
+          // TODO field location bits
+
+"d"       term dictionary storage
+          Key                      Val
+          field id (var uint16)    Vellum FST (mapping term to posting id uint64)
+
+
+"p"       postings list storage
+          Key                      Val
+          posting id (var uint64)  Roaring Bitmap Serialization (doc numbers) - see FromBuffer
+
+
+"x"       chunked data storage
+          Key                      Val
+          chunk id (var uint64)    sub-bucket
+
+                                   Key                       Val
+                                   posting id (var uint64)   sub-bucket
+
+
+                                                                              ALL Compressed Integer Encoding []uint64
+                                                             Key      Val
+                                                             "f"      freqs   1 value per hit
+                                                             "n"      norms   1 value per hit
+                                                             "i"      fields  <freq> values per hit
+                                                             "s"      start   <freq> values per hit
+                                                             "e"      end     <freq> values per hit
+                                                             "p"      pos     <freq> values per hit
+                                                             "a"      array pos
+                                                                              <freq> entries
+                                                                                each entry is count
+                                                                                followed by <count> uint64
+
+"s"      stored field data
+         Key                      Val
+         doc num (var uint64)     sub-bucket
+
+                                  Key            Val
+                                  "m"         mossy-like meta packed
+
+                                              16 bits - field id
+                                              8 bits - field type
+                                              2? bits - array pos length
+
+                                              X bits - offset
+                                              X bits - length
+
+                                  "d"         raw []byte data (possibly compressed, need segment level config?)
+
+                                  "a"         array position info, packed slice uint64
+
+
+
+
+
+Notes:
+
+It is assumed that each IndexReader (snapshot) starts a new Bolt TX (read-only) immediately, and holds it up until it is no longer needed.  This allows us to use (unsafely) the raw bytes coming out of BoltDB as return values.  Bolt guarantees they will be safe for the duration of the transaction (which we arrange to be the life of the index snapshot).
+
+Only physically store the fields in one direction, even though at runtime we need both.  Upon opening the index, we can read in all the k/v pairs in the "f" bucket.  We use the unsafe package to create a []string inverted mapping pointing to the underlying []byte in the BoltDB values.
+
+The term dictionary is stored opaquely as Vellum FST for each field.  When accessing these keys, the []byte return to us is mmap'd by bolt under the hood.  We then pass this to vellum using its []byte API, which then operates on it without ever forcing whole thing into memory unless needed.
+
+We do not need to persist the dictkeys slice since it is only there to support the dictionary iterator prefix/range searches, which are supported directly by the FST.
+
+Theory of operation of chunked storage is as follows.  The postings list iterators only allow starting at the beginning, and have no "advance" capability.  In the memory version, this means we always know the Nth hit in the postings list is the Nth entry in some other densely packed slice.  However, while OK when everything is in RAM, this is not as suitable for a structure on disk, where wading through detailed info of records you don't care about is too expensive.  Instead, we assume some fixed chunking, say 1024.  All detailed info for document number N can be found inside of chunk N/1024.  Now, the Advance operation still has to Next it's way through the posting list.  But, now when it reaches a hit, it knows the chunk index as well as the hit index inside that chunk.  Further, we push the chunk offsets to the top of the bolt structure, under the theory that we're likely to access data inside a chunk at the same time.  For example, you're likely to access the frequency and norm values for a document hit together, so by organizing by chunk first, we increase the likelihood that this info is nearby on disk.
+
+The "f" and "n" sub-buckets inside a posting have 1 entry for each hit.  (you must next-next-next within the chunk)
+
+The "i", "s", "e", "p", sub-buckets have <freq> entries for each hit.  (you must have read and know the freq)
+
+The "a" sub-bucket has <freq> groupings, where each grouping starts with a count, followed by <count> entries.
+
+For example, lets say hit docNum 27 has freq of 2.  The first location for the hit has array positions (0, 1) length 2, and the second location for the hit has array positions (1, 3, 2) length 3.  The entries in the slice for this hit look like:
+
+2 0 1 3 1 3 2
+^     ^
+|     next entry, number of ints to follow for it
+number of ints to follow for this entry

From c2047dcdf9cf911a4b2212f03cb5921ce6f26dba Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 1 Dec 2017 08:54:39 -0500
Subject: [PATCH 010/728] refactor doc id reader creation to share more code
 fix issue identified by steve

---
 index/scorch/snapshot_index.go   | 41 ++++++++++----------------------
 index/scorch/snapshot_segment.go | 10 ++++++++
 2 files changed, 23 insertions(+), 28 deletions(-)

diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 11fc063c5..fa2113ccd 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -72,61 +72,46 @@ func (i *IndexSnapshot) FieldDictPrefix(field string,
 }
 
 func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {
-
-	type segmentDocNumsResult struct {
-		index int
-		docs  *roaring.Bitmap
-	}
-
 	results := make(chan *segmentDocNumsResult)
 	for index, segment := range i.segment {
 		go func(index int, segment *SegmentSnapshot) {
-			docnums := roaring.NewBitmap()
-			docnums.AddRange(0, segment.Count())
 			results <- &segmentDocNumsResult{
 				index: index,
-				docs:  docnums,
+				docs:  segment.DocNumbersLive(),
 			}
 		}(index, segment)
 	}
 
-	rv := &IndexSnapshotDocIDReader{
-		snapshot:  i,
-		iterators: make([]roaring.IntIterable, len(i.segment)),
-	}
-	for count := 0; count < len(i.segment); count++ {
-		sdnr := <-results
-		rv.iterators[sdnr.index] = sdnr.docs.Iterator()
-	}
-
-	return rv, nil
+	return i.newDocIDReader(results)
 }
 
 func (i *IndexSnapshot) DocIDReaderOnly(ids []string) (index.DocIDReader, error) {
-
-	type segmentDocNumsResult struct {
-		index int
-		docs  *roaring.Bitmap
-	}
-
 	results := make(chan *segmentDocNumsResult)
 	for index, segment := range i.segment {
 		go func(index int, segment *SegmentSnapshot) {
-			docnums := segment.DocNumbers(ids)
 			results <- &segmentDocNumsResult{
 				index: index,
-				docs:  docnums,
+				docs:  segment.DocNumbers(ids),
 			}
 		}(index, segment)
 	}
 
+	return i.newDocIDReader(results)
+}
+
+type segmentDocNumsResult struct {
+	index int
+	docs  *roaring.Bitmap
+}
+
+func (i *IndexSnapshot) newDocIDReader(results chan *segmentDocNumsResult) (index.DocIDReader, error) {
 	rv := &IndexSnapshotDocIDReader{
 		snapshot:  i,
 		iterators: make([]roaring.IntIterable, len(i.segment)),
 	}
 	for count := 0; count < len(i.segment); count++ {
 		sdnr := <-results
-		rv.iterators[count] = sdnr.docs.Iterator()
+		rv.iterators[sdnr.index] = sdnr.docs.Iterator()
 	}
 
 	return rv, nil
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index b32725e96..ffacb5287 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -59,6 +59,16 @@ func (s *SegmentSnapshot) DocNumbers(docIDs []string) *roaring.Bitmap {
 	return rv
 }
 
+// DocNumbersLive returns bitsit containing doc numbers for all live docs
+func (s *SegmentSnapshot) DocNumbersLive() *roaring.Bitmap {
+	rv := roaring.NewBitmap()
+	rv.AddRange(0, s.segment.Count())
+	if s.deleted != nil {
+		rv.AndNot(s.deleted)
+	}
+	return rv
+}
+
 func (s *SegmentSnapshot) Fields() []string {
 	return s.segment.Fields()
 }

From 7c964de8bfdd93087169e8478064a2e293200e62 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 1 Dec 2017 09:26:51 -0500
Subject: [PATCH 011/728] switch to binary search for finding segment from
 global doc num added unit tests for this function specifically

---
 index/scorch/reader_test.go    | 104 +++++++++++++++++++++++++++++++++
 index/scorch/snapshot_index.go |  13 ++---
 2 files changed, 109 insertions(+), 8 deletions(-)

diff --git a/index/scorch/reader_test.go b/index/scorch/reader_test.go
index f673b198b..6b801faa3 100644
--- a/index/scorch/reader_test.go
+++ b/index/scorch/reader_test.go
@@ -517,3 +517,107 @@ func TestIndexDocIdOnlyReader(t *testing.T) {
 	// }
 
 }
+
+func TestSegmentIndexAndLocalDocNumFromGlobal(t *testing.T) {
+	tests := []struct {
+		offsets      []uint64
+		globalDocNum uint64
+		segmentIndex int
+		localDocNum  uint64
+	}{
+		// just 1 segment
+		{
+			offsets:      []uint64{0},
+			globalDocNum: 0,
+			segmentIndex: 0,
+			localDocNum:  0,
+		},
+		{
+			offsets:      []uint64{0},
+			globalDocNum: 1,
+			segmentIndex: 0,
+			localDocNum:  1,
+		},
+		{
+			offsets:      []uint64{0},
+			globalDocNum: 25,
+			segmentIndex: 0,
+			localDocNum:  25,
+		},
+		// now 2 segments, 30 docs in first
+		{
+			offsets:      []uint64{0, 30},
+			globalDocNum: 0,
+			segmentIndex: 0,
+			localDocNum:  0,
+		},
+		{
+			offsets:      []uint64{0, 30},
+			globalDocNum: 1,
+			segmentIndex: 0,
+			localDocNum:  1,
+		},
+		{
+			offsets:      []uint64{0, 30},
+			globalDocNum: 25,
+			segmentIndex: 0,
+			localDocNum:  25,
+		},
+		{
+			offsets:      []uint64{0, 30},
+			globalDocNum: 30,
+			segmentIndex: 1,
+			localDocNum:  0,
+		},
+		{
+			offsets:      []uint64{0, 30},
+			globalDocNum: 35,
+			segmentIndex: 1,
+			localDocNum:  5,
+		},
+		// lots of segments
+		{
+			offsets:      []uint64{0, 30, 40, 70, 99, 172, 800, 25000},
+			globalDocNum: 0,
+			segmentIndex: 0,
+			localDocNum:  0,
+		},
+		{
+			offsets:      []uint64{0, 30, 40, 70, 99, 172, 800, 25000},
+			globalDocNum: 25,
+			segmentIndex: 0,
+			localDocNum:  25,
+		},
+		{
+			offsets:      []uint64{0, 30, 40, 70, 99, 172, 800, 25000},
+			globalDocNum: 35,
+			segmentIndex: 1,
+			localDocNum:  5,
+		},
+		{
+			offsets:      []uint64{0, 30, 40, 70, 99, 172, 800, 25000},
+			globalDocNum: 100,
+			segmentIndex: 4,
+			localDocNum:  1,
+		},
+		{
+			offsets:      []uint64{0, 30, 40, 70, 99, 172, 800, 25000},
+			globalDocNum: 825,
+			segmentIndex: 6,
+			localDocNum:  25,
+		},
+	}
+
+	for _, test := range tests {
+		i := &IndexSnapshot{
+			offsets: test.offsets,
+		}
+		gotSegmentIndex, gotLocalDocNum := i.segmentIndexAndLocalDocNumFromGlobal(test.globalDocNum)
+		if gotSegmentIndex != test.segmentIndex {
+			t.Errorf("got segment index %d expected %d for offsets %v globalDocNum %d", gotSegmentIndex, test.segmentIndex, test.offsets, test.globalDocNum)
+		}
+		if gotLocalDocNum != test.localDocNum {
+			t.Errorf("got localDocNum %d expected %d for offsets %v globalDocNum %d", gotLocalDocNum, test.localDocNum, test.offsets, test.globalDocNum)
+		}
+	}
+}
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index fa2113ccd..2c85b8fee 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -5,6 +5,7 @@ import (
 	"container/heap"
 	"encoding/binary"
 	"fmt"
+	"sort"
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/document"
@@ -191,14 +192,10 @@ func (i *IndexSnapshot) Document(id string) (rv *document.Document, err error) {
 }
 
 func (i *IndexSnapshot) segmentIndexAndLocalDocNumFromGlobal(docNum uint64) (int, uint64) {
-	var segmentIndex uint64
-	for j := 1; j < len(i.offsets); j++ {
-		if docNum >= i.offsets[j] {
-			segmentIndex = uint64(j)
-		} else {
-			break
-		}
-	}
+	segmentIndex := sort.Search(len(i.offsets),
+		func(x int) bool {
+			return i.offsets[x] > docNum
+		}) - 1
 
 	localDocNum := docNum - i.offsets[segmentIndex]
 	return int(segmentIndex), localDocNum

From eb256f78bc63fef2d97a8aec81d06df375345879 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 1 Dec 2017 09:30:07 -0500
Subject: [PATCH 012/728] switch to constant referring to id field id 0 this
 avoids potentially mutating something that is intended to be immutable

---
 index/scorch/segment/mem/segment.go | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/index/scorch/segment/mem/segment.go b/index/scorch/segment/mem/segment.go
index fe71f17c6..e9a361c2f 100644
--- a/index/scorch/segment/mem/segment.go
+++ b/index/scorch/segment/mem/segment.go
@@ -5,6 +5,9 @@ import (
 	"github.com/blevesearch/bleve/index/scorch/segment"
 )
 
+// _id field is always guaranteed to have fieldID of 0
+const idFieldID uint16 = 0
+
 // KNOWN ISSUES
 // - LIMITATION - we decided whether or not to store term vectors for a field
 //                at the segment level, based on the first definition of a
@@ -120,7 +123,7 @@ func (s *Segment) Count() uint64 {
 // provided _id strings
 func (s *Segment) DocNumbers(ids []string) *roaring.Bitmap {
 
-	idDictionary := s.Dicts[s.getOrDefineField("_id", false)]
+	idDictionary := s.Dicts[idFieldID]
 	rv := roaring.New()
 	for _, id := range ids {
 		postingID := idDictionary[id]

From cff14f12121ffd362f0796cbc99621b8c8503d43 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 1 Dec 2017 09:50:27 -0500
Subject: [PATCH 013/728] fix crash in DocNumbers when segment is empty

---
 index/scorch/segment/mem/segment.go | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/index/scorch/segment/mem/segment.go b/index/scorch/segment/mem/segment.go
index e9a361c2f..f4294ebd7 100644
--- a/index/scorch/segment/mem/segment.go
+++ b/index/scorch/segment/mem/segment.go
@@ -122,13 +122,17 @@ func (s *Segment) Count() uint64 {
 // DocNumbers returns a bitset corresponding to the doc numbers of all the
 // provided _id strings
 func (s *Segment) DocNumbers(ids []string) *roaring.Bitmap {
-
-	idDictionary := s.Dicts[idFieldID]
 	rv := roaring.New()
-	for _, id := range ids {
-		postingID := idDictionary[id]
-		if postingID > 0 {
-			rv.Or(s.Postings[postingID-1])
+
+	// guard against empty segment
+	if len(s.FieldsMap) > 0 {
+		idDictionary := s.Dicts[idFieldID]
+
+		for _, id := range ids {
+			postingID := idDictionary[id]
+			if postingID > 0 {
+				rv.Or(s.Postings[postingID-1])
+			}
 		}
 	}
 	return rv

From 89aa02cf5b7fb3ad9c36fdb79068e8ce69fd98fe Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 1 Dec 2017 15:12:08 -0500
Subject: [PATCH 014/728] fix highlighting of composite fields updated log
 statements for refactored names

---
 index/scorch/segment/mem/build.go | 34 +++++++++++++++----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go
index 5154c182f..d995dd041 100644
--- a/index/scorch/segment/mem/build.go
+++ b/index/scorch/segment/mem/build.go
@@ -29,38 +29,38 @@ func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment {
 
 	// professional debugging
 	//
-	// log.Printf("fields: %v\n", s.fields)
-	// log.Printf("fieldsInv: %v\n", s.fieldsInv)
-	// log.Printf("fieldsLoc: %v\n", s.fieldsLoc)
-	// log.Printf("dicts: %v\n", s.dicts)
-	// log.Printf("dict keys: %v\n", s.dictKeys)
-	// for i, posting := range s.postings {
+	// log.Printf("fields: %v\n", s.FieldsMap)
+	// log.Printf("fieldsInv: %v\n", s.FieldsInv)
+	// log.Printf("fieldsLoc: %v\n", s.FieldsLoc)
+	// log.Printf("dicts: %v\n", s.Dicts)
+	// log.Printf("dict keys: %v\n", s.DictKeys)
+	// for i, posting := range s.Postings {
 	// 	log.Printf("posting %d: %v\n", i, posting)
 	// }
-	// for i, freq := range s.freqs {
+	// for i, freq := range s.Freqs {
 	// 	log.Printf("freq %d: %v\n", i, freq)
 	// }
-	// for i, norm := range s.norms {
+	// for i, norm := range s.Norms {
 	// 	log.Printf("norm %d: %v\n", i, norm)
 	// }
-	// for i, field := range s.locfields {
+	// for i, field := range s.Locfields {
 	// 	log.Printf("field %d: %v\n", i, field)
 	// }
-	// for i, start := range s.locstarts {
+	// for i, start := range s.Locstarts {
 	// 	log.Printf("start %d: %v\n", i, start)
 	// }
-	// for i, end := range s.locends {
+	// for i, end := range s.Locends {
 	// 	log.Printf("end %d: %v\n", i, end)
 	// }
-	// for i, pos := range s.locpos {
+	// for i, pos := range s.Locpos {
 	// 	log.Printf("pos %d: %v\n", i, pos)
 	// }
-	// for i, apos := range s.locarraypos {
+	// for i, apos := range s.Locarraypos {
 	// 	log.Printf("apos %d: %v\n", i, apos)
 	// }
-	// log.Printf("stored: %v\n", s.stored)
-	// log.Printf("stored types: %v\n", s.storedTypes)
-	// log.Printf("stored pos: %v\n", s.storedPos)
+	// log.Printf("stored: %v\n", s.Stored)
+	// log.Printf("stored types: %v\n", s.StoredTypes)
+	// log.Printf("stored pos: %v\n", s.StoredPos)
 
 	return s
 }
@@ -88,7 +88,7 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 
 	// walk each composite field
 	for _, field := range result.Document.CompositeFields {
-		fieldID := uint16(s.getOrDefineField(field.Name(), false))
+		fieldID := uint16(s.getOrDefineField(field.Name(), true))
 		l, tf := field.Analyze()
 		processField(fieldID, field.Name(), l, tf)
 	}

From b74cf4b08171b5c372f50bd777d8864b338f41bc Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 1 Dec 2017 15:42:50 -0500
Subject: [PATCH 015/728] add copyright header to all new files in scorch

---
 index/scorch/field_dict_test.go          | 14 ++++++++++++++
 index/scorch/introducer.go               | 14 ++++++++++++++
 index/scorch/reader.go                   | 14 ++++++++++++++
 index/scorch/reader_test.go              | 14 ++++++++++++++
 index/scorch/scorch.go                   | 14 ++++++++++++++
 index/scorch/scorch_test.go              |  2 +-
 index/scorch/segment/mem/build.go        | 14 ++++++++++++++
 index/scorch/segment/mem/dict.go         | 14 ++++++++++++++
 index/scorch/segment/mem/posting.go      | 14 ++++++++++++++
 index/scorch/segment/mem/segment.go      | 14 ++++++++++++++
 index/scorch/segment/mem/segment_test.go | 14 ++++++++++++++
 index/scorch/segment/segment.go          | 14 ++++++++++++++
 index/scorch/snapshot_index.go           | 14 ++++++++++++++
 index/scorch/snapshot_index_dict.go      | 14 ++++++++++++++
 index/scorch/snapshot_index_doc.go       | 14 ++++++++++++++
 index/scorch/snapshot_index_tfr.go       | 14 ++++++++++++++
 index/scorch/snapshot_segment.go         | 14 ++++++++++++++
 index/scorch/stats.go                    | 14 ++++++++++++++
 18 files changed, 239 insertions(+), 1 deletion(-)

diff --git a/index/scorch/field_dict_test.go b/index/scorch/field_dict_test.go
index 81285e76a..856f3d6cf 100644
--- a/index/scorch/field_dict_test.go
+++ b/index/scorch/field_dict_test.go
@@ -1,3 +1,17 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 package scorch
 
 import (
diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 37a66dc84..8b333b309 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -1,3 +1,17 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 package scorch
 
 import (
diff --git a/index/scorch/reader.go b/index/scorch/reader.go
index acb01905d..0e643f7ca 100644
--- a/index/scorch/reader.go
+++ b/index/scorch/reader.go
@@ -1,3 +1,17 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 package scorch
 
 import (
diff --git a/index/scorch/reader_test.go b/index/scorch/reader_test.go
index 6b801faa3..ef5d6d4f5 100644
--- a/index/scorch/reader_test.go
+++ b/index/scorch/reader_test.go
@@ -1,3 +1,17 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 package scorch
 
 import (
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 1f99b42fe..90b2fa0d3 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -1,3 +1,17 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 package scorch
 
 import (
diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index 48e80c8b8..aeb6c997a 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2014 Couchbase, Inc.
+//  Copyright (c) 2017 Couchbase, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go
index d995dd041..dbba39b13 100644
--- a/index/scorch/segment/mem/build.go
+++ b/index/scorch/segment/mem/build.go
@@ -1,3 +1,17 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 package mem
 
 import (
diff --git a/index/scorch/segment/mem/dict.go b/index/scorch/segment/mem/dict.go
index c724493da..015578949 100644
--- a/index/scorch/segment/mem/dict.go
+++ b/index/scorch/segment/mem/dict.go
@@ -1,3 +1,17 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 package mem
 
 import (
diff --git a/index/scorch/segment/mem/posting.go b/index/scorch/segment/mem/posting.go
index fa1c0a00f..2ce889888 100644
--- a/index/scorch/segment/mem/posting.go
+++ b/index/scorch/segment/mem/posting.go
@@ -1,3 +1,17 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 package mem
 
 import (
diff --git a/index/scorch/segment/mem/segment.go b/index/scorch/segment/mem/segment.go
index f4294ebd7..5d964e183 100644
--- a/index/scorch/segment/mem/segment.go
+++ b/index/scorch/segment/mem/segment.go
@@ -1,3 +1,17 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 package mem
 
 import (
diff --git a/index/scorch/segment/mem/segment_test.go b/index/scorch/segment/mem/segment_test.go
index 4c056d7ad..612bb947e 100644
--- a/index/scorch/segment/mem/segment_test.go
+++ b/index/scorch/segment/mem/segment_test.go
@@ -1,3 +1,17 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 package mem
 
 import (
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index 5cd3d5d7f..a6ef16e40 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -1,3 +1,17 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 package segment
 
 import (
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 2c85b8fee..bf076b91c 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -1,3 +1,17 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 package scorch
 
 import (
diff --git a/index/scorch/snapshot_index_dict.go b/index/scorch/snapshot_index_dict.go
index 443e401e6..3c902cad6 100644
--- a/index/scorch/snapshot_index_dict.go
+++ b/index/scorch/snapshot_index_dict.go
@@ -1,3 +1,17 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 package scorch
 
 import (
diff --git a/index/scorch/snapshot_index_doc.go b/index/scorch/snapshot_index_doc.go
index 2b1144874..4656079b0 100644
--- a/index/scorch/snapshot_index_doc.go
+++ b/index/scorch/snapshot_index_doc.go
@@ -1,3 +1,17 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 package scorch
 
 import (
diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go
index 44172f3d0..c2123a1dd 100644
--- a/index/scorch/snapshot_index_tfr.go
+++ b/index/scorch/snapshot_index_tfr.go
@@ -1,3 +1,17 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 package scorch
 
 import (
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index ffacb5287..67cdfb900 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -1,3 +1,17 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 package scorch
 
 import (
diff --git a/index/scorch/stats.go b/index/scorch/stats.go
index f49c8178c..13668480d 100644
--- a/index/scorch/stats.go
+++ b/index/scorch/stats.go
@@ -1,3 +1,17 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 package scorch
 
 import (

From 22ffc8940e091388f529a0531356e529759c2886 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Mon, 4 Dec 2017 18:06:06 -0500
Subject: [PATCH 016/728] update segment API to return error in key places

---
 index/scorch/introducer.go               |   6 +-
 index/scorch/scorch.go                   |   5 +-
 index/scorch/segment/mem/dict.go         |   5 +-
 index/scorch/segment/mem/posting.go      |   6 +-
 index/scorch/segment/mem/segment.go      |   8 +-
 index/scorch/segment/mem/segment_test.go | 118 +++++++++++++++------
 index/scorch/segment/segment.go          |   8 +-
 index/scorch/snapshot_index.go           | 126 +++++++++++++++--------
 index/scorch/snapshot_index_tfr.go       |   5 +-
 index/scorch/snapshot_segment.go         |  21 ++--
 10 files changed, 212 insertions(+), 96 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 8b333b309..d2978a2df 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -53,7 +53,11 @@ func (s *Scorch) mainLoop() {
 				// see if optimistic work included this segment
 				delta, ok := next.obsoletes[s.root.segment[i].id]
 				if !ok {
-					delta = s.root.segment[i].segment.DocNumbers(next.ids)
+					var err error
+					delta, err = s.root.segment[i].segment.DocNumbers(next.ids)
+					if err != nil {
+						panic(err)
+					}
 				}
 				newSnapshot.segment[i] = &SegmentSnapshot{
 					id:      s.root.segment[i].id,
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 90b2fa0d3..b80ff2482 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -154,7 +154,10 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
 	// get read lock, to optimistically prepare obsoleted info
 	s.rootLock.RLock()
 	for i := range s.root.segment {
-		delta := s.root.segment[i].segment.DocNumbers(ids)
+		delta, err := s.root.segment[i].segment.DocNumbers(ids)
+		if err != nil {
+			return err
+		}
 		introduction.obsoletes[s.root.segment[i].id] = delta
 	}
 	s.rootLock.RUnlock()
diff --git a/index/scorch/segment/mem/dict.go b/index/scorch/segment/mem/dict.go
index 015578949..939c287e9 100644
--- a/index/scorch/segment/mem/dict.go
+++ b/index/scorch/segment/mem/dict.go
@@ -31,13 +31,14 @@ type Dictionary struct {
 }
 
 // PostingsList returns the postings list for the specified term
-func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap) segment.PostingsList {
+func (d *Dictionary) PostingsList(term string,
+	except *roaring.Bitmap) (segment.PostingsList, error) {
 	return &PostingsList{
 		dictionary: d,
 		term:       term,
 		postingsID: d.segment.Dicts[d.fieldID][term],
 		except:     except,
-	}
+	}, nil
 }
 
 // Iterator returns an iterator for this dictionary
diff --git a/index/scorch/segment/mem/posting.go b/index/scorch/segment/mem/posting.go
index 2ce889888..b6fd0c6a7 100644
--- a/index/scorch/segment/mem/posting.go
+++ b/index/scorch/segment/mem/posting.go
@@ -74,9 +74,9 @@ type PostingsIterator struct {
 }
 
 // Next returns the next posting on the postings list, or nil at the end
-func (i *PostingsIterator) Next() segment.Posting {
+func (i *PostingsIterator) Next() (segment.Posting, error) {
 	if i.actual == nil || !i.actual.HasNext() {
-		return nil
+		return nil, nil
 	}
 	n := i.actual.Next()
 	allN := i.all.Next()
@@ -99,7 +99,7 @@ func (i *PostingsIterator) Next() segment.Posting {
 
 	i.locoffset += int(i.postings.dictionary.segment.Freqs[i.postings.postingsID-1][i.offset])
 	i.offset++
-	return rv
+	return rv, nil
 }
 
 // Posting is a single entry in a postings list
diff --git a/index/scorch/segment/mem/segment.go b/index/scorch/segment/mem/segment.go
index 5d964e183..a1eb29e27 100644
--- a/index/scorch/segment/mem/segment.go
+++ b/index/scorch/segment/mem/segment.go
@@ -119,12 +119,12 @@ func (s *Segment) VisitDocument(num uint64, visitor segment.DocumentFieldValueVi
 }
 
 // Dictionary returns the term dictionary for the specified field
-func (s *Segment) Dictionary(field string) segment.TermDictionary {
+func (s *Segment) Dictionary(field string) (segment.TermDictionary, error) {
 	return &Dictionary{
 		segment: s,
 		field:   field,
 		fieldID: uint16(s.getOrDefineField(field, false)),
-	}
+	}, nil
 }
 
 // Count returns the number of documents in this segment
@@ -135,7 +135,7 @@ func (s *Segment) Count() uint64 {
 
 // DocNumbers returns a bitset corresponding to the doc numbers of all the
 // provided _id strings
-func (s *Segment) DocNumbers(ids []string) *roaring.Bitmap {
+func (s *Segment) DocNumbers(ids []string) (*roaring.Bitmap, error) {
 	rv := roaring.New()
 
 	// guard against empty segment
@@ -149,5 +149,5 @@ func (s *Segment) DocNumbers(ids []string) *roaring.Bitmap {
 			}
 		}
 	}
-	return rv
+	return rv, nil
 }
diff --git a/index/scorch/segment/mem/segment_test.go b/index/scorch/segment/mem/segment_test.go
index 612bb947e..2b8452051 100644
--- a/index/scorch/segment/mem/segment_test.go
+++ b/index/scorch/segment/mem/segment_test.go
@@ -31,12 +31,18 @@ func TestEmpty(t *testing.T) {
 		t.Errorf("expected count 0, got %d", emptySegment.Count())
 	}
 
-	dict := emptySegment.Dictionary("name")
+	dict, err := emptySegment.Dictionary("name")
+	if err != nil {
+		t.Fatal(err)
+	}
 	if dict == nil {
 		t.Fatal("got nil dict, expected non-nil")
 	}
 
-	postingsList := dict.PostingsList("marty", nil)
+	postingsList, err := dict.PostingsList("marty", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
 	if postingsList == nil {
 		t.Fatal("got nil postings list, expected non-nil")
 	}
@@ -47,10 +53,13 @@ func TestEmpty(t *testing.T) {
 	}
 
 	count := 0
-	nextPosting := postingsItr.Next()
-	for nextPosting != nil {
+	nextPosting, err := postingsItr.Next()
+	for nextPosting != nil && err == nil {
 		count++
-		nextPosting = postingsItr.Next()
+		nextPosting, err = postingsItr.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
 	}
 
 	if count != 0 {
@@ -58,7 +67,7 @@ func TestEmpty(t *testing.T) {
 	}
 
 	// now try and visit a document
-	err := emptySegment.VisitDocument(0, func(field string, typ byte, value []byte, pos []uint64) bool {
+	err = emptySegment.VisitDocument(0, func(field string, typ byte, value []byte, pos []uint64) bool {
 		t.Errorf("document visitor called, not expected")
 		return true
 	})
@@ -164,12 +173,18 @@ func TestSingle(t *testing.T) {
 	}
 
 	// check the _id field
-	dict := segment.Dictionary("_id")
+	dict, err := segment.Dictionary("_id")
+	if err != nil {
+		t.Fatal(err)
+	}
 	if dict == nil {
 		t.Fatal("got nil dict, expected non-nil")
 	}
 
-	postingsList := dict.PostingsList("a", nil)
+	postingsList, err := dict.PostingsList("a", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
 	if postingsList == nil {
 		t.Fatal("got nil postings list, expected non-nil")
 	}
@@ -180,8 +195,8 @@ func TestSingle(t *testing.T) {
 	}
 
 	count := 0
-	nextPosting := postingsItr.Next()
-	for nextPosting != nil {
+	nextPosting, err := postingsItr.Next()
+	for nextPosting != nil && err == nil {
 		count++
 		if nextPosting.Frequency() != 1 {
 			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
@@ -193,7 +208,10 @@ func TestSingle(t *testing.T) {
 			t.Errorf("expected norm 1.0, got %f", nextPosting.Norm())
 		}
 
-		nextPosting = postingsItr.Next()
+		nextPosting, err = postingsItr.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
 	}
 
 	if count != 1 {
@@ -201,12 +219,18 @@ func TestSingle(t *testing.T) {
 	}
 
 	// check the name field
-	dict = segment.Dictionary("name")
+	dict, err = segment.Dictionary("name")
+	if err != nil {
+		t.Fatal(err)
+	}
 	if dict == nil {
 		t.Fatal("got nil dict, expected non-nil")
 	}
 
-	postingsList = dict.PostingsList("wow", nil)
+	postingsList, err = dict.PostingsList("wow", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
 	if postingsList == nil {
 		t.Fatal("got nil postings list, expected non-nil")
 	}
@@ -217,8 +241,8 @@ func TestSingle(t *testing.T) {
 	}
 
 	count = 0
-	nextPosting = postingsItr.Next()
-	for nextPosting != nil {
+	nextPosting, err = postingsItr.Next()
+	for nextPosting != nil && err == nil {
 		count++
 		if nextPosting.Frequency() != 1 {
 			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
@@ -244,7 +268,10 @@ func TestSingle(t *testing.T) {
 			}
 		}
 
-		nextPosting = postingsItr.Next()
+		nextPosting, err = postingsItr.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
 	}
 
 	if count != 1 {
@@ -252,12 +279,18 @@ func TestSingle(t *testing.T) {
 	}
 
 	// check the _all field (composite)
-	dict = segment.Dictionary("_all")
+	dict, err = segment.Dictionary("_all")
+	if err != nil {
+		t.Fatal(err)
+	}
 	if dict == nil {
 		t.Fatal("got nil dict, expected non-nil")
 	}
 
-	postingsList = dict.PostingsList("wow", nil)
+	postingsList, err = dict.PostingsList("wow", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
 	if postingsList == nil {
 		t.Fatal("got nil postings list, expected non-nil")
 	}
@@ -268,8 +301,8 @@ func TestSingle(t *testing.T) {
 	}
 
 	count = 0
-	nextPosting = postingsItr.Next()
-	for nextPosting != nil {
+	nextPosting, err = postingsItr.Next()
+	for nextPosting != nil && err == nil {
 		count++
 		if nextPosting.Frequency() != 1 {
 			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
@@ -296,7 +329,10 @@ func TestSingle(t *testing.T) {
 			}
 		}
 
-		nextPosting = postingsItr.Next()
+		nextPosting, err = postingsItr.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
 	}
 
 	if count != 1 {
@@ -305,7 +341,7 @@ func TestSingle(t *testing.T) {
 
 	// now try and visit a document
 	var fieldValuesSeen int
-	err := segment.VisitDocument(0, func(field string, typ byte, value []byte, pos []uint64) bool {
+	err = segment.VisitDocument(0, func(field string, typ byte, value []byte, pos []uint64) bool {
 		fieldValuesSeen++
 		return true
 	})
@@ -487,12 +523,18 @@ func TestMultiple(t *testing.T) {
 	}
 
 	// check the desc field
-	dict := segment.Dictionary("desc")
+	dict, err := segment.Dictionary("desc")
+	if err != nil {
+		t.Fatal(err)
+	}
 	if dict == nil {
 		t.Fatal("got nil dict, expected non-nil")
 	}
 
-	postingsList := dict.PostingsList("thing", nil)
+	postingsList, err := dict.PostingsList("thing", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
 	if postingsList == nil {
 		t.Fatal("got nil postings list, expected non-nil")
 	}
@@ -503,10 +545,13 @@ func TestMultiple(t *testing.T) {
 	}
 
 	count := 0
-	nextPosting := postingsItr.Next()
-	for nextPosting != nil {
+	nextPosting, err := postingsItr.Next()
+	for nextPosting != nil && err == nil {
 		count++
-		nextPosting = postingsItr.Next()
+		nextPosting, err = postingsItr.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
 	}
 
 	if count != 2 {
@@ -514,10 +559,16 @@ func TestMultiple(t *testing.T) {
 	}
 
 	// get docnum of a
-	exclude := segment.DocNumbers([]string{"a"})
+	exclude, err := segment.DocNumbers([]string{"a"})
+	if err != nil {
+		t.Fatal(err)
+	}
 
 	// look for term 'thing' excluding doc 'a'
-	postingsListExcluding := dict.PostingsList("thing", exclude)
+	postingsListExcluding, err := dict.PostingsList("thing", exclude)
+	if err != nil {
+		t.Fatal(err)
+	}
 	if postingsList == nil {
 		t.Fatal("got nil postings list, expected non-nil")
 	}
@@ -528,10 +579,13 @@ func TestMultiple(t *testing.T) {
 	}
 
 	count = 0
-	nextPosting = postingsItrExcluding.Next()
-	for nextPosting != nil {
+	nextPosting, err = postingsItrExcluding.Next()
+	for nextPosting != nil && err == nil {
 		count++
-		nextPosting = postingsItrExcluding.Next()
+		nextPosting, err = postingsItrExcluding.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
 	}
 
 	if count != 1 {
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index a6ef16e40..77ab13857 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -25,18 +25,18 @@ import (
 type DocumentFieldValueVisitor func(field string, typ byte, value []byte, pos []uint64) bool
 
 type Segment interface {
-	Dictionary(field string) TermDictionary
+	Dictionary(field string) (TermDictionary, error)
 
 	VisitDocument(num uint64, visitor DocumentFieldValueVisitor) error
 	Count() uint64
 
-	DocNumbers([]string) *roaring.Bitmap
+	DocNumbers([]string) (*roaring.Bitmap, error)
 
 	Fields() []string
 }
 
 type TermDictionary interface {
-	PostingsList(term string, except *roaring.Bitmap) PostingsList
+	PostingsList(term string, except *roaring.Bitmap) (PostingsList, error)
 
 	Iterator() DictionaryIterator
 	PrefixIterator(prefix string) DictionaryIterator
@@ -59,7 +59,7 @@ type PostingsList interface {
 }
 
 type PostingsIterator interface {
-	Next() Posting
+	Next() (Posting, error)
 }
 
 type Posting interface {
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index bf076b91c..c059fe734 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -27,6 +27,17 @@ import (
 	"github.com/blevesearch/bleve/index/scorch/segment"
 )
 
+type asynchSegmentResult struct {
+	dictItr segment.DictionaryIterator
+
+	index int
+	docs  *roaring.Bitmap
+
+	postings segment.PostingsList
+
+	err error
+}
+
 type IndexSnapshot struct {
 	segment  []*SegmentSnapshot
 	offsets  []uint64
@@ -35,31 +46,44 @@ type IndexSnapshot struct {
 
 func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {
 
-	results := make(chan segment.DictionaryIterator)
+	results := make(chan *asynchSegmentResult)
 	for index, segment := range i.segment {
 		go func(index int, segment *SegmentSnapshot) {
-			dict := segment.Dictionary(field)
-			results <- makeItr(dict)
+			dict, err := segment.Dictionary(field)
+			if err != nil {
+				results <- &asynchSegmentResult{err: err}
+			} else {
+				results <- &asynchSegmentResult{dictItr: makeItr(dict)}
+			}
 		}(index, segment)
 	}
 
+	var err error
 	rv := &IndexSnapshotFieldDict{
 		snapshot: i,
 		cursors:  make([]*segmentDictCursor, 0, len(i.segment)),
 	}
 	for count := 0; count < len(i.segment); count++ {
-		di := <-results
-		next, err := di.Next()
-		if err != nil {
-			return nil, err
-		}
-		if next != nil {
-			rv.cursors = append(rv.cursors, &segmentDictCursor{
-				itr:  di,
-				curr: next,
-			})
+		asr := <-results
+		if asr.err != nil && err == nil {
+			err = asr.err
+		} else {
+			next, err2 := asr.dictItr.Next()
+			if err2 != nil && err == nil {
+				err = err2
+			}
+			if next != nil {
+				rv.cursors = append(rv.cursors, &segmentDictCursor{
+					itr:  asr.dictItr,
+					curr: next,
+				})
+			}
 		}
 	}
+	// after ensuring we've read all items on channel
+	if err != nil {
+		return nil, err
+	}
 	// prepare heap
 	heap.Init(rv)
 
@@ -87,10 +111,10 @@ func (i *IndexSnapshot) FieldDictPrefix(field string,
 }
 
 func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {
-	results := make(chan *segmentDocNumsResult)
+	results := make(chan *asynchSegmentResult)
 	for index, segment := range i.segment {
 		go func(index int, segment *SegmentSnapshot) {
-			results <- &segmentDocNumsResult{
+			results <- &asynchSegmentResult{
 				index: index,
 				docs:  segment.DocNumbersLive(),
 			}
@@ -101,12 +125,17 @@ func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {
 }
 
 func (i *IndexSnapshot) DocIDReaderOnly(ids []string) (index.DocIDReader, error) {
-	results := make(chan *segmentDocNumsResult)
+	results := make(chan *asynchSegmentResult)
 	for index, segment := range i.segment {
 		go func(index int, segment *SegmentSnapshot) {
-			results <- &segmentDocNumsResult{
-				index: index,
-				docs:  segment.DocNumbers(ids),
+			docs, err := segment.DocNumbers(ids)
+			if err != nil {
+				results <- &asynchSegmentResult{err: err}
+			} else {
+				results <- &asynchSegmentResult{
+					index: index,
+					docs:  docs,
+				}
 			}
 		}(index, segment)
 	}
@@ -114,19 +143,23 @@ func (i *IndexSnapshot) DocIDReaderOnly(ids []string) (index.DocIDReader, error)
 	return i.newDocIDReader(results)
 }
 
-type segmentDocNumsResult struct {
-	index int
-	docs  *roaring.Bitmap
-}
-
-func (i *IndexSnapshot) newDocIDReader(results chan *segmentDocNumsResult) (index.DocIDReader, error) {
+func (i *IndexSnapshot) newDocIDReader(results chan *asynchSegmentResult) (index.DocIDReader, error) {
 	rv := &IndexSnapshotDocIDReader{
 		snapshot:  i,
 		iterators: make([]roaring.IntIterable, len(i.segment)),
 	}
+	var err error
 	for count := 0; count < len(i.segment); count++ {
-		sdnr := <-results
-		rv.iterators[sdnr.index] = sdnr.docs.Iterator()
+		asr := <-results
+		if asr.err != nil && err != nil {
+			err = asr.err
+		} else {
+			rv.iterators[asr.index] = asr.docs.Iterator()
+		}
+	}
+
+	if err != nil {
+		return nil, err
 	}
 
 	return rv, nil
@@ -262,23 +295,27 @@ func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err err
 func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
 	includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
 
-	type segmentPostingResult struct {
-		index    int
-		postings segment.PostingsList
-	}
-
-	results := make(chan *segmentPostingResult)
+	results := make(chan *asynchSegmentResult)
 	for index, segment := range i.segment {
 		go func(index int, segment *SegmentSnapshot) {
-			dict := segment.Dictionary(field)
-			pl := dict.PostingsList(string(term), nil)
-			results <- &segmentPostingResult{
-				index:    index,
-				postings: pl,
+			dict, err := segment.Dictionary(field)
+			if err != nil {
+				results <- &asynchSegmentResult{err: err}
+			} else {
+				pl, err := dict.PostingsList(string(term), nil)
+				if err != nil {
+					results <- &asynchSegmentResult{err: err}
+				} else {
+					results <- &asynchSegmentResult{
+						index:    index,
+						postings: pl,
+					}
+				}
 			}
 		}(index, segment)
 	}
 
+	var err error
 	rv := &IndexSnapshotTermFieldReader{
 		snapshot:           i,
 		postings:           make([]segment.PostingsList, len(i.segment)),
@@ -288,9 +325,16 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
 		includeTermVectors: includeTermVectors,
 	}
 	for count := 0; count < len(i.segment); count++ {
-		spr := <-results
-		rv.postings[spr.index] = spr.postings
-		rv.iterators[spr.index] = spr.postings.Iterator()
+		asr := <-results
+		if asr.err != nil && err == nil {
+			err = asr.err
+		} else {
+			rv.postings[asr.index] = asr.postings
+			rv.iterators[asr.index] = asr.postings.Iterator()
+		}
+	}
+	if err != nil {
+		return nil, err
 	}
 
 	return rv, nil
diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go
index c2123a1dd..936704906 100644
--- a/index/scorch/snapshot_index_tfr.go
+++ b/index/scorch/snapshot_index_tfr.go
@@ -38,7 +38,10 @@ func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*in
 	}
 	// find the next hit
 	for i.segmentOffset < len(i.postings) {
-		next := i.iterators[i.segmentOffset].Next()
+		next, err := i.iterators[i.segmentOffset].Next()
+		if err != nil {
+			return nil, err
+		}
 		if next != nil {
 			// make segment number into global number by adding offset
 			globalOffset := i.snapshot.offsets[i.segmentOffset]
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index 67cdfb900..6380a15fd 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -24,7 +24,7 @@ type SegmentDictionarySnapshot struct {
 	d segment.TermDictionary
 }
 
-func (s *SegmentDictionarySnapshot) PostingsList(term string, except *roaring.Bitmap) segment.PostingsList {
+func (s *SegmentDictionarySnapshot) PostingsList(term string, except *roaring.Bitmap) (segment.PostingsList, error) {
 	return s.d.PostingsList(term, s.s.deleted)
 }
 
@@ -58,19 +58,26 @@ func (s *SegmentSnapshot) Count() uint64 {
 	return rv
 }
 
-func (s *SegmentSnapshot) Dictionary(field string) segment.TermDictionary {
+func (s *SegmentSnapshot) Dictionary(field string) (segment.TermDictionary, error) {
+	d, err := s.segment.Dictionary(field)
+	if err != nil {
+		return nil, err
+	}
 	return &SegmentDictionarySnapshot{
 		s: s,
-		d: s.segment.Dictionary(field),
-	}
+		d: d,
+	}, nil
 }
 
-func (s *SegmentSnapshot) DocNumbers(docIDs []string) *roaring.Bitmap {
-	rv := s.segment.DocNumbers(docIDs)
+func (s *SegmentSnapshot) DocNumbers(docIDs []string) (*roaring.Bitmap, error) {
+	rv, err := s.segment.DocNumbers(docIDs)
+	if err != nil {
+		return nil, err
+	}
 	if s.deleted != nil {
 		rv.AndNot(s.deleted)
 	}
-	return rv
+	return rv, nil
 }
 
 // DocNumbersLive returns bitsit containing doc numbers for all live docs

From ed067f45dd0562380606d93f50d432891438abf7 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 5 Dec 2017 09:31:02 -0500
Subject: [PATCH 017/728] added Close() method to Segment

---
 index/scorch/segment/mem/segment.go | 5 +++++
 index/scorch/segment/segment.go     | 2 ++
 2 files changed, 7 insertions(+)

diff --git a/index/scorch/segment/mem/segment.go b/index/scorch/segment/mem/segment.go
index a1eb29e27..a0611947d 100644
--- a/index/scorch/segment/mem/segment.go
+++ b/index/scorch/segment/mem/segment.go
@@ -151,3 +151,8 @@ func (s *Segment) DocNumbers(ids []string) (*roaring.Bitmap, error) {
 	}
 	return rv, nil
 }
+
+// Close releases all resources associated with this segment
+func (s *Segment) Close() error {
+	return nil
+}
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index 77ab13857..6a9d70730 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -33,6 +33,8 @@ type Segment interface {
 	DocNumbers([]string) (*roaring.Bitmap, error)
 
 	Fields() []string
+
+	Close() error
 }
 
 type TermDictionary interface {

From 87e2627551841cecba0bd25067350eb2f199f5ec Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 5 Dec 2017 09:49:41 -0500
Subject: [PATCH 018/728] added dictionary tests to mem segment

---
 index/scorch/segment/mem/dict_test.go | 160 ++++++++++++++++++++++++++
 1 file changed, 160 insertions(+)
 create mode 100644 index/scorch/segment/mem/dict_test.go

diff --git a/index/scorch/segment/mem/dict_test.go b/index/scorch/segment/mem/dict_test.go
new file mode 100644
index 000000000..adfa4957d
--- /dev/null
+++ b/index/scorch/segment/mem/dict_test.go
@@ -0,0 +1,160 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package mem
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/index"
+)
+
+func TestDictionary(t *testing.T) {
+	doc := &document.Document{
+		ID: "a",
+		Fields: []document.Field{
+			document.NewTextFieldCustom("_id", nil, []byte("a"), document.IndexField|document.StoreField, nil),
+			document.NewTextFieldCustom("desc", nil, []byte("apple ball cat dog egg fish bat"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+		},
+	}
+
+	// forge analyzed docs
+	results := []*index.AnalysisResult{
+		&index.AnalysisResult{
+			Document: doc,
+			Analyzed: []analysis.TokenFrequencies{
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      1,
+						Position: 1,
+						Term:     []byte("a"),
+					},
+				}, nil, false),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      5,
+						Position: 1,
+						Term:     []byte("apple"),
+					},
+					&analysis.Token{
+						Start:    6,
+						End:      10,
+						Position: 2,
+						Term:     []byte("ball"),
+					},
+					&analysis.Token{
+						Start:    11,
+						End:      14,
+						Position: 3,
+						Term:     []byte("cat"),
+					},
+					&analysis.Token{
+						Start:    15,
+						End:      18,
+						Position: 4,
+						Term:     []byte("dog"),
+					},
+					&analysis.Token{
+						Start:    19,
+						End:      22,
+						Position: 5,
+						Term:     []byte("egg"),
+					},
+					&analysis.Token{
+						Start:    20,
+						End:      24,
+						Position: 6,
+						Term:     []byte("fish"),
+					},
+					&analysis.Token{
+						Start:    25,
+						End:      28,
+						Position: 7,
+						Term:     []byte("bat"),
+					},
+				}, nil, true),
+			},
+			Length: []int{
+				1,
+				7,
+			},
+		},
+	}
+
+	segment := NewFromAnalyzedDocs(results)
+	if segment == nil {
+		t.Fatalf("segment nil, not expected")
+	}
+
+	dict, err := segment.Dictionary("desc")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// test basic full iterator
+	expected := []string{"apple", "ball", "bat", "cat", "dog", "egg", "fish"}
+	var got []string
+	itr := dict.Iterator()
+	next, err := itr.Next()
+	for next != nil && err == nil {
+		got = append(got, next.Term)
+		next, err = itr.Next()
+	}
+	if err != nil {
+		t.Fatalf("dict itr error: %v", err)
+	}
+
+	if !reflect.DeepEqual(expected, got) {
+		t.Errorf("expected: %v, got: %v", expected, got)
+	}
+
+	// test prefix iterator
+	expected = []string{"ball", "bat"}
+	got = got[:0]
+	itr = dict.PrefixIterator("b")
+	next, err = itr.Next()
+	for next != nil && err == nil {
+		got = append(got, next.Term)
+		next, err = itr.Next()
+	}
+	if err != nil {
+		t.Fatalf("dict itr error: %v", err)
+	}
+
+	if !reflect.DeepEqual(expected, got) {
+		t.Errorf("expected: %v, got: %v", expected, got)
+	}
+
+	// test range iterator
+	expected = []string{"cat", "dog", "egg"}
+	got = got[:0]
+	itr = dict.RangeIterator("cat", "egg")
+	next, err = itr.Next()
+	for next != nil && err == nil {
+		got = append(got, next.Term)
+		next, err = itr.Next()
+	}
+	if err != nil {
+		t.Fatalf("dict itr error: %v", err)
+	}
+
+	if !reflect.DeepEqual(expected, got) {
+		t.Errorf("expected: %v, got: %v", expected, got)
+	}
+}

From e08fdab54a570fe50bee5b541dd7297dea6c39e3 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 5 Dec 2017 10:13:27 -0500
Subject: [PATCH 019/728] remove todo item

---
 index/scorch/segment/mem/segment.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/index/scorch/segment/mem/segment.go b/index/scorch/segment/mem/segment.go
index a0611947d..4d3d1d113 100644
--- a/index/scorch/segment/mem/segment.go
+++ b/index/scorch/segment/mem/segment.go
@@ -34,7 +34,6 @@ const idFieldID uint16 = 0
 // TODO
 // - need better testing of multiple docs, iterating freqs, locations and
 //   and verifying the correct results are returned
-// - need tests for term dictionary iteration
 
 // Segment is an in memory implementation of scorch.Segment
 type Segment struct {

From 7a6b5483f23b119fad5f2bad7e96a2f1a1ae0327 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 5 Dec 2017 11:58:05 -0500
Subject: [PATCH 020/728] add validation that all locations were seen

---
 index/scorch/segment/mem/segment_test.go | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/index/scorch/segment/mem/segment_test.go b/index/scorch/segment/mem/segment_test.go
index 2b8452051..74fb870d8 100644
--- a/index/scorch/segment/mem/segment_test.go
+++ b/index/scorch/segment/mem/segment_test.go
@@ -253,7 +253,9 @@ func TestSingle(t *testing.T) {
 		if nextPosting.Norm() != 1.0 {
 			t.Errorf("expected norm 1.0, got %f", nextPosting.Norm())
 		}
+		var numLocs uint64
 		for _, loc := range nextPosting.Locations() {
+			numLocs++
 			if loc.Start() != 0 {
 				t.Errorf("expected loc start to be 0, got %d", loc.Start())
 			}
@@ -267,6 +269,9 @@ func TestSingle(t *testing.T) {
 				t.Errorf("expect loc array pos to be nil, got %v", loc.ArrayPositions())
 			}
 		}
+		if numLocs != nextPosting.Frequency() {
+			t.Errorf("expected %d locations, got %d", nextPosting.Frequency(), numLocs)
+		}
 
 		nextPosting, err = postingsItr.Next()
 	}
@@ -314,7 +319,9 @@ func TestSingle(t *testing.T) {
 		if nextPosting.Norm() != float64(expectedNorm) {
 			t.Errorf("expected norm %f, got %f", expectedNorm, nextPosting.Norm())
 		}
+		var numLocs uint64
 		for _, loc := range nextPosting.Locations() {
+			numLocs++
 			if loc.Start() != 0 {
 				t.Errorf("expected loc start to be 0, got %d", loc.Start())
 			}
@@ -328,6 +335,9 @@ func TestSingle(t *testing.T) {
 				t.Errorf("expect loc array pos to be nil, got %v", loc.ArrayPositions())
 			}
 		}
+		if numLocs != nextPosting.Frequency() {
+			t.Errorf("expected %d locations, got %d", nextPosting.Frequency(), numLocs)
+		}
 
 		nextPosting, err = postingsItr.Next()
 	}

From 8f0350865b2d12b27f0afcacfa14a4de67ca7f01 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 5 Dec 2017 12:17:56 -0500
Subject: [PATCH 021/728] add test for segment fields method

---
 index/scorch/segment/mem/segment_test.go | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/index/scorch/segment/mem/segment_test.go b/index/scorch/segment/mem/segment_test.go
index 74fb870d8..0d9ec4534 100644
--- a/index/scorch/segment/mem/segment_test.go
+++ b/index/scorch/segment/mem/segment_test.go
@@ -168,6 +168,23 @@ func TestSingle(t *testing.T) {
 		t.Fatalf("segment nil, not expected")
 	}
 
+	expectFields := map[string]struct{}{
+		"_id":  struct{}{},
+		"_all": struct{}{},
+		"name": struct{}{},
+		"desc": struct{}{},
+		"tag":  struct{}{},
+	}
+	fields := segment.Fields()
+	if len(fields) != len(expectFields) {
+		t.Errorf("expected %d fields, only got %d", len(expectFields), len(fields))
+	}
+	for _, field := range fields {
+		if _, ok := expectFields[field]; !ok {
+			t.Errorf("got unexpected field: %s", field)
+		}
+	}
+
 	if segment.Count() != 1 {
 		t.Errorf("expected count 1, got %d", segment.Count())
 	}

From 8d9d45115ff04d843e37f343cd1e7a34bb194365 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 5 Dec 2017 12:20:06 -0500
Subject: [PATCH 022/728] add test of location field

---
 index/scorch/segment/mem/segment_test.go | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/index/scorch/segment/mem/segment_test.go b/index/scorch/segment/mem/segment_test.go
index 0d9ec4534..78c242416 100644
--- a/index/scorch/segment/mem/segment_test.go
+++ b/index/scorch/segment/mem/segment_test.go
@@ -273,6 +273,9 @@ func TestSingle(t *testing.T) {
 		var numLocs uint64
 		for _, loc := range nextPosting.Locations() {
 			numLocs++
+			if loc.Field() != "name" {
+				t.Errorf("expected loc field to be 'name', got '%s'", loc.Field())
+			}
 			if loc.Start() != 0 {
 				t.Errorf("expected loc start to be 0, got %d", loc.Start())
 			}
@@ -339,6 +342,9 @@ func TestSingle(t *testing.T) {
 		var numLocs uint64
 		for _, loc := range nextPosting.Locations() {
 			numLocs++
+			if loc.Field() != "name" {
+				t.Errorf("expected loc field to be 'name', got '%s'", loc.Field())
+			}
 			if loc.Start() != 0 {
 				t.Errorf("expected loc start to be 0, got %d", loc.Start())
 			}

From 30e9d6daa547cd0196e6e9b63a35e747c3971938 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 5 Dec 2017 12:54:44 -0500
Subject: [PATCH 023/728] add better testing of array positions

---
 index/scorch/segment/mem/segment_test.go | 74 ++++++++++++++++++++++--
 1 file changed, 68 insertions(+), 6 deletions(-)

diff --git a/index/scorch/segment/mem/segment_test.go b/index/scorch/segment/mem/segment_test.go
index 78c242416..603fd5a9b 100644
--- a/index/scorch/segment/mem/segment_test.go
+++ b/index/scorch/segment/mem/segment_test.go
@@ -16,6 +16,7 @@ package mem
 
 import (
 	"math"
+	"reflect"
 	"testing"
 
 	"github.com/blevesearch/bleve/analysis"
@@ -134,7 +135,7 @@ func TestSingle(t *testing.T) {
 						Position: 1,
 						Term:     []byte("cold"),
 					},
-				}, nil, true),
+				}, []uint64{0}, true),
 				analysis.TokenFrequency(analysis.TokenStream{
 					&analysis.Token{
 						Start:    0,
@@ -142,7 +143,7 @@ func TestSingle(t *testing.T) {
 						Position: 1,
 						Term:     []byte("dark"),
 					},
-				}, nil, true),
+				}, []uint64{1}, true),
 			},
 			Length: []int{
 				1,
@@ -372,6 +373,67 @@ func TestSingle(t *testing.T) {
 		t.Errorf("expected count to be 1, got %d", count)
 	}
 
+	// now try a field with array positions
+	dict, err = segment.Dictionary("tag")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if dict == nil {
+		t.Fatal("got nil dict, expected non-nil")
+	}
+
+	postingsList, err = dict.PostingsList("dark", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItr = postingsList.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	nextPosting, err = postingsItr.Next()
+	for nextPosting != nil && err == nil {
+
+		if nextPosting.Frequency() != 1 {
+			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
+		}
+		if nextPosting.Number() != 0 {
+			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
+		}
+		var numLocs uint64
+		for _, loc := range nextPosting.Locations() {
+			numLocs++
+			if loc.Field() != "tag" {
+				t.Errorf("expected loc field to be 'name', got '%s'", loc.Field())
+			}
+			if loc.Start() != 0 {
+				t.Errorf("expected loc start to be 0, got %d", loc.Start())
+			}
+			if loc.End() != 4 {
+				t.Errorf("expected loc end to be 3, got %d", loc.End())
+			}
+			if loc.Pos() != 1 {
+				t.Errorf("expected loc pos to be 1, got %d", loc.Pos())
+			}
+			expectArrayPos := []uint64{1}
+			if !reflect.DeepEqual(loc.ArrayPositions(), expectArrayPos) {
+				t.Errorf("expect loc array pos to be %v, got %v", expectArrayPos, loc.ArrayPositions())
+			}
+		}
+		if numLocs != nextPosting.Frequency() {
+			t.Errorf("expected %d locations, got %d", nextPosting.Frequency(), numLocs)
+		}
+
+		nextPosting, err = postingsItr.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
 	// now try and visit a document
 	var fieldValuesSeen int
 	err = segment.VisitDocument(0, func(field string, typ byte, value []byte, pos []uint64) bool {
@@ -459,7 +521,7 @@ func TestMultiple(t *testing.T) {
 						Position: 1,
 						Term:     []byte("cold"),
 					},
-				}, nil, true),
+				}, []uint64{0}, true),
 				analysis.TokenFrequency(analysis.TokenStream{
 					&analysis.Token{
 						Start:    0,
@@ -467,7 +529,7 @@ func TestMultiple(t *testing.T) {
 						Position: 1,
 						Term:     []byte("dark"),
 					},
-				}, nil, true),
+				}, []uint64{1}, true),
 			},
 			Length: []int{
 				1,
@@ -517,7 +579,7 @@ func TestMultiple(t *testing.T) {
 						Position: 1,
 						Term:     []byte("cold"),
 					},
-				}, nil, true),
+				}, []uint64{0}, true),
 				analysis.TokenFrequency(analysis.TokenStream{
 					&analysis.Token{
 						Start:    0,
@@ -525,7 +587,7 @@ func TestMultiple(t *testing.T) {
 						Position: 1,
 						Term:     []byte("dark"),
 					},
-				}, nil, true),
+				}, []uint64{1}, true),
 			},
 			Length: []int{
 				1,

From f6be841668e562a623e486b09a41b2389e5b32c7 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 5 Dec 2017 13:01:36 -0500
Subject: [PATCH 024/728] add test for postings list count method

---
 index/scorch/segment/mem/segment_test.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/index/scorch/segment/mem/segment_test.go b/index/scorch/segment/mem/segment_test.go
index 603fd5a9b..7eb691476 100644
--- a/index/scorch/segment/mem/segment_test.go
+++ b/index/scorch/segment/mem/segment_test.go
@@ -668,6 +668,11 @@ func TestMultiple(t *testing.T) {
 		t.Fatal("got nil postings list, expected non-nil")
 	}
 
+	postingsListExcludingCount := postingsListExcluding.Count()
+	if postingsListExcludingCount != 1 {
+		t.Errorf("expected count from postings list to be 1, got %d", postingsListExcludingCount)
+	}
+
 	postingsItrExcluding := postingsListExcluding.Iterator()
 	if postingsItr == nil {
 		t.Fatal("got nil iterator, expected non-nil")

From ece27ef21551f2be790d9fa75a7ce2a798542ea1 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 5 Dec 2017 13:05:12 -0500
Subject: [PATCH 025/728] adding initial version of bolt persisted segment

---
 index/scorch/segment/bolt/build.go        | 500 +++++++++++++++++++++
 index/scorch/segment/bolt/build_test.go   | 288 ++++++++++++
 index/scorch/segment/bolt/dict.go         | 161 +++++++
 index/scorch/segment/bolt/dict_test.go    | 183 ++++++++
 index/scorch/segment/bolt/int.go          |  94 ++++
 index/scorch/segment/bolt/int_test.go     |  96 ++++
 index/scorch/segment/bolt/posting.go      | 323 ++++++++++++++
 index/scorch/segment/bolt/segment.go      | 309 +++++++++++++
 index/scorch/segment/bolt/segment_test.go | 517 ++++++++++++++++++++++
 9 files changed, 2471 insertions(+)
 create mode 100644 index/scorch/segment/bolt/build.go
 create mode 100644 index/scorch/segment/bolt/build_test.go
 create mode 100644 index/scorch/segment/bolt/dict.go
 create mode 100644 index/scorch/segment/bolt/dict_test.go
 create mode 100644 index/scorch/segment/bolt/int.go
 create mode 100644 index/scorch/segment/bolt/int_test.go
 create mode 100644 index/scorch/segment/bolt/posting.go
 create mode 100644 index/scorch/segment/bolt/segment.go
 create mode 100644 index/scorch/segment/bolt/segment_test.go

diff --git a/index/scorch/segment/bolt/build.go b/index/scorch/segment/bolt/build.go
new file mode 100644
index 000000000..6ed5719d3
--- /dev/null
+++ b/index/scorch/segment/bolt/build.go
@@ -0,0 +1,500 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package bolt
+
+import (
+	"bytes"
+	"encoding/binary"
+	"math"
+
+	"github.com/RoaringBitmap/roaring"
+	"github.com/Smerity/govarint"
+	"github.com/blevesearch/bleve/index/scorch/segment/mem"
+	"github.com/boltdb/bolt"
+	"github.com/couchbaselabs/vellum"
+	"github.com/golang/snappy"
+)
+
+var fieldsBucket = []byte{'a'}
+var dictBucket = []byte{'b'}
+var postingsBucket = []byte{'c'}
+var postingDetailsBucket = []byte{'d'}
+var storedBucket = []byte{'e'}
+var configBucket = []byte{'x'}
+
+var indexLocsKey = []byte{'l'}
+
+var freqNormKey = []byte{'a'}
+var locKey = []byte{'b'}
+
+var metaKey = []byte{'a'}
+var dataKey = []byte{'b'}
+
+var chunkKey = []byte{'c'}
+var versionKey = []byte{'v'}
+
+var version = 0
+
+func persistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) (err error) {
+
+	db, err := bolt.Open(path, 0777, nil)
+	if err != nil {
+		return err
+	}
+	defer func() {
+		if cerr := db.Close(); err == nil && cerr != nil {
+			err = cerr
+		}
+	}()
+
+	tx, err := db.Begin(true)
+	if err != nil {
+		return err
+	}
+	defer func() {
+		if err == nil {
+			err = tx.Commit()
+		} else {
+			_ = tx.Rollback()
+		}
+	}()
+
+	err = persistFields(memSegment, tx)
+	if err != nil {
+		return err
+	}
+
+	err = persistDictionary(memSegment, tx)
+	if err != nil {
+		return err
+	}
+
+	err = persistPostings(memSegment, tx)
+	if err != nil {
+		return err
+	}
+
+	err = persistPostingsDetails(memSegment, tx, chunkFactor)
+	if err != nil {
+		return err
+	}
+
+	err = persistStored(memSegment, tx)
+	if err != nil {
+		return err
+	}
+
+	err = persistConfig(tx, chunkFactor)
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// persistFields puts the fields as separate k/v pairs in the fields bucket
+// makes very little attempt to squeeze a lot of perf because it is expected
+// this is usually somewhat small, and when re-opened it will be read once and
+// kept on the heap, and not read out of the file subsequently
+func persistFields(memSegment *mem.Segment, tx *bolt.Tx) error {
+	bucket, err := tx.CreateBucket(fieldsBucket)
+	if err != nil {
+		return err
+	}
+	bucket.FillPercent = 1.0
+
+	// build/persist a bitset corresponding to the field locs array
+	indexLocs := roaring.NewBitmap()
+	for i, indexLoc := range memSegment.FieldsLoc {
+		if indexLoc {
+			indexLocs.AddInt(i)
+		}
+	}
+	var indexLocsBuffer bytes.Buffer
+	_, err = indexLocs.WriteTo(&indexLocsBuffer)
+	if err != nil {
+		return err
+	}
+	err = bucket.Put(indexLocsKey, indexLocsBuffer.Bytes())
+	if err != nil {
+		return err
+	}
+
+	// we use special varint which is still guaranteed to sort correctly
+	fieldBuf := make([]byte, 0, maxVarintSize)
+	for fieldID, fieldName := range memSegment.FieldsInv {
+		if fieldID != 0 {
+			// reset buffer if necessary
+			fieldBuf = fieldBuf[:0]
+		}
+		fieldBuf = EncodeUvarintAscending(fieldBuf, uint64(fieldID))
+		err = bucket.Put(fieldBuf, []byte(fieldName))
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func persistDictionary(memSegment *mem.Segment, tx *bolt.Tx) error {
+	bucket, err := tx.CreateBucket(dictBucket)
+	if err != nil {
+		return err
+	}
+	bucket.FillPercent = 1.0
+
+	// TODO consider whether or not there is benefit to building the vellums
+	// concurrently.  While we have to insert them into the bolt in order,
+	// the (presumably) heavier lifting involved in building the FST could
+	// be done concurrently.
+
+	fieldBuf := make([]byte, 0, maxVarintSize)
+	for fieldID, fieldTerms := range memSegment.DictKeys {
+		if fieldID != 0 {
+			// reset buffers if necessary
+			fieldBuf = fieldBuf[:0]
+		}
+		// start a new vellum for this field
+		var buffer bytes.Buffer
+		builder, err := vellum.New(&buffer, nil)
+		if err != nil {
+			return err
+		}
+
+		dict := memSegment.Dicts[fieldID]
+		// now walk the dictionary in order of fieldTerms (already sorted)
+		for i := range fieldTerms {
+			err = builder.Insert([]byte(fieldTerms[i]), dict[fieldTerms[i]]-1)
+			if err != nil {
+				return err
+			}
+		}
+		err = builder.Close()
+		if err != nil {
+			return err
+		}
+
+		// put this FST into bolt
+		// we use special varint which is still guaranteed to sort correctly
+		fieldBuf = EncodeUvarintAscending(fieldBuf, uint64(fieldID))
+		err = bucket.Put(fieldBuf, buffer.Bytes())
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func persistPostings(memSegment *mem.Segment, tx *bolt.Tx) error {
+	bucket, err := tx.CreateBucket(postingsBucket)
+	if err != nil {
+		return err
+	}
+	bucket.FillPercent = 1.0
+
+	postingIDBuf := make([]byte, 0, maxVarintSize)
+	for postingID := range memSegment.Postings {
+		if postingID != 0 {
+			// reset buffers if necessary
+			postingIDBuf = postingIDBuf[:0]
+		}
+		postingIDBuf = EncodeUvarintAscending(postingIDBuf, uint64(postingID))
+		var postingsBuf bytes.Buffer
+		_, err := memSegment.Postings[postingID].WriteTo(&postingsBuf)
+		if err != nil {
+			return err
+		}
+		err = bucket.Put(postingIDBuf, postingsBuf.Bytes())
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func persistPostingsDetails(memSegment *mem.Segment, tx *bolt.Tx,
+	chunkFactor uint32) error {
+	bucket, err := tx.CreateBucket(postingDetailsBucket)
+	if err != nil {
+		return err
+	}
+	bucket.FillPercent = 1.0
+
+	postingIDBuf := make([]byte, 0, maxVarintSize)
+	for postingID := range memSegment.Postings {
+		if postingID != 0 {
+			// reset buffers if necessary
+			postingIDBuf = postingIDBuf[:0]
+		}
+		postingIDBuf = EncodeUvarintAscending(postingIDBuf, uint64(postingID))
+
+		// make bucket for posting details
+		postingBucket, err := bucket.CreateBucket(postingIDBuf)
+		if err != nil {
+			return err
+		}
+		postingBucket.FillPercent = 1.0
+
+		err = persistPostingDetails(memSegment, postingBucket, postingID, chunkFactor)
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func persistPostingDetails(memSegment *mem.Segment, postingBucket *bolt.Bucket,
+	postingID int, chunkFactor uint32) error {
+	// walk the postings list
+	var err error
+	var chunkBucket *bolt.Bucket
+	var currChunk uint32
+	chunkIDBuf := make([]byte, 0, maxVarintSize)
+	postingsListItr := memSegment.Postings[postingID].Iterator()
+	var encoder *govarint.Base128Encoder
+	var locEncoder *govarint.Base128Encoder
+
+	encodingBuf := &bytes.Buffer{}
+	locEncodingBuf := &bytes.Buffer{}
+
+	var offset int
+	var locOffset int
+	for postingsListItr.HasNext() {
+		docNum := postingsListItr.Next()
+		chunk := docNum / chunkFactor
+
+		// create new chunk bucket if necessary
+		if chunkBucket == nil || currChunk != chunk {
+
+			// close out last chunk
+			if chunkBucket != nil {
+
+				// fix me write freq/norms
+				encoder.Close()
+				err = chunkBucket.Put(freqNormKey, encodingBuf.Bytes())
+				if err != nil {
+					return err
+				}
+				locEncoder.Close()
+				err = chunkBucket.Put(locKey, locEncodingBuf.Bytes())
+				if err != nil {
+					return err
+				}
+
+				// reset for next
+				chunkIDBuf = chunkIDBuf[:0]
+				encodingBuf = &bytes.Buffer{}
+				locEncodingBuf = &bytes.Buffer{}
+			}
+
+			// prepare next chunk
+			chunkIDBuf = EncodeUvarintAscending(chunkIDBuf, uint64(chunk))
+			chunkBucket, err = postingBucket.CreateBucket(chunkIDBuf)
+			if err != nil {
+				return err
+			}
+			chunkBucket.FillPercent = 1.0
+			currChunk = chunk
+
+			encoder = govarint.NewU64Base128Encoder(encodingBuf)
+			locEncoder = govarint.NewU64Base128Encoder(locEncodingBuf)
+		}
+
+		// put freq
+		_, err = encoder.PutU64(memSegment.Freqs[postingID][offset])
+		if err != nil {
+			return err
+		}
+
+		// put norm
+		norm := memSegment.Norms[postingID][offset]
+		normBits := math.Float32bits(norm)
+		_, err = encoder.PutU32(normBits)
+		if err != nil {
+			return err
+		}
+
+		// put locations
+
+		for i := 0; i < int(memSegment.Freqs[postingID][offset]); i++ {
+
+			if len(memSegment.Locfields[postingID]) > 0 {
+				// put field
+				_, err = locEncoder.PutU64(uint64(memSegment.Locfields[postingID][locOffset]))
+				if err != nil {
+					return err
+				}
+
+				// put pos
+				_, err = locEncoder.PutU64(memSegment.Locpos[postingID][locOffset])
+				if err != nil {
+					return err
+				}
+
+				// put start
+				_, err = locEncoder.PutU64(memSegment.Locstarts[postingID][locOffset])
+				if err != nil {
+					return err
+				}
+
+				// put end
+				_, err = locEncoder.PutU64(memSegment.Locends[postingID][locOffset])
+				if err != nil {
+					return err
+				}
+
+				// put array positions
+				num := len(memSegment.Locarraypos[postingID][locOffset])
+
+				// put the number of array positions to follow
+				_, err = locEncoder.PutU64(uint64(num))
+				if err != nil {
+					return err
+				}
+
+				// put each array position
+				for j := 0; j < num; j++ {
+					_, err = locEncoder.PutU64(memSegment.Locarraypos[postingID][locOffset][j])
+					if err != nil {
+						return err
+					}
+				}
+			}
+
+			locOffset++
+		}
+
+		offset++
+	}
+
+	// close out last chunk
+
+	if chunkBucket != nil {
+		// fix me write freq/norms
+		encoder.Close()
+		err = chunkBucket.Put(freqNormKey, encodingBuf.Bytes())
+		if err != nil {
+			return err
+		}
+		locEncoder.Close()
+		err = chunkBucket.Put(locKey, locEncodingBuf.Bytes())
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func persistStored(memSegment *mem.Segment, tx *bolt.Tx) error {
+	bucket, err := tx.CreateBucket(storedBucket)
+	if err != nil {
+		return err
+	}
+	bucket.FillPercent = 1.0
+
+	var curr int
+	// we use special varint which is still guaranteed to sort correctly
+	docNumBuf := make([]byte, 0, maxVarintSize)
+	for docNum, storedValues := range memSegment.Stored {
+		var metaBuf bytes.Buffer
+		var data, compressed []byte
+		if docNum != 0 {
+			// reset buffer if necessary
+			docNumBuf = docNumBuf[:0]
+			curr = 0
+		}
+		// create doc sub-bucket
+		docNumBuf = EncodeUvarintAscending(docNumBuf, uint64(docNum))
+		docBucket, err := bucket.CreateBucket(docNumBuf)
+		if err != nil {
+			return err
+		}
+		docBucket.FillPercent = 1.0
+
+		metaEncoder := govarint.NewU64Base128Encoder(&metaBuf)
+
+		// encode fields in order
+		for fieldID := range memSegment.FieldsInv {
+			if storedFieldValues, ok := storedValues[uint16(fieldID)]; ok {
+				// has stored values for this field
+				num := len(storedFieldValues)
+
+				// process each value
+				for i := 0; i < num; i++ {
+					// encode field
+					metaEncoder.PutU64(uint64(fieldID))
+					// encode type
+					metaEncoder.PutU64(uint64(memSegment.StoredTypes[docNum][uint16(fieldID)][i]))
+					// encode start offset
+					metaEncoder.PutU64(uint64(curr))
+					// end len
+					metaEncoder.PutU64(uint64(len(storedFieldValues[i])))
+					// encode number of array pos
+					metaEncoder.PutU64(uint64(len(memSegment.StoredPos[docNum][uint16(fieldID)][i])))
+					// encode all array positions
+					for j := 0; j < len(memSegment.StoredPos[docNum][uint16(fieldID)][i]); j++ {
+						metaEncoder.PutU64(memSegment.StoredPos[docNum][uint16(fieldID)][i][j])
+					}
+					// append data
+					data = append(data, storedFieldValues[i]...)
+					// update curr
+					curr += len(storedFieldValues[i])
+				}
+			}
+		}
+		metaEncoder.Close()
+
+		err = docBucket.Put(metaKey, metaBuf.Bytes())
+		if err != nil {
+			return err
+		}
+
+		// compress data
+		compressed = snappy.Encode(compressed, data)
+
+		err = docBucket.Put(dataKey, compressed)
+		if err != nil {
+			return err
+		}
+
+	}
+
+	return nil
+}
+
+func persistConfig(tx *bolt.Tx, chunkFactor uint32) error {
+	bucket, err := tx.CreateBucket(configBucket)
+	if err != nil {
+		return err
+	}
+
+	chunkVal := make([]byte, 4)
+	binary.BigEndian.PutUint32(chunkVal, chunkFactor)
+	err = bucket.Put(chunkKey, chunkVal)
+	if err != nil {
+		return err
+	}
+
+	err = bucket.Put(versionKey, []byte{byte(version)})
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
diff --git a/index/scorch/segment/bolt/build_test.go b/index/scorch/segment/bolt/build_test.go
new file mode 100644
index 000000000..d4b93f159
--- /dev/null
+++ b/index/scorch/segment/bolt/build_test.go
@@ -0,0 +1,288 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package bolt
+
+import (
+	"os"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/scorch/segment/mem"
+)
+
+func TestBuild(t *testing.T) {
+	os.RemoveAll("/tmp/scorch.bolt")
+
+	memSegment := buildMemSegment()
+	err := persistSegment(memSegment, "/tmp/scorch.bolt", 1024)
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
+func buildMemSegment() *mem.Segment {
+	doc := &document.Document{
+		ID: "a",
+		Fields: []document.Field{
+			document.NewTextFieldCustom("_id", nil, []byte("a"), document.IndexField|document.StoreField, nil),
+			document.NewTextFieldCustom("name", nil, []byte("wow"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+		},
+		CompositeFields: []*document.CompositeField{
+			document.NewCompositeField("_all", true, nil, []string{"_id"}),
+		},
+	}
+
+	// forge analyzed docs
+	results := []*index.AnalysisResult{
+		&index.AnalysisResult{
+			Document: doc,
+			Analyzed: []analysis.TokenFrequencies{
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      1,
+						Position: 1,
+						Term:     []byte("a"),
+					},
+				}, nil, false),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      3,
+						Position: 1,
+						Term:     []byte("wow"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("some"),
+					},
+					&analysis.Token{
+						Start:    5,
+						End:      10,
+						Position: 2,
+						Term:     []byte("thing"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("cold"),
+					},
+				}, []uint64{0}, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("dark"),
+					},
+				}, []uint64{1}, true),
+			},
+			Length: []int{
+				1,
+				1,
+				2,
+				1,
+				1,
+			},
+		},
+	}
+
+	// fix up composite fields
+	for _, ar := range results {
+		for i, f := range ar.Document.Fields {
+			for _, cf := range ar.Document.CompositeFields {
+				cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
+			}
+		}
+	}
+
+	return mem.NewFromAnalyzedDocs(results)
+}
+
+func buildMemSegmentMulti() *mem.Segment {
+
+	doc := &document.Document{
+		ID: "a",
+		Fields: []document.Field{
+			document.NewTextFieldCustom("_id", nil, []byte("a"), document.IndexField|document.StoreField, nil),
+			document.NewTextFieldCustom("name", nil, []byte("wow"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+		},
+		CompositeFields: []*document.CompositeField{
+			document.NewCompositeField("_all", true, nil, []string{"_id"}),
+		},
+	}
+
+	doc2 := &document.Document{
+		ID: "b",
+		Fields: []document.Field{
+			document.NewTextFieldCustom("_id", nil, []byte("b"), document.IndexField|document.StoreField, nil),
+			document.NewTextFieldCustom("name", nil, []byte("who"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+		},
+		CompositeFields: []*document.CompositeField{
+			document.NewCompositeField("_all", true, nil, []string{"_id"}),
+		},
+	}
+
+	// forge analyzed docs
+	results := []*index.AnalysisResult{
+		&index.AnalysisResult{
+			Document: doc,
+			Analyzed: []analysis.TokenFrequencies{
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      1,
+						Position: 1,
+						Term:     []byte("a"),
+					},
+				}, nil, false),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      3,
+						Position: 1,
+						Term:     []byte("wow"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("some"),
+					},
+					&analysis.Token{
+						Start:    5,
+						End:      10,
+						Position: 2,
+						Term:     []byte("thing"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("cold"),
+					},
+				}, []uint64{0}, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("dark"),
+					},
+				}, []uint64{1}, true),
+			},
+			Length: []int{
+				1,
+				1,
+				2,
+				1,
+				1,
+			},
+		},
+		&index.AnalysisResult{
+			Document: doc2,
+			Analyzed: []analysis.TokenFrequencies{
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      1,
+						Position: 1,
+						Term:     []byte("b"),
+					},
+				}, nil, false),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      3,
+						Position: 1,
+						Term:     []byte("who"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("some"),
+					},
+					&analysis.Token{
+						Start:    5,
+						End:      10,
+						Position: 2,
+						Term:     []byte("thing"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("cold"),
+					},
+				}, []uint64{0}, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("dark"),
+					},
+				}, []uint64{1}, true),
+			},
+			Length: []int{
+				1,
+				1,
+				2,
+				1,
+				1,
+			},
+		},
+	}
+
+	// fix up composite fields
+	for _, ar := range results {
+		for i, f := range ar.Document.Fields {
+			for _, cf := range ar.Document.CompositeFields {
+				cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
+			}
+		}
+	}
+
+	segment := mem.NewFromAnalyzedDocs(results)
+
+	return segment
+}
diff --git a/index/scorch/segment/bolt/dict.go b/index/scorch/segment/bolt/dict.go
new file mode 100644
index 000000000..0d7ab5eca
--- /dev/null
+++ b/index/scorch/segment/bolt/dict.go
@@ -0,0 +1,161 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package bolt
+
+import (
+	"fmt"
+
+	"github.com/RoaringBitmap/roaring"
+	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/scorch/segment"
+	"github.com/couchbaselabs/vellum"
+	"github.com/couchbaselabs/vellum/regexp"
+)
+
+// Dictionary is the bolt representation of the term dictionary
+type Dictionary struct {
+	segment *Segment
+	field   string
+	fieldID uint16
+	fst     *vellum.FST
+}
+
+// PostingsList returns the postings list for the specified term
+func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap) (segment.PostingsList, error) {
+	return d.postingsList(term, except)
+}
+
+func (d *Dictionary) postingsList(term string, except *roaring.Bitmap) (*PostingsList, error) {
+	rv := &PostingsList{
+		dictionary: d,
+		term:       term,
+		except:     except,
+	}
+
+	if d.fst != nil {
+		postingsID, exists, err := d.fst.Get([]byte(term))
+		if err != nil {
+			return nil, fmt.Errorf("vellum err: %v", err)
+		}
+		if exists {
+			rv.postingsID = postingsID
+			postingsIDKey := EncodeUvarintAscending(nil, postingsID)
+			bucket := d.segment.tx.Bucket(postingsBucket)
+			if bucket == nil {
+				return nil, fmt.Errorf("postings bucket missing")
+			}
+
+			roaringBytes := bucket.Get(postingsIDKey)
+			if roaringBytes == nil {
+				return nil, fmt.Errorf("postings for postingsID %d missing", postingsID)
+			}
+			bitmap := roaring.NewBitmap()
+			_, err = bitmap.FromBuffer(roaringBytes)
+			if err != nil {
+				return nil, fmt.Errorf("error loading roaring bitmap: %v", err)
+			}
+
+			rv.postings = bitmap
+			rv.postingKey = postingsIDKey
+		}
+	}
+
+	return rv, nil
+}
+
+// Iterator returns an iterator for this dictionary
+func (d *Dictionary) Iterator() segment.DictionaryIterator {
+
+	rv := &DictionaryIterator{
+		d: d,
+	}
+
+	if d.fst != nil {
+		itr, err := d.fst.Iterator(nil, nil)
+		if err == nil {
+			rv.itr = itr
+		}
+	}
+
+	return rv
+}
+
+// PrefixIterator returns an iterator which only visits terms having the
+// the specified prefix
+func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator {
+	rv := &DictionaryIterator{
+		d: d,
+	}
+
+	if d.fst != nil {
+		r, err := regexp.New(prefix + ".*")
+		if err == nil {
+			itr, err := d.fst.Search(r, nil, nil)
+			if err == nil {
+				rv.itr = itr
+			}
+		}
+	}
+
+	return rv
+}
+
+// RangeIterator returns an iterator which only visits terms between the
+// start and end terms.  NOTE: bleve.index API specifies the end is inclusive.
+func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator {
+	rv := &DictionaryIterator{
+		d: d,
+	}
+
+	// need to increment the end position to be inclusive
+	endBytes := []byte(end)
+	if endBytes[len(endBytes)-1] < 0xff {
+		endBytes[len(endBytes)-1]++
+	} else {
+		endBytes = append(endBytes, 0xff)
+	}
+
+	if d.fst != nil {
+		itr, err := d.fst.Iterator([]byte(start), endBytes)
+		if err == nil {
+			rv.itr = itr
+		}
+	}
+
+	return rv
+}
+
+// DictionaryIterator is an iterator for term dictionary
+type DictionaryIterator struct {
+	d   *Dictionary
+	itr vellum.Iterator
+	err error
+}
+
+// Next returns the next entry in the dictionary
+func (i *DictionaryIterator) Next() (*index.DictEntry, error) {
+	if i.err == vellum.ErrIteratorDone {
+		return nil, nil
+	} else if i.err != nil {
+		return nil, i.err
+	}
+	term, count := i.itr.Current()
+	rv := &index.DictEntry{
+		Term:  string(term),
+		Count: count,
+	}
+	i.err = i.itr.Next()
+	return rv, nil
+}
diff --git a/index/scorch/segment/bolt/dict_test.go b/index/scorch/segment/bolt/dict_test.go
new file mode 100644
index 000000000..6b3926a87
--- /dev/null
+++ b/index/scorch/segment/bolt/dict_test.go
@@ -0,0 +1,183 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package bolt
+
+import (
+	"os"
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/scorch/segment/mem"
+)
+
+func buildMemSegmentForDict() *mem.Segment {
+	doc := &document.Document{
+		ID: "a",
+		Fields: []document.Field{
+			document.NewTextFieldCustom("_id", nil, []byte("a"), document.IndexField|document.StoreField, nil),
+			document.NewTextFieldCustom("desc", nil, []byte("apple ball cat dog egg fish bat"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+		},
+	}
+
+	// forge analyzed docs
+	results := []*index.AnalysisResult{
+		&index.AnalysisResult{
+			Document: doc,
+			Analyzed: []analysis.TokenFrequencies{
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      1,
+						Position: 1,
+						Term:     []byte("a"),
+					},
+				}, nil, false),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      5,
+						Position: 1,
+						Term:     []byte("apple"),
+					},
+					&analysis.Token{
+						Start:    6,
+						End:      10,
+						Position: 2,
+						Term:     []byte("ball"),
+					},
+					&analysis.Token{
+						Start:    11,
+						End:      14,
+						Position: 3,
+						Term:     []byte("cat"),
+					},
+					&analysis.Token{
+						Start:    15,
+						End:      18,
+						Position: 4,
+						Term:     []byte("dog"),
+					},
+					&analysis.Token{
+						Start:    19,
+						End:      22,
+						Position: 5,
+						Term:     []byte("egg"),
+					},
+					&analysis.Token{
+						Start:    20,
+						End:      24,
+						Position: 6,
+						Term:     []byte("fish"),
+					},
+					&analysis.Token{
+						Start:    25,
+						End:      28,
+						Position: 7,
+						Term:     []byte("bat"),
+					},
+				}, nil, true),
+			},
+			Length: []int{
+				1,
+				7,
+			},
+		},
+	}
+
+	segment := mem.NewFromAnalyzedDocs(results)
+
+	return segment
+}
+
+func TestDictionary(t *testing.T) {
+
+	_ = os.RemoveAll("/tmp/scorch.bolt")
+
+	memSegment := buildMemSegmentForDict()
+	err := persistSegment(memSegment, "/tmp/scorch.bolt", 1024)
+	if err != nil {
+		t.Fatalf("error persisting segment: %v", err)
+	}
+
+	segment, err := Open("/tmp/scorch.bolt")
+	if err != nil {
+		t.Fatalf("error opening segment: %v", err)
+	}
+	defer func() {
+		cerr := segment.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", err)
+		}
+	}()
+
+	dict, err := segment.Dictionary("desc")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// test basic full iterator
+	expected := []string{"apple", "ball", "bat", "cat", "dog", "egg", "fish"}
+	var got []string
+	itr := dict.Iterator()
+	next, err := itr.Next()
+	for next != nil && err == nil {
+		got = append(got, next.Term)
+		next, err = itr.Next()
+	}
+	if err != nil {
+		t.Fatalf("dict itr error: %v", err)
+	}
+
+	if !reflect.DeepEqual(expected, got) {
+		t.Errorf("expected: %v, got: %v", expected, got)
+	}
+
+	// test prefix iterator
+	expected = []string{"ball", "bat"}
+	got = got[:0]
+	itr = dict.PrefixIterator("b")
+	next, err = itr.Next()
+	for next != nil && err == nil {
+		got = append(got, next.Term)
+		next, err = itr.Next()
+	}
+	if err != nil {
+		t.Fatalf("dict itr error: %v", err)
+	}
+
+	if !reflect.DeepEqual(expected, got) {
+		t.Errorf("expected: %v, got: %v", expected, got)
+	}
+
+	// test range iterator
+	expected = []string{"cat", "dog", "egg"}
+	got = got[:0]
+	itr = dict.RangeIterator("cat", "egg")
+	next, err = itr.Next()
+	for next != nil && err == nil {
+		got = append(got, next.Term)
+		next, err = itr.Next()
+	}
+	if err != nil {
+		t.Fatalf("dict itr error: %v", err)
+	}
+
+	if !reflect.DeepEqual(expected, got) {
+		t.Errorf("expected: %v, got: %v", expected, got)
+	}
+}
diff --git a/index/scorch/segment/bolt/int.go b/index/scorch/segment/bolt/int.go
new file mode 100644
index 000000000..a4af3a7a8
--- /dev/null
+++ b/index/scorch/segment/bolt/int.go
@@ -0,0 +1,94 @@
+// Copyright 2014 The Cockroach Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the License.
+
+// This code originated from:
+// https://github.com/cockroachdb/cockroach/blob/2dd65dde5d90c157f4b93f92502ca1063b904e1d/pkg/util/encoding/encoding.go
+
+// Modified to not use pkg/errors
+
+package bolt
+
+import "fmt"
+
+const (
+	maxVarintSize = 9
+
+	// IntMin is chosen such that the range of int tags does not overlap the
+	// ascii character set that is frequently used in testing.
+	IntMin      = 0x80 // 128
+	intMaxWidth = 8
+	intZero     = IntMin + intMaxWidth           // 136
+	intSmall    = IntMax - intZero - intMaxWidth // 109
+	// IntMax is the maximum int tag value.
+	IntMax = 0xfd // 253
+)
+
+// EncodeUvarintAscending encodes the uint64 value using a variable length
+// (length-prefixed) representation. The length is encoded as a single
+// byte indicating the number of encoded bytes (-8) to follow. See
+// EncodeVarintAscending for rationale. The encoded bytes are appended to the
+// supplied buffer and the final buffer is returned.
+func EncodeUvarintAscending(b []byte, v uint64) []byte {
+	switch {
+	case v <= intSmall:
+		return append(b, intZero+byte(v))
+	case v <= 0xff:
+		return append(b, IntMax-7, byte(v))
+	case v <= 0xffff:
+		return append(b, IntMax-6, byte(v>>8), byte(v))
+	case v <= 0xffffff:
+		return append(b, IntMax-5, byte(v>>16), byte(v>>8), byte(v))
+	case v <= 0xffffffff:
+		return append(b, IntMax-4, byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
+	case v <= 0xffffffffff:
+		return append(b, IntMax-3, byte(v>>32), byte(v>>24), byte(v>>16), byte(v>>8),
+			byte(v))
+	case v <= 0xffffffffffff:
+		return append(b, IntMax-2, byte(v>>40), byte(v>>32), byte(v>>24), byte(v>>16),
+			byte(v>>8), byte(v))
+	case v <= 0xffffffffffffff:
+		return append(b, IntMax-1, byte(v>>48), byte(v>>40), byte(v>>32), byte(v>>24),
+			byte(v>>16), byte(v>>8), byte(v))
+	default:
+		return append(b, IntMax, byte(v>>56), byte(v>>48), byte(v>>40), byte(v>>32),
+			byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
+	}
+}
+
+// DecodeUvarintAscending decodes a varint encoded uint64 from the input
+// buffer. The remainder of the input buffer and the decoded uint64
+// are returned.
+func DecodeUvarintAscending(b []byte) ([]byte, uint64, error) {
+	if len(b) == 0 {
+		return nil, 0, fmt.Errorf("insufficient bytes to decode uvarint value")
+	}
+	length := int(b[0]) - intZero
+	b = b[1:] // skip length byte
+	if length <= intSmall {
+		return b, uint64(length), nil
+	}
+	length -= intSmall
+	if length < 0 || length > 8 {
+		return nil, 0, fmt.Errorf("invalid uvarint length of %d", length)
+	} else if len(b) < length {
+		return nil, 0, fmt.Errorf("insufficient bytes to decode uvarint value: %q", b)
+	}
+	var v uint64
+	// It is faster to range over the elements in a slice than to index
+	// into the slice on each loop iteration.
+	for _, t := range b[:length] {
+		v = (v << 8) | uint64(t)
+	}
+	return b[length:], v, nil
+}
diff --git a/index/scorch/segment/bolt/int_test.go b/index/scorch/segment/bolt/int_test.go
new file mode 100644
index 000000000..e59918c8b
--- /dev/null
+++ b/index/scorch/segment/bolt/int_test.go
@@ -0,0 +1,96 @@
+// Copyright 2014 The Cockroach Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the License.
+
+// This code originated from:
+// https://github.com/cockroachdb/cockroach/blob/2dd65dde5d90c157f4b93f92502ca1063b904e1d/pkg/util/encoding/encoding_test.go
+
+// Modified to only test the parts we borrowed
+
+package bolt
+
+import (
+	"bytes"
+	"math"
+	"testing"
+)
+
+type testCaseUint64 struct {
+	value  uint64
+	expEnc []byte
+}
+
+func TestEncodeDecodeUvarint(t *testing.T) {
+	testBasicEncodeDecodeUint64(EncodeUvarintAscending, DecodeUvarintAscending, false, t)
+	testCases := []testCaseUint64{
+		{0, []byte{0x88}},
+		{1, []byte{0x89}},
+		{109, []byte{0xf5}},
+		{110, []byte{0xf6, 0x6e}},
+		{1 << 8, []byte{0xf7, 0x01, 0x00}},
+		{math.MaxUint64, []byte{0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}},
+	}
+	testCustomEncodeUint64(testCases, EncodeUvarintAscending, t)
+}
+
+func testBasicEncodeDecodeUint64(
+	encFunc func([]byte, uint64) []byte,
+	decFunc func([]byte) ([]byte, uint64, error),
+	descending bool, t *testing.T,
+) {
+	testCases := []uint64{
+		0, 1,
+		1<<8 - 1, 1 << 8,
+		1<<16 - 1, 1 << 16,
+		1<<24 - 1, 1 << 24,
+		1<<32 - 1, 1 << 32,
+		1<<40 - 1, 1 << 40,
+		1<<48 - 1, 1 << 48,
+		1<<56 - 1, 1 << 56,
+		math.MaxUint64 - 1, math.MaxUint64,
+	}
+
+	var lastEnc []byte
+	for i, v := range testCases {
+		enc := encFunc(nil, v)
+		if i > 0 {
+			if (descending && bytes.Compare(enc, lastEnc) >= 0) ||
+				(!descending && bytes.Compare(enc, lastEnc) < 0) {
+				t.Errorf("ordered constraint violated for %d: [% x] vs. [% x]", v, enc, lastEnc)
+			}
+		}
+		b, decode, err := decFunc(enc)
+		if err != nil {
+			t.Error(err)
+			continue
+		}
+		if len(b) != 0 {
+			t.Errorf("leftover bytes: [% x]", b)
+		}
+		if decode != v {
+			t.Errorf("decode yielded different value than input: %d vs. %d", decode, v)
+		}
+		lastEnc = enc
+	}
+}
+
+func testCustomEncodeUint64(
+	testCases []testCaseUint64, encFunc func([]byte, uint64) []byte, t *testing.T,
+) {
+	for _, test := range testCases {
+		enc := encFunc(nil, test.value)
+		if !bytes.Equal(enc, test.expEnc) {
+			t.Errorf("expected [% x]; got [% x] (value: %d)", test.expEnc, enc, test.value)
+		}
+	}
+}
diff --git a/index/scorch/segment/bolt/posting.go b/index/scorch/segment/bolt/posting.go
new file mode 100644
index 000000000..e5d6c8938
--- /dev/null
+++ b/index/scorch/segment/bolt/posting.go
@@ -0,0 +1,323 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package bolt
+
+import (
+	"bytes"
+	"fmt"
+	"math"
+
+	"github.com/RoaringBitmap/roaring"
+	"github.com/Smerity/govarint"
+	"github.com/blevesearch/bleve/index/scorch/segment"
+	"github.com/boltdb/bolt"
+)
+
+// PostingsList is an in-memory represenation of a postings list
+type PostingsList struct {
+	dictionary *Dictionary
+	term       string
+	postingsID uint64
+	postings   *roaring.Bitmap
+	except     *roaring.Bitmap
+	postingKey []byte
+}
+
+// Iterator returns an iterator for this postings list
+func (p *PostingsList) Iterator() segment.PostingsIterator {
+	rv := &PostingsIterator{
+		postings: p,
+	}
+	if p.postings != nil {
+		detailsBucket := p.dictionary.segment.tx.Bucket(postingDetailsBucket)
+		rv.detailBucket = detailsBucket.Bucket(p.postingKey)
+		rv.all = p.postings.Iterator()
+		if p.except != nil {
+			allExcept := p.postings.Clone()
+			allExcept.AndNot(p.except)
+			rv.actual = allExcept.Iterator()
+		} else {
+			rv.actual = p.postings.Iterator()
+		}
+	}
+
+	return rv
+}
+
+// Count returns the number of items on this postings list
+func (p *PostingsList) Count() uint64 {
+	var rv uint64
+	if p.postings != nil {
+		rv = p.postings.GetCardinality()
+		if p.except != nil {
+			except := p.except.GetCardinality()
+			if except > rv {
+				// avoid underflow
+				except = rv
+			}
+			rv -= except
+		}
+	}
+	return rv
+}
+
+// PostingsIterator provides a way to iterate through the postings list
+type PostingsIterator struct {
+	postings     *PostingsList
+	all          roaring.IntIterable
+	offset       int
+	locoffset    int
+	actual       roaring.IntIterable
+	detailBucket *bolt.Bucket
+
+	currChunk         uint32
+	currChunkFreqNorm []byte
+	currChunkLoc      []byte
+	freqNormDecoder   *govarint.Base128Decoder
+	locDecoder        *govarint.Base128Decoder
+}
+
+func (i *PostingsIterator) loadChunk(chunk int) error {
+	// load correct chunk bytes
+	chunkID := EncodeUvarintAscending(nil, uint64(chunk))
+	chunkBucket := i.detailBucket.Bucket(chunkID)
+	if chunkBucket == nil {
+		return fmt.Errorf("chunk %d missing", chunkID)
+	}
+	i.currChunkFreqNorm = chunkBucket.Get(freqNormKey)
+	i.freqNormDecoder = govarint.NewU64Base128Decoder(bytes.NewReader(i.currChunkFreqNorm))
+	i.currChunkLoc = chunkBucket.Get(locKey)
+	i.locDecoder = govarint.NewU64Base128Decoder(bytes.NewReader(i.currChunkLoc))
+	i.currChunk = uint32(chunk)
+	return nil
+}
+
+func (i *PostingsIterator) readFreqNorm() (uint64, uint64, error) {
+	freq, err := i.freqNormDecoder.GetU64()
+	if err != nil {
+		return 0, 0, fmt.Errorf("error reading frequency: %v", err)
+	}
+	normBits, err := i.freqNormDecoder.GetU64()
+	if err != nil {
+		return 0, 0, fmt.Errorf("error reading norm: %v", err)
+	}
+	return freq, normBits, err
+}
+
+// readLocation processes all the integers on the stream representing a single
+// location.  if you care about it, pass in a non-nil location struct, and we
+// will fill it.  if you don't care about it, pass in nil and we safely consume
+// the contents.
+func (i *PostingsIterator) readLocation(l *Location) error {
+	// read off field
+	fieldID, err := i.locDecoder.GetU64()
+	if err != nil {
+		return fmt.Errorf("error reading location field: %v", err)
+	}
+	// read off pos
+	pos, err := i.locDecoder.GetU64()
+	if err != nil {
+		return fmt.Errorf("error reading location pos: %v", err)
+	}
+	// read off start
+	start, err := i.locDecoder.GetU64()
+	if err != nil {
+		return fmt.Errorf("error reading location start: %v", err)
+	}
+	// read off end
+	end, err := i.locDecoder.GetU64()
+	if err != nil {
+		return fmt.Errorf("error reading location end: %v", err)
+	}
+	// read off num array pos
+	numArrayPos, err := i.locDecoder.GetU64()
+	if err != nil {
+		return fmt.Errorf("error reading location num array pos: %v", err)
+	}
+
+	// group these together for less branching
+	if l != nil {
+		l.field = i.postings.dictionary.segment.fieldsInv[fieldID]
+		l.pos = pos
+		l.start = start
+		l.end = end
+		if numArrayPos > 0 {
+			l.ap = make([]uint64, int(numArrayPos))
+		}
+	}
+
+	// read off array positions
+	for k := 0; k < int(numArrayPos); k++ {
+		ap, err := i.locDecoder.GetU64()
+		if err != nil {
+			return fmt.Errorf("error reading array position: %v", err)
+		}
+		if l != nil {
+			l.ap[k] = ap
+		}
+	}
+
+	return nil
+}
+
+// Next returns the next posting on the postings list, or nil at the end
+func (i *PostingsIterator) Next() (segment.Posting, error) {
+	if i.actual == nil || !i.actual.HasNext() {
+		return nil, nil
+	}
+	n := i.actual.Next()
+	nChunk := n / i.postings.dictionary.segment.chunkFactor
+	allN := i.all.Next()
+	allNChunk := allN / i.postings.dictionary.segment.chunkFactor
+
+	// n is the next actual hit (excluding some postings)
+	// allN is the next hit in the full postings
+	// if they don't match, adjust offsets to factor in item we're skipping over
+	// incr the all iterator, and check again
+	for allN != n {
+
+		// in different chunks, reset offsets
+		if allNChunk != nChunk {
+			i.locoffset = 0
+			i.offset = 0
+		} else {
+
+			if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
+				err := i.loadChunk(int(nChunk))
+				if err != nil {
+					return nil, fmt.Errorf("error loading chunk: %v", err)
+				}
+			}
+
+			// read off freq/offsets even though we don't care about them
+			freq, _, err := i.readFreqNorm()
+			if err != nil {
+				return nil, err
+			}
+			if i.postings.dictionary.segment.fieldsLoc[i.postings.dictionary.fieldID] {
+				for j := 0; j < int(freq); j++ {
+					err := i.readLocation(nil)
+					if err != nil {
+						return nil, err
+					}
+				}
+			}
+
+			// in same chunk, need to account for offsets
+			i.offset++
+		}
+
+		allN = i.all.Next()
+	}
+
+	if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
+		err := i.loadChunk(int(nChunk))
+		if err != nil {
+			return nil, fmt.Errorf("error loading chunk: %v", err)
+		}
+	}
+
+	rv := &Posting{
+		iterator: i,
+		docNum:   uint64(n),
+	}
+
+	var err error
+	var normBits uint64
+	rv.freq, normBits, err = i.readFreqNorm()
+	if err != nil {
+		return nil, err
+	}
+	rv.norm = math.Float32frombits(uint32(normBits))
+	if i.postings.dictionary.segment.fieldsLoc[i.postings.dictionary.fieldID] {
+		// read off 'freq' locations
+		rv.locs = make([]segment.Location, rv.freq)
+		locs := make([]Location, rv.freq)
+		for j := 0; j < int(rv.freq); j++ {
+			err := i.readLocation(&locs[j])
+			if err != nil {
+				return nil, err
+			}
+			rv.locs[j] = &locs[j]
+		}
+	}
+
+	return rv, nil
+}
+
+// Posting is a single entry in a postings list
+type Posting struct {
+	iterator *PostingsIterator
+	docNum   uint64
+
+	freq uint64
+	norm float32
+	locs []segment.Location
+}
+
+// Number returns the document number of this posting in this segment
+func (p *Posting) Number() uint64 {
+	return p.docNum
+}
+
+// Frequency returns the frequence of occurance of this term in this doc/field
+func (p *Posting) Frequency() uint64 {
+	return p.freq
+}
+
+// Norm returns the normalization factor for this posting
+func (p *Posting) Norm() float64 {
+	return float64(p.norm)
+}
+
+// Locations returns the location information for each occurance
+func (p *Posting) Locations() []segment.Location {
+	return p.locs
+}
+
+// Location represents the location of a single occurance
+type Location struct {
+	field string
+	pos   uint64
+	start uint64
+	end   uint64
+	ap    []uint64
+}
+
+// Field returns the name of the field (useful in composite fields to know
+// which original field the value came from)
+func (l *Location) Field() string {
+	return l.field
+}
+
+// Start returns the start byte offset of this occurance
+func (l *Location) Start() uint64 {
+	return l.start
+}
+
+// End returns the end byte offset of this occurance
+func (l *Location) End() uint64 {
+	return l.end
+}
+
+// Pos returns the 1-based phrase position of this occurance
+func (l *Location) Pos() uint64 {
+	return l.pos
+}
+
+// ArrayPositions returns the array position vector associated with this occurance
+func (l *Location) ArrayPositions() []uint64 {
+	return l.ap
+}
diff --git a/index/scorch/segment/bolt/segment.go b/index/scorch/segment/bolt/segment.go
new file mode 100644
index 000000000..835313b87
--- /dev/null
+++ b/index/scorch/segment/bolt/segment.go
@@ -0,0 +1,309 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package bolt
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"io"
+
+	"github.com/RoaringBitmap/roaring"
+	"github.com/Smerity/govarint"
+	"github.com/blevesearch/bleve/index/scorch/segment"
+	"github.com/boltdb/bolt"
+	"github.com/couchbaselabs/vellum"
+	"github.com/golang/snappy"
+)
+
+var readOnlyOptions = &bolt.Options{
+	ReadOnly: true,
+}
+
+// _id field is always guaranteed to have fieldID of 0
+const idFieldID uint16 = 0
+
+// Open returns a boltdb impl of a segment
+func Open(path string) (segment.Segment, error) {
+
+	db, err := bolt.Open(path, 0600, readOnlyOptions)
+	if err != nil {
+		return nil, err
+	}
+
+	tx, err := db.Begin(false)
+	if err != nil {
+		_ = db.Close()
+		return nil, err
+	}
+
+	rv := &Segment{
+		db:        db,
+		tx:        tx,
+		fieldsMap: make(map[string]uint16),
+	}
+
+	err = rv.loadConfig()
+	if err != nil {
+		_ = db.Close()
+		return nil, err
+	}
+
+	err = rv.loadFields()
+	if err != nil {
+		_ = db.Close()
+		return nil, err
+	}
+
+	return rv, nil
+}
+
+// Segment implements a boltdb based implementation of a segment
+type Segment struct {
+	version     uint8
+	chunkFactor uint32
+	db          *bolt.DB
+	tx          *bolt.Tx
+
+	fieldsMap map[string]uint16
+	fieldsInv []string
+	fieldsLoc []bool
+}
+
+func (s *Segment) loadConfig() (err error) {
+	bucket := s.tx.Bucket(configBucket)
+	if bucket == nil {
+		return fmt.Errorf("config bucket missing")
+	}
+
+	ver := bucket.Get(versionKey)
+	if ver == nil {
+		return fmt.Errorf("version key missing")
+	}
+	s.version = ver[0]
+
+	chunk := bucket.Get(chunkKey)
+	if chunk == nil {
+		return fmt.Errorf("chunk key is missing")
+	}
+	s.chunkFactor = binary.BigEndian.Uint32(chunk)
+
+	return nil
+}
+
+// loadFields reads the fields info from the segment so that we never have to go
+// back to disk to access this (small and used frequently)
+func (s *Segment) loadFields() (err error) {
+
+	bucket := s.tx.Bucket(fieldsBucket)
+	if bucket == nil {
+		return fmt.Errorf("fields bucket missing")
+	}
+
+	indexLocs := roaring.NewBitmap()
+	err = bucket.ForEach(func(k []byte, v []byte) error {
+
+		// process index locations bitset
+		if k[0] == indexLocsKey[0] {
+			_, err2 := indexLocs.FromBuffer(v)
+			if err2 != nil {
+				return fmt.Errorf("error loading indexLocs: %v", err2)
+			}
+		} else {
+
+			_, fieldID, err2 := DecodeUvarintAscending(k)
+			if err2 != nil {
+				return err2
+			}
+			// we store fieldID+1 in so we can discern the zero value
+			s.fieldsMap[string(v)] = uint16(fieldID + 1)
+		}
+		return nil
+	})
+	if err != nil {
+		return err
+	}
+
+	// now setup the inverse (should have same size as map and be keyed 0-(len-1))
+	s.fieldsInv = make([]string, len(s.fieldsMap))
+	for k, v := range s.fieldsMap {
+		s.fieldsInv[int(v)-1] = k
+	}
+	s.fieldsLoc = make([]bool, len(s.fieldsInv))
+	for i := range s.fieldsInv {
+		if indexLocs.ContainsInt(i) {
+			s.fieldsLoc[i] = true
+		}
+	}
+
+	return nil
+}
+
+// Fields returns the field names used in this segment
+func (s *Segment) Fields() []string {
+	return s.fieldsInv
+}
+
+// Count returns the number of documents in this segment
+// (this has no notion of deleted docs)
+func (s *Segment) Count() uint64 {
+	return uint64(s.tx.Bucket(storedBucket).Stats().BucketN - 1)
+}
+
+// Dictionary returns the term dictionary for the specified field
+func (s *Segment) Dictionary(field string) (segment.TermDictionary, error) {
+	return s.dictionary(field)
+}
+
+func (s *Segment) dictionary(field string) (*Dictionary, error) {
+
+	rv := &Dictionary{
+		segment: s,
+		field:   field,
+	}
+
+	rv.fieldID = s.fieldsMap[field]
+	if rv.fieldID > 0 {
+		rv.fieldID = rv.fieldID - 1
+		fieldIDKey := EncodeUvarintAscending(nil, uint64(rv.fieldID))
+		bucket := s.tx.Bucket(dictBucket)
+		if bucket == nil {
+			return nil, fmt.Errorf("dictionary bucket missing")
+		}
+		fstBytes := bucket.Get(fieldIDKey)
+		if fstBytes == nil {
+			return nil, fmt.Errorf("dictionary field %s bytes nil", field)
+		}
+		if fstBytes != nil {
+			fst, err := vellum.Load(fstBytes)
+			if err != nil {
+				return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err)
+			}
+			if err == nil {
+				rv.fst = fst
+			}
+		}
+
+	}
+
+	return rv, nil
+}
+
+// VisitDocument invokes the DocFieldValueVistor for each stored field
+// for the specified doc number
+func (s *Segment) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
+	storedBuucket := s.tx.Bucket(storedBucket)
+	if storedBuucket == nil {
+		return fmt.Errorf("stored bucket missing")
+	}
+	docNumKey := EncodeUvarintAscending(nil, num)
+	docBucket := storedBuucket.Bucket(docNumKey)
+	if docBucket == nil {
+		return fmt.Errorf("segment has no doc number %d", num)
+	}
+	metaBytes := docBucket.Get(metaKey)
+	if metaBytes == nil {
+		return fmt.Errorf("stored meta bytes for doc number %d is nil", num)
+	}
+	dataBytes := docBucket.Get(dataKey)
+	if dataBytes == nil {
+		return fmt.Errorf("stored data bytes for doc number %d is nil", num)
+	}
+	uncompressed, err := snappy.Decode(nil, dataBytes)
+	if err != nil {
+		return err
+	}
+
+	reader := bytes.NewReader(metaBytes)
+	decoder := govarint.NewU64Base128Decoder(reader)
+
+	keepGoing := true
+	for keepGoing {
+		field, err := decoder.GetU64()
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			return err
+		}
+		typ, err := decoder.GetU64()
+		if err != nil {
+			return err
+		}
+		offset, err := decoder.GetU64()
+		if err != nil {
+			return err
+		}
+		l, err := decoder.GetU64()
+		if err != nil {
+			return err
+		}
+		numap, err := decoder.GetU64()
+		if err != nil {
+			return err
+		}
+		var arrayPos []uint64
+		if numap > 0 {
+			arrayPos = make([]uint64, numap)
+			for i := 0; i < int(numap); i++ {
+				ap, err := decoder.GetU64()
+				if err != nil {
+					return err
+				}
+				arrayPos[i] = ap
+			}
+		}
+
+		value := uncompressed[offset : offset+l]
+		keepGoing = visitor(s.fieldsInv[field], byte(typ), value, arrayPos)
+	}
+
+	return nil
+}
+
+// DocNumbers returns a bitset corresponding to the doc numbers of all the
+// provided _id strings
+func (s *Segment) DocNumbers(ids []string) (*roaring.Bitmap, error) {
+	rv := roaring.New()
+
+	if len(s.fieldsMap) > 0 {
+		idDict, err := s.dictionary("_id")
+		if err != nil {
+			return nil, err
+		}
+
+		for _, id := range ids {
+			postings, err := idDict.postingsList(id, nil)
+			if err != nil {
+				return nil, err
+			}
+			if postings.postings != nil {
+				rv.Or(postings.postings)
+			}
+		}
+	}
+
+	return rv, nil
+}
+
+// Close releases all resources associated with this segment
+func (s *Segment) Close() error {
+	err := s.tx.Rollback()
+	if err != nil {
+		_ = s.db.Close()
+		return err
+	}
+	return s.db.Close()
+}
diff --git a/index/scorch/segment/bolt/segment_test.go b/index/scorch/segment/bolt/segment_test.go
new file mode 100644
index 000000000..b00c71926
--- /dev/null
+++ b/index/scorch/segment/bolt/segment_test.go
@@ -0,0 +1,517 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package bolt
+
+import (
+	"math"
+	"os"
+	"reflect"
+	"testing"
+)
+
+func TestOpen(t *testing.T) {
+	_ = os.RemoveAll("/tmp/scorch.bolt")
+
+	memSegment := buildMemSegment()
+	err := persistSegment(memSegment, "/tmp/scorch.bolt", 1024)
+	if err != nil {
+		t.Fatalf("error persisting segment: %v", err)
+	}
+
+	segment, err := Open("/tmp/scorch.bolt")
+	if err != nil {
+		t.Fatalf("error opening segment: %v", err)
+	}
+	defer func() {
+		cerr := segment.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", err)
+		}
+	}()
+
+	expectFields := map[string]struct{}{
+		"_id":  struct{}{},
+		"_all": struct{}{},
+		"name": struct{}{},
+		"desc": struct{}{},
+		"tag":  struct{}{},
+	}
+	fields := segment.Fields()
+	if len(fields) != len(expectFields) {
+		t.Errorf("expected %d fields, only got %d", len(expectFields), len(fields))
+	}
+	for _, field := range fields {
+		if _, ok := expectFields[field]; !ok {
+			t.Errorf("got unexpected field: %s", field)
+		}
+	}
+
+	docCount := segment.Count()
+	if docCount != 1 {
+		t.Errorf("expected count 1, got %d", docCount)
+	}
+
+	// check the _id field
+	dict, err := segment.Dictionary("_id")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if dict == nil {
+		t.Fatal("got nil dict, expected non-nil")
+	}
+
+	postingsList, err := dict.PostingsList("a", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItr := postingsList.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	count := 0
+	nextPosting, err := postingsItr.Next()
+	for nextPosting != nil && err == nil {
+		count++
+		if nextPosting.Frequency() != 1 {
+			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
+		}
+		if nextPosting.Number() != 0 {
+			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
+		}
+		if nextPosting.Norm() != 1.0 {
+			t.Errorf("expected norm 1.0, got %f", nextPosting.Norm())
+		}
+
+		nextPosting, err = postingsItr.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if count != 1 {
+		t.Errorf("expected count to be 1, got %d", count)
+	}
+
+	// check the name field
+	dict, err = segment.Dictionary("name")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if dict == nil {
+		t.Fatal("got nil dict, expected non-nil")
+	}
+
+	postingsList, err = dict.PostingsList("wow", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItr = postingsList.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	count = 0
+	nextPosting, err = postingsItr.Next()
+	for nextPosting != nil && err == nil {
+		count++
+		if nextPosting.Frequency() != 1 {
+			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
+		}
+		if nextPosting.Number() != 0 {
+			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
+		}
+		if nextPosting.Norm() != 1.0 {
+			t.Errorf("expected norm 1.0, got %f", nextPosting.Norm())
+		}
+		var numLocs uint64
+		for _, loc := range nextPosting.Locations() {
+			numLocs++
+			if loc.Field() != "name" {
+				t.Errorf("expected loc field to be 'name', got '%s'", loc.Field())
+			}
+			if loc.Start() != 0 {
+				t.Errorf("expected loc start to be 0, got %d", loc.Start())
+			}
+			if loc.End() != 3 {
+				t.Errorf("expected loc end to be 3, got %d", loc.End())
+			}
+			if loc.Pos() != 1 {
+				t.Errorf("expected loc pos to be 1, got %d", loc.Pos())
+			}
+			if loc.ArrayPositions() != nil {
+				t.Errorf("expect loc array pos to be nil, got %v", loc.ArrayPositions())
+			}
+		}
+		if numLocs != nextPosting.Frequency() {
+			t.Errorf("expected %d locations, got %d", nextPosting.Frequency(), numLocs)
+		}
+
+		nextPosting, err = postingsItr.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if count != 1 {
+		t.Errorf("expected count to be 1, got %d", count)
+	}
+
+	// check the _all field (composite)
+	dict, err = segment.Dictionary("_all")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if dict == nil {
+		t.Fatal("got nil dict, expected non-nil")
+	}
+
+	postingsList, err = dict.PostingsList("wow", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItr = postingsList.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	count = 0
+	nextPosting, err = postingsItr.Next()
+	for nextPosting != nil && err == nil {
+		count++
+		if nextPosting.Frequency() != 1 {
+			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
+		}
+		if nextPosting.Number() != 0 {
+			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
+		}
+		expectedNorm := float32(1.0 / math.Sqrt(float64(5)))
+		if nextPosting.Norm() != float64(expectedNorm) {
+			t.Errorf("expected norm %f, got %f", expectedNorm, nextPosting.Norm())
+		}
+		var numLocs uint64
+		for _, loc := range nextPosting.Locations() {
+			numLocs++
+			if loc.Field() != "name" {
+				t.Errorf("expected loc field to be 'name', got '%s'", loc.Field())
+			}
+			if loc.Start() != 0 {
+				t.Errorf("expected loc start to be 0, got %d", loc.Start())
+			}
+			if loc.End() != 3 {
+				t.Errorf("expected loc end to be 3, got %d", loc.End())
+			}
+			if loc.Pos() != 1 {
+				t.Errorf("expected loc pos to be 1, got %d", loc.Pos())
+			}
+			if loc.ArrayPositions() != nil {
+				t.Errorf("expect loc array pos to be nil, got %v", loc.ArrayPositions())
+			}
+		}
+		if numLocs != nextPosting.Frequency() {
+			t.Errorf("expected %d locations, got %d", nextPosting.Frequency(), numLocs)
+		}
+
+		nextPosting, err = postingsItr.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if count != 1 {
+		t.Errorf("expected count to be 1, got %d", count)
+	}
+
+	// now try a field with array positions
+	dict, err = segment.Dictionary("tag")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if dict == nil {
+		t.Fatal("got nil dict, expected non-nil")
+	}
+
+	postingsList, err = dict.PostingsList("dark", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItr = postingsList.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	nextPosting, err = postingsItr.Next()
+	for nextPosting != nil && err == nil {
+
+		if nextPosting.Frequency() != 1 {
+			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
+		}
+		if nextPosting.Number() != 0 {
+			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
+		}
+		var numLocs uint64
+		for _, loc := range nextPosting.Locations() {
+			numLocs++
+			if loc.Field() != "tag" {
+				t.Errorf("expected loc field to be 'name', got '%s'", loc.Field())
+			}
+			if loc.Start() != 0 {
+				t.Errorf("expected loc start to be 0, got %d", loc.Start())
+			}
+			if loc.End() != 4 {
+				t.Errorf("expected loc end to be 3, got %d", loc.End())
+			}
+			if loc.Pos() != 1 {
+				t.Errorf("expected loc pos to be 1, got %d", loc.Pos())
+			}
+			expectArrayPos := []uint64{1}
+			if !reflect.DeepEqual(loc.ArrayPositions(), expectArrayPos) {
+				t.Errorf("expect loc array pos to be %v, got %v", expectArrayPos, loc.ArrayPositions())
+			}
+		}
+		if numLocs != nextPosting.Frequency() {
+			t.Errorf("expected %d locations, got %d", nextPosting.Frequency(), numLocs)
+		}
+
+		nextPosting, err = postingsItr.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// now try and visit a document
+	var fieldValuesSeen int
+	err = segment.VisitDocument(0, func(field string, typ byte, value []byte, pos []uint64) bool {
+		fieldValuesSeen++
+		return true
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if fieldValuesSeen != 5 {
+		t.Errorf("expected 5 field values, got %d", fieldValuesSeen)
+	}
+}
+
+func TestOpenMulti(t *testing.T) {
+	_ = os.RemoveAll("/tmp/scorch.bolt")
+
+	memSegment := buildMemSegmentMulti()
+	err := persistSegment(memSegment, "/tmp/scorch.bolt", 1024)
+	if err != nil {
+		t.Fatalf("error persisting segment: %v", err)
+	}
+
+	segment, err := Open("/tmp/scorch.bolt")
+	if err != nil {
+		t.Fatalf("error opening segment: %v", err)
+	}
+	defer func() {
+		cerr := segment.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", err)
+		}
+	}()
+
+	if segment.Count() != 2 {
+		t.Errorf("expected count 2, got %d", segment.Count())
+	}
+
+	// check the desc field
+	dict, err := segment.Dictionary("desc")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if dict == nil {
+		t.Fatal("got nil dict, expected non-nil")
+	}
+
+	postingsList, err := dict.PostingsList("thing", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItr := postingsList.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	count := 0
+	nextPosting, err := postingsItr.Next()
+	for nextPosting != nil && err == nil {
+		count++
+		nextPosting, err = postingsItr.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if count != 2 {
+		t.Errorf("expected count to be 2, got %d", count)
+	}
+
+	// get docnum of a
+	exclude, err := segment.DocNumbers([]string{"a"})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// look for term 'thing' excluding doc 'a'
+	postingsListExcluding, err := dict.PostingsList("thing", exclude)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsListExcludingCount := postingsListExcluding.Count()
+	if postingsListExcludingCount != 1 {
+		t.Errorf("expected count from postings list to be 1, got %d", postingsListExcludingCount)
+	}
+
+	postingsItrExcluding := postingsListExcluding.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	count = 0
+	nextPosting, err = postingsItrExcluding.Next()
+	for nextPosting != nil && err == nil {
+		count++
+		nextPosting, err = postingsItrExcluding.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if count != 1 {
+		t.Errorf("expected count to be 1, got %d", count)
+	}
+}
+
+func TestOpenMultiWithTwoChunks(t *testing.T) {
+	_ = os.RemoveAll("/tmp/scorch.bolt")
+
+	memSegment := buildMemSegmentMulti()
+	err := persistSegment(memSegment, "/tmp/scorch.bolt", 1)
+	if err != nil {
+		t.Fatalf("error persisting segment: %v", err)
+	}
+
+	segment, err := Open("/tmp/scorch.bolt")
+	if err != nil {
+		t.Fatalf("error opening segment: %v", err)
+	}
+	defer func() {
+		cerr := segment.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", err)
+		}
+	}()
+
+	if segment.Count() != 2 {
+		t.Errorf("expected count 2, got %d", segment.Count())
+	}
+
+	// check the desc field
+	dict, err := segment.Dictionary("desc")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if dict == nil {
+		t.Fatal("got nil dict, expected non-nil")
+	}
+
+	postingsList, err := dict.PostingsList("thing", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItr := postingsList.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	count := 0
+	nextPosting, err := postingsItr.Next()
+	for nextPosting != nil && err == nil {
+		count++
+		nextPosting, err = postingsItr.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if count != 2 {
+		t.Errorf("expected count to be 2, got %d", count)
+	}
+
+	// get docnum of a
+	exclude, err := segment.DocNumbers([]string{"a"})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// look for term 'thing' excluding doc 'a'
+	postingsListExcluding, err := dict.PostingsList("thing", exclude)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItrExcluding := postingsListExcluding.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	count = 0
+	nextPosting, err = postingsItrExcluding.Next()
+	for nextPosting != nil && err == nil {
+		count++
+		nextPosting, err = postingsItrExcluding.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if count != 1 {
+		t.Errorf("expected count to be 1, got %d", count)
+	}
+}

From 898a6b1e8579d04eb5852eac298371df180c0a73 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 5 Dec 2017 13:32:57 -0500
Subject: [PATCH 026/728] fix errcheck issues

---
 index/scorch/segment/bolt/build.go      | 30 ++++++++++++++++++++-----
 index/scorch/segment/bolt/build_test.go |  2 +-
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/index/scorch/segment/bolt/build.go b/index/scorch/segment/bolt/build.go
index 6ed5719d3..0c68bab59 100644
--- a/index/scorch/segment/bolt/build.go
+++ b/index/scorch/segment/bolt/build.go
@@ -438,18 +438,36 @@ func persistStored(memSegment *mem.Segment, tx *bolt.Tx) error {
 				// process each value
 				for i := 0; i < num; i++ {
 					// encode field
-					metaEncoder.PutU64(uint64(fieldID))
+					_, err2 := metaEncoder.PutU64(uint64(fieldID))
+					if err2 != nil {
+						return err2
+					}
 					// encode type
-					metaEncoder.PutU64(uint64(memSegment.StoredTypes[docNum][uint16(fieldID)][i]))
+					_, err2 = metaEncoder.PutU64(uint64(memSegment.StoredTypes[docNum][uint16(fieldID)][i]))
+					if err2 != nil {
+						return err2
+					}
 					// encode start offset
-					metaEncoder.PutU64(uint64(curr))
+					_, err2 = metaEncoder.PutU64(uint64(curr))
+					if err2 != nil {
+						return err2
+					}
 					// end len
-					metaEncoder.PutU64(uint64(len(storedFieldValues[i])))
+					_, err2 = metaEncoder.PutU64(uint64(len(storedFieldValues[i])))
+					if err2 != nil {
+						return err2
+					}
 					// encode number of array pos
-					metaEncoder.PutU64(uint64(len(memSegment.StoredPos[docNum][uint16(fieldID)][i])))
+					_, err2 = metaEncoder.PutU64(uint64(len(memSegment.StoredPos[docNum][uint16(fieldID)][i])))
+					if err2 != nil {
+						return err2
+					}
 					// encode all array positions
 					for j := 0; j < len(memSegment.StoredPos[docNum][uint16(fieldID)][i]); j++ {
-						metaEncoder.PutU64(memSegment.StoredPos[docNum][uint16(fieldID)][i][j])
+						_, err2 = metaEncoder.PutU64(memSegment.StoredPos[docNum][uint16(fieldID)][i][j])
+						if err2 != nil {
+							return err2
+						}
 					}
 					// append data
 					data = append(data, storedFieldValues[i]...)
diff --git a/index/scorch/segment/bolt/build_test.go b/index/scorch/segment/bolt/build_test.go
index d4b93f159..3f869d86a 100644
--- a/index/scorch/segment/bolt/build_test.go
+++ b/index/scorch/segment/bolt/build_test.go
@@ -25,7 +25,7 @@ import (
 )
 
 func TestBuild(t *testing.T) {
-	os.RemoveAll("/tmp/scorch.bolt")
+	_ = os.RemoveAll("/tmp/scorch.bolt")
 
 	memSegment := buildMemSegment()
 	err := persistSegment(memSegment, "/tmp/scorch.bolt", 1024)

From b1346b4c8a51a2a787c5f157a82087cea9b517ca Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 5 Dec 2017 16:09:00 -0500
Subject: [PATCH 027/728] add readme describing our use of bolt as a segment
 format

---
 index/scorch/segment/bolt/README.md | 306 ++++++++++++++++++++++++++++
 1 file changed, 306 insertions(+)
 create mode 100644 index/scorch/segment/bolt/README.md

diff --git a/index/scorch/segment/bolt/README.md b/index/scorch/segment/bolt/README.md
new file mode 100644
index 000000000..2c6cd31d5
--- /dev/null
+++ b/index/scorch/segment/bolt/README.md
@@ -0,0 +1,306 @@
+# bolt segment format
+
+## top level key space (all sub-buckets, as bolt has no root bucket)
+
+We have chosen to letter these starting with 'a' and in the code refer to them with more meaningful names.  The reason is that we intend to write them in order, and this lets us rearrange them more easily later.
+
+- 'a' field storage
+- 'b' term dictionaries
+- 'c' postings list
+- 'd' postings details
+- 'e' stored fields
+- 'x' configuration
+
+## variable length integers that sort correctly (insert order same as numeric)
+
+We use numbers as keys in several places.  We want those keys to be small, so we prefer to use a variable length key to minimize space, but we also want to insert these in order, so the encoding has to sort correctly.
+
+We have chosen to the the scheme found in [CockroachDB](https://github.com/cockroachdb/cockroach/blob/2dd65dde5d90c157f4b93f92502ca1063b904e1d/pkg/util/encoding/encoding.go).
+
+In short, the first byte indicates how many bytes will follow, with a few other nice properties.
+- values 0-127 are not used in the first byte (this means we can still use any ASCII values we want and avoid collision)
+- very small values are packed directly into this first byte
+For the full details see the link above.
+
+## field storage bucket
+
+Contains one row for each field, the key is the integer field ID, and the value is the string name associated with the field.
+
+There is one additional row with key 'l'.  The value is a binary serialization of a [roaring bitmap](https://github.com/RoaringBitmap/roaring), with bits set for each field id which also index location details with each posting.
+
+## term dictionary bucket
+
+Contains one row for each field, the key is the integer field ID, and the value is a binary serialization of the [Vellum](https://github.com/couchbaselabs/vellum) FST.  The Vellum FST maps from term (utf-8 string) to a posting ID (uint64).
+
+## postings list bucket
+
+Contains one row for each postings list, the key is the integer posting ID, the value is a binary serialization of a [roaring bitmap](https://github.com/RoaringBitmap/roaring).  The roaring bitmap has bits set for each doc number that used this term in this field.
+
+## posting details bucket
+
+Contains one sub-bucket for each postings list, the name of the sub-bucket is the posting ID.
+
+### individual posting detail sub-bucket
+
+Contains one sub-bucket for each chunk.  A chunk contains details for sub-section of the docNum key space.  By default, the chunk size is 1024, so all posting details for the first 1024 docs are in chunk zero, then the next 1024 in chunk one, and so on.
+
+The purpose of the chunking is so that when trying to Seek/Advance through a large number of hits to something much further ahead, we have to keep seeking through the roaring bitmap, but we can jump to the nearest chunk for details, and only seek within the details of the current chunk.
+
+#### chunk posting detail sub-bucket
+
+Contains two key/value pairs:
+
+Key 'a' contains a [govarint](https://github.com/Smerity/govarint) compressed slice of uint64 values.  For each hit in the postings list, there are two values on this list, the first is the term frequency (uint64) and the second is the norm factor (float32).
+
+Key 'b' contains a [govarint](https://github.com/Smerity/govarint) compressed slice of uint64 values.  For each location (there will be one location for each 'frequency' in the list above) there will be a variable number of uint64 values as follows:
+
+- field ID (uint16)
+- pos (uint64)
+- start (uint64)
+- end (uint64)
+- number of array position entries that follow (uint64)
+- variable number of array positions (each uint64)
+
+## stored field values sub-bucket
+
+Contains one sub-bucket for each doc number (uint64).
+
+## stored field doc specific sub-bucket
+
+Contains two key/value pairs:
+
+Key 'a' contains a [govarint](https://github.com/Smerity/govarint) compressed slice of uint64 values.  For each stored field there are a variable number of uint64 values as follows:
+
+- field ID (uint16)
+- value type (byte) (string/number/date/geo/etc)
+- start offset (in the uncompressed slice of data)
+- length (in the uncompressed slice of data)
+- number of array position entries that follow (uint64)
+- variable number of array positions (each uint64)
+
+Key 'b' contains a [snappy]() compressed sequence of bytes.  The input to the snappy compression was a slice of bytes containing the field values, in the same order the metadata slice was created.
+
+## configuration sub-bucket
+
+Currently contains two key/value pairs:
+
+Key 'c' contains a BigEndian encoded uint32 chunk size.  This chunk size must be used when computing doc number to chunk conversions in this segment.
+
+Key 'v' contains a version number, currently 0.
+
+## Example
+
+The following is a dump of the boltdb bucket/key/value space for a segment which contains two documents:
+
+```
+{
+  "_id": "a",
+  "name": "wow",
+  "desc": "some thing",
+  "tag": ["cold", "dark"]
+}
+
+{
+  "_id": "b",
+  "name": "who",
+  "desc": "some thing",
+  "tag": ["cold", "dark"]
+}
+```
+
+```
+[61]  ('a' - field storage)
+  6c ('l' - roaring bitmap of field IDs which have index location data)
+    3a 30 00 00 01 00 00 00 00 00 03 00 10 00 00 00 01 00 02 00 03 00 04 00
+  88 (field ID 0)
+    5f 69 64 (utf-8 string '_id')
+  89 (field ID 1)
+    5f 61 6c 6c (utf-8 string '_all')
+  8a (field ID 2)
+    6e 61 6d 65 (utf-8 string 'name')
+  8b (field ID 3)
+    64 65 73 63 (utf-8 string 'desc')
+  8c (field ID 4)
+    74 61 67 (utf-8 string 'tag')
+[62] ('b' - term dictionary)
+  88 (field ID 0)
+    01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0b 05 00 00 62 61 11 02 02 00 00 00 00 00 00 00 17 00 00 00 00 00 00 00 (vellum FST data)
+  89
+    01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 10 92 cf c4 00 10 a7 c7 c5 00 10 82 d0 c4 00 10 97 cb c8 ce 00 10 84 00 10 8c 00 0d 01 04 6f 68 11 02 00 02 01 04 03 01 0f 15 1a 1f 77 74 73 64 63 11 05 06 00 00 00 00 00 00 00 43 00 00 00 00 00 00 00
+  8a
+    01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 10 84 00 10 8c 00 06 01 04 6f 68 11 02 06 01 11 8c 02 00 00 00 00 00 00 00 21 00 00 00 00 00 00 00
+  8b
+    01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 10 82 d0 c4 00 10 97 cb c8 ce 08 07 01 07 74 73 11 02 02 00 00 00 00 00 00 00 22 00 00 00 00 00 00 00
+  8c
+    01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 10 92 cf c4 00 10 a7 c7 c5 0a 09 01 06 64 63 11 02 02 00 00 00 00 00 00 00 21 00 00 00 00 00 00 00
+[63] ('c' - postings lists)
+  88 (field ID 0)
+    3a 30 00 00 01 00 00 00 00 00 00 00 10 00 00 00 00 00 (roaring bitmap data)
+  89
+    3a 30 00 00 01 00 00 00 00 00 01 00 10 00 00 00 00 00 01 00
+  8a
+    3a 30 00 00 01 00 00 00 00 00 01 00 10 00 00 00 00 00 01 00
+  8b
+    3a 30 00 00 01 00 00 00 00 00 01 00 10 00 00 00 00 00 01 00
+  8c
+    3a 30 00 00 01 00 00 00 00 00 01 00 10 00 00 00 00 00 01 00
+  8d
+    3a 30 00 00 01 00 00 00 00 00 00 00 10 00 00 00 00 00
+  8e
+    3a 30 00 00 01 00 00 00 00 00 00 00 10 00 00 00 00 00
+  8f
+    3a 30 00 00 01 00 00 00 00 00 01 00 10 00 00 00 00 00 01 00
+  90
+    3a 30 00 00 01 00 00 00 00 00 01 00 10 00 00 00 00 00 01 00
+  91
+    3a 30 00 00 01 00 00 00 00 00 01 00 10 00 00 00 00 00 01 00
+  92
+    3a 30 00 00 01 00 00 00 00 00 01 00 10 00 00 00 00 00 01 00
+  93
+    3a 30 00 00 01 00 00 00 00 00 00 00 10 00 00 00 01 00
+  94
+    3a 30 00 00 01 00 00 00 00 00 00 00 10 00 00 00 01 00
+  95
+    3a 30 00 00 01 00 00 00 00 00 00 00 10 00 00 00 01 00
+[64] ('d' - postings details)
+  [88] (posting ID 0)
+    [88] (chunk ID 0)
+      61 ('a' term freq/norm data)
+        01 ae f2 93 f7 03
+      62 ('b' term location data)
+        02 01 00 03 00
+  [89] (posting ID 1)
+    [88] (chunk ID 0)
+      61 ('a' term freq/norm data)
+        01 ae f2 93 f7 03
+      62 ('b' term location data)
+        03 01 00 04 00
+    [89] (chunk ID 1)
+      61 ('a' term freq/norm data)
+        01 ae f2 93 f7 03
+      62 ('b' term location data)
+        03 01 00 04 00
+  [8a]
+    [88]
+      61
+        01 ae f2 93 f7 03
+      62
+        03 02 05 0a 00
+    [89]
+      61
+        01 ae f2 93 f7 03
+      62
+        03 02 05 0a 00
+  [8b]
+    [88]
+      61
+        01 ae f2 93 f7 03
+      62
+        04 01 00 04 01 00
+    [89]
+      61
+        01 ae f2 93 f7 03
+      62
+        04 01 00 04 01 00
+  [8c]
+    [88]
+      61
+        01 ae f2 93 f7 03
+      62
+        04 01 00 04 01 01
+    [89]
+      61
+        01 ae f2 93 f7 03
+      62
+        04 01 00 04 01 01
+  [8d]
+    [88]
+      61
+        01 80 80 80 fc 03
+      62
+
+  [8e]
+    [88]
+      61
+        01 80 80 80 fc 03
+      62
+        02 01 00 03 00
+  [8f]
+    [88]
+      61
+        01 f3 89 d4 f9 03
+      62
+        03 01 00 04 00
+    [89]
+      61
+        01 f3 89 d4 f9 03
+      62
+        03 01 00 04 00
+  [90]
+    [88]
+      61
+        01 f3 89 d4 f9 03
+      62
+        03 02 05 0a 00
+    [89]
+      61
+        01 f3 89 d4 f9 03
+      62
+        03 02 05 0a 00
+  [91]
+    [88]
+      61
+        01 f3 89 d4 f9 03
+      62
+        04 01 00 04 01 00
+    [89]
+      61
+        01 f3 89 d4 f9 03
+      62
+        04 01 00 04 01 00
+  [92]
+    [88]
+      61
+        01 f3 89 d4 f9 03
+      62
+        04 01 00 04 01 01
+    [89]
+      61
+        01 f3 89 d4 f9 03
+      62
+        04 01 00 04 01 01
+  [93]
+    [89]
+      61
+        01 80 80 80 fc 03
+      62
+
+  [94]
+    [89]
+      61
+        01 80 80 80 fc 03
+      62
+        02 01 00 03 00
+  [95]
+    [89]
+      61
+        01 ae f2 93 f7 03
+      62
+        02 01 00 03 00
+[65] ('e' - stored fields)
+  [88] (doc num 0)
+    61 ('a' - stored field meta slice)
+      00 74 00 01 00 02 74 01 03 00 03 74 04 0a 00 04 74 0e 04 01 00 04 74 12 04 01 01
+    62 ('b' - snappy compressed value bytes)
+      16 54 61 77 6f 77 73 6f 6d 65 20 74 68 69 6e 67 63 6f 6c 64 64 61 72 6b
+  [89]
+    61
+      00 74 00 01 00 02 74 01 03 00 03 74 04 0a 00 04 74 0e 04 01 00 04 74 12 04 01 01
+    62
+      16 54 62 77 68 6f 73 6f 6d 65 20 74 68 69 6e 67 63 6f 6c 64 64 61 72 6b
+[78] ('x' - configuration)
+  63 ('c' - chunk size)
+    00 00 00 01 (big endian 1)
+  76 ('v' - version)
+    00 (single byte 0)
+```

From adac4f41db1e8c226b3f5339f9bbe75940ecf09a Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Wed, 6 Dec 2017 18:33:47 -0500
Subject: [PATCH 028/728] initial version of scorch which persists index to
 disk

---
 index/scorch/field_dict_test.go             |  18 +-
 index/scorch/introducer.go                  |  32 +-
 index/scorch/persister.go                   | 329 ++++++++++++++++++++
 index/scorch/reader_test.go                 |  33 +-
 index/scorch/scorch.go                      | 106 +++++--
 index/scorch/scorch_test.go                 | 261 ++++++++++++++--
 index/scorch/segment/bolt/build.go          |  28 +-
 index/scorch/segment/bolt/build_test.go     |   2 +-
 index/scorch/segment/bolt/dict.go           |   2 +-
 index/scorch/segment/bolt/dict_test.go      |   2 +-
 index/scorch/segment/bolt/posting.go        |   2 +-
 index/scorch/segment/bolt/segment.go        |  18 +-
 index/scorch/segment/bolt/segment_test.go   |   6 +-
 index/scorch/segment/empty.go               |  61 ++++
 index/scorch/segment/{bolt => }/int.go      |   4 +-
 index/scorch/segment/{bolt => }/int_test.go |   2 +-
 index/scorch/segment/mem/segment.go         |  17 +-
 index/scorch/snapshot_index.go              |   1 +
 index/scorch/snapshot_segment.go            |   7 +
 19 files changed, 839 insertions(+), 92 deletions(-)
 create mode 100644 index/scorch/persister.go
 create mode 100644 index/scorch/segment/empty.go
 rename index/scorch/segment/{bolt => }/int.go (98%)
 rename index/scorch/segment/{bolt => }/int_test.go (99%)

diff --git a/index/scorch/field_dict_test.go b/index/scorch/field_dict_test.go
index 856f3d6cf..a25c5c984 100644
--- a/index/scorch/field_dict_test.go
+++ b/index/scorch/field_dict_test.go
@@ -23,20 +23,26 @@ import (
 )
 
 func TestIndexFieldDict(t *testing.T) {
+	defer func() {
+		err := DestroyTest()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
 
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, nil, analysisQueue)
+	idx, err := NewScorch(Name, testConfig, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
 	err = idx.Open()
 	if err != nil {
-		t.Errorf("error opening index: %v", err)
+		t.Fatalf("error opening index: %v", err)
 	}
 	defer func() {
-		err := idx.Close()
-		if err != nil {
-			t.Fatal(err)
+		cerr := idx.Close()
+		if cerr != nil {
+			t.Fatal(cerr)
 		}
 	}()
 
@@ -96,7 +102,7 @@ func TestIndexFieldDict(t *testing.T) {
 
 	dict2, err := indexReader.FieldDict("desc")
 	if err != nil {
-		t.Errorf("error creating reader: %v", err)
+		t.Fatalf("error creating reader: %v", err)
 	}
 	defer func() {
 		err := dict2.Close()
diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index d2978a2df..0b30044a1 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -15,6 +15,8 @@
 package scorch
 
 import (
+	"fmt"
+
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index/scorch/segment"
 )
@@ -26,17 +28,21 @@ type segmentIntroduction struct {
 	ids       []string
 	internal  map[string][]byte
 
-	applied chan struct{}
+	applied   chan error
+	persisted chan error
 }
 
 func (s *Scorch) mainLoop() {
+	var notify notificationChan
+OUTER:
 	for {
 		select {
 		case <-s.closeCh:
-			return
+			break OUTER
 
-		case next := <-s.introductions:
+		case notify = <-s.introducerNotifier:
 
+		case next := <-s.introductions:
 			// acquire lock
 			s.rootLock.Lock()
 
@@ -45,7 +51,9 @@ func (s *Scorch) mainLoop() {
 				segment:  make([]*SegmentSnapshot, len(s.root.segment)+1),
 				offsets:  make([]uint64, len(s.root.segment)+1),
 				internal: make(map[string][]byte, len(s.root.segment)),
+				epoch:    s.nextSnapshotEpoch,
 			}
+			s.nextSnapshotEpoch++
 
 			// iterate through current segments
 			var running uint64
@@ -56,12 +64,15 @@ func (s *Scorch) mainLoop() {
 					var err error
 					delta, err = s.root.segment[i].segment.DocNumbers(next.ids)
 					if err != nil {
-						panic(err)
+						next.applied <- fmt.Errorf("error computing doc numbers: %v", err)
+						close(next.applied)
+						continue OUTER
 					}
 				}
 				newSnapshot.segment[i] = &SegmentSnapshot{
 					id:      s.root.segment[i].id,
 					segment: s.root.segment[i].segment,
+					notify:  s.root.segment[i].notify,
 				}
 				// apply new obsoletions
 				if s.root.segment[i].deleted == nil {
@@ -80,6 +91,12 @@ func (s *Scorch) mainLoop() {
 				segment: next.data,
 			}
 			newSnapshot.offsets[len(s.root.segment)] = running
+			if !s.unsafeBatch {
+				newSnapshot.segment[len(s.root.segment)].notify = append(
+					newSnapshot.segment[len(s.root.segment)].notify,
+					next.persisted,
+				)
+			}
 			// copy old values
 			for key, oldVal := range s.root.internal {
 				newSnapshot.internal[key] = oldVal
@@ -97,6 +114,13 @@ func (s *Scorch) mainLoop() {
 			// release lock
 			s.rootLock.Unlock()
 			close(next.applied)
+
+			if notify != nil {
+				close(notify)
+				notify = nil
+			}
 		}
 	}
+
+	s.asyncTasks.Done()
 }
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
new file mode 100644
index 000000000..0e3318e8c
--- /dev/null
+++ b/index/scorch/persister.go
@@ -0,0 +1,329 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package scorch
+
+import (
+	"bytes"
+	"fmt"
+	"log"
+	"os"
+	"strings"
+
+	"github.com/RoaringBitmap/roaring"
+	"github.com/blevesearch/bleve/index/scorch/segment"
+	scorchBolt "github.com/blevesearch/bleve/index/scorch/segment/bolt"
+	"github.com/blevesearch/bleve/index/scorch/segment/mem"
+	"github.com/boltdb/bolt"
+)
+
+type notificationChan chan struct{}
+
+func (s *Scorch) persisterLoop() {
+	var lastPersistedEpoch uint64
+OUTER:
+	for {
+		select {
+		case <-s.closeCh:
+			break OUTER
+
+		default:
+			// check to see if there is a new snapshot to persist
+			s.rootLock.RLock()
+			ourSnapshot := s.root
+			s.rootLock.RUnlock()
+
+			//for ourSnapshot.epoch != lastPersistedEpoch {
+			if ourSnapshot.epoch != lastPersistedEpoch {
+				// lets get started
+				err := s.persistSnapshot(ourSnapshot)
+				if err != nil {
+					log.Printf("got err persisting snapshot: %v", err)
+					continue OUTER
+				}
+				lastPersistedEpoch = ourSnapshot.epoch
+			}
+
+			// tell the introducer we're waiting for changes
+			// first make a notification chan
+			notifyUs := make(notificationChan)
+
+			// give it to the introducer
+			select {
+			case <-s.closeCh:
+				break OUTER
+			case s.introducerNotifier <- notifyUs:
+			}
+
+			// check again
+			s.rootLock.RLock()
+			ourSnapshot = s.root
+			s.rootLock.RUnlock()
+			if ourSnapshot.epoch != lastPersistedEpoch {
+
+				// lets get started
+				err := s.persistSnapshot(ourSnapshot)
+				if err != nil {
+					log.Printf("got err persisting snapshot: %v", err)
+					continue OUTER
+				}
+				lastPersistedEpoch = ourSnapshot.epoch
+			}
+
+			// now wait for it (but also detect close)
+			select {
+			case <-s.closeCh:
+				break OUTER
+			case <-notifyUs:
+				// woken up, next loop should pick up work
+			}
+		}
+	}
+	s.asyncTasks.Done()
+}
+
+func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
+	// start a write transaction
+	tx, err := s.rootBolt.Begin(true)
+	if err != nil {
+		return err
+	}
+	defer func() {
+		if err == nil {
+			err = tx.Commit()
+		} else {
+			_ = tx.Rollback()
+		}
+	}()
+
+	snapshotsBucket, err := tx.CreateBucketIfNotExists(boltSnapshotsBucket)
+	if err != nil {
+		return err
+	}
+	newSnapshotKey := segment.EncodeUvarintAscending(nil, snapshot.epoch)
+	snapshotBucket, err := snapshotsBucket.CreateBucketIfNotExists(newSnapshotKey)
+	if err != nil {
+		return err
+	}
+
+	// persist internal values
+	internalBucket, err := snapshotBucket.CreateBucketIfNotExists(boltInternalKey)
+	if err != nil {
+		return err
+	}
+	// TODO optimize writing these in order?
+	for k, v := range snapshot.internal {
+		internalBucket.Put([]byte(k), v)
+	}
+
+	newSegmentPaths := make(map[uint64]string)
+
+	// first ensure that each segment in this snapshot has been persisted
+	for i, segmentSnapshot := range snapshot.segment {
+		snapshotSegmentKey := segment.EncodeUvarintAscending(nil, uint64(i))
+		snapshotSegmentBucket, err2 := snapshotBucket.CreateBucketIfNotExists(snapshotSegmentKey)
+		if err2 != nil {
+			return err2
+		}
+		switch seg := segmentSnapshot.segment.(type) {
+		case *mem.Segment:
+			// need to persist this to disk
+			filename := fmt.Sprintf("%x.bolt", segmentSnapshot.id)
+			path := s.path + string(os.PathSeparator) + filename
+			err2 := scorchBolt.PersistSegment(seg, path, 1024)
+			if err2 != nil {
+				return fmt.Errorf("error persisting segment: %v", err2)
+			}
+			newSegmentPaths[segmentSnapshot.id] = path
+			snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
+		case *scorchBolt.Segment:
+
+			path := seg.Path()
+			filename := strings.TrimPrefix(path, s.path+string(os.PathSeparator))
+			snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
+		default:
+			return fmt.Errorf("unknown segment type: %T", seg)
+		}
+		// store current deleted bits
+		var roaringBuf bytes.Buffer
+		if segmentSnapshot.deleted != nil {
+			_, err = segmentSnapshot.deleted.WriteTo(&roaringBuf)
+			if err != nil {
+				return fmt.Errorf("error persisting roaring bytes: %v", err)
+			}
+			snapshotSegmentBucket.Put(boltDeletedKey, roaringBuf.Bytes())
+		}
+	}
+
+	// now try to open all the new snapshots
+	newSegments := make(map[uint64]segment.Segment)
+	for segmentID, path := range newSegmentPaths {
+		newSegments[segmentID], err = scorchBolt.Open(path)
+		if err != nil {
+			return fmt.Errorf("error opening new segment at %s, %v", path, err)
+		}
+	}
+
+	// get write lock and update the current snapshot with disk-based versions
+	var notifications []chan error
+
+	s.rootLock.Lock()
+	newIndexSnapshot := &IndexSnapshot{
+		epoch:    s.root.epoch,
+		segment:  make([]*SegmentSnapshot, len(s.root.segment)),
+		offsets:  make([]uint64, len(s.root.offsets)),
+		internal: make(map[string][]byte, len(s.root.internal)),
+	}
+	for i, segmentSnapshot := range s.root.segment {
+		// see if this segment has been replaced
+		if replacement, ok := newSegments[segmentSnapshot.id]; ok {
+			newSegmentSnapshot := &SegmentSnapshot{
+				segment: replacement,
+				deleted: segmentSnapshot.deleted,
+				id:      segmentSnapshot.id,
+			}
+			newIndexSnapshot.segment[i] = newSegmentSnapshot
+			// add the old segment snapshots notifications to the list
+			for _, notification := range segmentSnapshot.notify {
+				notifications = append(notifications, notification)
+			}
+		} else {
+			newIndexSnapshot.segment[i] = s.root.segment[i]
+		}
+		newIndexSnapshot.offsets[i] = s.root.offsets[i]
+	}
+	for k, v := range s.root.internal {
+		newIndexSnapshot.internal[k] = v
+	}
+	s.root = newIndexSnapshot
+	s.rootLock.Unlock()
+
+	// now that we've given up the lock, notify everyone that we've safely
+	// persisted their data
+	for _, notification := range notifications {
+		close(notification)
+	}
+
+	return nil
+}
+
+// bolt snapshot code
+
+var boltSnapshotsBucket = []byte{'s'}
+var boltPathKey = []byte{'p'}
+var boltDeletedKey = []byte{'d'}
+var boltInternalKey = []byte{'i'}
+
+func (s *Scorch) loadFromBolt() error {
+	return s.rootBolt.View(func(tx *bolt.Tx) error {
+		snapshots := tx.Bucket(boltSnapshotsBucket)
+		if snapshots == nil {
+			return nil
+		}
+		c := snapshots.Cursor()
+		for k, _ := c.Last(); k != nil; k, _ = c.Prev() {
+			_, snapshotEpoch, err := segment.DecodeUvarintAscending(k)
+			if err != nil {
+				log.Printf("unable to parse segment epoch % x, contiuing", k)
+				continue
+			}
+			snapshot := snapshots.Bucket(k)
+			if snapshot == nil {
+				log.Printf("snapshot key, but bucket missing % x, continuing", k)
+				continue
+			}
+			indexSnapshot, err := s.loadSnapshot(snapshot)
+			if err != nil {
+				log.Printf("unable to load snapshot, %v continuing", err)
+				continue
+			}
+			indexSnapshot.epoch = snapshotEpoch
+			// set the nextSegmentID
+			for _, segment := range indexSnapshot.segment {
+				if segment.id > s.nextSegmentID {
+					s.nextSegmentID = segment.id
+				}
+			}
+			s.nextSegmentID++
+			s.nextSnapshotEpoch = snapshotEpoch + 1
+			s.root = indexSnapshot
+			break
+		}
+		return nil
+	})
+}
+
+func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
+
+	rv := &IndexSnapshot{
+		internal: make(map[string][]byte),
+	}
+	var running uint64
+	c := snapshot.Cursor()
+	for k, _ := c.First(); k != nil; k, _ = c.Next() {
+		if k[0] == boltInternalKey[0] {
+			internalBucket := snapshot.Bucket(k)
+			internalBucket.ForEach(func(key []byte, val []byte) error {
+				copiedVal := append([]byte(nil), val...)
+				rv.internal[string(key)] = copiedVal
+				return nil
+			})
+		} else {
+			segmentBucket := snapshot.Bucket(k)
+			if segmentBucket == nil {
+				return nil, fmt.Errorf("segment key, but bucket missing % x", k)
+			}
+			segmentSnapshot, err := s.loadSegment(segmentBucket)
+			if err != nil {
+				return nil, fmt.Errorf("failed to load segment: %v", err)
+			}
+			_, segmentSnapshot.id, err = segment.DecodeUvarintAscending(k)
+			if err != nil {
+				return nil, fmt.Errorf("failed to decode segment id: %v", err)
+			}
+			rv.segment = append(rv.segment, segmentSnapshot)
+			rv.offsets = append(rv.offsets, running)
+			running += segmentSnapshot.segment.Count()
+		}
+	}
+	return rv, nil
+}
+
+func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, error) {
+	pathBytes := segmentBucket.Get(boltPathKey)
+	if pathBytes == nil {
+		return nil, fmt.Errorf("segment path missing")
+	}
+	segmentPath := s.path + string(os.PathSeparator) + string(pathBytes)
+	segment, err := scorchBolt.Open(segmentPath)
+	if err != nil {
+		return nil, fmt.Errorf("error opening bolt segment: %v", err)
+	}
+
+	rv := &SegmentSnapshot{
+		segment: segment,
+	}
+	deletedBytes := segmentBucket.Get(boltDeletedKey)
+	if deletedBytes != nil {
+		deletedBitmap := roaring.NewBitmap()
+		r := bytes.NewReader(deletedBytes)
+		_, err := deletedBitmap.ReadFrom(r)
+		if err != nil {
+			return nil, fmt.Errorf("error reading deleted bytes: %v", err)
+		}
+		rv.deleted = deletedBitmap
+	}
+
+	return rv, nil
+}
diff --git a/index/scorch/reader_test.go b/index/scorch/reader_test.go
index ef5d6d4f5..2cd42fe47 100644
--- a/index/scorch/reader_test.go
+++ b/index/scorch/reader_test.go
@@ -23,14 +23,21 @@ import (
 )
 
 func TestIndexReader(t *testing.T) {
+	defer func() {
+		err := DestroyTest()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, nil, analysisQueue)
+	idx, err := NewScorch(Name, testConfig, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
 	err = idx.Open()
 	if err != nil {
-		t.Errorf("error opening index: %v", err)
+		t.Fatalf("error opening index: %v", err)
 	}
 	defer func() {
 		err := idx.Close()
@@ -205,14 +212,21 @@ func TestIndexReader(t *testing.T) {
 }
 
 func TestIndexDocIdReader(t *testing.T) {
+	defer func() {
+		err := DestroyTest()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, nil, analysisQueue)
+	idx, err := NewScorch(Name, testConfig, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
 	err = idx.Open()
 	if err != nil {
-		t.Errorf("error opening index: %v", err)
+		t.Fatalf("error opening index: %v", err)
 	}
 	defer func() {
 		err := idx.Close()
@@ -309,14 +323,21 @@ func TestIndexDocIdReader(t *testing.T) {
 }
 
 func TestIndexDocIdOnlyReader(t *testing.T) {
+	defer func() {
+		err := DestroyTest()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, nil, analysisQueue)
+	idx, err := NewScorch(Name, testConfig, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
 	err = idx.Open()
 	if err != nil {
-		t.Errorf("error opening index: %v", err)
+		t.Fatalf("error opening index: %v", err)
 	}
 	defer func() {
 		err := idx.Close()
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index b80ff2482..678349216 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -16,6 +16,8 @@ package scorch
 
 import (
 	"encoding/json"
+	"fmt"
+	"os"
 	"sync"
 	"sync/atomic"
 	"time"
@@ -28,6 +30,7 @@ import (
 	"github.com/blevesearch/bleve/index/scorch/segment/mem"
 	"github.com/blevesearch/bleve/index/store"
 	"github.com/blevesearch/bleve/registry"
+	"github.com/boltdb/bolt"
 )
 
 const Name = "scorch"
@@ -35,40 +38,95 @@ const Name = "scorch"
 const Version uint8 = 1
 
 type Scorch struct {
-	version       uint8
-	storeConfig   map[string]interface{}
-	analysisQueue *index.AnalysisQueue
-	stats         *Stats
-	nextSegmentID uint64
+	version           uint8
+	config            map[string]interface{}
+	analysisQueue     *index.AnalysisQueue
+	stats             *Stats
+	nextSegmentID     uint64
+	nextSnapshotEpoch uint64
+	path              string
+
+	unsafeBatch bool
 
 	rootLock sync.RWMutex
 	root     *IndexSnapshot
 
-	closeCh       chan struct{}
-	introductions chan *segmentIntroduction
+	closeCh            chan struct{}
+	introductions      chan *segmentIntroduction
+	introducerNotifier chan notificationChan
+	rootBolt           *bolt.DB
+	asyncTasks         sync.WaitGroup
 }
 
-func NewScorch(storeName string, storeConfig map[string]interface{}, analysisQueue *index.AnalysisQueue) (index.Index, error) {
+func NewScorch(storeName string, config map[string]interface{}, analysisQueue *index.AnalysisQueue) (index.Index, error) {
 	rv := &Scorch{
-		version:       Version,
-		storeConfig:   storeConfig,
-		analysisQueue: analysisQueue,
-		stats:         &Stats{},
-		root:          &IndexSnapshot{},
+		version:           Version,
+		config:            config,
+		analysisQueue:     analysisQueue,
+		stats:             &Stats{},
+		root:              &IndexSnapshot{},
+		nextSnapshotEpoch: 1,
 	}
 	return rv, nil
 }
 
 func (s *Scorch) Open() error {
+	var ok bool
+	s.path, ok = s.config["path"].(string)
+	if !ok {
+		return fmt.Errorf("must specify path")
+	}
+	if s.path == "" {
+		return os.ErrInvalid
+	}
+
+	err := os.MkdirAll(s.path, 0700)
+	if err != nil {
+		return err
+	}
+
+	rootBoltPath := s.path + string(os.PathSeparator) + "root.bolt"
+
+	s.rootBolt, err = bolt.Open(rootBoltPath, 0600, nil)
+	if err != nil {
+		return err
+	}
+
+	// now see if there is any existing state to load
+	err = s.loadFromBolt()
+	if err != nil {
+		return err
+	}
+
 	s.closeCh = make(chan struct{})
 	s.introductions = make(chan *segmentIntroduction)
+	s.introducerNotifier = make(chan notificationChan)
+
+	s.asyncTasks.Add(1)
 	go s.mainLoop()
+	s.asyncTasks.Add(1)
+	go s.persisterLoop()
+
 	return nil
 }
 
-func (s *Scorch) Close() error {
+func (s *Scorch) Close() (err error) {
+	// signal to async tasks we want to close
 	close(s.closeCh)
-	return nil
+	// wait for them to close
+	s.asyncTasks.Wait()
+	// now close the root bolt
+
+	err = s.rootBolt.Close()
+	s.rootLock.Lock()
+	for _, segment := range s.root.segment {
+		cerr := segment.Close()
+		if err == nil {
+			err = cerr
+		}
+	}
+
+	return
 }
 
 func (s *Scorch) Update(doc *document.Document) error {
@@ -85,7 +143,6 @@ func (s *Scorch) Delete(id string) error {
 
 // Batch applices a batch of changes to the index atomically
 func (s *Scorch) Batch(batch *index.Batch) error {
-
 	analysisStart := time.Now()
 
 	resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps))
@@ -148,7 +205,11 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
 		ids:       ids,
 		obsoletes: make(map[uint64]*roaring.Bitmap),
 		internal:  internalOps,
-		applied:   make(chan struct{}),
+		applied:   make(chan error),
+	}
+
+	if !s.unsafeBatch {
+		introduction.persisted = make(chan error)
 	}
 
 	// get read lock, to optimistically prepare obsoleted info
@@ -165,9 +226,16 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
 	s.introductions <- introduction
 
 	// block until this segment is applied
-	<-introduction.applied
+	err := <-introduction.applied
+	if err != nil {
+		return err
+	}
 
-	return nil
+	if !s.unsafeBatch {
+		err = <-introduction.persisted
+	}
+
+	return err
 }
 
 func (s *Scorch) SetInternal(key, val []byte) error {
diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index aeb6c997a..52a86ab2e 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -16,6 +16,7 @@ package scorch
 
 import (
 	"log"
+	"os"
 	"reflect"
 	"regexp"
 	"strconv"
@@ -29,13 +30,85 @@ import (
 	"github.com/blevesearch/bleve/index"
 )
 
+func DestroyTest() error {
+	return os.RemoveAll("/tmp/bleve-scorch-test")
+}
+
+var testConfig = map[string]interface{}{
+	"path": "/tmp/bleve-scorch-test",
+}
+
 var testAnalyzer = &analysis.Analyzer{
 	Tokenizer: regexpTokenizer.NewRegexpTokenizer(regexp.MustCompile(`\w+`)),
 }
 
-func TestIndexInsert(t *testing.T) {
+func TestIndexOpenReopen(t *testing.T) {
+	defer func() {
+		err := DestroyTest()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, nil, analysisQueue)
+	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+
+	var expectedCount uint64
+	reader, err := idx.Reader()
+	if err != nil {
+		t.Fatal(err)
+	}
+	docCount, err := reader.DocCount()
+	if err != nil {
+		t.Error(err)
+	}
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+	err = reader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// insert a doc
+	doc := document.NewDocument("1")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+	expectedCount++
+
+	reader, err = idx.Reader()
+	if err != nil {
+		t.Fatal(err)
+	}
+	docCount, err = reader.DocCount()
+	if err != nil {
+		t.Error(err)
+	}
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+	err = reader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// now close it
+	err = idx.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	idx, err = NewScorch(Name, testConfig, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -43,6 +116,48 @@ func TestIndexInsert(t *testing.T) {
 	if err != nil {
 		t.Errorf("error opening index: %v", err)
 	}
+
+	// check the doc count again after reopening it
+	reader, err = idx.Reader()
+	if err != nil {
+		t.Fatal(err)
+	}
+	docCount, err = reader.DocCount()
+	if err != nil {
+		t.Error(err)
+	}
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+	err = reader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// now close it
+	err = idx.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestIndexInsert(t *testing.T) {
+	defer func() {
+		err := DestroyTest()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Fatalf("error opening index: %v", err)
+	}
 	defer func() {
 		err := idx.Close()
 		if err != nil {
@@ -93,14 +208,21 @@ func TestIndexInsert(t *testing.T) {
 }
 
 func TestIndexInsertThenDelete(t *testing.T) {
+	defer func() {
+		err := DestroyTest()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, nil, analysisQueue)
+	idx, err := NewScorch(Name, testConfig, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
 	err = idx.Open()
 	if err != nil {
-		t.Errorf("error opening index: %v", err)
+		t.Fatalf("error opening index: %v", err)
 	}
 	defer func() {
 		err := idx.Close()
@@ -204,8 +326,15 @@ func TestIndexInsertThenDelete(t *testing.T) {
 }
 
 func TestIndexInsertThenUpdate(t *testing.T) {
+	defer func() {
+		err := DestroyTest()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, nil, analysisQueue)
+	idx, err := NewScorch(Name, testConfig, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -213,7 +342,7 @@ func TestIndexInsertThenUpdate(t *testing.T) {
 	var expectedCount uint64
 	err = idx.Open()
 	if err != nil {
-		t.Errorf("error opening index: %v", err)
+		t.Fatalf("error opening index: %v", err)
 	}
 	defer func() {
 		err := idx.Close()
@@ -264,15 +393,28 @@ func TestIndexInsertThenUpdate(t *testing.T) {
 }
 
 func TestIndexInsertMultiple(t *testing.T) {
+	defer func() {
+		err := DestroyTest()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, nil, analysisQueue)
+	idx, err := NewScorch(Name, testConfig, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
 	err = idx.Open()
 	if err != nil {
-		t.Errorf("error opening index: %v", err)
+		t.Fatalf("error opening index: %v", err)
 	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
 
 	var expectedCount uint64
 
@@ -318,19 +460,26 @@ func TestIndexInsertMultiple(t *testing.T) {
 }
 
 func TestIndexInsertWithStore(t *testing.T) {
+	defer func() {
+		err := DestroyTest()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, nil, analysisQueue)
+	idx, err := NewScorch(Name, testConfig, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
 	err = idx.Open()
 	if err != nil {
-		t.Errorf("error opening index: %v", err)
+		t.Fatalf("error opening index: %v", err)
 	}
 	defer func() {
-		err := idx.Close()
+		cerr := idx.Close()
 		if err != nil {
-			t.Fatal(err)
+			t.Fatal(cerr)
 		}
 	}()
 
@@ -416,14 +565,21 @@ func TestIndexInsertWithStore(t *testing.T) {
 }
 
 func TestIndexInternalCRUD(t *testing.T) {
+	defer func() {
+		err := DestroyTest()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, nil, analysisQueue)
+	idx, err := NewScorch(Name, testConfig, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
 	err = idx.Open()
 	if err != nil {
-		t.Errorf("error opening index: %v", err)
+		t.Fatalf("error opening index: %v", err)
 	}
 	defer func() {
 		err := idx.Close()
@@ -503,14 +659,21 @@ func TestIndexInternalCRUD(t *testing.T) {
 }
 
 func TestIndexBatch(t *testing.T) {
+	defer func() {
+		err := DestroyTest()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, nil, analysisQueue)
+	idx, err := NewScorch(Name, testConfig, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
 	err = idx.Open()
 	if err != nil {
-		t.Errorf("error opening index: %v", err)
+		t.Fatalf("error opening index: %v", err)
 	}
 	defer func() {
 		err := idx.Close()
@@ -609,14 +772,21 @@ func TestIndexBatch(t *testing.T) {
 }
 
 func TestIndexInsertUpdateDeleteWithMultipleTypesStored(t *testing.T) {
+	defer func() {
+		err := DestroyTest()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, nil, analysisQueue)
+	idx, err := NewScorch(Name, testConfig, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
 	err = idx.Open()
 	if err != nil {
-		t.Errorf("error opening index: %v", err)
+		t.Fatalf("error opening index: %v", err)
 	}
 	defer func() {
 		err := idx.Close()
@@ -817,14 +987,21 @@ func TestIndexInsertUpdateDeleteWithMultipleTypesStored(t *testing.T) {
 }
 
 func TestIndexInsertFields(t *testing.T) {
+	defer func() {
+		err := DestroyTest()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, nil, analysisQueue)
+	idx, err := NewScorch(Name, testConfig, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
 	err = idx.Open()
 	if err != nil {
-		t.Errorf("error opening index: %v", err)
+		t.Fatalf("error opening index: %v", err)
 	}
 	defer func() {
 		err := idx.Close()
@@ -879,14 +1056,21 @@ func TestIndexInsertFields(t *testing.T) {
 }
 
 func TestIndexUpdateComposites(t *testing.T) {
+	defer func() {
+		err := DestroyTest()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, nil, analysisQueue)
+	idx, err := NewScorch(Name, testConfig, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
 	err = idx.Open()
 	if err != nil {
-		t.Errorf("error opening index: %v", err)
+		t.Fatalf("error opening index: %v", err)
 	}
 	defer func() {
 		err := idx.Close()
@@ -947,14 +1131,21 @@ func TestIndexUpdateComposites(t *testing.T) {
 }
 
 func TestIndexTermReaderCompositeFields(t *testing.T) {
+	defer func() {
+		err := DestroyTest()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, nil, analysisQueue)
+	idx, err := NewScorch(Name, testConfig, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
 	err = idx.Open()
 	if err != nil {
-		t.Errorf("error opening index: %v", err)
+		t.Fatalf("error opening index: %v", err)
 	}
 	defer func() {
 		err := idx.Close()
@@ -1005,14 +1196,21 @@ func TestIndexTermReaderCompositeFields(t *testing.T) {
 }
 
 func TestConcurrentUpdate(t *testing.T) {
+	defer func() {
+		err := DestroyTest()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, nil, analysisQueue)
+	idx, err := NewScorch(Name, testConfig, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
 	err = idx.Open()
 	if err != nil {
-		t.Errorf("error opening index: %v", err)
+		t.Fatalf("error opening index: %v", err)
 	}
 	defer func() {
 		err := idx.Close()
@@ -1054,14 +1252,21 @@ func TestConcurrentUpdate(t *testing.T) {
 }
 
 func TestLargeField(t *testing.T) {
+	defer func() {
+		err := DestroyTest()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, nil, analysisQueue)
+	idx, err := NewScorch(Name, testConfig, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
 	err = idx.Open()
 	if err != nil {
-		t.Errorf("error opening index: %v", err)
+		t.Fatalf("error opening index: %v", err)
 	}
 	defer func() {
 		err := idx.Close()
diff --git a/index/scorch/segment/bolt/build.go b/index/scorch/segment/bolt/build.go
index 0c68bab59..ac01f9b8d 100644
--- a/index/scorch/segment/bolt/build.go
+++ b/index/scorch/segment/bolt/build.go
@@ -21,6 +21,7 @@ import (
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/Smerity/govarint"
+	"github.com/blevesearch/bleve/index/scorch/segment"
 	"github.com/blevesearch/bleve/index/scorch/segment/mem"
 	"github.com/boltdb/bolt"
 	"github.com/couchbaselabs/vellum"
@@ -47,8 +48,7 @@ var versionKey = []byte{'v'}
 
 var version = 0
 
-func persistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) (err error) {
-
+func PersistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) (err error) {
 	db, err := bolt.Open(path, 0777, nil)
 	if err != nil {
 		return err
@@ -133,13 +133,13 @@ func persistFields(memSegment *mem.Segment, tx *bolt.Tx) error {
 	}
 
 	// we use special varint which is still guaranteed to sort correctly
-	fieldBuf := make([]byte, 0, maxVarintSize)
+	fieldBuf := make([]byte, 0, segment.MaxVarintSize)
 	for fieldID, fieldName := range memSegment.FieldsInv {
 		if fieldID != 0 {
 			// reset buffer if necessary
 			fieldBuf = fieldBuf[:0]
 		}
-		fieldBuf = EncodeUvarintAscending(fieldBuf, uint64(fieldID))
+		fieldBuf = segment.EncodeUvarintAscending(fieldBuf, uint64(fieldID))
 		err = bucket.Put(fieldBuf, []byte(fieldName))
 		if err != nil {
 			return err
@@ -160,7 +160,7 @@ func persistDictionary(memSegment *mem.Segment, tx *bolt.Tx) error {
 	// the (presumably) heavier lifting involved in building the FST could
 	// be done concurrently.
 
-	fieldBuf := make([]byte, 0, maxVarintSize)
+	fieldBuf := make([]byte, 0, segment.MaxVarintSize)
 	for fieldID, fieldTerms := range memSegment.DictKeys {
 		if fieldID != 0 {
 			// reset buffers if necessary
@@ -188,7 +188,7 @@ func persistDictionary(memSegment *mem.Segment, tx *bolt.Tx) error {
 
 		// put this FST into bolt
 		// we use special varint which is still guaranteed to sort correctly
-		fieldBuf = EncodeUvarintAscending(fieldBuf, uint64(fieldID))
+		fieldBuf = segment.EncodeUvarintAscending(fieldBuf, uint64(fieldID))
 		err = bucket.Put(fieldBuf, buffer.Bytes())
 		if err != nil {
 			return err
@@ -205,13 +205,13 @@ func persistPostings(memSegment *mem.Segment, tx *bolt.Tx) error {
 	}
 	bucket.FillPercent = 1.0
 
-	postingIDBuf := make([]byte, 0, maxVarintSize)
+	postingIDBuf := make([]byte, 0, segment.MaxVarintSize)
 	for postingID := range memSegment.Postings {
 		if postingID != 0 {
 			// reset buffers if necessary
 			postingIDBuf = postingIDBuf[:0]
 		}
-		postingIDBuf = EncodeUvarintAscending(postingIDBuf, uint64(postingID))
+		postingIDBuf = segment.EncodeUvarintAscending(postingIDBuf, uint64(postingID))
 		var postingsBuf bytes.Buffer
 		_, err := memSegment.Postings[postingID].WriteTo(&postingsBuf)
 		if err != nil {
@@ -234,13 +234,13 @@ func persistPostingsDetails(memSegment *mem.Segment, tx *bolt.Tx,
 	}
 	bucket.FillPercent = 1.0
 
-	postingIDBuf := make([]byte, 0, maxVarintSize)
+	postingIDBuf := make([]byte, 0, segment.MaxVarintSize)
 	for postingID := range memSegment.Postings {
 		if postingID != 0 {
 			// reset buffers if necessary
 			postingIDBuf = postingIDBuf[:0]
 		}
-		postingIDBuf = EncodeUvarintAscending(postingIDBuf, uint64(postingID))
+		postingIDBuf = segment.EncodeUvarintAscending(postingIDBuf, uint64(postingID))
 
 		// make bucket for posting details
 		postingBucket, err := bucket.CreateBucket(postingIDBuf)
@@ -264,7 +264,7 @@ func persistPostingDetails(memSegment *mem.Segment, postingBucket *bolt.Bucket,
 	var err error
 	var chunkBucket *bolt.Bucket
 	var currChunk uint32
-	chunkIDBuf := make([]byte, 0, maxVarintSize)
+	chunkIDBuf := make([]byte, 0, segment.MaxVarintSize)
 	postingsListItr := memSegment.Postings[postingID].Iterator()
 	var encoder *govarint.Base128Encoder
 	var locEncoder *govarint.Base128Encoder
@@ -303,7 +303,7 @@ func persistPostingDetails(memSegment *mem.Segment, postingBucket *bolt.Bucket,
 			}
 
 			// prepare next chunk
-			chunkIDBuf = EncodeUvarintAscending(chunkIDBuf, uint64(chunk))
+			chunkIDBuf = segment.EncodeUvarintAscending(chunkIDBuf, uint64(chunk))
 			chunkBucket, err = postingBucket.CreateBucket(chunkIDBuf)
 			if err != nil {
 				return err
@@ -410,7 +410,7 @@ func persistStored(memSegment *mem.Segment, tx *bolt.Tx) error {
 
 	var curr int
 	// we use special varint which is still guaranteed to sort correctly
-	docNumBuf := make([]byte, 0, maxVarintSize)
+	docNumBuf := make([]byte, 0, segment.MaxVarintSize)
 	for docNum, storedValues := range memSegment.Stored {
 		var metaBuf bytes.Buffer
 		var data, compressed []byte
@@ -420,7 +420,7 @@ func persistStored(memSegment *mem.Segment, tx *bolt.Tx) error {
 			curr = 0
 		}
 		// create doc sub-bucket
-		docNumBuf = EncodeUvarintAscending(docNumBuf, uint64(docNum))
+		docNumBuf = segment.EncodeUvarintAscending(docNumBuf, uint64(docNum))
 		docBucket, err := bucket.CreateBucket(docNumBuf)
 		if err != nil {
 			return err
diff --git a/index/scorch/segment/bolt/build_test.go b/index/scorch/segment/bolt/build_test.go
index 3f869d86a..deb4157d4 100644
--- a/index/scorch/segment/bolt/build_test.go
+++ b/index/scorch/segment/bolt/build_test.go
@@ -28,7 +28,7 @@ func TestBuild(t *testing.T) {
 	_ = os.RemoveAll("/tmp/scorch.bolt")
 
 	memSegment := buildMemSegment()
-	err := persistSegment(memSegment, "/tmp/scorch.bolt", 1024)
+	err := PersistSegment(memSegment, "/tmp/scorch.bolt", 1024)
 	if err != nil {
 		t.Fatal(err)
 	}
diff --git a/index/scorch/segment/bolt/dict.go b/index/scorch/segment/bolt/dict.go
index 0d7ab5eca..0f38a3d60 100644
--- a/index/scorch/segment/bolt/dict.go
+++ b/index/scorch/segment/bolt/dict.go
@@ -51,7 +51,7 @@ func (d *Dictionary) postingsList(term string, except *roaring.Bitmap) (*Posting
 		}
 		if exists {
 			rv.postingsID = postingsID
-			postingsIDKey := EncodeUvarintAscending(nil, postingsID)
+			postingsIDKey := segment.EncodeUvarintAscending(nil, postingsID)
 			bucket := d.segment.tx.Bucket(postingsBucket)
 			if bucket == nil {
 				return nil, fmt.Errorf("postings bucket missing")
diff --git a/index/scorch/segment/bolt/dict_test.go b/index/scorch/segment/bolt/dict_test.go
index 6b3926a87..2df57d67f 100644
--- a/index/scorch/segment/bolt/dict_test.go
+++ b/index/scorch/segment/bolt/dict_test.go
@@ -109,7 +109,7 @@ func TestDictionary(t *testing.T) {
 	_ = os.RemoveAll("/tmp/scorch.bolt")
 
 	memSegment := buildMemSegmentForDict()
-	err := persistSegment(memSegment, "/tmp/scorch.bolt", 1024)
+	err := PersistSegment(memSegment, "/tmp/scorch.bolt", 1024)
 	if err != nil {
 		t.Fatalf("error persisting segment: %v", err)
 	}
diff --git a/index/scorch/segment/bolt/posting.go b/index/scorch/segment/bolt/posting.go
index e5d6c8938..bd038a575 100644
--- a/index/scorch/segment/bolt/posting.go
+++ b/index/scorch/segment/bolt/posting.go
@@ -91,7 +91,7 @@ type PostingsIterator struct {
 
 func (i *PostingsIterator) loadChunk(chunk int) error {
 	// load correct chunk bytes
-	chunkID := EncodeUvarintAscending(nil, uint64(chunk))
+	chunkID := segment.EncodeUvarintAscending(nil, uint64(chunk))
 	chunkBucket := i.detailBucket.Bucket(chunkID)
 	if chunkBucket == nil {
 		return fmt.Errorf("chunk %d missing", chunkID)
diff --git a/index/scorch/segment/bolt/segment.go b/index/scorch/segment/bolt/segment.go
index 835313b87..f53a98fe6 100644
--- a/index/scorch/segment/bolt/segment.go
+++ b/index/scorch/segment/bolt/segment.go
@@ -123,7 +123,7 @@ func (s *Segment) loadFields() (err error) {
 			}
 		} else {
 
-			_, fieldID, err2 := DecodeUvarintAscending(k)
+			_, fieldID, err2 := segment.DecodeUvarintAscending(k)
 			if err2 != nil {
 				return err2
 			}
@@ -164,7 +164,11 @@ func (s *Segment) Count() uint64 {
 
 // Dictionary returns the term dictionary for the specified field
 func (s *Segment) Dictionary(field string) (segment.TermDictionary, error) {
-	return s.dictionary(field)
+	dict, err := s.dictionary(field)
+	if err == nil && dict == nil {
+		return &segment.EmptyDictionary{}, nil
+	}
+	return dict, err
 }
 
 func (s *Segment) dictionary(field string) (*Dictionary, error) {
@@ -177,7 +181,7 @@ func (s *Segment) dictionary(field string) (*Dictionary, error) {
 	rv.fieldID = s.fieldsMap[field]
 	if rv.fieldID > 0 {
 		rv.fieldID = rv.fieldID - 1
-		fieldIDKey := EncodeUvarintAscending(nil, uint64(rv.fieldID))
+		fieldIDKey := segment.EncodeUvarintAscending(nil, uint64(rv.fieldID))
 		bucket := s.tx.Bucket(dictBucket)
 		if bucket == nil {
 			return nil, fmt.Errorf("dictionary bucket missing")
@@ -196,6 +200,8 @@ func (s *Segment) dictionary(field string) (*Dictionary, error) {
 			}
 		}
 
+	} else {
+		return nil, nil
 	}
 
 	return rv, nil
@@ -208,7 +214,7 @@ func (s *Segment) VisitDocument(num uint64, visitor segment.DocumentFieldValueVi
 	if storedBuucket == nil {
 		return fmt.Errorf("stored bucket missing")
 	}
-	docNumKey := EncodeUvarintAscending(nil, num)
+	docNumKey := segment.EncodeUvarintAscending(nil, num)
 	docBucket := storedBuucket.Bucket(docNumKey)
 	if docBucket == nil {
 		return fmt.Errorf("segment has no doc number %d", num)
@@ -307,3 +313,7 @@ func (s *Segment) Close() error {
 	}
 	return s.db.Close()
 }
+
+func (s *Segment) Path() string {
+	return s.db.Path()
+}
diff --git a/index/scorch/segment/bolt/segment_test.go b/index/scorch/segment/bolt/segment_test.go
index b00c71926..16ac2cadd 100644
--- a/index/scorch/segment/bolt/segment_test.go
+++ b/index/scorch/segment/bolt/segment_test.go
@@ -25,7 +25,7 @@ func TestOpen(t *testing.T) {
 	_ = os.RemoveAll("/tmp/scorch.bolt")
 
 	memSegment := buildMemSegment()
-	err := persistSegment(memSegment, "/tmp/scorch.bolt", 1024)
+	err := PersistSegment(memSegment, "/tmp/scorch.bolt", 1024)
 	if err != nil {
 		t.Fatalf("error persisting segment: %v", err)
 	}
@@ -325,7 +325,7 @@ func TestOpenMulti(t *testing.T) {
 	_ = os.RemoveAll("/tmp/scorch.bolt")
 
 	memSegment := buildMemSegmentMulti()
-	err := persistSegment(memSegment, "/tmp/scorch.bolt", 1024)
+	err := PersistSegment(memSegment, "/tmp/scorch.bolt", 1024)
 	if err != nil {
 		t.Fatalf("error persisting segment: %v", err)
 	}
@@ -425,7 +425,7 @@ func TestOpenMultiWithTwoChunks(t *testing.T) {
 	_ = os.RemoveAll("/tmp/scorch.bolt")
 
 	memSegment := buildMemSegmentMulti()
-	err := persistSegment(memSegment, "/tmp/scorch.bolt", 1)
+	err := PersistSegment(memSegment, "/tmp/scorch.bolt", 1)
 	if err != nil {
 		t.Fatalf("error persisting segment: %v", err)
 	}
diff --git a/index/scorch/segment/empty.go b/index/scorch/segment/empty.go
new file mode 100644
index 000000000..0913eeba8
--- /dev/null
+++ b/index/scorch/segment/empty.go
@@ -0,0 +1,61 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package segment
+
+import (
+	"github.com/RoaringBitmap/roaring"
+	"github.com/blevesearch/bleve/index"
+)
+
+type EmptyDictionary struct{}
+
+func (e *EmptyDictionary) PostingsList(term string,
+	except *roaring.Bitmap) (PostingsList, error) {
+	return &EmptyPostingsList{}, nil
+}
+
+func (e *EmptyDictionary) Iterator() DictionaryIterator {
+	return &EmptyDictionaryIterator{}
+}
+
+func (e *EmptyDictionary) PrefixIterator(prefix string) DictionaryIterator {
+	return &EmptyDictionaryIterator{}
+}
+
+func (e *EmptyDictionary) RangeIterator(start, end string) DictionaryIterator {
+	return &EmptyDictionaryIterator{}
+}
+
+type EmptyDictionaryIterator struct{}
+
+func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) {
+	return nil, nil
+}
+
+type EmptyPostingsList struct{}
+
+func (e *EmptyPostingsList) Iterator() PostingsIterator {
+	return &EmptyPostingsIterator{}
+}
+
+func (e *EmptyPostingsList) Count() uint64 {
+	return 0
+}
+
+type EmptyPostingsIterator struct{}
+
+func (e *EmptyPostingsIterator) Next() (Posting, error) {
+	return nil, nil
+}
diff --git a/index/scorch/segment/bolt/int.go b/index/scorch/segment/int.go
similarity index 98%
rename from index/scorch/segment/bolt/int.go
rename to index/scorch/segment/int.go
index a4af3a7a8..a4836ebf8 100644
--- a/index/scorch/segment/bolt/int.go
+++ b/index/scorch/segment/int.go
@@ -17,12 +17,12 @@
 
 // Modified to not use pkg/errors
 
-package bolt
+package segment
 
 import "fmt"
 
 const (
-	maxVarintSize = 9
+	MaxVarintSize = 9
 
 	// IntMin is chosen such that the range of int tags does not overlap the
 	// ascii character set that is frequently used in testing.
diff --git a/index/scorch/segment/bolt/int_test.go b/index/scorch/segment/int_test.go
similarity index 99%
rename from index/scorch/segment/bolt/int_test.go
rename to index/scorch/segment/int_test.go
index e59918c8b..3d2ab6fd7 100644
--- a/index/scorch/segment/bolt/int_test.go
+++ b/index/scorch/segment/int_test.go
@@ -17,7 +17,7 @@
 
 // Modified to only test the parts we borrowed
 
-package bolt
+package segment
 
 import (
 	"bytes"
diff --git a/index/scorch/segment/mem/segment.go b/index/scorch/segment/mem/segment.go
index 4d3d1d113..4940bb4cf 100644
--- a/index/scorch/segment/mem/segment.go
+++ b/index/scorch/segment/mem/segment.go
@@ -15,6 +15,8 @@
 package mem
 
 import (
+	"fmt"
+
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index/scorch/segment"
 )
@@ -117,12 +119,25 @@ func (s *Segment) VisitDocument(num uint64, visitor segment.DocumentFieldValueVi
 	return nil
 }
 
+func (s *Segment) getField(name string) (int, error) {
+	fieldID, ok := s.FieldsMap[name]
+	if !ok {
+		return 0, fmt.Errorf("no field named %s", name)
+	}
+	return int(fieldID - 1), nil
+}
+
 // Dictionary returns the term dictionary for the specified field
 func (s *Segment) Dictionary(field string) (segment.TermDictionary, error) {
+	fieldID, err := s.getField(field)
+	if err != nil {
+		// no such field, return empty dictionary
+		return &segment.EmptyDictionary{}, nil
+	}
 	return &Dictionary{
 		segment: s,
 		field:   field,
-		fieldID: uint16(s.getOrDefineField(field, false)),
+		fieldID: uint16(fieldID),
 	}, nil
 }
 
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index c059fe734..c96082c54 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -42,6 +42,7 @@ type IndexSnapshot struct {
 	segment  []*SegmentSnapshot
 	offsets  []uint64
 	internal map[string][]byte
+	epoch    uint64
 }
 
 func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index 6380a15fd..1a50eb6cc 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -44,6 +44,12 @@ type SegmentSnapshot struct {
 	id      uint64
 	segment segment.Segment
 	deleted *roaring.Bitmap
+
+	notify []chan error
+}
+
+func (s *SegmentSnapshot) Close() error {
+	return s.segment.Close()
 }
 
 func (s *SegmentSnapshot) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
@@ -51,6 +57,7 @@ func (s *SegmentSnapshot) VisitDocument(num uint64, visitor segment.DocumentFiel
 }
 
 func (s *SegmentSnapshot) Count() uint64 {
+
 	rv := s.segment.Count()
 	if s.deleted != nil {
 		rv -= s.deleted.GetCardinality()

From e47010563583f05ea5e72062725fd92d9d3c6447 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Wed, 6 Dec 2017 18:36:14 -0500
Subject: [PATCH 029/728] fix issues identified by errcheck

---
 index/scorch/persister.go | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 0e3318e8c..44393e961 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -124,7 +124,10 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 	}
 	// TODO optimize writing these in order?
 	for k, v := range snapshot.internal {
-		internalBucket.Put([]byte(k), v)
+		err = internalBucket.Put([]byte(k), v)
+		if err != nil {
+			return err
+		}
 	}
 
 	newSegmentPaths := make(map[uint64]string)
@@ -146,12 +149,18 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 				return fmt.Errorf("error persisting segment: %v", err2)
 			}
 			newSegmentPaths[segmentSnapshot.id] = path
-			snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
+			err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
+			if err != nil {
+				return err
+			}
 		case *scorchBolt.Segment:
 
 			path := seg.Path()
 			filename := strings.TrimPrefix(path, s.path+string(os.PathSeparator))
-			snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
+			err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
+			if err != nil {
+				return err
+			}
 		default:
 			return fmt.Errorf("unknown segment type: %T", seg)
 		}
@@ -162,7 +171,10 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 			if err != nil {
 				return fmt.Errorf("error persisting roaring bytes: %v", err)
 			}
-			snapshotSegmentBucket.Put(boltDeletedKey, roaringBuf.Bytes())
+			err = snapshotSegmentBucket.Put(boltDeletedKey, roaringBuf.Bytes())
+			if err != nil {
+				return err
+			}
 		}
 	}
 
@@ -274,11 +286,14 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
 	for k, _ := c.First(); k != nil; k, _ = c.Next() {
 		if k[0] == boltInternalKey[0] {
 			internalBucket := snapshot.Bucket(k)
-			internalBucket.ForEach(func(key []byte, val []byte) error {
+			err := internalBucket.ForEach(func(key []byte, val []byte) error {
 				copiedVal := append([]byte(nil), val...)
 				rv.internal[string(key)] = copiedVal
 				return nil
 			})
+			if err != nil {
+				return nil, err
+			}
 		} else {
 			segmentBucket := snapshot.Bucket(k)
 			if segmentBucket == nil {

From ff2e6b98e4673c427fb526dc5d5101f10c70f2e4 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Sat, 9 Dec 2017 12:43:02 -0500
Subject: [PATCH 030/728] added empty segment

---
 index/scorch/segment/empty.go | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/index/scorch/segment/empty.go b/index/scorch/segment/empty.go
index 0913eeba8..724195007 100644
--- a/index/scorch/segment/empty.go
+++ b/index/scorch/segment/empty.go
@@ -19,6 +19,33 @@ import (
 	"github.com/blevesearch/bleve/index"
 )
 
+type EmptySegment struct{}
+
+func (e *EmptySegment) Dictionary(field string) (TermDictionary, error) {
+	return &EmptyDictionary{}, nil
+}
+
+func (e *EmptySegment) VisitDocument(num uint64, visitor DocumentFieldValueVisitor) error {
+	return nil
+}
+
+func (e *EmptySegment) Count() uint64 {
+	return 0
+}
+
+func (e *EmptySegment) DocNumbers([]string) (*roaring.Bitmap, error) {
+	r := roaring.NewBitmap()
+	return r, nil
+}
+
+func (e *EmptySegment) Fields() []string {
+	return []string{}
+}
+
+func (e *EmptySegment) Close() error {
+	return nil
+}
+
 type EmptyDictionary struct{}
 
 func (e *EmptyDictionary) PostingsList(term string,

From 9781d9b08963160181d706b8ec0f1805452187e8 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Sat, 9 Dec 2017 14:28:33 -0500
Subject: [PATCH 031/728] add initial version of zap file format

---
 index/scorch/segment/zap/README.md            | 120 ++++
 index/scorch/segment/zap/build.go             | 615 ++++++++++++++++++
 index/scorch/segment/zap/build_test.go        | 288 ++++++++
 index/scorch/segment/zap/cmd/zap/README.md    |   3 +
 index/scorch/segment/zap/cmd/zap/cmd/dict.go  |  72 ++
 .../scorch/segment/zap/cmd/zap/cmd/explore.go | 124 ++++
 .../scorch/segment/zap/cmd/zap/cmd/footer.go  |  43 ++
 index/scorch/segment/zap/cmd/zap/cmd/root.go  |  58 ++
 .../scorch/segment/zap/cmd/zap/cmd/stored.go  |  73 +++
 index/scorch/segment/zap/cmd/zap/main.go      |  23 +
 index/scorch/segment/zap/count.go             |  55 ++
 index/scorch/segment/zap/dict.go              | 165 +++++
 index/scorch/segment/zap/dict_test.go         | 183 ++++++
 index/scorch/segment/zap/posting.go           | 362 +++++++++++
 index/scorch/segment/zap/segment.go           | 352 ++++++++++
 index/scorch/segment/zap/segment_test.go      | 517 +++++++++++++++
 16 files changed, 3053 insertions(+)
 create mode 100644 index/scorch/segment/zap/README.md
 create mode 100644 index/scorch/segment/zap/build.go
 create mode 100644 index/scorch/segment/zap/build_test.go
 create mode 100644 index/scorch/segment/zap/cmd/zap/README.md
 create mode 100644 index/scorch/segment/zap/cmd/zap/cmd/dict.go
 create mode 100644 index/scorch/segment/zap/cmd/zap/cmd/explore.go
 create mode 100644 index/scorch/segment/zap/cmd/zap/cmd/footer.go
 create mode 100644 index/scorch/segment/zap/cmd/zap/cmd/root.go
 create mode 100644 index/scorch/segment/zap/cmd/zap/cmd/stored.go
 create mode 100644 index/scorch/segment/zap/cmd/zap/main.go
 create mode 100644 index/scorch/segment/zap/count.go
 create mode 100644 index/scorch/segment/zap/dict.go
 create mode 100644 index/scorch/segment/zap/dict_test.go
 create mode 100644 index/scorch/segment/zap/posting.go
 create mode 100644 index/scorch/segment/zap/segment.go
 create mode 100644 index/scorch/segment/zap/segment_test.go

diff --git a/index/scorch/segment/zap/README.md b/index/scorch/segment/zap/README.md
new file mode 100644
index 000000000..079eef9b3
--- /dev/null
+++ b/index/scorch/segment/zap/README.md
@@ -0,0 +1,120 @@
+# zap file format
+
+## stored fields section
+
+- for each document
+  - preparation phase:
+    - produce a slice of metadata bytes and data bytes
+    - produce these slices in field id order
+    - field value is appended to the data slice
+    - metadata slice is govarint encoded with the following values for each field value
+      - field id (uint16)
+      - field type (byte)
+      - field value start offset in uncompressed data slice (uint64)
+      - field value length (uint64)
+      - field number of array positions (uint64)
+      - one additional value for each array position (uint64)
+      - compress the data slice using snappy
+  - file writing phase:
+    - remember the start offset for this document
+    - write out meta data length (varint uint64)
+    - write out compressed data length (varint uint64)
+    - write out the metadata bytes
+    - write out the compressed data bytes
+
+## stored fields idx
+
+- for each document
+  - write start offset (remembered from previous section) of stored data (big endian uint64)
+
+With this index and a known document number, we have direct access to all the stored field data.
+
+## posting details (freq/norm) section
+
+- for each posting list
+  - produce a slice containing multiple consecutive chunks (each chunk is govarint stream)
+  - produce a slice remembering offsets of where each chunk starts
+  - preparation phase:
+    - for each hit in the posting list
+    - if this hit is in next chunk close out encoding of last chunk and record offset start of next
+    - encode term frequency (uint64)
+    - encode norm factor (float32)
+  - file writing phase:
+    - remember start position for this posting list details
+    - write out number of chunks that follow (varint uint64)
+    - write out length of each chunk (each a varint uint64)
+    - write out the byte slice containing all the chunk data
+
+If you know the doc number you're interested in, this format lets you jump to the correct chunk (docNum/chunkFactor) directly and then seek within that chunk until you find it.
+
+## posting details (location) section
+
+- for each posting list
+  - produce a slice containing multiple consecutive chunks (each chunk is govarint stream)
+  - produce a slice remembering offsets of where each chunk starts
+  - preparation phase:
+    - for each hit in the posting list
+    - if this hit is in next chunk close out encoding of last chunk and record offset start of next
+    - encode field (uint16)
+    - encode field pos (uint64)
+    - encode field start (uint64)
+    - encode field end (uint64)
+    - encode number of array positions to follow (uint64)
+    - encode each array position (each uint64)
+  - file writing phase:
+    - remember start position for this posting list details
+    - write out number of chunks that follow (varint uint64)
+    - write out length of each chunk (each a varint uint64)
+    - write out the byte slice containing all the chunk data
+
+If you know the doc number you're interested in, this format lets you jump to the correct chunk (docNum/chunkFactor) directly and then seek within that chunk until you find it.
+
+## postings list section
+
+- for each posting list
+  - preparation phase:
+    - encode roaring bitmap posting list to bytes (so we know the length)
+  - file writing phase:
+    - remember the start position for this posting list
+    - write freq/norm details offset (remembered from previous, as varint uint64)
+    - write location details offset (remembered from previous, as varint uint64)
+    - write length of encoded roaring bitmap
+    - write the serialized roaring bitmap data
+
+## dictionary
+
+- for each field
+  - preparation phase:
+    - encode vellum FST with dictionary data pointing to file offset of posting list (remembered from previous)
+  - file writing phase:
+    - remember the start position of this persistDictionary
+    - write length of vellum data (varint uint64)
+    - write out vellum data
+
+## fields section
+
+- for each field
+  - file writing phase:
+    - remember start offset for each field
+    - write 1 if field has location info indexed, 0 if not (varint uint64)
+    - write dictionary address (remembered from previous) (varint uint64)
+    - write length of field name (varint uint64)
+    - write field name bytes
+
+## fields idx
+
+- for each field
+  - file writing phase:
+    - write big endian uint64 of start offset for each field
+
+NOTE: currently we don't know or record the length of this fields index.  Instead we rely on the fact that we know it immediately precedes a footer of known size.
+
+## footer
+
+- file writing phase
+  - write number of docs (big endian uint64)
+  - write stored field index location (big endian uint64)
+  - write field index location (big endian uint64)
+  - write out chunk factor (big endian uint32)
+  - write out version (big endian uint32)
+  - write out file CRC of everything preceding this (big endian uint32)
diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
new file mode 100644
index 000000000..1ed95e80f
--- /dev/null
+++ b/index/scorch/segment/zap/build.go
@@ -0,0 +1,615 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zap
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/binary"
+	"math"
+	"os"
+
+	"github.com/Smerity/govarint"
+	"github.com/blevesearch/bleve/index/scorch/segment/mem"
+	"github.com/couchbaselabs/vellum"
+	"github.com/golang/snappy"
+)
+
+var version uint32
+
+// PersistSegment takes the in-memory segment and persists it to the specified
+// path in the zap file format.
+func PersistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) (err error) {
+
+	flag := os.O_RDWR | os.O_CREATE
+
+	f, err := os.OpenFile(path, flag, 0600)
+	if err != nil {
+		return err
+	}
+
+	// bufer the output
+	br := bufio.NewWriter(f)
+
+	// wrap it for counting (tracking offsets)
+	cr := NewCountHashWriter(br)
+
+	var storedIndexOffset uint64
+	storedIndexOffset, err = persistStored(memSegment, cr)
+	if err != nil {
+		return err
+	}
+
+	var freqOffsets, locOffsets []uint64
+	freqOffsets, locOffsets, err = persistPostingDetails(memSegment, cr, chunkFactor)
+	if err != nil {
+		return err
+	}
+
+	var postingsLocs []uint64
+	postingsLocs, err = persistPostingsLists(memSegment, cr, freqOffsets, locOffsets)
+	if err != nil {
+		return err
+	}
+
+	var dictLocs []uint64
+	dictLocs, err = persistDictionary(memSegment, cr, postingsLocs)
+	if err != nil {
+		return err
+	}
+
+	var fieldIndexStart uint64
+	fieldIndexStart, err = persistFields(memSegment, cr, dictLocs)
+	if err != nil {
+		return err
+	}
+
+	err = persistFooter(uint64(len(memSegment.Stored)), storedIndexOffset,
+		fieldIndexStart, chunkFactor, cr)
+	if err != nil {
+		return err
+	}
+
+	err = br.Flush()
+	if err != nil {
+		return err
+	}
+
+	err = f.Close()
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func persistStored(memSegment *mem.Segment, w *CountHashWriter) (uint64, error) {
+
+	var curr int
+	var metaBuf bytes.Buffer
+	var data, compressed []byte
+
+	docNumOffsets := make(map[int]uint64, len(memSegment.Stored))
+
+	for docNum, storedValues := range memSegment.Stored {
+		if docNum != 0 {
+			// reset buffer if necessary
+			metaBuf.Reset()
+			data = data[:0]
+			compressed = compressed[:0]
+			curr = 0
+		}
+
+		metaEncoder := govarint.NewU64Base128Encoder(&metaBuf)
+
+		// encode fields in order
+		for fieldID := range memSegment.FieldsInv {
+			if storedFieldValues, ok := storedValues[uint16(fieldID)]; ok {
+				// has stored values for this field
+				num := len(storedFieldValues)
+
+				// process each value
+				for i := 0; i < num; i++ {
+					// encode field
+					_, err2 := metaEncoder.PutU64(uint64(fieldID))
+					if err2 != nil {
+						return 0, err2
+					}
+					// encode type
+					_, err2 = metaEncoder.PutU64(uint64(memSegment.StoredTypes[docNum][uint16(fieldID)][i]))
+					if err2 != nil {
+						return 0, err2
+					}
+					// encode start offset
+					_, err2 = metaEncoder.PutU64(uint64(curr))
+					if err2 != nil {
+						return 0, err2
+					}
+					// end len
+					_, err2 = metaEncoder.PutU64(uint64(len(storedFieldValues[i])))
+					if err2 != nil {
+						return 0, err2
+					}
+					// encode number of array pos
+					_, err2 = metaEncoder.PutU64(uint64(len(memSegment.StoredPos[docNum][uint16(fieldID)][i])))
+					if err2 != nil {
+						return 0, err2
+					}
+					// encode all array positions
+					for j := 0; j < len(memSegment.StoredPos[docNum][uint16(fieldID)][i]); j++ {
+						_, err2 = metaEncoder.PutU64(memSegment.StoredPos[docNum][uint16(fieldID)][i][j])
+						if err2 != nil {
+							return 0, err2
+						}
+					}
+					// append data
+					data = append(data, storedFieldValues[i]...)
+					// update curr
+					curr += len(storedFieldValues[i])
+				}
+			}
+		}
+		metaEncoder.Close()
+
+		metaBytes := metaBuf.Bytes()
+
+		// compress the data
+		compressed = snappy.Encode(compressed, data)
+
+		// record where we're about to start writing
+		docNumOffsets[docNum] = uint64(w.Count())
+
+		buf := make([]byte, binary.MaxVarintLen64)
+		// write out the meta length
+		n := binary.PutUvarint(buf, uint64(len(metaBytes)))
+		_, err := w.Write(buf[:n])
+		if err != nil {
+			return 0, err
+		}
+		// write out the compressed data length
+		n = binary.PutUvarint(buf, uint64(len(compressed)))
+		_, err = w.Write(buf[:n])
+		if err != nil {
+			return 0, err
+		}
+		// now write the meta
+		_, err = w.Write(metaBytes)
+		if err != nil {
+			return 0, err
+		}
+		// now write the compressed data
+		_, err = w.Write(compressed)
+		if err != nil {
+			return 0, err
+		}
+	}
+
+	// return value is the start of the stored index
+	rv := uint64(w.Count())
+	// now write out the stored doc index
+	for docNum := range memSegment.Stored {
+		err := binary.Write(w, binary.BigEndian, docNumOffsets[docNum])
+		if err != nil {
+			return 0, err
+		}
+	}
+
+	return rv, nil
+}
+
+func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFactor uint32) ([]uint64, []uint64, error) {
+	var freqOffsets, locOfffsets []uint64
+	for postingID := range memSegment.Postings {
+		postingsListItr := memSegment.Postings[postingID].Iterator()
+
+		total := uint64(len(memSegment.Stored))/uint64(chunkFactor) + 1
+
+		var freqNormBuf []byte
+		var offset int
+
+		var encodingBuf bytes.Buffer
+		encoder := govarint.NewU64Base128Encoder(&encodingBuf)
+
+		chunkLens := make([]uint64, total)
+		var currChunk uint64
+		for postingsListItr.HasNext() {
+			docNum := postingsListItr.Next()
+			chunk := uint64(docNum) / uint64(chunkFactor)
+
+			if chunk != currChunk {
+				// starting a new chunk
+				if encoder != nil {
+					// close out last
+					encoder.Close()
+					encodingBytes := encodingBuf.Bytes()
+					chunkLens[currChunk] = uint64(len(encodingBytes))
+					freqNormBuf = append(freqNormBuf, encodingBytes...)
+					encodingBuf.Reset()
+					encoder = govarint.NewU64Base128Encoder(&encodingBuf)
+				}
+
+				currChunk = chunk
+			}
+
+			// put freq
+			_, err := encoder.PutU64(memSegment.Freqs[postingID][offset])
+			if err != nil {
+				return nil, nil, err
+			}
+
+			// put norm
+			norm := memSegment.Norms[postingID][offset]
+			normBits := math.Float32bits(norm)
+			_, err = encoder.PutU32(normBits)
+			if err != nil {
+				return nil, nil, err
+			}
+
+			offset++
+		}
+
+		// close out last chunk
+		if encoder != nil {
+			// fix me write freq/norms
+			encoder.Close()
+			encodingBytes := encodingBuf.Bytes()
+			chunkLens[currChunk] = uint64(len(encodingBytes))
+			freqNormBuf = append(freqNormBuf, encodingBytes...)
+		}
+
+		// record where this postings freq info starts
+		freqOffsets = append(freqOffsets, uint64(w.Count()))
+
+		buf := make([]byte, binary.MaxVarintLen64)
+		// write out the number of chunks
+		n := binary.PutUvarint(buf, uint64(total))
+		_, err := w.Write(buf[:n])
+		if err != nil {
+			return nil, nil, err
+		}
+		// write out the chunk lens
+		for _, chunkLen := range chunkLens {
+			n := binary.PutUvarint(buf, uint64(chunkLen))
+			_, err = w.Write(buf[:n])
+			if err != nil {
+				return nil, nil, err
+			}
+		}
+		// write out the data
+		_, err = w.Write(freqNormBuf)
+		if err != nil {
+			return nil, nil, err
+		}
+
+	}
+
+	// now do it again for the locations
+	for postingID := range memSegment.Postings {
+		postingsListItr := memSegment.Postings[postingID].Iterator()
+
+		total := uint64(len(memSegment.Stored))/uint64(chunkFactor) + 1
+
+		var locBuf []byte
+		var offset int
+		var locOffset int
+
+		var encodingBuf bytes.Buffer
+		encoder := govarint.NewU64Base128Encoder(&encodingBuf)
+
+		chunkLens := make([]uint64, total)
+		var currChunk uint64
+		for postingsListItr.HasNext() {
+			docNum := postingsListItr.Next()
+			chunk := uint64(docNum) / uint64(chunkFactor)
+
+			if chunk != currChunk {
+				// starting a new chunk
+				if encoder != nil {
+					// close out last
+					encoder.Close()
+					encodingBytes := encodingBuf.Bytes()
+					chunkLens[currChunk] = uint64(len(encodingBytes))
+					locBuf = append(locBuf, encodingBytes...)
+					encodingBuf.Reset()
+					encoder = govarint.NewU64Base128Encoder(&encodingBuf)
+				}
+				currChunk = chunk
+			}
+
+			for i := 0; i < int(memSegment.Freqs[postingID][offset]); i++ {
+
+				if len(memSegment.Locfields[postingID]) > 0 {
+					// put field
+					_, err := encoder.PutU64(uint64(memSegment.Locfields[postingID][locOffset]))
+					if err != nil {
+						return nil, nil, err
+					}
+
+					// put pos
+					_, err = encoder.PutU64(memSegment.Locpos[postingID][locOffset])
+					if err != nil {
+						return nil, nil, err
+					}
+
+					// put start
+					_, err = encoder.PutU64(memSegment.Locstarts[postingID][locOffset])
+					if err != nil {
+						return nil, nil, err
+					}
+
+					// put end
+					_, err = encoder.PutU64(memSegment.Locends[postingID][locOffset])
+					if err != nil {
+						return nil, nil, err
+					}
+
+					// put array positions
+					num := len(memSegment.Locarraypos[postingID][locOffset])
+
+					// put the number of array positions to follow
+					_, err = encoder.PutU64(uint64(num))
+					if err != nil {
+						return nil, nil, err
+					}
+
+					// put each array position
+					for j := 0; j < num; j++ {
+						_, err = encoder.PutU64(memSegment.Locarraypos[postingID][locOffset][j])
+						if err != nil {
+							return nil, nil, err
+						}
+					}
+				}
+
+				locOffset++
+			}
+			offset++
+		}
+
+		// close out last chunk
+		if encoder != nil {
+			// fix me write freq/norms
+			encoder.Close()
+			encodingBytes := encodingBuf.Bytes()
+			chunkLens[currChunk] = uint64(len(encodingBytes))
+			locBuf = append(locBuf, encodingBytes...)
+		}
+
+		// record where this postings loc info starts
+		locOfffsets = append(locOfffsets, uint64(w.Count()))
+
+		buf := make([]byte, binary.MaxVarintLen64)
+		// write out the number of chunks
+		n := binary.PutUvarint(buf, uint64(total))
+		_, err := w.Write(buf[:n])
+		if err != nil {
+			return nil, nil, err
+		}
+		// write out the chunk lens
+		for _, chunkLen := range chunkLens {
+			n := binary.PutUvarint(buf, uint64(chunkLen))
+			_, err = w.Write(buf[:n])
+			if err != nil {
+				return nil, nil, err
+			}
+		}
+		// write out the data
+		_, err = w.Write(locBuf)
+		if err != nil {
+			return nil, nil, err
+		}
+
+	}
+	return freqOffsets, locOfffsets, nil
+}
+
+func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter, freqOffsets, locOffsets []uint64) ([]uint64, error) {
+	var rv []uint64
+
+	var postingsBuf bytes.Buffer
+	for postingID := range memSegment.Postings {
+		if postingID != 0 {
+			postingsBuf.Reset()
+		}
+
+		// record where we start this posting list
+		rv = append(rv, uint64(w.Count()))
+
+		// write out postings list to memory so we know the len
+		postingsListLen, err := memSegment.Postings[postingID].WriteTo(&postingsBuf)
+		if err != nil {
+			return nil, err
+		}
+
+		// write out the start of the term info
+		buf := make([]byte, binary.MaxVarintLen64)
+		n := binary.PutUvarint(buf, freqOffsets[postingID])
+		_, err = w.Write(buf[:n])
+		if err != nil {
+			return nil, err
+		}
+
+		// write out the start of the loc info
+		n = binary.PutUvarint(buf, locOffsets[postingID])
+		_, err = w.Write(buf[:n])
+		if err != nil {
+			return nil, err
+		}
+
+		// write out the length of this postings list
+		n = binary.PutUvarint(buf, uint64(postingsListLen))
+		_, err = w.Write(buf[:n])
+		if err != nil {
+			return nil, err
+		}
+
+		// write out the postings list itself
+		_, err = w.Write(postingsBuf.Bytes())
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	return rv, nil
+}
+
+func persistDictionary(memSegment *mem.Segment, w *CountHashWriter, postingsLocs []uint64) ([]uint64, error) {
+	var rv []uint64
+
+	var buffer bytes.Buffer
+	for fieldID, fieldTerms := range memSegment.DictKeys {
+		if fieldID != 0 {
+			buffer.Reset()
+		}
+
+		// start a new vellum for this field
+		builder, err := vellum.New(&buffer, nil)
+		if err != nil {
+			return nil, err
+		}
+
+		dict := memSegment.Dicts[fieldID]
+		// now walk the dictionary in order of fieldTerms (already sorted)
+		for i := range fieldTerms {
+			postingID := dict[fieldTerms[i]] - 1
+			postingsAddr := postingsLocs[postingID]
+			err = builder.Insert([]byte(fieldTerms[i]), postingsAddr)
+			if err != nil {
+				return nil, err
+			}
+		}
+		err = builder.Close()
+		if err != nil {
+			return nil, err
+		}
+
+		// record where this dictionary starts
+		rv = append(rv, uint64(w.Count()))
+
+		vellumData := buffer.Bytes()
+
+		// write out the length of the vellum data
+		buf := make([]byte, binary.MaxVarintLen64)
+		// write out the number of chunks
+		n := binary.PutUvarint(buf, uint64(len(vellumData)))
+		_, err = w.Write(buf[:n])
+		if err != nil {
+			return nil, err
+		}
+
+		// write this vellum to disk
+		_, err = w.Write(vellumData)
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	return rv, nil
+}
+
+func persistFields(memSegment *mem.Segment, w *CountHashWriter, dictLocs []uint64) (uint64, error) {
+	var rv uint64
+
+	var fieldStarts []uint64
+	for fieldID, fieldName := range memSegment.FieldsInv {
+
+		// record start of this field
+		fieldStarts = append(fieldStarts, uint64(w.Count()))
+
+		buf := make([]byte, binary.MaxVarintLen64)
+		// write out if the field has indexed locs (0 or 1)
+		var indexedLoc uint64
+		if memSegment.FieldsLoc[fieldID] {
+			indexedLoc = 1
+		}
+		n := binary.PutUvarint(buf, indexedLoc)
+		_, err := w.Write(buf[:n])
+		if err != nil {
+			return 0, err
+		}
+
+		// write out dict location for this field
+		n = binary.PutUvarint(buf, dictLocs[fieldID])
+		_, err = w.Write(buf[:n])
+		if err != nil {
+			return 0, err
+		}
+
+		// write out the length of the field name
+		n = binary.PutUvarint(buf, uint64(len(fieldName)))
+		_, err = w.Write(buf[:n])
+		if err != nil {
+			return 0, err
+		}
+
+		// write out the field name
+		_, err = w.Write([]byte(fieldName))
+		if err != nil {
+			return 0, err
+		}
+	}
+
+	// now write out the fields index
+	rv = uint64(w.Count())
+
+	// now write out the stored doc index
+	for fieldID := range memSegment.FieldsInv {
+		err := binary.Write(w, binary.BigEndian, fieldStarts[fieldID])
+		if err != nil {
+			return 0, err
+		}
+	}
+
+	return rv, nil
+}
+
+// NOTE: update if you make the footer bigger
+//               crc + ver + chunk + field offset + stored offset + num docs
+const footerSize = 4 + 4 + 4 + 8 + 8 + 8
+
+func persistFooter(numDocs, storedIndexOffset, fieldIndexOffset uint64,
+	chunkFactor uint32, w *CountHashWriter) error {
+	// write out the number of docs
+	err := binary.Write(w, binary.BigEndian, numDocs)
+	if err != nil {
+		return err
+	}
+	// write out the stored field index location:
+	err = binary.Write(w, binary.BigEndian, storedIndexOffset)
+	if err != nil {
+		return err
+	}
+	// write out the field index location
+	err = binary.Write(w, binary.BigEndian, fieldIndexOffset)
+	if err != nil {
+		return err
+	}
+	// write out 32-bit chunk factor
+	err = binary.Write(w, binary.BigEndian, chunkFactor)
+	if err != nil {
+		return err
+	}
+	// write out 32-bit version
+	err = binary.Write(w, binary.BigEndian, version)
+	if err != nil {
+		return err
+	}
+	// write out CRC-32 of everything upto but not including this CRC
+	err = binary.Write(w, binary.BigEndian, w.Sum32())
+	if err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/index/scorch/segment/zap/build_test.go b/index/scorch/segment/zap/build_test.go
new file mode 100644
index 000000000..da0e12ba6
--- /dev/null
+++ b/index/scorch/segment/zap/build_test.go
@@ -0,0 +1,288 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zap
+
+import (
+	"os"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/scorch/segment/mem"
+)
+
+func TestBuild(t *testing.T) {
+	_ = os.RemoveAll("/tmp/scorch.zap")
+
+	memSegment := buildMemSegment()
+	err := PersistSegment(memSegment, "/tmp/scorch.zap", 1024)
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
+func buildMemSegment() *mem.Segment {
+	doc := &document.Document{
+		ID: "a",
+		Fields: []document.Field{
+			document.NewTextFieldCustom("_id", nil, []byte("a"), document.IndexField|document.StoreField, nil),
+			document.NewTextFieldCustom("name", nil, []byte("wow"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+		},
+		CompositeFields: []*document.CompositeField{
+			document.NewCompositeField("_all", true, nil, []string{"_id"}),
+		},
+	}
+
+	// forge analyzed docs
+	results := []*index.AnalysisResult{
+		&index.AnalysisResult{
+			Document: doc,
+			Analyzed: []analysis.TokenFrequencies{
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      1,
+						Position: 1,
+						Term:     []byte("a"),
+					},
+				}, nil, false),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      3,
+						Position: 1,
+						Term:     []byte("wow"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("some"),
+					},
+					&analysis.Token{
+						Start:    5,
+						End:      10,
+						Position: 2,
+						Term:     []byte("thing"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("cold"),
+					},
+				}, []uint64{0}, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("dark"),
+					},
+				}, []uint64{1}, true),
+			},
+			Length: []int{
+				1,
+				1,
+				2,
+				1,
+				1,
+			},
+		},
+	}
+
+	// fix up composite fields
+	for _, ar := range results {
+		for i, f := range ar.Document.Fields {
+			for _, cf := range ar.Document.CompositeFields {
+				cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
+			}
+		}
+	}
+
+	return mem.NewFromAnalyzedDocs(results)
+}
+
+func buildMemSegmentMulti() *mem.Segment {
+
+	doc := &document.Document{
+		ID: "a",
+		Fields: []document.Field{
+			document.NewTextFieldCustom("_id", nil, []byte("a"), document.IndexField|document.StoreField, nil),
+			document.NewTextFieldCustom("name", nil, []byte("wow"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+		},
+		CompositeFields: []*document.CompositeField{
+			document.NewCompositeField("_all", true, nil, []string{"_id"}),
+		},
+	}
+
+	doc2 := &document.Document{
+		ID: "b",
+		Fields: []document.Field{
+			document.NewTextFieldCustom("_id", nil, []byte("b"), document.IndexField|document.StoreField, nil),
+			document.NewTextFieldCustom("name", nil, []byte("who"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+		},
+		CompositeFields: []*document.CompositeField{
+			document.NewCompositeField("_all", true, nil, []string{"_id"}),
+		},
+	}
+
+	// forge analyzed docs
+	results := []*index.AnalysisResult{
+		&index.AnalysisResult{
+			Document: doc,
+			Analyzed: []analysis.TokenFrequencies{
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      1,
+						Position: 1,
+						Term:     []byte("a"),
+					},
+				}, nil, false),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      3,
+						Position: 1,
+						Term:     []byte("wow"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("some"),
+					},
+					&analysis.Token{
+						Start:    5,
+						End:      10,
+						Position: 2,
+						Term:     []byte("thing"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("cold"),
+					},
+				}, []uint64{0}, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("dark"),
+					},
+				}, []uint64{1}, true),
+			},
+			Length: []int{
+				1,
+				1,
+				2,
+				1,
+				1,
+			},
+		},
+		&index.AnalysisResult{
+			Document: doc2,
+			Analyzed: []analysis.TokenFrequencies{
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      1,
+						Position: 1,
+						Term:     []byte("b"),
+					},
+				}, nil, false),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      3,
+						Position: 1,
+						Term:     []byte("who"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("some"),
+					},
+					&analysis.Token{
+						Start:    5,
+						End:      10,
+						Position: 2,
+						Term:     []byte("thing"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("cold"),
+					},
+				}, []uint64{0}, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("dark"),
+					},
+				}, []uint64{1}, true),
+			},
+			Length: []int{
+				1,
+				1,
+				2,
+				1,
+				1,
+			},
+		},
+	}
+
+	// fix up composite fields
+	for _, ar := range results {
+		for i, f := range ar.Document.Fields {
+			for _, cf := range ar.Document.CompositeFields {
+				cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
+			}
+		}
+	}
+
+	segment := mem.NewFromAnalyzedDocs(results)
+
+	return segment
+}
diff --git a/index/scorch/segment/zap/cmd/zap/README.md b/index/scorch/segment/zap/cmd/zap/README.md
new file mode 100644
index 000000000..99f55d365
--- /dev/null
+++ b/index/scorch/segment/zap/cmd/zap/README.md
@@ -0,0 +1,3 @@
+# zap command line utility
+
+Kind of a hack just put together quickly to let me debug some issues.
diff --git a/index/scorch/segment/zap/cmd/zap/cmd/dict.go b/index/scorch/segment/zap/cmd/zap/cmd/dict.go
new file mode 100644
index 000000000..74e59e902
--- /dev/null
+++ b/index/scorch/segment/zap/cmd/zap/cmd/dict.go
@@ -0,0 +1,72 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+	"encoding/binary"
+	"fmt"
+
+	"github.com/couchbaselabs/vellum"
+	"github.com/spf13/cobra"
+)
+
+// dictCmd represents the dict command
+var dictCmd = &cobra.Command{
+	Use:   "dict [path] [field]",
+	Short: "dict prints the term dictionary for the specified field",
+	Long:  `The dict command lets you print the term dictionary for the specified field.`,
+	RunE: func(cmd *cobra.Command, args []string) error {
+		if len(args) < 2 {
+			return fmt.Errorf("must specify field")
+		}
+
+		data := segment.Data()
+
+		addr, err := segment.DictAddr(args[1])
+		if err != nil {
+			return fmt.Errorf("error determing address: %v", err)
+		}
+		fmt.Printf("dictionary for field starts at %d (%x)\n", addr, addr)
+
+		vellumLen, read := binary.Uvarint(data[addr : addr+binary.MaxVarintLen64])
+		fmt.Printf("vellum length: %d\n", vellumLen)
+		fstBytes := data[addr+uint64(read) : addr+uint64(read)+vellumLen]
+		fmt.Printf("raw vellum data % x\n", fstBytes)
+		fmt.Printf("dictionary:\n\n")
+		if fstBytes != nil {
+			fst, err := vellum.Load(fstBytes)
+			if err != nil {
+				return fmt.Errorf("dictionary field %s vellum err: %v", args[1], err)
+			}
+
+			itr, err := fst.Iterator(nil, nil)
+			for err == nil {
+				currTerm, currVal := itr.Current()
+				fmt.Printf("%s - %d (%x)\n", currTerm, currVal, currVal)
+				err = itr.Next()
+			}
+			if err != nil && err != vellum.ErrIteratorDone {
+				return fmt.Errorf("error iterating dictionary: %v", err)
+			}
+
+		}
+
+		return nil
+	},
+}
+
+func init() {
+	RootCmd.AddCommand(dictCmd)
+}
diff --git a/index/scorch/segment/zap/cmd/zap/cmd/explore.go b/index/scorch/segment/zap/cmd/zap/cmd/explore.go
new file mode 100644
index 000000000..42ab82732
--- /dev/null
+++ b/index/scorch/segment/zap/cmd/zap/cmd/explore.go
@@ -0,0 +1,124 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+	"encoding/binary"
+	"fmt"
+	"log"
+
+	"github.com/couchbaselabs/vellum"
+	"github.com/spf13/cobra"
+)
+
+// exploreCmd represents the explore command
+var exploreCmd = &cobra.Command{
+	Use:   "explore [path] [field] <term> <docNum>",
+	Short: "explores the index by field, then term (optional), and then docNum (optional)",
+	Long:  `The explore command lets you explore the index in order of field, then optionally by term, then optionally again by doc number.`,
+	RunE: func(cmd *cobra.Command, args []string) error {
+		if len(args) < 2 {
+			return fmt.Errorf("must specify field")
+		}
+
+		data := segment.Data()
+
+		addr, err := segment.DictAddr(args[1])
+		if err != nil {
+			return fmt.Errorf("error determing address: %v", err)
+		}
+		fmt.Printf("dictionary for field starts at %d (%x)\n", addr, addr)
+
+		vellumLen, read := binary.Uvarint(data[addr : addr+binary.MaxVarintLen64])
+		fmt.Printf("vellum length: %d\n", vellumLen)
+		fstBytes := data[addr+uint64(read) : addr+uint64(read)+vellumLen]
+		fmt.Printf("raw vellum data % x\n", fstBytes)
+
+		if len(args) >= 3 {
+			if fstBytes != nil {
+				fst, err := vellum.Load(fstBytes)
+				if err != nil {
+					return fmt.Errorf("dictionary field %s vellum err: %v", args[1], err)
+				}
+				postingsAddr, exists, err := fst.Get([]byte(args[2]))
+				if err != nil {
+					return fmt.Errorf("error looking for term : %v", err)
+				}
+				if exists {
+					fmt.Printf("postings list begins at %d (%x)\n", postingsAddr, postingsAddr)
+
+					var n uint64
+					freqAddr, read := binary.Uvarint(data[postingsAddr : postingsAddr+binary.MaxVarintLen64])
+					n += uint64(read)
+
+					var locAddr uint64
+					locAddr, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
+					n += uint64(read)
+
+					var postingListLen uint64
+					postingListLen, _ = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
+
+					fmt.Printf("Posting List Length: %d\n", postingListLen)
+
+					fmt.Printf("Freq details at: %d (%x)\n", freqAddr, freqAddr)
+					numChunks, r2 := binary.Uvarint(data[freqAddr : freqAddr+binary.MaxVarintLen64])
+					n = uint64(r2)
+
+					var freqOffsets []uint64
+					for j := uint64(0); j < numChunks; j++ {
+						chunkLen, r3 := binary.Uvarint(data[freqAddr+n : freqAddr+n+binary.MaxVarintLen64])
+						n += uint64(r3)
+						freqOffsets = append(freqOffsets, chunkLen)
+					}
+					running := freqAddr + n
+					for k, offset := range freqOffsets {
+						fmt.Printf("freq chunk: %d, len %d, start at %d (%x) end %d (%x)\n", k, offset, running, running, running+offset, running+offset)
+						running += offset
+					}
+
+					fmt.Printf("Loc details at: %d (%x)\n", locAddr, locAddr)
+					numLChunks, r4 := binary.Uvarint(data[locAddr : locAddr+binary.MaxVarintLen64])
+					n = uint64(r4)
+					fmt.Printf("there are %d loc chunks\n", numLChunks)
+
+					var locOffsets []uint64
+					for j := uint64(0); j < numLChunks; j++ {
+						log.Printf("reading from %d(%x)\n", locAddr+n, locAddr+n)
+						log.Printf("data i see here: % x\n", data[locAddr+n:locAddr+n+binary.MaxVarintLen64])
+						lchunkLen, r4 := binary.Uvarint(data[locAddr+n : locAddr+n+binary.MaxVarintLen64])
+						n += uint64(r4)
+						log.Printf("see chunk len %d(%x)\n", lchunkLen, lchunkLen)
+						locOffsets = append(locOffsets, lchunkLen)
+					}
+
+					running2 := locAddr + n
+					for k, offset := range locOffsets {
+						fmt.Printf("loc chunk: %d, len %d(%x), start at %d (%x) end %d (%x)\n", k, offset, offset, running2, running2, running2+offset, running2+offset)
+						running2 += offset
+					}
+
+				} else {
+					fmt.Printf("dictionary does not contain term '%s'\n", args[2])
+				}
+			}
+		}
+
+		return nil
+	},
+}
+
+func init() {
+	RootCmd.AddCommand(exploreCmd)
+}
diff --git a/index/scorch/segment/zap/cmd/zap/cmd/footer.go b/index/scorch/segment/zap/cmd/zap/cmd/footer.go
new file mode 100644
index 000000000..177f4e71b
--- /dev/null
+++ b/index/scorch/segment/zap/cmd/zap/cmd/footer.go
@@ -0,0 +1,43 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+	"fmt"
+
+	"github.com/spf13/cobra"
+)
+
+// footerCmd represents the footer command
+var footerCmd = &cobra.Command{
+	Use:   "footer [path]",
+	Short: "prints the contents of the zap footer",
+	Long:  `The footer command will print the contents of the footer.`,
+	RunE: func(cmd *cobra.Command, args []string) error {
+		data := segment.Data()
+		fmt.Printf("Length: %d\n", len(data))
+		fmt.Printf("CRC: %#x\n", segment.CRC())
+		fmt.Printf("Version: %d\n", segment.Version())
+		fmt.Printf("Chunk Factor: %d\n", segment.ChunkFactor())
+		fmt.Printf("Fields Idx: %d (%#x)\n", segment.FieldsIndexOffset(), segment.FieldsIndexOffset())
+		fmt.Printf("Stored Idx: %d (%#x)\n", segment.StoredIndexOffset(), segment.StoredIndexOffset())
+		fmt.Printf("Num Docs: %d\n", segment.NumDocs())
+		return nil
+	},
+}
+
+func init() {
+	RootCmd.AddCommand(footerCmd)
+}
diff --git a/index/scorch/segment/zap/cmd/zap/cmd/root.go b/index/scorch/segment/zap/cmd/zap/cmd/root.go
new file mode 100644
index 000000000..f969bbf13
--- /dev/null
+++ b/index/scorch/segment/zap/cmd/zap/cmd/root.go
@@ -0,0 +1,58 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+	"fmt"
+	"os"
+
+	"github.com/blevesearch/bleve/index/scorch/segment/zap"
+	"github.com/spf13/cobra"
+)
+
+var segment *zap.Segment
+
+// RootCmd represents the base command when called without any subcommands
+var RootCmd = &cobra.Command{
+	Use:   "zap",
+	Short: "command-line tool to interact with a zap file",
+	Long:  `Zap is a command-line tool to interact with a zap file.`,
+	PersistentPreRunE: func(cmd *cobra.Command, args []string) error {
+
+		if len(args) < 1 {
+			return fmt.Errorf("must specify path to zap file")
+		}
+
+		segInf, err := zap.Open(args[0])
+		if err != nil {
+			return fmt.Errorf("error opening zap file: %v", err)
+		}
+		segment = segInf.(*zap.Segment)
+
+		return nil
+	},
+	PersistentPostRunE: func(cmd *cobra.Command, args []string) error {
+		return nil
+	},
+}
+
+// Execute adds all child commands to the root command sets flags appropriately.
+// This is called by main.main(). It only needs to happen once to the rootCmd.
+func Execute() {
+	if err := RootCmd.Execute(); err != nil {
+		fmt.Println(err)
+		os.Exit(-1)
+	}
+}
diff --git a/index/scorch/segment/zap/cmd/zap/cmd/stored.go b/index/scorch/segment/zap/cmd/zap/cmd/stored.go
new file mode 100644
index 000000000..64e42c7e6
--- /dev/null
+++ b/index/scorch/segment/zap/cmd/zap/cmd/stored.go
@@ -0,0 +1,73 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+	"encoding/binary"
+	"fmt"
+	"strconv"
+
+	"github.com/golang/snappy"
+	"github.com/spf13/cobra"
+)
+
+// storedCmd represents the stored command
+var storedCmd = &cobra.Command{
+	Use:   "stored [path] [docNum]",
+	Short: "prints the stored section for a doc number",
+	Long:  `The stored command will print the raw stored data bytes for the specified document number.`,
+	RunE: func(cmd *cobra.Command, args []string) error {
+		if len(args) < 2 {
+			return fmt.Errorf("must specify doc number")
+		}
+		docNum, err := strconv.Atoi(args[1])
+		if err != nil {
+			return fmt.Errorf("unable to parse doc number: %v", err)
+		}
+		if docNum >= int(segment.NumDocs()) {
+			return fmt.Errorf("invalid doc number %d (valid 0 - %d)", docNum, segment.NumDocs()-1)
+		}
+		data := segment.Data()
+		storedIdx := segment.StoredIndexOffset()
+		// read docNum entry in the index
+		indexPos := storedIdx + (8 * uint64(docNum))
+		storedStartAddr := binary.BigEndian.Uint64(data[indexPos : indexPos+8])
+		fmt.Printf("Stored field starts at %d (%#x)\n", storedStartAddr, storedStartAddr)
+
+		var n uint64
+		metaLen, read := binary.Uvarint(data[storedStartAddr : storedStartAddr+binary.MaxVarintLen64])
+		n += uint64(read)
+		fmt.Printf("Meta Len: %d\n", metaLen)
+		var dataLen uint64
+		dataLen, read = binary.Uvarint(data[storedStartAddr+n : storedStartAddr+n+binary.MaxVarintLen64])
+		n += uint64(read)
+		fmt.Printf("Data Len: %d\n", dataLen)
+		meta := data[storedStartAddr+n : storedStartAddr+n+metaLen]
+		fmt.Printf("Raw meta: % x\n", meta)
+		raw := data[storedStartAddr+n+metaLen : storedStartAddr+n+metaLen+dataLen]
+		fmt.Printf("Raw data (len %d): % x\n", len(raw), raw)
+		uncompressed, err := snappy.Decode(nil, raw)
+		if err != nil {
+			panic(err)
+		}
+		fmt.Printf("Uncompressed data (len %d): % x\n", len(uncompressed), uncompressed)
+
+		return nil
+	},
+}
+
+func init() {
+	RootCmd.AddCommand(storedCmd)
+}
diff --git a/index/scorch/segment/zap/cmd/zap/main.go b/index/scorch/segment/zap/cmd/zap/main.go
new file mode 100644
index 000000000..23c500a33
--- /dev/null
+++ b/index/scorch/segment/zap/cmd/zap/main.go
@@ -0,0 +1,23 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+	"github.com/blevesearch/bleve/index/scorch/segment/zap/cmd/zap/cmd"
+)
+
+func main() {
+	cmd.Execute()
+}
diff --git a/index/scorch/segment/zap/count.go b/index/scorch/segment/zap/count.go
new file mode 100644
index 000000000..2f0b92de2
--- /dev/null
+++ b/index/scorch/segment/zap/count.go
@@ -0,0 +1,55 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zap
+
+import (
+	"hash"
+	"hash/crc32"
+	"io"
+)
+
+// CountHashWriter is a wrapper around a Writer which counts the number of
+// bytes which have been written
+type CountHashWriter struct {
+	w io.Writer
+	h hash.Hash32
+	n int
+}
+
+// NewCountHashWriter returns a CountHashWriter which wraps the provided Writer
+func NewCountHashWriter(w io.Writer) *CountHashWriter {
+	return &CountHashWriter{
+		w: w,
+		h: crc32.NewIEEE(),
+	}
+}
+
+// Write writes the provided bytes to the wrapped writer and counts the bytes
+func (c *CountHashWriter) Write(b []byte) (int, error) {
+	n, err := c.w.Write(b)
+	c.n += n
+	_, _ = c.h.Write(b)
+	return n, err
+}
+
+// Count returns the number of bytes written
+func (c *CountHashWriter) Count() int {
+	return c.n
+}
+
+// Sum32 returns the CRC-32 hash of the content written to this writer
+func (c *CountHashWriter) Sum32() uint32 {
+	return c.h.Sum32()
+}
diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
new file mode 100644
index 000000000..d69195958
--- /dev/null
+++ b/index/scorch/segment/zap/dict.go
@@ -0,0 +1,165 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zap
+
+import (
+	"encoding/binary"
+	"fmt"
+
+	"github.com/RoaringBitmap/roaring"
+	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/scorch/segment"
+	"github.com/couchbaselabs/vellum"
+	"github.com/couchbaselabs/vellum/regexp"
+)
+
+// Dictionary is the zap representation of the term dictionary
+type Dictionary struct {
+	segment *Segment
+	field   string
+	fieldID uint16
+	fst     *vellum.FST
+}
+
+// PostingsList returns the postings list for the specified term
+func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap) (segment.PostingsList, error) {
+	return d.postingsList(term, except)
+}
+
+func (d *Dictionary) postingsList(term string, except *roaring.Bitmap) (*PostingsList, error) {
+	rv := &PostingsList{
+		dictionary: d,
+		term:       term,
+		except:     except,
+	}
+
+	if d.fst != nil {
+		postingsOffset, exists, err := d.fst.Get([]byte(term))
+		if err != nil {
+			return nil, fmt.Errorf("vellum err: %v", err)
+		}
+		if exists {
+			rv.postingsOffset = postingsOffset
+			// read the location of the freq/norm details
+			var n uint64
+			var read int
+
+			rv.freqOffset, read = binary.Uvarint(d.segment.mm[postingsOffset+n : postingsOffset+binary.MaxVarintLen64])
+			n += uint64(read)
+			rv.locOffset, read = binary.Uvarint(d.segment.mm[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
+			n += uint64(read)
+			var postingsLen uint64
+			postingsLen, read = binary.Uvarint(d.segment.mm[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
+			n += uint64(read)
+
+			roaringBytes := d.segment.mm[postingsOffset+n : postingsOffset+n+postingsLen]
+
+			bitmap := roaring.NewBitmap()
+			_, err = bitmap.FromBuffer(roaringBytes)
+			if err != nil {
+				return nil, fmt.Errorf("error loading roaring bitmap: %v", err)
+			}
+
+			rv.postings = bitmap
+		}
+	}
+
+	return rv, nil
+}
+
+// Iterator returns an iterator for this dictionary
+func (d *Dictionary) Iterator() segment.DictionaryIterator {
+
+	rv := &DictionaryIterator{
+		d: d,
+	}
+
+	if d.fst != nil {
+		itr, err := d.fst.Iterator(nil, nil)
+		if err == nil {
+			rv.itr = itr
+		}
+	}
+
+	return rv
+}
+
+// PrefixIterator returns an iterator which only visits terms having the
+// the specified prefix
+func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator {
+	rv := &DictionaryIterator{
+		d: d,
+	}
+
+	if d.fst != nil {
+		r, err := regexp.New(prefix + ".*")
+		if err == nil {
+			itr, err := d.fst.Search(r, nil, nil)
+			if err == nil {
+				rv.itr = itr
+			}
+		}
+	}
+
+	return rv
+}
+
+// RangeIterator returns an iterator which only visits terms between the
+// start and end terms.  NOTE: bleve.index API specifies the end is inclusive.
+func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator {
+	rv := &DictionaryIterator{
+		d: d,
+	}
+
+	// need to increment the end position to be inclusive
+	endBytes := []byte(end)
+	if endBytes[len(endBytes)-1] < 0xff {
+		endBytes[len(endBytes)-1]++
+	} else {
+		endBytes = append(endBytes, 0xff)
+	}
+
+	if d.fst != nil {
+		itr, err := d.fst.Iterator([]byte(start), endBytes)
+		if err == nil {
+			rv.itr = itr
+		}
+	}
+
+	return rv
+}
+
+// DictionaryIterator is an iterator for term dictionary
+type DictionaryIterator struct {
+	d   *Dictionary
+	itr vellum.Iterator
+	err error
+}
+
+// Next returns the next entry in the dictionary
+func (i *DictionaryIterator) Next() (*index.DictEntry, error) {
+	if i.itr == nil || i.err == vellum.ErrIteratorDone {
+		return nil, nil
+	} else if i.err != nil {
+		return nil, i.err
+	}
+	term, count := i.itr.Current()
+	rv := &index.DictEntry{
+		Term:  string(term),
+		Count: count,
+	}
+	i.err = i.itr.Next()
+	return rv, nil
+}
diff --git a/index/scorch/segment/zap/dict_test.go b/index/scorch/segment/zap/dict_test.go
new file mode 100644
index 000000000..336fb37ca
--- /dev/null
+++ b/index/scorch/segment/zap/dict_test.go
@@ -0,0 +1,183 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zap
+
+import (
+	"os"
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/scorch/segment/mem"
+)
+
+func buildMemSegmentForDict() *mem.Segment {
+	doc := &document.Document{
+		ID: "a",
+		Fields: []document.Field{
+			document.NewTextFieldCustom("_id", nil, []byte("a"), document.IndexField|document.StoreField, nil),
+			document.NewTextFieldCustom("desc", nil, []byte("apple ball cat dog egg fish bat"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+		},
+	}
+
+	// forge analyzed docs
+	results := []*index.AnalysisResult{
+		&index.AnalysisResult{
+			Document: doc,
+			Analyzed: []analysis.TokenFrequencies{
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      1,
+						Position: 1,
+						Term:     []byte("a"),
+					},
+				}, nil, false),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      5,
+						Position: 1,
+						Term:     []byte("apple"),
+					},
+					&analysis.Token{
+						Start:    6,
+						End:      10,
+						Position: 2,
+						Term:     []byte("ball"),
+					},
+					&analysis.Token{
+						Start:    11,
+						End:      14,
+						Position: 3,
+						Term:     []byte("cat"),
+					},
+					&analysis.Token{
+						Start:    15,
+						End:      18,
+						Position: 4,
+						Term:     []byte("dog"),
+					},
+					&analysis.Token{
+						Start:    19,
+						End:      22,
+						Position: 5,
+						Term:     []byte("egg"),
+					},
+					&analysis.Token{
+						Start:    20,
+						End:      24,
+						Position: 6,
+						Term:     []byte("fish"),
+					},
+					&analysis.Token{
+						Start:    25,
+						End:      28,
+						Position: 7,
+						Term:     []byte("bat"),
+					},
+				}, nil, true),
+			},
+			Length: []int{
+				1,
+				7,
+			},
+		},
+	}
+
+	segment := mem.NewFromAnalyzedDocs(results)
+
+	return segment
+}
+
+func TestDictionary(t *testing.T) {
+
+	_ = os.RemoveAll("/tmp/scorch.zap")
+
+	memSegment := buildMemSegmentForDict()
+	err := PersistSegment(memSegment, "/tmp/scorch.zap", 1024)
+	if err != nil {
+		t.Fatalf("error persisting segment: %v", err)
+	}
+
+	segment, err := Open("/tmp/scorch.zap")
+	if err != nil {
+		t.Fatalf("error opening segment: %v", err)
+	}
+	defer func() {
+		cerr := segment.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", err)
+		}
+	}()
+
+	dict, err := segment.Dictionary("desc")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// test basic full iterator
+	expected := []string{"apple", "ball", "bat", "cat", "dog", "egg", "fish"}
+	var got []string
+	itr := dict.Iterator()
+	next, err := itr.Next()
+	for next != nil && err == nil {
+		got = append(got, next.Term)
+		next, err = itr.Next()
+	}
+	if err != nil {
+		t.Fatalf("dict itr error: %v", err)
+	}
+
+	if !reflect.DeepEqual(expected, got) {
+		t.Errorf("expected: %v, got: %v", expected, got)
+	}
+
+	// test prefix iterator
+	expected = []string{"ball", "bat"}
+	got = got[:0]
+	itr = dict.PrefixIterator("b")
+	next, err = itr.Next()
+	for next != nil && err == nil {
+		got = append(got, next.Term)
+		next, err = itr.Next()
+	}
+	if err != nil {
+		t.Fatalf("dict itr error: %v", err)
+	}
+
+	if !reflect.DeepEqual(expected, got) {
+		t.Errorf("expected: %v, got: %v", expected, got)
+	}
+
+	// test range iterator
+	expected = []string{"cat", "dog", "egg"}
+	got = got[:0]
+	itr = dict.RangeIterator("cat", "egg")
+	next, err = itr.Next()
+	for next != nil && err == nil {
+		got = append(got, next.Term)
+		next, err = itr.Next()
+	}
+	if err != nil {
+		t.Fatalf("dict itr error: %v", err)
+	}
+
+	if !reflect.DeepEqual(expected, got) {
+		t.Errorf("expected: %v, got: %v", expected, got)
+	}
+}
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
new file mode 100644
index 000000000..f29020093
--- /dev/null
+++ b/index/scorch/segment/zap/posting.go
@@ -0,0 +1,362 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zap
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"math"
+
+	"github.com/RoaringBitmap/roaring"
+	"github.com/Smerity/govarint"
+	"github.com/blevesearch/bleve/index/scorch/segment"
+)
+
+// PostingsList is an in-memory represenation of a postings list
+type PostingsList struct {
+	dictionary     *Dictionary
+	term           string
+	postingsOffset uint64
+	freqOffset     uint64
+	locOffset      uint64
+	postings       *roaring.Bitmap
+	except         *roaring.Bitmap
+	postingKey     []byte
+}
+
+// Iterator returns an iterator for this postings list
+func (p *PostingsList) Iterator() segment.PostingsIterator {
+	rv := &PostingsIterator{
+		postings: p,
+	}
+	if p.postings != nil {
+		// prepare the freq chunk details
+		var n uint64
+		var read int
+		var numFreqChunks uint64
+		numFreqChunks, read = binary.Uvarint(p.dictionary.segment.mm[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
+		n += uint64(read)
+		rv.freqChunkLens = make([]uint64, int(numFreqChunks))
+		for i := 0; i < int(numFreqChunks); i++ {
+			rv.freqChunkLens[i], read = binary.Uvarint(p.dictionary.segment.mm[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
+			n += uint64(read)
+		}
+		rv.freqChunkStart = p.freqOffset + n
+
+		// prepare the loc chunk details
+		n = 0
+		var numLocChunks uint64
+		numLocChunks, read = binary.Uvarint(p.dictionary.segment.mm[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
+		n += uint64(read)
+		rv.locChunkLens = make([]uint64, int(numLocChunks))
+		for i := 0; i < int(numLocChunks); i++ {
+			rv.locChunkLens[i], read = binary.Uvarint(p.dictionary.segment.mm[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
+			n += uint64(read)
+		}
+		rv.locChunkStart = p.locOffset + n
+
+		rv.all = p.postings.Iterator()
+		if p.except != nil {
+			allExcept := p.postings.Clone()
+			allExcept.AndNot(p.except)
+			rv.actual = allExcept.Iterator()
+		} else {
+			rv.actual = p.postings.Iterator()
+		}
+	}
+
+	return rv
+}
+
+// Count returns the number of items on this postings list
+func (p *PostingsList) Count() uint64 {
+	var rv uint64
+	if p.postings != nil {
+		rv = p.postings.GetCardinality()
+		if p.except != nil {
+			except := p.except.GetCardinality()
+			if except > rv {
+				// avoid underflow
+				except = rv
+			}
+			rv -= except
+		}
+	}
+	return rv
+}
+
+// PostingsIterator provides a way to iterate through the postings list
+type PostingsIterator struct {
+	postings  *PostingsList
+	all       roaring.IntIterable
+	offset    int
+	locoffset int
+	actual    roaring.IntIterable
+
+	currChunk         uint32
+	currChunkFreqNorm []byte
+	currChunkLoc      []byte
+	freqNormDecoder   *govarint.Base128Decoder
+	locDecoder        *govarint.Base128Decoder
+
+	freqChunkLens  []uint64
+	freqChunkStart uint64
+
+	locChunkLens  []uint64
+	locChunkStart uint64
+}
+
+func (i *PostingsIterator) loadChunk(chunk int) error {
+	if chunk >= len(i.freqChunkLens) || chunk >= len(i.locChunkLens) {
+		return fmt.Errorf("tried to load chunk that doesn't exist %d/(%d %d)", chunk, len(i.freqChunkLens), len(i.locChunkLens))
+	}
+	// load correct chunk bytes
+	start := i.freqChunkStart
+	for j := 0; j < chunk; j++ {
+		start += i.freqChunkLens[j]
+	}
+	end := start + i.freqChunkLens[chunk]
+	i.currChunkFreqNorm = i.postings.dictionary.segment.mm[start:end]
+	i.freqNormDecoder = govarint.NewU64Base128Decoder(bytes.NewReader(i.currChunkFreqNorm))
+
+	start = i.locChunkStart
+	for j := 0; j < chunk; j++ {
+		start += i.locChunkLens[j]
+	}
+	end = start + i.locChunkLens[chunk]
+	i.currChunkLoc = i.postings.dictionary.segment.mm[start:end]
+	i.locDecoder = govarint.NewU64Base128Decoder(bytes.NewReader(i.currChunkLoc))
+	i.currChunk = uint32(chunk)
+	return nil
+}
+
+func (i *PostingsIterator) readFreqNorm() (uint64, uint64, error) {
+	freq, err := i.freqNormDecoder.GetU64()
+	if err != nil {
+		return 0, 0, fmt.Errorf("error reading frequency: %v", err)
+	}
+	normBits, err := i.freqNormDecoder.GetU64()
+	if err != nil {
+		return 0, 0, fmt.Errorf("error reading norm: %v", err)
+	}
+	return freq, normBits, err
+}
+
+// readLocation processes all the integers on the stream representing a single
+// location.  if you care about it, pass in a non-nil location struct, and we
+// will fill it.  if you don't care about it, pass in nil and we safely consume
+// the contents.
+func (i *PostingsIterator) readLocation(l *Location) error {
+	// read off field
+	fieldID, err := i.locDecoder.GetU64()
+	if err != nil {
+		return fmt.Errorf("error reading location field: %v", err)
+	}
+	// read off pos
+	pos, err := i.locDecoder.GetU64()
+	if err != nil {
+		return fmt.Errorf("error reading location pos: %v", err)
+	}
+	// read off start
+	start, err := i.locDecoder.GetU64()
+	if err != nil {
+		return fmt.Errorf("error reading location start: %v", err)
+	}
+	// read off end
+	end, err := i.locDecoder.GetU64()
+	if err != nil {
+		return fmt.Errorf("error reading location end: %v", err)
+	}
+	// read off num array pos
+	numArrayPos, err := i.locDecoder.GetU64()
+	if err != nil {
+		return fmt.Errorf("error reading location num array pos: %v", err)
+	}
+
+	// group these together for less branching
+	if l != nil {
+		l.field = i.postings.dictionary.segment.fieldsInv[fieldID]
+		l.pos = pos
+		l.start = start
+		l.end = end
+		if numArrayPos > 0 {
+			l.ap = make([]uint64, int(numArrayPos))
+		}
+	}
+
+	// read off array positions
+	for k := 0; k < int(numArrayPos); k++ {
+		ap, err := i.locDecoder.GetU64()
+		if err != nil {
+			return fmt.Errorf("error reading array position: %v", err)
+		}
+		if l != nil {
+			l.ap[k] = ap
+		}
+	}
+
+	return nil
+}
+
+// Next returns the next posting on the postings list, or nil at the end
+func (i *PostingsIterator) Next() (segment.Posting, error) {
+	if i.actual == nil || !i.actual.HasNext() {
+		return nil, nil
+	}
+	n := i.actual.Next()
+	nChunk := n / i.postings.dictionary.segment.chunkFactor
+	allN := i.all.Next()
+	allNChunk := allN / i.postings.dictionary.segment.chunkFactor
+
+	// n is the next actual hit (excluding some postings)
+	// allN is the next hit in the full postings
+	// if they don't match, adjust offsets to factor in item we're skipping over
+	// incr the all iterator, and check again
+	for allN != n {
+
+		// in different chunks, reset offsets
+		if allNChunk != nChunk {
+			i.locoffset = 0
+			i.offset = 0
+		} else {
+
+			if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
+				err := i.loadChunk(int(nChunk))
+				if err != nil {
+					return nil, fmt.Errorf("error loading chunk: %v", err)
+				}
+			}
+
+			// read off freq/offsets even though we don't care about them
+			freq, _, err := i.readFreqNorm()
+			if err != nil {
+				return nil, err
+			}
+			if i.postings.dictionary.segment.fieldsLoc[i.postings.dictionary.fieldID] {
+				for j := 0; j < int(freq); j++ {
+					err := i.readLocation(nil)
+					if err != nil {
+						return nil, err
+					}
+				}
+			}
+
+			// in same chunk, need to account for offsets
+			i.offset++
+		}
+
+		allN = i.all.Next()
+	}
+
+	if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
+		err := i.loadChunk(int(nChunk))
+		if err != nil {
+			return nil, fmt.Errorf("error loading chunk: %v", err)
+		}
+	}
+
+	rv := &Posting{
+		iterator: i,
+		docNum:   uint64(n),
+	}
+
+	var err error
+	var normBits uint64
+	rv.freq, normBits, err = i.readFreqNorm()
+	if err != nil {
+		return nil, err
+	}
+	rv.norm = math.Float32frombits(uint32(normBits))
+	if i.postings.dictionary.segment.fieldsLoc[i.postings.dictionary.fieldID] {
+		// read off 'freq' locations
+		rv.locs = make([]segment.Location, rv.freq)
+		locs := make([]Location, rv.freq)
+		for j := 0; j < int(rv.freq); j++ {
+			err := i.readLocation(&locs[j])
+			if err != nil {
+				return nil, err
+			}
+			rv.locs[j] = &locs[j]
+		}
+	}
+
+	return rv, nil
+}
+
+// Posting is a single entry in a postings list
+type Posting struct {
+	iterator *PostingsIterator
+	docNum   uint64
+
+	freq uint64
+	norm float32
+	locs []segment.Location
+}
+
+// Number returns the document number of this posting in this segment
+func (p *Posting) Number() uint64 {
+	return p.docNum
+}
+
+// Frequency returns the frequence of occurance of this term in this doc/field
+func (p *Posting) Frequency() uint64 {
+	return p.freq
+}
+
+// Norm returns the normalization factor for this posting
+func (p *Posting) Norm() float64 {
+	return float64(p.norm)
+}
+
+// Locations returns the location information for each occurance
+func (p *Posting) Locations() []segment.Location {
+	return p.locs
+}
+
+// Location represents the location of a single occurance
+type Location struct {
+	field string
+	pos   uint64
+	start uint64
+	end   uint64
+	ap    []uint64
+}
+
+// Field returns the name of the field (useful in composite fields to know
+// which original field the value came from)
+func (l *Location) Field() string {
+	return l.field
+}
+
+// Start returns the start byte offset of this occurance
+func (l *Location) Start() uint64 {
+	return l.start
+}
+
+// End returns the end byte offset of this occurance
+func (l *Location) End() uint64 {
+	return l.end
+}
+
+// Pos returns the 1-based phrase position of this occurance
+func (l *Location) Pos() uint64 {
+	return l.pos
+}
+
+// ArrayPositions returns the array position vector associated with this occurance
+func (l *Location) ArrayPositions() []uint64 {
+	return l.ap
+}
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
new file mode 100644
index 000000000..c8e8f389b
--- /dev/null
+++ b/index/scorch/segment/zap/segment.go
@@ -0,0 +1,352 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zap
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"os"
+
+	"github.com/RoaringBitmap/roaring"
+	"github.com/Smerity/govarint"
+	"github.com/blevesearch/bleve/index/scorch/segment"
+	"github.com/couchbaselabs/vellum"
+	mmap "github.com/edsrzf/mmap-go"
+	"github.com/golang/snappy"
+)
+
+// Open returns a zap impl of a segment
+func Open(path string) (segment.Segment, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return nil, err
+	}
+	mm, err := mmap.Map(f, mmap.RDONLY, 0)
+	if err != nil {
+		// mmap failed, try to close the file
+		_ = f.Close()
+		return nil, err
+	}
+
+	rv := &Segment{
+		f:         f,
+		mm:        mm,
+		path:      path,
+		fieldsMap: make(map[string]uint16),
+	}
+
+	err = rv.loadConfig()
+	if err != nil {
+		_ = rv.Close()
+		return nil, err
+	}
+
+	err = rv.loadFields()
+	if err != nil {
+		_ = rv.Close()
+		return nil, err
+	}
+
+	return rv, nil
+}
+
+// Segment implements the segment.Segment inteface over top the zap file format
+type Segment struct {
+	f                 *os.File
+	mm                mmap.MMap
+	path              string
+	crc               uint32
+	version           uint32
+	chunkFactor       uint32
+	numDocs           uint64
+	storedIndexOffset uint64
+	fieldsIndexOffset uint64
+
+	fieldsMap     map[string]uint16
+	fieldsInv     []string
+	fieldsLoc     []bool
+	fieldsOffsets []uint64
+}
+
+func (s *Segment) loadConfig() error {
+	crcOffset := len(s.mm) - 4
+	s.crc = binary.BigEndian.Uint32(s.mm[crcOffset : crcOffset+4])
+	verOffset := crcOffset - 4
+	s.version = binary.BigEndian.Uint32(s.mm[verOffset : verOffset+4])
+	if s.version != version {
+		return fmt.Errorf("unsupported version %d", s.version)
+	}
+	chunkOffset := verOffset - 4
+	s.chunkFactor = binary.BigEndian.Uint32(s.mm[chunkOffset : chunkOffset+4])
+	fieldsOffset := chunkOffset - 8
+	s.fieldsIndexOffset = binary.BigEndian.Uint64(s.mm[fieldsOffset : fieldsOffset+8])
+	storedOffset := fieldsOffset - 8
+	s.storedIndexOffset = binary.BigEndian.Uint64(s.mm[storedOffset : storedOffset+8])
+	docNumOffset := storedOffset - 8
+	s.numDocs = binary.BigEndian.Uint64(s.mm[docNumOffset : docNumOffset+8])
+	return nil
+
+}
+
+func (s *Segment) loadFields() error {
+	// NOTE for now we assume the fields index immediately preceeds the footer
+	// if this changes, need to adjust accordingly (or store epxlicit length)
+	fieldsIndexEnd := uint64(len(s.mm) - footerSize)
+
+	// iterate through fields index
+	var fieldID uint64
+	for s.fieldsIndexOffset+(8*fieldID) < fieldsIndexEnd {
+		addr := binary.BigEndian.Uint64(s.mm[s.fieldsIndexOffset+(8*fieldID) : s.fieldsIndexOffset+(8*fieldID)+8])
+		var n uint64
+		hasStoredLoc, read := binary.Uvarint(s.mm[addr:fieldsIndexEnd])
+		n += uint64(read)
+		if hasStoredLoc == 1 {
+			s.fieldsLoc = append(s.fieldsLoc, true)
+		} else {
+			s.fieldsLoc = append(s.fieldsLoc, false)
+		}
+
+		var dictLoc uint64
+		dictLoc, read = binary.Uvarint(s.mm[addr+n : fieldsIndexEnd])
+		n += uint64(read)
+		s.fieldsOffsets = append(s.fieldsOffsets, dictLoc)
+
+		var nameLen uint64
+		nameLen, read = binary.Uvarint(s.mm[addr+n : fieldsIndexEnd])
+		n += uint64(read)
+
+		name := string(s.mm[addr+n : addr+n+nameLen])
+		s.fieldsInv = append(s.fieldsInv, name)
+		s.fieldsMap[name] = uint16(fieldID + 1)
+
+		fieldID++
+	}
+	return nil
+}
+
+// Dictionary returns the term dictionary for the specified field
+func (s *Segment) Dictionary(field string) (segment.TermDictionary, error) {
+	dict, err := s.dictionary(field)
+	if err == nil && dict == nil {
+		return &segment.EmptyDictionary{}, nil
+	}
+	return dict, err
+}
+
+func (s *Segment) dictionary(field string) (*Dictionary, error) {
+	rv := &Dictionary{
+		segment: s,
+		field:   field,
+	}
+
+	rv.fieldID = s.fieldsMap[field]
+	if rv.fieldID > 0 {
+		rv.fieldID = rv.fieldID - 1
+
+		dictStart := s.fieldsOffsets[rv.fieldID]
+
+		// read the length of the vellum data
+		vellumLen, read := binary.Uvarint(s.mm[dictStart : dictStart+binary.MaxVarintLen64])
+		fstBytes := s.mm[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
+		if fstBytes != nil {
+			fst, err := vellum.Load(fstBytes)
+			if err != nil {
+				return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err)
+			}
+			if err == nil {
+				rv.fst = fst
+			}
+		}
+
+	} else {
+		return nil, nil
+	}
+
+	return rv, nil
+}
+
+// VisitDocument invokes the DocFieldValueVistor for each stored field
+// for the specified doc number
+func (s *Segment) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
+	// first make sure this is a valid number in this segment
+	if num < s.numDocs {
+		docStoredStartAddr := s.storedIndexOffset + (8 * num)
+		docStoredStart := binary.BigEndian.Uint64(s.mm[docStoredStartAddr : docStoredStartAddr+8])
+		var n uint64
+		metaLen, read := binary.Uvarint(s.mm[docStoredStart : docStoredStart+binary.MaxVarintLen64])
+		n += uint64(read)
+		var dataLen uint64
+		dataLen, read = binary.Uvarint(s.mm[docStoredStart+n : docStoredStart+n+binary.MaxVarintLen64])
+		n += uint64(read)
+		meta := s.mm[docStoredStart+n : docStoredStart+n+metaLen]
+		data := s.mm[docStoredStart+n+metaLen : docStoredStart+n+metaLen+dataLen]
+		uncompressed, err := snappy.Decode(nil, data)
+		if err != nil {
+			panic(err)
+		}
+		// now decode meta and process
+		reader := bytes.NewReader(meta)
+		decoder := govarint.NewU64Base128Decoder(reader)
+
+		keepGoing := true
+		for keepGoing {
+			field, err := decoder.GetU64()
+			if err == io.EOF {
+				break
+			}
+			if err != nil {
+				return err
+			}
+			typ, err := decoder.GetU64()
+			if err != nil {
+				return err
+			}
+			offset, err := decoder.GetU64()
+			if err != nil {
+				return err
+			}
+			l, err := decoder.GetU64()
+			if err != nil {
+				return err
+			}
+			numap, err := decoder.GetU64()
+			if err != nil {
+				return err
+			}
+			var arrayPos []uint64
+			if numap > 0 {
+				arrayPos = make([]uint64, numap)
+				for i := 0; i < int(numap); i++ {
+					ap, err := decoder.GetU64()
+					if err != nil {
+						return err
+					}
+					arrayPos[i] = ap
+				}
+			}
+
+			value := uncompressed[offset : offset+l]
+			keepGoing = visitor(s.fieldsInv[field], byte(typ), value, arrayPos)
+		}
+	}
+	return nil
+}
+
+// Count returns the number of documents in this segment.
+func (s *Segment) Count() uint64 {
+	return s.numDocs
+}
+
+// DocNumbers returns a bitset corresponding to the doc numbers of all the
+// provided _id strings
+func (s *Segment) DocNumbers(ids []string) (*roaring.Bitmap, error) {
+	rv := roaring.New()
+
+	if len(s.fieldsMap) > 0 {
+		idDict, err := s.dictionary("_id")
+		if err != nil {
+			return nil, err
+		}
+
+		for _, id := range ids {
+			postings, err := idDict.postingsList(id, nil)
+			if err != nil {
+				return nil, err
+			}
+			if postings.postings != nil {
+				rv.Or(postings.postings)
+			}
+		}
+	}
+
+	return rv, nil
+}
+
+// Fields returns the field names used in this segment
+func (s *Segment) Fields() []string {
+	return s.fieldsInv
+}
+
+// Path returns the path of this segment on disk
+func (s *Segment) Path() string {
+	return s.path
+}
+
+// Close releases all resources associated with this segment
+func (s *Segment) Close() (err error) {
+	if s.mm != nil {
+		err = s.mm.Unmap()
+	}
+	// try to close file even if unmap failed
+	if s.f != nil {
+		err2 := s.f.Close()
+		if err == nil {
+			// try to return first error
+			err = err2
+		}
+	}
+	return
+}
+
+// some helpers i started adding for the command-line utility
+
+// Data returns the underlying mmaped data slice
+func (s *Segment) Data() []byte {
+	return s.mm
+}
+
+// CRC returns the CRC value stored in the file footer
+func (s *Segment) CRC() uint32 {
+	return s.crc
+}
+
+// Version returns the file version in the file footer
+func (s *Segment) Version() uint32 {
+	return s.version
+}
+
+// ChunkFactor returns the chunk factor in the file footer
+func (s *Segment) ChunkFactor() uint32 {
+	return s.chunkFactor
+}
+
+// FieldsIndexOffset returns the fields index offset in the file footer
+func (s *Segment) FieldsIndexOffset() uint64 {
+	return s.fieldsIndexOffset
+}
+
+// StoredIndexOffset returns the stored value index offset in the file foooter
+func (s *Segment) StoredIndexOffset() uint64 {
+	return s.storedIndexOffset
+}
+
+// NumDocs returns the number of documents in the file footer
+func (s *Segment) NumDocs() uint64 {
+	return s.numDocs
+}
+
+// DictAddr is a helper function to compute the file offset where the
+// dictionary is stored for the specified field.
+func (s *Segment) DictAddr(field string) (uint64, error) {
+	var fieldID uint16
+	var ok bool
+	if fieldID, ok = s.fieldsMap[field]; !ok {
+		return 0, fmt.Errorf("no such field '%s'", field)
+	}
+
+	return s.fieldsOffsets[fieldID-1], nil
+}
diff --git a/index/scorch/segment/zap/segment_test.go b/index/scorch/segment/zap/segment_test.go
new file mode 100644
index 000000000..d4241c1d9
--- /dev/null
+++ b/index/scorch/segment/zap/segment_test.go
@@ -0,0 +1,517 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zap
+
+import (
+	"math"
+	"os"
+	"reflect"
+	"testing"
+)
+
+func TestOpen(t *testing.T) {
+	_ = os.RemoveAll("/tmp/scorch.zap")
+
+	memSegment := buildMemSegment()
+	err := PersistSegment(memSegment, "/tmp/scorch.zap", 1024)
+	if err != nil {
+		t.Fatalf("error persisting segment: %v", err)
+	}
+
+	segment, err := Open("/tmp/scorch.zap")
+	if err != nil {
+		t.Fatalf("error opening segment: %v", err)
+	}
+	defer func() {
+		cerr := segment.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", err)
+		}
+	}()
+
+	expectFields := map[string]struct{}{
+		"_id":  struct{}{},
+		"_all": struct{}{},
+		"name": struct{}{},
+		"desc": struct{}{},
+		"tag":  struct{}{},
+	}
+	fields := segment.Fields()
+	if len(fields) != len(expectFields) {
+		t.Errorf("expected %d fields, only got %d", len(expectFields), len(fields))
+	}
+	for _, field := range fields {
+		if _, ok := expectFields[field]; !ok {
+			t.Errorf("got unexpected field: %s", field)
+		}
+	}
+
+	docCount := segment.Count()
+	if docCount != 1 {
+		t.Errorf("expected count 1, got %d", docCount)
+	}
+
+	// check the _id field
+	dict, err := segment.Dictionary("_id")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if dict == nil {
+		t.Fatal("got nil dict, expected non-nil")
+	}
+
+	postingsList, err := dict.PostingsList("a", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItr := postingsList.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	count := 0
+	nextPosting, err := postingsItr.Next()
+	for nextPosting != nil && err == nil {
+		count++
+		if nextPosting.Frequency() != 1 {
+			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
+		}
+		if nextPosting.Number() != 0 {
+			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
+		}
+		if nextPosting.Norm() != 1.0 {
+			t.Errorf("expected norm 1.0, got %f", nextPosting.Norm())
+		}
+
+		nextPosting, err = postingsItr.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if count != 1 {
+		t.Errorf("expected count to be 1, got %d", count)
+	}
+
+	// check the name field
+	dict, err = segment.Dictionary("name")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if dict == nil {
+		t.Fatal("got nil dict, expected non-nil")
+	}
+
+	postingsList, err = dict.PostingsList("wow", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItr = postingsList.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	count = 0
+	nextPosting, err = postingsItr.Next()
+	for nextPosting != nil && err == nil {
+		count++
+		if nextPosting.Frequency() != 1 {
+			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
+		}
+		if nextPosting.Number() != 0 {
+			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
+		}
+		if nextPosting.Norm() != 1.0 {
+			t.Errorf("expected norm 1.0, got %f", nextPosting.Norm())
+		}
+		var numLocs uint64
+		for _, loc := range nextPosting.Locations() {
+			numLocs++
+			if loc.Field() != "name" {
+				t.Errorf("expected loc field to be 'name', got '%s'", loc.Field())
+			}
+			if loc.Start() != 0 {
+				t.Errorf("expected loc start to be 0, got %d", loc.Start())
+			}
+			if loc.End() != 3 {
+				t.Errorf("expected loc end to be 3, got %d", loc.End())
+			}
+			if loc.Pos() != 1 {
+				t.Errorf("expected loc pos to be 1, got %d", loc.Pos())
+			}
+			if loc.ArrayPositions() != nil {
+				t.Errorf("expect loc array pos to be nil, got %v", loc.ArrayPositions())
+			}
+		}
+		if numLocs != nextPosting.Frequency() {
+			t.Errorf("expected %d locations, got %d", nextPosting.Frequency(), numLocs)
+		}
+
+		nextPosting, err = postingsItr.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if count != 1 {
+		t.Errorf("expected count to be 1, got %d", count)
+	}
+
+	// check the _all field (composite)
+	dict, err = segment.Dictionary("_all")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if dict == nil {
+		t.Fatal("got nil dict, expected non-nil")
+	}
+
+	postingsList, err = dict.PostingsList("wow", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItr = postingsList.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	count = 0
+	nextPosting, err = postingsItr.Next()
+	for nextPosting != nil && err == nil {
+		count++
+		if nextPosting.Frequency() != 1 {
+			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
+		}
+		if nextPosting.Number() != 0 {
+			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
+		}
+		expectedNorm := float32(1.0 / math.Sqrt(float64(5)))
+		if nextPosting.Norm() != float64(expectedNorm) {
+			t.Errorf("expected norm %f, got %f", expectedNorm, nextPosting.Norm())
+		}
+		var numLocs uint64
+		for _, loc := range nextPosting.Locations() {
+			numLocs++
+			if loc.Field() != "name" {
+				t.Errorf("expected loc field to be 'name', got '%s'", loc.Field())
+			}
+			if loc.Start() != 0 {
+				t.Errorf("expected loc start to be 0, got %d", loc.Start())
+			}
+			if loc.End() != 3 {
+				t.Errorf("expected loc end to be 3, got %d", loc.End())
+			}
+			if loc.Pos() != 1 {
+				t.Errorf("expected loc pos to be 1, got %d", loc.Pos())
+			}
+			if loc.ArrayPositions() != nil {
+				t.Errorf("expect loc array pos to be nil, got %v", loc.ArrayPositions())
+			}
+		}
+		if numLocs != nextPosting.Frequency() {
+			t.Errorf("expected %d locations, got %d", nextPosting.Frequency(), numLocs)
+		}
+
+		nextPosting, err = postingsItr.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if count != 1 {
+		t.Errorf("expected count to be 1, got %d", count)
+	}
+
+	// now try a field with array positions
+	dict, err = segment.Dictionary("tag")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if dict == nil {
+		t.Fatal("got nil dict, expected non-nil")
+	}
+
+	postingsList, err = dict.PostingsList("dark", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItr = postingsList.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	nextPosting, err = postingsItr.Next()
+	for nextPosting != nil && err == nil {
+
+		if nextPosting.Frequency() != 1 {
+			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
+		}
+		if nextPosting.Number() != 0 {
+			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
+		}
+		var numLocs uint64
+		for _, loc := range nextPosting.Locations() {
+			numLocs++
+			if loc.Field() != "tag" {
+				t.Errorf("expected loc field to be 'name', got '%s'", loc.Field())
+			}
+			if loc.Start() != 0 {
+				t.Errorf("expected loc start to be 0, got %d", loc.Start())
+			}
+			if loc.End() != 4 {
+				t.Errorf("expected loc end to be 3, got %d", loc.End())
+			}
+			if loc.Pos() != 1 {
+				t.Errorf("expected loc pos to be 1, got %d", loc.Pos())
+			}
+			expectArrayPos := []uint64{1}
+			if !reflect.DeepEqual(loc.ArrayPositions(), expectArrayPos) {
+				t.Errorf("expect loc array pos to be %v, got %v", expectArrayPos, loc.ArrayPositions())
+			}
+		}
+		if numLocs != nextPosting.Frequency() {
+			t.Errorf("expected %d locations, got %d", nextPosting.Frequency(), numLocs)
+		}
+
+		nextPosting, err = postingsItr.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// now try and visit a document
+	var fieldValuesSeen int
+	err = segment.VisitDocument(0, func(field string, typ byte, value []byte, pos []uint64) bool {
+		fieldValuesSeen++
+		return true
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if fieldValuesSeen != 5 {
+		t.Errorf("expected 5 field values, got %d", fieldValuesSeen)
+	}
+}
+
+func TestOpenMulti(t *testing.T) {
+	_ = os.RemoveAll("/tmp/scorch.zap")
+
+	memSegment := buildMemSegmentMulti()
+	err := PersistSegment(memSegment, "/tmp/scorch.zap", 1024)
+	if err != nil {
+		t.Fatalf("error persisting segment: %v", err)
+	}
+
+	segment, err := Open("/tmp/scorch.zap")
+	if err != nil {
+		t.Fatalf("error opening segment: %v", err)
+	}
+	defer func() {
+		cerr := segment.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", err)
+		}
+	}()
+
+	if segment.Count() != 2 {
+		t.Errorf("expected count 2, got %d", segment.Count())
+	}
+
+	// check the desc field
+	dict, err := segment.Dictionary("desc")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if dict == nil {
+		t.Fatal("got nil dict, expected non-nil")
+	}
+
+	postingsList, err := dict.PostingsList("thing", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItr := postingsList.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	count := 0
+	nextPosting, err := postingsItr.Next()
+	for nextPosting != nil && err == nil {
+		count++
+		nextPosting, err = postingsItr.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if count != 2 {
+		t.Errorf("expected count to be 2, got %d", count)
+	}
+
+	// get docnum of a
+	exclude, err := segment.DocNumbers([]string{"a"})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// look for term 'thing' excluding doc 'a'
+	postingsListExcluding, err := dict.PostingsList("thing", exclude)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsListExcludingCount := postingsListExcluding.Count()
+	if postingsListExcludingCount != 1 {
+		t.Errorf("expected count from postings list to be 1, got %d", postingsListExcludingCount)
+	}
+
+	postingsItrExcluding := postingsListExcluding.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	count = 0
+	nextPosting, err = postingsItrExcluding.Next()
+	for nextPosting != nil && err == nil {
+		count++
+		nextPosting, err = postingsItrExcluding.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if count != 1 {
+		t.Errorf("expected count to be 1, got %d", count)
+	}
+}
+
+func TestOpenMultiWithTwoChunks(t *testing.T) {
+	_ = os.RemoveAll("/tmp/scorch.zap")
+
+	memSegment := buildMemSegmentMulti()
+	err := PersistSegment(memSegment, "/tmp/scorch.zap", 1)
+	if err != nil {
+		t.Fatalf("error persisting segment: %v", err)
+	}
+
+	segment, err := Open("/tmp/scorch.zap")
+	if err != nil {
+		t.Fatalf("error opening segment: %v", err)
+	}
+	defer func() {
+		cerr := segment.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", err)
+		}
+	}()
+
+	if segment.Count() != 2 {
+		t.Errorf("expected count 2, got %d", segment.Count())
+	}
+
+	// check the desc field
+	dict, err := segment.Dictionary("desc")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if dict == nil {
+		t.Fatal("got nil dict, expected non-nil")
+	}
+
+	postingsList, err := dict.PostingsList("thing", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItr := postingsList.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	count := 0
+	nextPosting, err := postingsItr.Next()
+	for nextPosting != nil && err == nil {
+		count++
+		nextPosting, err = postingsItr.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if count != 2 {
+		t.Errorf("expected count to be 2, got %d", count)
+	}
+
+	// get docnum of a
+	exclude, err := segment.DocNumbers([]string{"a"})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// look for term 'thing' excluding doc 'a'
+	postingsListExcluding, err := dict.PostingsList("thing", exclude)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItrExcluding := postingsListExcluding.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	count = 0
+	nextPosting, err = postingsItrExcluding.Next()
+	for nextPosting != nil && err == nil {
+		count++
+		nextPosting, err = postingsItrExcluding.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if count != 1 {
+		t.Errorf("expected count to be 1, got %d", count)
+	}
+}

From 414899618b647f72f9633960be20586416f118f3 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Sat, 9 Dec 2017 14:28:50 -0500
Subject: [PATCH 032/728] switch from bolt format to zap in the persister

---
 index/scorch/persister.go | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 44393e961..8a171b311 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -23,8 +23,8 @@ import (
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index/scorch/segment"
-	scorchBolt "github.com/blevesearch/bleve/index/scorch/segment/bolt"
 	"github.com/blevesearch/bleve/index/scorch/segment/mem"
+	"github.com/blevesearch/bleve/index/scorch/segment/zap"
 	"github.com/boltdb/bolt"
 )
 
@@ -142,9 +142,9 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 		switch seg := segmentSnapshot.segment.(type) {
 		case *mem.Segment:
 			// need to persist this to disk
-			filename := fmt.Sprintf("%x.bolt", segmentSnapshot.id)
+			filename := fmt.Sprintf("%x.zap", segmentSnapshot.id)
 			path := s.path + string(os.PathSeparator) + filename
-			err2 := scorchBolt.PersistSegment(seg, path, 1024)
+			err2 := zap.PersistSegment(seg, path, 1024)
 			if err2 != nil {
 				return fmt.Errorf("error persisting segment: %v", err2)
 			}
@@ -153,8 +153,7 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 			if err != nil {
 				return err
 			}
-		case *scorchBolt.Segment:
-
+		case *zap.Segment:
 			path := seg.Path()
 			filename := strings.TrimPrefix(path, s.path+string(os.PathSeparator))
 			err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
@@ -181,7 +180,7 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 	// now try to open all the new snapshots
 	newSegments := make(map[uint64]segment.Segment)
 	for segmentID, path := range newSegmentPaths {
-		newSegments[segmentID], err = scorchBolt.Open(path)
+		newSegments[segmentID], err = zap.Open(path)
 		if err != nil {
 			return fmt.Errorf("error opening new segment at %s, %v", path, err)
 		}
@@ -321,7 +320,7 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro
 		return nil, fmt.Errorf("segment path missing")
 	}
 	segmentPath := s.path + string(os.PathSeparator) + string(pathBytes)
-	segment, err := scorchBolt.Open(segmentPath)
+	segment, err := zap.Open(segmentPath)
 	if err != nil {
 		return nil, fmt.Errorf("error opening bolt segment: %v", err)
 	}

From e0d9828cd05bfd0ee9d118cec196af4aca7b8a42 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Sat, 9 Dec 2017 14:42:36 -0500
Subject: [PATCH 033/728] add more detail to the readme

---
 index/scorch/segment/zap/README.md | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/index/scorch/segment/zap/README.md b/index/scorch/segment/zap/README.md
index 079eef9b3..39735d34d 100644
--- a/index/scorch/segment/zap/README.md
+++ b/index/scorch/segment/zap/README.md
@@ -1,5 +1,25 @@
 # zap file format
 
+The file is written in the reverse order that we typically access data.  This helps us write in one pass since later sections of the file require file offsets of things we've already written.
+
+Current usage:
+
+- mmap the entire file
+- crc-32 bytes and version are in fixed position at end of the file
+- reading remainder of footer could be version specific
+- remainder of footer gives us:
+  - 2 important offsets (fields index and stored data index)
+  - 2 important values (number of docs and chunk factor)
+- field data is processed once and memoized onto the heap so that we never have to go back to disk for it
+- access to stored data by doc number means first navigating to the stored data index, then accessing a fixed position offset into that slice, which gives us the actual address of the data.  the first bytes of that section tell us the size of data so that we know where it ends.
+- access to all other indexed data follows the following pattern:
+  - first know the field name -> convert to id
+  - next navigate to term dictionary for that field
+    - some operations stop here and do dictionary ops
+  - next use dictionary to navigate to posting list for a specific term
+  - walk posting list
+  - if necessary, walk posting details as we go
+
 ## stored fields section
 
 - for each document

From dc0adc882739a6a1c819aa7b7c144ce4fbfe05a0 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Sat, 9 Dec 2017 20:52:01 -0500
Subject: [PATCH 034/728] add fsync

---
 index/scorch/segment/zap/build.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 1ed95e80f..bef263ac1 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -87,6 +87,11 @@ func PersistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) (e
 		return err
 	}
 
+	err = f.Sync()
+	if err != nil {
+		return err
+	}
+
 	err = f.Close()
 	if err != nil {
 		return err

From 690cd39921945253fe3227649fec7cec89074575 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Sun, 10 Dec 2017 08:55:59 -0500
Subject: [PATCH 035/728] add crazy slow but functional DocumentVisitFieldTerms

---
 index/scorch/reader.go           |  2 +-
 index/scorch/scorch_test.go      | 60 ++++++++++++++++++++++++++++++++
 index/scorch/snapshot_index.go   |  9 +++++
 index/scorch/snapshot_segment.go | 45 ++++++++++++++++++++++++
 4 files changed, 115 insertions(+), 1 deletion(-)

diff --git a/index/scorch/reader.go b/index/scorch/reader.go
index 0e643f7ca..e4abbce64 100644
--- a/index/scorch/reader.go
+++ b/index/scorch/reader.go
@@ -58,7 +58,7 @@ func (r *Reader) Document(id string) (*document.Document, error) {
 }
 func (r *Reader) DocumentVisitFieldTerms(id index.IndexInternalID, fields []string,
 	visitor index.DocumentFieldTermVisitor) error {
-	panic("document visit field terms not implemented")
+	return r.root.DocumentVisitFieldTerms(id, fields, visitor)
 }
 
 func (r *Reader) Fields() ([]string, error) {
diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index 52a86ab2e..25854bcd3 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -1195,6 +1195,66 @@ func TestIndexTermReaderCompositeFields(t *testing.T) {
 	}
 }
 
+func TestIndexDocumentVisitFieldTerms(t *testing.T) {
+	defer func() {
+		err := DestroyTest()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	doc := document.NewDocument("1")
+	doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), document.IndexField|document.StoreField|document.IncludeTermVectors))
+	doc.AddField(document.NewTextFieldWithIndexingOptions("title", []uint64{}, []byte("mister"), document.IndexField|document.StoreField|document.IncludeTermVectors))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+
+	indexReader, err := idx.Reader()
+	if err != nil {
+		t.Error(err)
+	}
+	defer func() {
+		err := indexReader.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	fieldTerms := make(index.FieldTerms)
+
+	err = indexReader.DocumentVisitFieldTerms(index.IndexInternalID("1"), []string{"name", "title"}, func(field string, term []byte) {
+		fieldTerms[field] = append(fieldTerms[field], string(term))
+	})
+	if err != nil {
+		t.Error(err)
+	}
+	expectedFieldTerms := index.FieldTerms{
+		"name":  []string{"test"},
+		"title": []string{"mister"},
+	}
+	if !reflect.DeepEqual(fieldTerms, expectedFieldTerms) {
+		t.Errorf("expected field terms: %#v, got: %#v", expectedFieldTerms, fieldTerms)
+	}
+}
+
 func TestConcurrentUpdate(t *testing.T) {
 	defer func() {
 		err := DestroyTest()
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index c96082c54..d2efa2dcd 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -341,6 +341,15 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
 	return rv, nil
 }
 
+func (i *IndexSnapshot) DocumentVisitFieldTerms(id index.IndexInternalID, fields []string,
+	visitor index.DocumentFieldTermVisitor) error {
+
+	docNum := docInternalToNumber(id)
+	segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum)
+
+	return i.segment[segmentIndex].DocumentVisitFieldTerms(localDocNum, fields, visitor)
+}
+
 func docNumberToBytes(in uint64) []byte {
 
 	buf := new(bytes.Buffer)
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index 1a50eb6cc..14c49450b 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -16,6 +16,7 @@ package scorch
 
 import (
 	"github.com/RoaringBitmap/roaring"
+	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
 )
 
@@ -56,6 +57,50 @@ func (s *SegmentSnapshot) VisitDocument(num uint64, visitor segment.DocumentFiel
 	return s.segment.VisitDocument(num, visitor)
 }
 
+func (s *SegmentSnapshot) DocumentVisitFieldTerms(num uint64, fields []string,
+	visitor index.DocumentFieldTermVisitor) error {
+	collection := make(map[string][][]byte)
+	// collect field indexed values
+	for _, field := range fields {
+		dict, err := s.Dictionary(field)
+		if err != nil {
+			return err
+		}
+		dictItr := dict.Iterator()
+		var next *index.DictEntry
+		next, err = dictItr.Next()
+		for next != nil && err == nil {
+			postings, err2 := dict.PostingsList(next.Term, nil)
+			if err2 != nil {
+				return err2
+			}
+			postingsItr := postings.Iterator()
+			nextPosting, err2 := postingsItr.Next()
+			for err2 == nil && nextPosting != nil && nextPosting.Number() <= num {
+				if nextPosting.Number() == num {
+					// got what we're looking for
+					collection[field] = append(collection[field], []byte(next.Term))
+				}
+				nextPosting, err = postingsItr.Next()
+			}
+			if err2 != nil {
+				return err
+			}
+			next, err = dictItr.Next()
+		}
+		if err != nil {
+			return err
+		}
+	}
+	// invoke callback
+	for field, values := range collection {
+		for _, value := range values {
+			visitor(field, value)
+		}
+	}
+	return nil
+}
+
 func (s *SegmentSnapshot) Count() uint64 {
 
 	rv := s.segment.Count()

From e8cc7ac0bff37727e4dc8043e4bb4da09523c341 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Mon, 11 Dec 2017 09:05:50 -0500
Subject: [PATCH 036/728] add new fields command to zap cmd-line util

---
 .../scorch/segment/zap/cmd/zap/cmd/fields.go  | 70 +++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 index/scorch/segment/zap/cmd/zap/cmd/fields.go

diff --git a/index/scorch/segment/zap/cmd/zap/cmd/fields.go b/index/scorch/segment/zap/cmd/zap/cmd/fields.go
new file mode 100644
index 000000000..472d966f5
--- /dev/null
+++ b/index/scorch/segment/zap/cmd/zap/cmd/fields.go
@@ -0,0 +1,70 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+	"encoding/binary"
+	"fmt"
+
+	"github.com/blevesearch/bleve/index/scorch/segment/zap"
+	"github.com/spf13/cobra"
+)
+
+// fieldsCmd represents the fields command
+var fieldsCmd = &cobra.Command{
+	Use:   "fields [path]",
+	Short: "fields prints the fields in the specified file",
+	Long:  `The fields command lets you print the fields in the specified file.`,
+	RunE: func(cmd *cobra.Command, args []string) error {
+
+		data := segment.Data()
+
+		crcOffset := len(data) - 4
+		verOffset := crcOffset - 4
+		chunkOffset := verOffset - 4
+		fieldsOffset := chunkOffset - 8
+		fieldsIndexOffset := binary.BigEndian.Uint64(data[fieldsOffset : fieldsOffset+8])
+		fieldsIndexEnd := uint64(len(data) - zap.FooterSize)
+
+		// iterate through fields index
+		var fieldID uint64
+		for fieldsIndexOffset+(8*fieldID) < fieldsIndexEnd {
+			addr := binary.BigEndian.Uint64(data[fieldsIndexOffset+(8*fieldID) : fieldsIndexOffset+(8*fieldID)+8])
+			var n uint64
+			indexedLoc, read := binary.Uvarint(data[addr:fieldsIndexEnd])
+			n += uint64(read)
+
+			var dictLoc uint64
+			dictLoc, read = binary.Uvarint(data[addr+n : fieldsIndexEnd])
+			n += uint64(read)
+
+			var nameLen uint64
+			nameLen, read = binary.Uvarint(data[addr+n : fieldsIndexEnd])
+			n += uint64(read)
+
+			name := string(data[addr+n : addr+n+nameLen])
+
+			fmt.Printf("field %d '%s' indexedLoc: %t starts at %d (%x)\n", fieldID, name, indexedLoc == 1, dictLoc, dictLoc)
+
+			fieldID++
+		}
+
+		return nil
+	},
+}
+
+func init() {
+	RootCmd.AddCommand(fieldsCmd)
+}

From 8280859bb8a5c1d83b8db2eca1f19c65920bfb29 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Mon, 11 Dec 2017 09:07:01 -0500
Subject: [PATCH 037/728] handle read-only and in-mem only cases

---
 index/scorch/scorch.go              | 64 +++++++++++++++++++----------
 index/scorch/segment/zap/build.go   |  2 +-
 index/scorch/segment/zap/segment.go |  2 +-
 3 files changed, 44 insertions(+), 24 deletions(-)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 678349216..d95cb05bc 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -38,6 +38,7 @@ const Name = "scorch"
 const Version uint8 = 1
 
 type Scorch struct {
+	readOnly          bool
 	version           uint8
 	config            map[string]interface{}
 	analysisQueue     *index.AnalysisQueue
@@ -67,6 +68,10 @@ func NewScorch(storeName string, config map[string]interface{}, analysisQueue *i
 		root:              &IndexSnapshot{},
 		nextSnapshotEpoch: 1,
 	}
+	ro, ok := config["read_only"].(bool)
+	if ok {
+		rv.readOnly = ro
+	}
 	return rv, nil
 }
 
@@ -77,25 +82,35 @@ func (s *Scorch) Open() error {
 		return fmt.Errorf("must specify path")
 	}
 	if s.path == "" {
-		return os.ErrInvalid
+		s.unsafeBatch = true
 	}
 
-	err := os.MkdirAll(s.path, 0700)
-	if err != nil {
-		return err
+	var rootBoltOpt *bolt.Options
+	if s.readOnly {
+		rootBoltOpt = &bolt.Options{
+			ReadOnly: true,
+		}
+	} else {
+		if s.path != "" {
+			err := os.MkdirAll(s.path, 0700)
+			if err != nil {
+				return err
+			}
+		}
 	}
-
 	rootBoltPath := s.path + string(os.PathSeparator) + "root.bolt"
+	var err error
+	if s.path != "" {
+		s.rootBolt, err = bolt.Open(rootBoltPath, 0600, rootBoltOpt)
+		if err != nil {
+			return err
+		}
 
-	s.rootBolt, err = bolt.Open(rootBoltPath, 0600, nil)
-	if err != nil {
-		return err
-	}
-
-	// now see if there is any existing state to load
-	err = s.loadFromBolt()
-	if err != nil {
-		return err
+		// now see if there is any existing state to load
+		err = s.loadFromBolt()
+		if err != nil {
+			return err
+		}
 	}
 
 	s.closeCh = make(chan struct{})
@@ -104,8 +119,11 @@ func (s *Scorch) Open() error {
 
 	s.asyncTasks.Add(1)
 	go s.mainLoop()
-	s.asyncTasks.Add(1)
-	go s.persisterLoop()
+
+	if !s.readOnly && s.path != "" {
+		s.asyncTasks.Add(1)
+		go s.persisterLoop()
+	}
 
 	return nil
 }
@@ -117,12 +135,14 @@ func (s *Scorch) Close() (err error) {
 	s.asyncTasks.Wait()
 	// now close the root bolt
 
-	err = s.rootBolt.Close()
-	s.rootLock.Lock()
-	for _, segment := range s.root.segment {
-		cerr := segment.Close()
-		if err == nil {
-			err = cerr
+	if s.rootBolt != nil {
+		err = s.rootBolt.Close()
+		s.rootLock.Lock()
+		for _, segment := range s.root.segment {
+			cerr := segment.Close()
+			if err == nil {
+				err = cerr
+			}
 		}
 	}
 
diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index bef263ac1..0ccb6cb0c 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -582,7 +582,7 @@ func persistFields(memSegment *mem.Segment, w *CountHashWriter, dictLocs []uint6
 
 // NOTE: update if you make the footer bigger
 //               crc + ver + chunk + field offset + stored offset + num docs
-const footerSize = 4 + 4 + 4 + 8 + 8 + 8
+const FooterSize = 4 + 4 + 4 + 8 + 8 + 8
 
 func persistFooter(numDocs, storedIndexOffset, fieldIndexOffset uint64,
 	chunkFactor uint32, w *CountHashWriter) error {
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index c8e8f389b..b8784921a 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -105,7 +105,7 @@ func (s *Segment) loadConfig() error {
 func (s *Segment) loadFields() error {
 	// NOTE for now we assume the fields index immediately preceeds the footer
 	// if this changes, need to adjust accordingly (or store epxlicit length)
-	fieldsIndexEnd := uint64(len(s.mm) - footerSize)
+	fieldsIndexEnd := uint64(len(s.mm) - FooterSize)
 
 	// iterate through fields index
 	var fieldID uint64

From eada7b209b30c188133877b344c4ea0addfbfdf8 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Mon, 11 Dec 2017 10:16:56 -0500
Subject: [PATCH 038/728] fix test issue identified by sreekanth

---
 index/scorch/scorch_test.go | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index 25854bcd3..b46a5ffd9 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -1240,7 +1240,12 @@ func TestIndexDocumentVisitFieldTerms(t *testing.T) {
 
 	fieldTerms := make(index.FieldTerms)
 
-	err = indexReader.DocumentVisitFieldTerms(index.IndexInternalID("1"), []string{"name", "title"}, func(field string, term []byte) {
+	internalID, err := indexReader.GetInternal([]byte("1"))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	err = indexReader.DocumentVisitFieldTerms(internalID, []string{"name", "title"}, func(field string, term []byte) {
 		fieldTerms[field] = append(fieldTerms[field], string(term))
 	})
 	if err != nil {

From d7eb223e146b1b2b048fd2e1689a4986ac45ade5 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Mon, 11 Dec 2017 10:20:26 -0500
Subject: [PATCH 039/728] remove bolt segment format upcomning breaking changes
 and no desire to maintain

---
 index/scorch/segment/bolt/README.md       | 306 -------------
 index/scorch/segment/bolt/build.go        | 518 ----------------------
 index/scorch/segment/bolt/build_test.go   | 288 ------------
 index/scorch/segment/bolt/dict.go         | 161 -------
 index/scorch/segment/bolt/dict_test.go    | 183 --------
 index/scorch/segment/bolt/posting.go      | 323 --------------
 index/scorch/segment/bolt/segment.go      | 319 -------------
 index/scorch/segment/bolt/segment_test.go | 517 ---------------------
 8 files changed, 2615 deletions(-)
 delete mode 100644 index/scorch/segment/bolt/README.md
 delete mode 100644 index/scorch/segment/bolt/build.go
 delete mode 100644 index/scorch/segment/bolt/build_test.go
 delete mode 100644 index/scorch/segment/bolt/dict.go
 delete mode 100644 index/scorch/segment/bolt/dict_test.go
 delete mode 100644 index/scorch/segment/bolt/posting.go
 delete mode 100644 index/scorch/segment/bolt/segment.go
 delete mode 100644 index/scorch/segment/bolt/segment_test.go

diff --git a/index/scorch/segment/bolt/README.md b/index/scorch/segment/bolt/README.md
deleted file mode 100644
index 2c6cd31d5..000000000
--- a/index/scorch/segment/bolt/README.md
+++ /dev/null
@@ -1,306 +0,0 @@
-# bolt segment format
-
-## top level key space (all sub-buckets, as bolt has no root bucket)
-
-We have chosen to letter these starting with 'a' and in the code refer to them with more meaningful names.  The reason is that we intend to write them in order, and this lets us rearrange them more easily later.
-
-- 'a' field storage
-- 'b' term dictionaries
-- 'c' postings list
-- 'd' postings details
-- 'e' stored fields
-- 'x' configuration
-
-## variable length integers that sort correctly (insert order same as numeric)
-
-We use numbers as keys in several places.  We want those keys to be small, so we prefer to use a variable length key to minimize space, but we also want to insert these in order, so the encoding has to sort correctly.
-
-We have chosen to the the scheme found in [CockroachDB](https://github.com/cockroachdb/cockroach/blob/2dd65dde5d90c157f4b93f92502ca1063b904e1d/pkg/util/encoding/encoding.go).
-
-In short, the first byte indicates how many bytes will follow, with a few other nice properties.
-- values 0-127 are not used in the first byte (this means we can still use any ASCII values we want and avoid collision)
-- very small values are packed directly into this first byte
-For the full details see the link above.
-
-## field storage bucket
-
-Contains one row for each field, the key is the integer field ID, and the value is the string name associated with the field.
-
-There is one additional row with key 'l'.  The value is a binary serialization of a [roaring bitmap](https://github.com/RoaringBitmap/roaring), with bits set for each field id which also index location details with each posting.
-
-## term dictionary bucket
-
-Contains one row for each field, the key is the integer field ID, and the value is a binary serialization of the [Vellum](https://github.com/couchbaselabs/vellum) FST.  The Vellum FST maps from term (utf-8 string) to a posting ID (uint64).
-
-## postings list bucket
-
-Contains one row for each postings list, the key is the integer posting ID, the value is a binary serialization of a [roaring bitmap](https://github.com/RoaringBitmap/roaring).  The roaring bitmap has bits set for each doc number that used this term in this field.
-
-## posting details bucket
-
-Contains one sub-bucket for each postings list, the name of the sub-bucket is the posting ID.
-
-### individual posting detail sub-bucket
-
-Contains one sub-bucket for each chunk.  A chunk contains details for sub-section of the docNum key space.  By default, the chunk size is 1024, so all posting details for the first 1024 docs are in chunk zero, then the next 1024 in chunk one, and so on.
-
-The purpose of the chunking is so that when trying to Seek/Advance through a large number of hits to something much further ahead, we have to keep seeking through the roaring bitmap, but we can jump to the nearest chunk for details, and only seek within the details of the current chunk.
-
-#### chunk posting detail sub-bucket
-
-Contains two key/value pairs:
-
-Key 'a' contains a [govarint](https://github.com/Smerity/govarint) compressed slice of uint64 values.  For each hit in the postings list, there are two values on this list, the first is the term frequency (uint64) and the second is the norm factor (float32).
-
-Key 'b' contains a [govarint](https://github.com/Smerity/govarint) compressed slice of uint64 values.  For each location (there will be one location for each 'frequency' in the list above) there will be a variable number of uint64 values as follows:
-
-- field ID (uint16)
-- pos (uint64)
-- start (uint64)
-- end (uint64)
-- number of array position entries that follow (uint64)
-- variable number of array positions (each uint64)
-
-## stored field values sub-bucket
-
-Contains one sub-bucket for each doc number (uint64).
-
-## stored field doc specific sub-bucket
-
-Contains two key/value pairs:
-
-Key 'a' contains a [govarint](https://github.com/Smerity/govarint) compressed slice of uint64 values.  For each stored field there are a variable number of uint64 values as follows:
-
-- field ID (uint16)
-- value type (byte) (string/number/date/geo/etc)
-- start offset (in the uncompressed slice of data)
-- length (in the uncompressed slice of data)
-- number of array position entries that follow (uint64)
-- variable number of array positions (each uint64)
-
-Key 'b' contains a [snappy]() compressed sequence of bytes.  The input to the snappy compression was a slice of bytes containing the field values, in the same order the metadata slice was created.
-
-## configuration sub-bucket
-
-Currently contains two key/value pairs:
-
-Key 'c' contains a BigEndian encoded uint32 chunk size.  This chunk size must be used when computing doc number to chunk conversions in this segment.
-
-Key 'v' contains a version number, currently 0.
-
-## Example
-
-The following is a dump of the boltdb bucket/key/value space for a segment which contains two documents:
-
-```
-{
-  "_id": "a",
-  "name": "wow",
-  "desc": "some thing",
-  "tag": ["cold", "dark"]
-}
-
-{
-  "_id": "b",
-  "name": "who",
-  "desc": "some thing",
-  "tag": ["cold", "dark"]
-}
-```
-
-```
-[61]  ('a' - field storage)
-  6c ('l' - roaring bitmap of field IDs which have index location data)
-    3a 30 00 00 01 00 00 00 00 00 03 00 10 00 00 00 01 00 02 00 03 00 04 00
-  88 (field ID 0)
-    5f 69 64 (utf-8 string '_id')
-  89 (field ID 1)
-    5f 61 6c 6c (utf-8 string '_all')
-  8a (field ID 2)
-    6e 61 6d 65 (utf-8 string 'name')
-  8b (field ID 3)
-    64 65 73 63 (utf-8 string 'desc')
-  8c (field ID 4)
-    74 61 67 (utf-8 string 'tag')
-[62] ('b' - term dictionary)
-  88 (field ID 0)
-    01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0b 05 00 00 62 61 11 02 02 00 00 00 00 00 00 00 17 00 00 00 00 00 00 00 (vellum FST data)
-  89
-    01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 10 92 cf c4 00 10 a7 c7 c5 00 10 82 d0 c4 00 10 97 cb c8 ce 00 10 84 00 10 8c 00 0d 01 04 6f 68 11 02 00 02 01 04 03 01 0f 15 1a 1f 77 74 73 64 63 11 05 06 00 00 00 00 00 00 00 43 00 00 00 00 00 00 00
-  8a
-    01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 10 84 00 10 8c 00 06 01 04 6f 68 11 02 06 01 11 8c 02 00 00 00 00 00 00 00 21 00 00 00 00 00 00 00
-  8b
-    01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 10 82 d0 c4 00 10 97 cb c8 ce 08 07 01 07 74 73 11 02 02 00 00 00 00 00 00 00 22 00 00 00 00 00 00 00
-  8c
-    01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 10 92 cf c4 00 10 a7 c7 c5 0a 09 01 06 64 63 11 02 02 00 00 00 00 00 00 00 21 00 00 00 00 00 00 00
-[63] ('c' - postings lists)
-  88 (field ID 0)
-    3a 30 00 00 01 00 00 00 00 00 00 00 10 00 00 00 00 00 (roaring bitmap data)
-  89
-    3a 30 00 00 01 00 00 00 00 00 01 00 10 00 00 00 00 00 01 00
-  8a
-    3a 30 00 00 01 00 00 00 00 00 01 00 10 00 00 00 00 00 01 00
-  8b
-    3a 30 00 00 01 00 00 00 00 00 01 00 10 00 00 00 00 00 01 00
-  8c
-    3a 30 00 00 01 00 00 00 00 00 01 00 10 00 00 00 00 00 01 00
-  8d
-    3a 30 00 00 01 00 00 00 00 00 00 00 10 00 00 00 00 00
-  8e
-    3a 30 00 00 01 00 00 00 00 00 00 00 10 00 00 00 00 00
-  8f
-    3a 30 00 00 01 00 00 00 00 00 01 00 10 00 00 00 00 00 01 00
-  90
-    3a 30 00 00 01 00 00 00 00 00 01 00 10 00 00 00 00 00 01 00
-  91
-    3a 30 00 00 01 00 00 00 00 00 01 00 10 00 00 00 00 00 01 00
-  92
-    3a 30 00 00 01 00 00 00 00 00 01 00 10 00 00 00 00 00 01 00
-  93
-    3a 30 00 00 01 00 00 00 00 00 00 00 10 00 00 00 01 00
-  94
-    3a 30 00 00 01 00 00 00 00 00 00 00 10 00 00 00 01 00
-  95
-    3a 30 00 00 01 00 00 00 00 00 00 00 10 00 00 00 01 00
-[64] ('d' - postings details)
-  [88] (posting ID 0)
-    [88] (chunk ID 0)
-      61 ('a' term freq/norm data)
-        01 ae f2 93 f7 03
-      62 ('b' term location data)
-        02 01 00 03 00
-  [89] (posting ID 1)
-    [88] (chunk ID 0)
-      61 ('a' term freq/norm data)
-        01 ae f2 93 f7 03
-      62 ('b' term location data)
-        03 01 00 04 00
-    [89] (chunk ID 1)
-      61 ('a' term freq/norm data)
-        01 ae f2 93 f7 03
-      62 ('b' term location data)
-        03 01 00 04 00
-  [8a]
-    [88]
-      61
-        01 ae f2 93 f7 03
-      62
-        03 02 05 0a 00
-    [89]
-      61
-        01 ae f2 93 f7 03
-      62
-        03 02 05 0a 00
-  [8b]
-    [88]
-      61
-        01 ae f2 93 f7 03
-      62
-        04 01 00 04 01 00
-    [89]
-      61
-        01 ae f2 93 f7 03
-      62
-        04 01 00 04 01 00
-  [8c]
-    [88]
-      61
-        01 ae f2 93 f7 03
-      62
-        04 01 00 04 01 01
-    [89]
-      61
-        01 ae f2 93 f7 03
-      62
-        04 01 00 04 01 01
-  [8d]
-    [88]
-      61
-        01 80 80 80 fc 03
-      62
-
-  [8e]
-    [88]
-      61
-        01 80 80 80 fc 03
-      62
-        02 01 00 03 00
-  [8f]
-    [88]
-      61
-        01 f3 89 d4 f9 03
-      62
-        03 01 00 04 00
-    [89]
-      61
-        01 f3 89 d4 f9 03
-      62
-        03 01 00 04 00
-  [90]
-    [88]
-      61
-        01 f3 89 d4 f9 03
-      62
-        03 02 05 0a 00
-    [89]
-      61
-        01 f3 89 d4 f9 03
-      62
-        03 02 05 0a 00
-  [91]
-    [88]
-      61
-        01 f3 89 d4 f9 03
-      62
-        04 01 00 04 01 00
-    [89]
-      61
-        01 f3 89 d4 f9 03
-      62
-        04 01 00 04 01 00
-  [92]
-    [88]
-      61
-        01 f3 89 d4 f9 03
-      62
-        04 01 00 04 01 01
-    [89]
-      61
-        01 f3 89 d4 f9 03
-      62
-        04 01 00 04 01 01
-  [93]
-    [89]
-      61
-        01 80 80 80 fc 03
-      62
-
-  [94]
-    [89]
-      61
-        01 80 80 80 fc 03
-      62
-        02 01 00 03 00
-  [95]
-    [89]
-      61
-        01 ae f2 93 f7 03
-      62
-        02 01 00 03 00
-[65] ('e' - stored fields)
-  [88] (doc num 0)
-    61 ('a' - stored field meta slice)
-      00 74 00 01 00 02 74 01 03 00 03 74 04 0a 00 04 74 0e 04 01 00 04 74 12 04 01 01
-    62 ('b' - snappy compressed value bytes)
-      16 54 61 77 6f 77 73 6f 6d 65 20 74 68 69 6e 67 63 6f 6c 64 64 61 72 6b
-  [89]
-    61
-      00 74 00 01 00 02 74 01 03 00 03 74 04 0a 00 04 74 0e 04 01 00 04 74 12 04 01 01
-    62
-      16 54 62 77 68 6f 73 6f 6d 65 20 74 68 69 6e 67 63 6f 6c 64 64 61 72 6b
-[78] ('x' - configuration)
-  63 ('c' - chunk size)
-    00 00 00 01 (big endian 1)
-  76 ('v' - version)
-    00 (single byte 0)
-```
diff --git a/index/scorch/segment/bolt/build.go b/index/scorch/segment/bolt/build.go
deleted file mode 100644
index ac01f9b8d..000000000
--- a/index/scorch/segment/bolt/build.go
+++ /dev/null
@@ -1,518 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package bolt
-
-import (
-	"bytes"
-	"encoding/binary"
-	"math"
-
-	"github.com/RoaringBitmap/roaring"
-	"github.com/Smerity/govarint"
-	"github.com/blevesearch/bleve/index/scorch/segment"
-	"github.com/blevesearch/bleve/index/scorch/segment/mem"
-	"github.com/boltdb/bolt"
-	"github.com/couchbaselabs/vellum"
-	"github.com/golang/snappy"
-)
-
-var fieldsBucket = []byte{'a'}
-var dictBucket = []byte{'b'}
-var postingsBucket = []byte{'c'}
-var postingDetailsBucket = []byte{'d'}
-var storedBucket = []byte{'e'}
-var configBucket = []byte{'x'}
-
-var indexLocsKey = []byte{'l'}
-
-var freqNormKey = []byte{'a'}
-var locKey = []byte{'b'}
-
-var metaKey = []byte{'a'}
-var dataKey = []byte{'b'}
-
-var chunkKey = []byte{'c'}
-var versionKey = []byte{'v'}
-
-var version = 0
-
-func PersistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) (err error) {
-	db, err := bolt.Open(path, 0777, nil)
-	if err != nil {
-		return err
-	}
-	defer func() {
-		if cerr := db.Close(); err == nil && cerr != nil {
-			err = cerr
-		}
-	}()
-
-	tx, err := db.Begin(true)
-	if err != nil {
-		return err
-	}
-	defer func() {
-		if err == nil {
-			err = tx.Commit()
-		} else {
-			_ = tx.Rollback()
-		}
-	}()
-
-	err = persistFields(memSegment, tx)
-	if err != nil {
-		return err
-	}
-
-	err = persistDictionary(memSegment, tx)
-	if err != nil {
-		return err
-	}
-
-	err = persistPostings(memSegment, tx)
-	if err != nil {
-		return err
-	}
-
-	err = persistPostingsDetails(memSegment, tx, chunkFactor)
-	if err != nil {
-		return err
-	}
-
-	err = persistStored(memSegment, tx)
-	if err != nil {
-		return err
-	}
-
-	err = persistConfig(tx, chunkFactor)
-	if err != nil {
-		return err
-	}
-
-	return nil
-}
-
-// persistFields puts the fields as separate k/v pairs in the fields bucket
-// makes very little attempt to squeeze a lot of perf because it is expected
-// this is usually somewhat small, and when re-opened it will be read once and
-// kept on the heap, and not read out of the file subsequently
-func persistFields(memSegment *mem.Segment, tx *bolt.Tx) error {
-	bucket, err := tx.CreateBucket(fieldsBucket)
-	if err != nil {
-		return err
-	}
-	bucket.FillPercent = 1.0
-
-	// build/persist a bitset corresponding to the field locs array
-	indexLocs := roaring.NewBitmap()
-	for i, indexLoc := range memSegment.FieldsLoc {
-		if indexLoc {
-			indexLocs.AddInt(i)
-		}
-	}
-	var indexLocsBuffer bytes.Buffer
-	_, err = indexLocs.WriteTo(&indexLocsBuffer)
-	if err != nil {
-		return err
-	}
-	err = bucket.Put(indexLocsKey, indexLocsBuffer.Bytes())
-	if err != nil {
-		return err
-	}
-
-	// we use special varint which is still guaranteed to sort correctly
-	fieldBuf := make([]byte, 0, segment.MaxVarintSize)
-	for fieldID, fieldName := range memSegment.FieldsInv {
-		if fieldID != 0 {
-			// reset buffer if necessary
-			fieldBuf = fieldBuf[:0]
-		}
-		fieldBuf = segment.EncodeUvarintAscending(fieldBuf, uint64(fieldID))
-		err = bucket.Put(fieldBuf, []byte(fieldName))
-		if err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-func persistDictionary(memSegment *mem.Segment, tx *bolt.Tx) error {
-	bucket, err := tx.CreateBucket(dictBucket)
-	if err != nil {
-		return err
-	}
-	bucket.FillPercent = 1.0
-
-	// TODO consider whether or not there is benefit to building the vellums
-	// concurrently.  While we have to insert them into the bolt in order,
-	// the (presumably) heavier lifting involved in building the FST could
-	// be done concurrently.
-
-	fieldBuf := make([]byte, 0, segment.MaxVarintSize)
-	for fieldID, fieldTerms := range memSegment.DictKeys {
-		if fieldID != 0 {
-			// reset buffers if necessary
-			fieldBuf = fieldBuf[:0]
-		}
-		// start a new vellum for this field
-		var buffer bytes.Buffer
-		builder, err := vellum.New(&buffer, nil)
-		if err != nil {
-			return err
-		}
-
-		dict := memSegment.Dicts[fieldID]
-		// now walk the dictionary in order of fieldTerms (already sorted)
-		for i := range fieldTerms {
-			err = builder.Insert([]byte(fieldTerms[i]), dict[fieldTerms[i]]-1)
-			if err != nil {
-				return err
-			}
-		}
-		err = builder.Close()
-		if err != nil {
-			return err
-		}
-
-		// put this FST into bolt
-		// we use special varint which is still guaranteed to sort correctly
-		fieldBuf = segment.EncodeUvarintAscending(fieldBuf, uint64(fieldID))
-		err = bucket.Put(fieldBuf, buffer.Bytes())
-		if err != nil {
-			return err
-		}
-	}
-
-	return nil
-}
-
-func persistPostings(memSegment *mem.Segment, tx *bolt.Tx) error {
-	bucket, err := tx.CreateBucket(postingsBucket)
-	if err != nil {
-		return err
-	}
-	bucket.FillPercent = 1.0
-
-	postingIDBuf := make([]byte, 0, segment.MaxVarintSize)
-	for postingID := range memSegment.Postings {
-		if postingID != 0 {
-			// reset buffers if necessary
-			postingIDBuf = postingIDBuf[:0]
-		}
-		postingIDBuf = segment.EncodeUvarintAscending(postingIDBuf, uint64(postingID))
-		var postingsBuf bytes.Buffer
-		_, err := memSegment.Postings[postingID].WriteTo(&postingsBuf)
-		if err != nil {
-			return err
-		}
-		err = bucket.Put(postingIDBuf, postingsBuf.Bytes())
-		if err != nil {
-			return err
-		}
-	}
-
-	return nil
-}
-
-func persistPostingsDetails(memSegment *mem.Segment, tx *bolt.Tx,
-	chunkFactor uint32) error {
-	bucket, err := tx.CreateBucket(postingDetailsBucket)
-	if err != nil {
-		return err
-	}
-	bucket.FillPercent = 1.0
-
-	postingIDBuf := make([]byte, 0, segment.MaxVarintSize)
-	for postingID := range memSegment.Postings {
-		if postingID != 0 {
-			// reset buffers if necessary
-			postingIDBuf = postingIDBuf[:0]
-		}
-		postingIDBuf = segment.EncodeUvarintAscending(postingIDBuf, uint64(postingID))
-
-		// make bucket for posting details
-		postingBucket, err := bucket.CreateBucket(postingIDBuf)
-		if err != nil {
-			return err
-		}
-		postingBucket.FillPercent = 1.0
-
-		err = persistPostingDetails(memSegment, postingBucket, postingID, chunkFactor)
-		if err != nil {
-			return err
-		}
-	}
-
-	return nil
-}
-
-func persistPostingDetails(memSegment *mem.Segment, postingBucket *bolt.Bucket,
-	postingID int, chunkFactor uint32) error {
-	// walk the postings list
-	var err error
-	var chunkBucket *bolt.Bucket
-	var currChunk uint32
-	chunkIDBuf := make([]byte, 0, segment.MaxVarintSize)
-	postingsListItr := memSegment.Postings[postingID].Iterator()
-	var encoder *govarint.Base128Encoder
-	var locEncoder *govarint.Base128Encoder
-
-	encodingBuf := &bytes.Buffer{}
-	locEncodingBuf := &bytes.Buffer{}
-
-	var offset int
-	var locOffset int
-	for postingsListItr.HasNext() {
-		docNum := postingsListItr.Next()
-		chunk := docNum / chunkFactor
-
-		// create new chunk bucket if necessary
-		if chunkBucket == nil || currChunk != chunk {
-
-			// close out last chunk
-			if chunkBucket != nil {
-
-				// fix me write freq/norms
-				encoder.Close()
-				err = chunkBucket.Put(freqNormKey, encodingBuf.Bytes())
-				if err != nil {
-					return err
-				}
-				locEncoder.Close()
-				err = chunkBucket.Put(locKey, locEncodingBuf.Bytes())
-				if err != nil {
-					return err
-				}
-
-				// reset for next
-				chunkIDBuf = chunkIDBuf[:0]
-				encodingBuf = &bytes.Buffer{}
-				locEncodingBuf = &bytes.Buffer{}
-			}
-
-			// prepare next chunk
-			chunkIDBuf = segment.EncodeUvarintAscending(chunkIDBuf, uint64(chunk))
-			chunkBucket, err = postingBucket.CreateBucket(chunkIDBuf)
-			if err != nil {
-				return err
-			}
-			chunkBucket.FillPercent = 1.0
-			currChunk = chunk
-
-			encoder = govarint.NewU64Base128Encoder(encodingBuf)
-			locEncoder = govarint.NewU64Base128Encoder(locEncodingBuf)
-		}
-
-		// put freq
-		_, err = encoder.PutU64(memSegment.Freqs[postingID][offset])
-		if err != nil {
-			return err
-		}
-
-		// put norm
-		norm := memSegment.Norms[postingID][offset]
-		normBits := math.Float32bits(norm)
-		_, err = encoder.PutU32(normBits)
-		if err != nil {
-			return err
-		}
-
-		// put locations
-
-		for i := 0; i < int(memSegment.Freqs[postingID][offset]); i++ {
-
-			if len(memSegment.Locfields[postingID]) > 0 {
-				// put field
-				_, err = locEncoder.PutU64(uint64(memSegment.Locfields[postingID][locOffset]))
-				if err != nil {
-					return err
-				}
-
-				// put pos
-				_, err = locEncoder.PutU64(memSegment.Locpos[postingID][locOffset])
-				if err != nil {
-					return err
-				}
-
-				// put start
-				_, err = locEncoder.PutU64(memSegment.Locstarts[postingID][locOffset])
-				if err != nil {
-					return err
-				}
-
-				// put end
-				_, err = locEncoder.PutU64(memSegment.Locends[postingID][locOffset])
-				if err != nil {
-					return err
-				}
-
-				// put array positions
-				num := len(memSegment.Locarraypos[postingID][locOffset])
-
-				// put the number of array positions to follow
-				_, err = locEncoder.PutU64(uint64(num))
-				if err != nil {
-					return err
-				}
-
-				// put each array position
-				for j := 0; j < num; j++ {
-					_, err = locEncoder.PutU64(memSegment.Locarraypos[postingID][locOffset][j])
-					if err != nil {
-						return err
-					}
-				}
-			}
-
-			locOffset++
-		}
-
-		offset++
-	}
-
-	// close out last chunk
-
-	if chunkBucket != nil {
-		// fix me write freq/norms
-		encoder.Close()
-		err = chunkBucket.Put(freqNormKey, encodingBuf.Bytes())
-		if err != nil {
-			return err
-		}
-		locEncoder.Close()
-		err = chunkBucket.Put(locKey, locEncodingBuf.Bytes())
-		if err != nil {
-			return err
-		}
-	}
-
-	return nil
-}
-
-func persistStored(memSegment *mem.Segment, tx *bolt.Tx) error {
-	bucket, err := tx.CreateBucket(storedBucket)
-	if err != nil {
-		return err
-	}
-	bucket.FillPercent = 1.0
-
-	var curr int
-	// we use special varint which is still guaranteed to sort correctly
-	docNumBuf := make([]byte, 0, segment.MaxVarintSize)
-	for docNum, storedValues := range memSegment.Stored {
-		var metaBuf bytes.Buffer
-		var data, compressed []byte
-		if docNum != 0 {
-			// reset buffer if necessary
-			docNumBuf = docNumBuf[:0]
-			curr = 0
-		}
-		// create doc sub-bucket
-		docNumBuf = segment.EncodeUvarintAscending(docNumBuf, uint64(docNum))
-		docBucket, err := bucket.CreateBucket(docNumBuf)
-		if err != nil {
-			return err
-		}
-		docBucket.FillPercent = 1.0
-
-		metaEncoder := govarint.NewU64Base128Encoder(&metaBuf)
-
-		// encode fields in order
-		for fieldID := range memSegment.FieldsInv {
-			if storedFieldValues, ok := storedValues[uint16(fieldID)]; ok {
-				// has stored values for this field
-				num := len(storedFieldValues)
-
-				// process each value
-				for i := 0; i < num; i++ {
-					// encode field
-					_, err2 := metaEncoder.PutU64(uint64(fieldID))
-					if err2 != nil {
-						return err2
-					}
-					// encode type
-					_, err2 = metaEncoder.PutU64(uint64(memSegment.StoredTypes[docNum][uint16(fieldID)][i]))
-					if err2 != nil {
-						return err2
-					}
-					// encode start offset
-					_, err2 = metaEncoder.PutU64(uint64(curr))
-					if err2 != nil {
-						return err2
-					}
-					// end len
-					_, err2 = metaEncoder.PutU64(uint64(len(storedFieldValues[i])))
-					if err2 != nil {
-						return err2
-					}
-					// encode number of array pos
-					_, err2 = metaEncoder.PutU64(uint64(len(memSegment.StoredPos[docNum][uint16(fieldID)][i])))
-					if err2 != nil {
-						return err2
-					}
-					// encode all array positions
-					for j := 0; j < len(memSegment.StoredPos[docNum][uint16(fieldID)][i]); j++ {
-						_, err2 = metaEncoder.PutU64(memSegment.StoredPos[docNum][uint16(fieldID)][i][j])
-						if err2 != nil {
-							return err2
-						}
-					}
-					// append data
-					data = append(data, storedFieldValues[i]...)
-					// update curr
-					curr += len(storedFieldValues[i])
-				}
-			}
-		}
-		metaEncoder.Close()
-
-		err = docBucket.Put(metaKey, metaBuf.Bytes())
-		if err != nil {
-			return err
-		}
-
-		// compress data
-		compressed = snappy.Encode(compressed, data)
-
-		err = docBucket.Put(dataKey, compressed)
-		if err != nil {
-			return err
-		}
-
-	}
-
-	return nil
-}
-
-func persistConfig(tx *bolt.Tx, chunkFactor uint32) error {
-	bucket, err := tx.CreateBucket(configBucket)
-	if err != nil {
-		return err
-	}
-
-	chunkVal := make([]byte, 4)
-	binary.BigEndian.PutUint32(chunkVal, chunkFactor)
-	err = bucket.Put(chunkKey, chunkVal)
-	if err != nil {
-		return err
-	}
-
-	err = bucket.Put(versionKey, []byte{byte(version)})
-	if err != nil {
-		return err
-	}
-
-	return nil
-}
diff --git a/index/scorch/segment/bolt/build_test.go b/index/scorch/segment/bolt/build_test.go
deleted file mode 100644
index deb4157d4..000000000
--- a/index/scorch/segment/bolt/build_test.go
+++ /dev/null
@@ -1,288 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package bolt
-
-import (
-	"os"
-	"testing"
-
-	"github.com/blevesearch/bleve/analysis"
-	"github.com/blevesearch/bleve/document"
-	"github.com/blevesearch/bleve/index"
-	"github.com/blevesearch/bleve/index/scorch/segment/mem"
-)
-
-func TestBuild(t *testing.T) {
-	_ = os.RemoveAll("/tmp/scorch.bolt")
-
-	memSegment := buildMemSegment()
-	err := PersistSegment(memSegment, "/tmp/scorch.bolt", 1024)
-	if err != nil {
-		t.Fatal(err)
-	}
-}
-
-func buildMemSegment() *mem.Segment {
-	doc := &document.Document{
-		ID: "a",
-		Fields: []document.Field{
-			document.NewTextFieldCustom("_id", nil, []byte("a"), document.IndexField|document.StoreField, nil),
-			document.NewTextFieldCustom("name", nil, []byte("wow"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-		},
-		CompositeFields: []*document.CompositeField{
-			document.NewCompositeField("_all", true, nil, []string{"_id"}),
-		},
-	}
-
-	// forge analyzed docs
-	results := []*index.AnalysisResult{
-		&index.AnalysisResult{
-			Document: doc,
-			Analyzed: []analysis.TokenFrequencies{
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      1,
-						Position: 1,
-						Term:     []byte("a"),
-					},
-				}, nil, false),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      3,
-						Position: 1,
-						Term:     []byte("wow"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("some"),
-					},
-					&analysis.Token{
-						Start:    5,
-						End:      10,
-						Position: 2,
-						Term:     []byte("thing"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("cold"),
-					},
-				}, []uint64{0}, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("dark"),
-					},
-				}, []uint64{1}, true),
-			},
-			Length: []int{
-				1,
-				1,
-				2,
-				1,
-				1,
-			},
-		},
-	}
-
-	// fix up composite fields
-	for _, ar := range results {
-		for i, f := range ar.Document.Fields {
-			for _, cf := range ar.Document.CompositeFields {
-				cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
-			}
-		}
-	}
-
-	return mem.NewFromAnalyzedDocs(results)
-}
-
-func buildMemSegmentMulti() *mem.Segment {
-
-	doc := &document.Document{
-		ID: "a",
-		Fields: []document.Field{
-			document.NewTextFieldCustom("_id", nil, []byte("a"), document.IndexField|document.StoreField, nil),
-			document.NewTextFieldCustom("name", nil, []byte("wow"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-		},
-		CompositeFields: []*document.CompositeField{
-			document.NewCompositeField("_all", true, nil, []string{"_id"}),
-		},
-	}
-
-	doc2 := &document.Document{
-		ID: "b",
-		Fields: []document.Field{
-			document.NewTextFieldCustom("_id", nil, []byte("b"), document.IndexField|document.StoreField, nil),
-			document.NewTextFieldCustom("name", nil, []byte("who"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-		},
-		CompositeFields: []*document.CompositeField{
-			document.NewCompositeField("_all", true, nil, []string{"_id"}),
-		},
-	}
-
-	// forge analyzed docs
-	results := []*index.AnalysisResult{
-		&index.AnalysisResult{
-			Document: doc,
-			Analyzed: []analysis.TokenFrequencies{
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      1,
-						Position: 1,
-						Term:     []byte("a"),
-					},
-				}, nil, false),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      3,
-						Position: 1,
-						Term:     []byte("wow"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("some"),
-					},
-					&analysis.Token{
-						Start:    5,
-						End:      10,
-						Position: 2,
-						Term:     []byte("thing"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("cold"),
-					},
-				}, []uint64{0}, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("dark"),
-					},
-				}, []uint64{1}, true),
-			},
-			Length: []int{
-				1,
-				1,
-				2,
-				1,
-				1,
-			},
-		},
-		&index.AnalysisResult{
-			Document: doc2,
-			Analyzed: []analysis.TokenFrequencies{
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      1,
-						Position: 1,
-						Term:     []byte("b"),
-					},
-				}, nil, false),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      3,
-						Position: 1,
-						Term:     []byte("who"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("some"),
-					},
-					&analysis.Token{
-						Start:    5,
-						End:      10,
-						Position: 2,
-						Term:     []byte("thing"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("cold"),
-					},
-				}, []uint64{0}, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("dark"),
-					},
-				}, []uint64{1}, true),
-			},
-			Length: []int{
-				1,
-				1,
-				2,
-				1,
-				1,
-			},
-		},
-	}
-
-	// fix up composite fields
-	for _, ar := range results {
-		for i, f := range ar.Document.Fields {
-			for _, cf := range ar.Document.CompositeFields {
-				cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
-			}
-		}
-	}
-
-	segment := mem.NewFromAnalyzedDocs(results)
-
-	return segment
-}
diff --git a/index/scorch/segment/bolt/dict.go b/index/scorch/segment/bolt/dict.go
deleted file mode 100644
index 0f38a3d60..000000000
--- a/index/scorch/segment/bolt/dict.go
+++ /dev/null
@@ -1,161 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package bolt
-
-import (
-	"fmt"
-
-	"github.com/RoaringBitmap/roaring"
-	"github.com/blevesearch/bleve/index"
-	"github.com/blevesearch/bleve/index/scorch/segment"
-	"github.com/couchbaselabs/vellum"
-	"github.com/couchbaselabs/vellum/regexp"
-)
-
-// Dictionary is the bolt representation of the term dictionary
-type Dictionary struct {
-	segment *Segment
-	field   string
-	fieldID uint16
-	fst     *vellum.FST
-}
-
-// PostingsList returns the postings list for the specified term
-func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap) (segment.PostingsList, error) {
-	return d.postingsList(term, except)
-}
-
-func (d *Dictionary) postingsList(term string, except *roaring.Bitmap) (*PostingsList, error) {
-	rv := &PostingsList{
-		dictionary: d,
-		term:       term,
-		except:     except,
-	}
-
-	if d.fst != nil {
-		postingsID, exists, err := d.fst.Get([]byte(term))
-		if err != nil {
-			return nil, fmt.Errorf("vellum err: %v", err)
-		}
-		if exists {
-			rv.postingsID = postingsID
-			postingsIDKey := segment.EncodeUvarintAscending(nil, postingsID)
-			bucket := d.segment.tx.Bucket(postingsBucket)
-			if bucket == nil {
-				return nil, fmt.Errorf("postings bucket missing")
-			}
-
-			roaringBytes := bucket.Get(postingsIDKey)
-			if roaringBytes == nil {
-				return nil, fmt.Errorf("postings for postingsID %d missing", postingsID)
-			}
-			bitmap := roaring.NewBitmap()
-			_, err = bitmap.FromBuffer(roaringBytes)
-			if err != nil {
-				return nil, fmt.Errorf("error loading roaring bitmap: %v", err)
-			}
-
-			rv.postings = bitmap
-			rv.postingKey = postingsIDKey
-		}
-	}
-
-	return rv, nil
-}
-
-// Iterator returns an iterator for this dictionary
-func (d *Dictionary) Iterator() segment.DictionaryIterator {
-
-	rv := &DictionaryIterator{
-		d: d,
-	}
-
-	if d.fst != nil {
-		itr, err := d.fst.Iterator(nil, nil)
-		if err == nil {
-			rv.itr = itr
-		}
-	}
-
-	return rv
-}
-
-// PrefixIterator returns an iterator which only visits terms having the
-// the specified prefix
-func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator {
-	rv := &DictionaryIterator{
-		d: d,
-	}
-
-	if d.fst != nil {
-		r, err := regexp.New(prefix + ".*")
-		if err == nil {
-			itr, err := d.fst.Search(r, nil, nil)
-			if err == nil {
-				rv.itr = itr
-			}
-		}
-	}
-
-	return rv
-}
-
-// RangeIterator returns an iterator which only visits terms between the
-// start and end terms.  NOTE: bleve.index API specifies the end is inclusive.
-func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator {
-	rv := &DictionaryIterator{
-		d: d,
-	}
-
-	// need to increment the end position to be inclusive
-	endBytes := []byte(end)
-	if endBytes[len(endBytes)-1] < 0xff {
-		endBytes[len(endBytes)-1]++
-	} else {
-		endBytes = append(endBytes, 0xff)
-	}
-
-	if d.fst != nil {
-		itr, err := d.fst.Iterator([]byte(start), endBytes)
-		if err == nil {
-			rv.itr = itr
-		}
-	}
-
-	return rv
-}
-
-// DictionaryIterator is an iterator for term dictionary
-type DictionaryIterator struct {
-	d   *Dictionary
-	itr vellum.Iterator
-	err error
-}
-
-// Next returns the next entry in the dictionary
-func (i *DictionaryIterator) Next() (*index.DictEntry, error) {
-	if i.err == vellum.ErrIteratorDone {
-		return nil, nil
-	} else if i.err != nil {
-		return nil, i.err
-	}
-	term, count := i.itr.Current()
-	rv := &index.DictEntry{
-		Term:  string(term),
-		Count: count,
-	}
-	i.err = i.itr.Next()
-	return rv, nil
-}
diff --git a/index/scorch/segment/bolt/dict_test.go b/index/scorch/segment/bolt/dict_test.go
deleted file mode 100644
index 2df57d67f..000000000
--- a/index/scorch/segment/bolt/dict_test.go
+++ /dev/null
@@ -1,183 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package bolt
-
-import (
-	"os"
-	"reflect"
-	"testing"
-
-	"github.com/blevesearch/bleve/analysis"
-	"github.com/blevesearch/bleve/document"
-	"github.com/blevesearch/bleve/index"
-	"github.com/blevesearch/bleve/index/scorch/segment/mem"
-)
-
-func buildMemSegmentForDict() *mem.Segment {
-	doc := &document.Document{
-		ID: "a",
-		Fields: []document.Field{
-			document.NewTextFieldCustom("_id", nil, []byte("a"), document.IndexField|document.StoreField, nil),
-			document.NewTextFieldCustom("desc", nil, []byte("apple ball cat dog egg fish bat"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-		},
-	}
-
-	// forge analyzed docs
-	results := []*index.AnalysisResult{
-		&index.AnalysisResult{
-			Document: doc,
-			Analyzed: []analysis.TokenFrequencies{
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      1,
-						Position: 1,
-						Term:     []byte("a"),
-					},
-				}, nil, false),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      5,
-						Position: 1,
-						Term:     []byte("apple"),
-					},
-					&analysis.Token{
-						Start:    6,
-						End:      10,
-						Position: 2,
-						Term:     []byte("ball"),
-					},
-					&analysis.Token{
-						Start:    11,
-						End:      14,
-						Position: 3,
-						Term:     []byte("cat"),
-					},
-					&analysis.Token{
-						Start:    15,
-						End:      18,
-						Position: 4,
-						Term:     []byte("dog"),
-					},
-					&analysis.Token{
-						Start:    19,
-						End:      22,
-						Position: 5,
-						Term:     []byte("egg"),
-					},
-					&analysis.Token{
-						Start:    20,
-						End:      24,
-						Position: 6,
-						Term:     []byte("fish"),
-					},
-					&analysis.Token{
-						Start:    25,
-						End:      28,
-						Position: 7,
-						Term:     []byte("bat"),
-					},
-				}, nil, true),
-			},
-			Length: []int{
-				1,
-				7,
-			},
-		},
-	}
-
-	segment := mem.NewFromAnalyzedDocs(results)
-
-	return segment
-}
-
-func TestDictionary(t *testing.T) {
-
-	_ = os.RemoveAll("/tmp/scorch.bolt")
-
-	memSegment := buildMemSegmentForDict()
-	err := PersistSegment(memSegment, "/tmp/scorch.bolt", 1024)
-	if err != nil {
-		t.Fatalf("error persisting segment: %v", err)
-	}
-
-	segment, err := Open("/tmp/scorch.bolt")
-	if err != nil {
-		t.Fatalf("error opening segment: %v", err)
-	}
-	defer func() {
-		cerr := segment.Close()
-		if cerr != nil {
-			t.Fatalf("error closing segment: %v", err)
-		}
-	}()
-
-	dict, err := segment.Dictionary("desc")
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	// test basic full iterator
-	expected := []string{"apple", "ball", "bat", "cat", "dog", "egg", "fish"}
-	var got []string
-	itr := dict.Iterator()
-	next, err := itr.Next()
-	for next != nil && err == nil {
-		got = append(got, next.Term)
-		next, err = itr.Next()
-	}
-	if err != nil {
-		t.Fatalf("dict itr error: %v", err)
-	}
-
-	if !reflect.DeepEqual(expected, got) {
-		t.Errorf("expected: %v, got: %v", expected, got)
-	}
-
-	// test prefix iterator
-	expected = []string{"ball", "bat"}
-	got = got[:0]
-	itr = dict.PrefixIterator("b")
-	next, err = itr.Next()
-	for next != nil && err == nil {
-		got = append(got, next.Term)
-		next, err = itr.Next()
-	}
-	if err != nil {
-		t.Fatalf("dict itr error: %v", err)
-	}
-
-	if !reflect.DeepEqual(expected, got) {
-		t.Errorf("expected: %v, got: %v", expected, got)
-	}
-
-	// test range iterator
-	expected = []string{"cat", "dog", "egg"}
-	got = got[:0]
-	itr = dict.RangeIterator("cat", "egg")
-	next, err = itr.Next()
-	for next != nil && err == nil {
-		got = append(got, next.Term)
-		next, err = itr.Next()
-	}
-	if err != nil {
-		t.Fatalf("dict itr error: %v", err)
-	}
-
-	if !reflect.DeepEqual(expected, got) {
-		t.Errorf("expected: %v, got: %v", expected, got)
-	}
-}
diff --git a/index/scorch/segment/bolt/posting.go b/index/scorch/segment/bolt/posting.go
deleted file mode 100644
index bd038a575..000000000
--- a/index/scorch/segment/bolt/posting.go
+++ /dev/null
@@ -1,323 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package bolt
-
-import (
-	"bytes"
-	"fmt"
-	"math"
-
-	"github.com/RoaringBitmap/roaring"
-	"github.com/Smerity/govarint"
-	"github.com/blevesearch/bleve/index/scorch/segment"
-	"github.com/boltdb/bolt"
-)
-
-// PostingsList is an in-memory represenation of a postings list
-type PostingsList struct {
-	dictionary *Dictionary
-	term       string
-	postingsID uint64
-	postings   *roaring.Bitmap
-	except     *roaring.Bitmap
-	postingKey []byte
-}
-
-// Iterator returns an iterator for this postings list
-func (p *PostingsList) Iterator() segment.PostingsIterator {
-	rv := &PostingsIterator{
-		postings: p,
-	}
-	if p.postings != nil {
-		detailsBucket := p.dictionary.segment.tx.Bucket(postingDetailsBucket)
-		rv.detailBucket = detailsBucket.Bucket(p.postingKey)
-		rv.all = p.postings.Iterator()
-		if p.except != nil {
-			allExcept := p.postings.Clone()
-			allExcept.AndNot(p.except)
-			rv.actual = allExcept.Iterator()
-		} else {
-			rv.actual = p.postings.Iterator()
-		}
-	}
-
-	return rv
-}
-
-// Count returns the number of items on this postings list
-func (p *PostingsList) Count() uint64 {
-	var rv uint64
-	if p.postings != nil {
-		rv = p.postings.GetCardinality()
-		if p.except != nil {
-			except := p.except.GetCardinality()
-			if except > rv {
-				// avoid underflow
-				except = rv
-			}
-			rv -= except
-		}
-	}
-	return rv
-}
-
-// PostingsIterator provides a way to iterate through the postings list
-type PostingsIterator struct {
-	postings     *PostingsList
-	all          roaring.IntIterable
-	offset       int
-	locoffset    int
-	actual       roaring.IntIterable
-	detailBucket *bolt.Bucket
-
-	currChunk         uint32
-	currChunkFreqNorm []byte
-	currChunkLoc      []byte
-	freqNormDecoder   *govarint.Base128Decoder
-	locDecoder        *govarint.Base128Decoder
-}
-
-func (i *PostingsIterator) loadChunk(chunk int) error {
-	// load correct chunk bytes
-	chunkID := segment.EncodeUvarintAscending(nil, uint64(chunk))
-	chunkBucket := i.detailBucket.Bucket(chunkID)
-	if chunkBucket == nil {
-		return fmt.Errorf("chunk %d missing", chunkID)
-	}
-	i.currChunkFreqNorm = chunkBucket.Get(freqNormKey)
-	i.freqNormDecoder = govarint.NewU64Base128Decoder(bytes.NewReader(i.currChunkFreqNorm))
-	i.currChunkLoc = chunkBucket.Get(locKey)
-	i.locDecoder = govarint.NewU64Base128Decoder(bytes.NewReader(i.currChunkLoc))
-	i.currChunk = uint32(chunk)
-	return nil
-}
-
-func (i *PostingsIterator) readFreqNorm() (uint64, uint64, error) {
-	freq, err := i.freqNormDecoder.GetU64()
-	if err != nil {
-		return 0, 0, fmt.Errorf("error reading frequency: %v", err)
-	}
-	normBits, err := i.freqNormDecoder.GetU64()
-	if err != nil {
-		return 0, 0, fmt.Errorf("error reading norm: %v", err)
-	}
-	return freq, normBits, err
-}
-
-// readLocation processes all the integers on the stream representing a single
-// location.  if you care about it, pass in a non-nil location struct, and we
-// will fill it.  if you don't care about it, pass in nil and we safely consume
-// the contents.
-func (i *PostingsIterator) readLocation(l *Location) error {
-	// read off field
-	fieldID, err := i.locDecoder.GetU64()
-	if err != nil {
-		return fmt.Errorf("error reading location field: %v", err)
-	}
-	// read off pos
-	pos, err := i.locDecoder.GetU64()
-	if err != nil {
-		return fmt.Errorf("error reading location pos: %v", err)
-	}
-	// read off start
-	start, err := i.locDecoder.GetU64()
-	if err != nil {
-		return fmt.Errorf("error reading location start: %v", err)
-	}
-	// read off end
-	end, err := i.locDecoder.GetU64()
-	if err != nil {
-		return fmt.Errorf("error reading location end: %v", err)
-	}
-	// read off num array pos
-	numArrayPos, err := i.locDecoder.GetU64()
-	if err != nil {
-		return fmt.Errorf("error reading location num array pos: %v", err)
-	}
-
-	// group these together for less branching
-	if l != nil {
-		l.field = i.postings.dictionary.segment.fieldsInv[fieldID]
-		l.pos = pos
-		l.start = start
-		l.end = end
-		if numArrayPos > 0 {
-			l.ap = make([]uint64, int(numArrayPos))
-		}
-	}
-
-	// read off array positions
-	for k := 0; k < int(numArrayPos); k++ {
-		ap, err := i.locDecoder.GetU64()
-		if err != nil {
-			return fmt.Errorf("error reading array position: %v", err)
-		}
-		if l != nil {
-			l.ap[k] = ap
-		}
-	}
-
-	return nil
-}
-
-// Next returns the next posting on the postings list, or nil at the end
-func (i *PostingsIterator) Next() (segment.Posting, error) {
-	if i.actual == nil || !i.actual.HasNext() {
-		return nil, nil
-	}
-	n := i.actual.Next()
-	nChunk := n / i.postings.dictionary.segment.chunkFactor
-	allN := i.all.Next()
-	allNChunk := allN / i.postings.dictionary.segment.chunkFactor
-
-	// n is the next actual hit (excluding some postings)
-	// allN is the next hit in the full postings
-	// if they don't match, adjust offsets to factor in item we're skipping over
-	// incr the all iterator, and check again
-	for allN != n {
-
-		// in different chunks, reset offsets
-		if allNChunk != nChunk {
-			i.locoffset = 0
-			i.offset = 0
-		} else {
-
-			if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
-				err := i.loadChunk(int(nChunk))
-				if err != nil {
-					return nil, fmt.Errorf("error loading chunk: %v", err)
-				}
-			}
-
-			// read off freq/offsets even though we don't care about them
-			freq, _, err := i.readFreqNorm()
-			if err != nil {
-				return nil, err
-			}
-			if i.postings.dictionary.segment.fieldsLoc[i.postings.dictionary.fieldID] {
-				for j := 0; j < int(freq); j++ {
-					err := i.readLocation(nil)
-					if err != nil {
-						return nil, err
-					}
-				}
-			}
-
-			// in same chunk, need to account for offsets
-			i.offset++
-		}
-
-		allN = i.all.Next()
-	}
-
-	if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
-		err := i.loadChunk(int(nChunk))
-		if err != nil {
-			return nil, fmt.Errorf("error loading chunk: %v", err)
-		}
-	}
-
-	rv := &Posting{
-		iterator: i,
-		docNum:   uint64(n),
-	}
-
-	var err error
-	var normBits uint64
-	rv.freq, normBits, err = i.readFreqNorm()
-	if err != nil {
-		return nil, err
-	}
-	rv.norm = math.Float32frombits(uint32(normBits))
-	if i.postings.dictionary.segment.fieldsLoc[i.postings.dictionary.fieldID] {
-		// read off 'freq' locations
-		rv.locs = make([]segment.Location, rv.freq)
-		locs := make([]Location, rv.freq)
-		for j := 0; j < int(rv.freq); j++ {
-			err := i.readLocation(&locs[j])
-			if err != nil {
-				return nil, err
-			}
-			rv.locs[j] = &locs[j]
-		}
-	}
-
-	return rv, nil
-}
-
-// Posting is a single entry in a postings list
-type Posting struct {
-	iterator *PostingsIterator
-	docNum   uint64
-
-	freq uint64
-	norm float32
-	locs []segment.Location
-}
-
-// Number returns the document number of this posting in this segment
-func (p *Posting) Number() uint64 {
-	return p.docNum
-}
-
-// Frequency returns the frequence of occurance of this term in this doc/field
-func (p *Posting) Frequency() uint64 {
-	return p.freq
-}
-
-// Norm returns the normalization factor for this posting
-func (p *Posting) Norm() float64 {
-	return float64(p.norm)
-}
-
-// Locations returns the location information for each occurance
-func (p *Posting) Locations() []segment.Location {
-	return p.locs
-}
-
-// Location represents the location of a single occurance
-type Location struct {
-	field string
-	pos   uint64
-	start uint64
-	end   uint64
-	ap    []uint64
-}
-
-// Field returns the name of the field (useful in composite fields to know
-// which original field the value came from)
-func (l *Location) Field() string {
-	return l.field
-}
-
-// Start returns the start byte offset of this occurance
-func (l *Location) Start() uint64 {
-	return l.start
-}
-
-// End returns the end byte offset of this occurance
-func (l *Location) End() uint64 {
-	return l.end
-}
-
-// Pos returns the 1-based phrase position of this occurance
-func (l *Location) Pos() uint64 {
-	return l.pos
-}
-
-// ArrayPositions returns the array position vector associated with this occurance
-func (l *Location) ArrayPositions() []uint64 {
-	return l.ap
-}
diff --git a/index/scorch/segment/bolt/segment.go b/index/scorch/segment/bolt/segment.go
deleted file mode 100644
index f53a98fe6..000000000
--- a/index/scorch/segment/bolt/segment.go
+++ /dev/null
@@ -1,319 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package bolt
-
-import (
-	"bytes"
-	"encoding/binary"
-	"fmt"
-	"io"
-
-	"github.com/RoaringBitmap/roaring"
-	"github.com/Smerity/govarint"
-	"github.com/blevesearch/bleve/index/scorch/segment"
-	"github.com/boltdb/bolt"
-	"github.com/couchbaselabs/vellum"
-	"github.com/golang/snappy"
-)
-
-var readOnlyOptions = &bolt.Options{
-	ReadOnly: true,
-}
-
-// _id field is always guaranteed to have fieldID of 0
-const idFieldID uint16 = 0
-
-// Open returns a boltdb impl of a segment
-func Open(path string) (segment.Segment, error) {
-
-	db, err := bolt.Open(path, 0600, readOnlyOptions)
-	if err != nil {
-		return nil, err
-	}
-
-	tx, err := db.Begin(false)
-	if err != nil {
-		_ = db.Close()
-		return nil, err
-	}
-
-	rv := &Segment{
-		db:        db,
-		tx:        tx,
-		fieldsMap: make(map[string]uint16),
-	}
-
-	err = rv.loadConfig()
-	if err != nil {
-		_ = db.Close()
-		return nil, err
-	}
-
-	err = rv.loadFields()
-	if err != nil {
-		_ = db.Close()
-		return nil, err
-	}
-
-	return rv, nil
-}
-
-// Segment implements a boltdb based implementation of a segment
-type Segment struct {
-	version     uint8
-	chunkFactor uint32
-	db          *bolt.DB
-	tx          *bolt.Tx
-
-	fieldsMap map[string]uint16
-	fieldsInv []string
-	fieldsLoc []bool
-}
-
-func (s *Segment) loadConfig() (err error) {
-	bucket := s.tx.Bucket(configBucket)
-	if bucket == nil {
-		return fmt.Errorf("config bucket missing")
-	}
-
-	ver := bucket.Get(versionKey)
-	if ver == nil {
-		return fmt.Errorf("version key missing")
-	}
-	s.version = ver[0]
-
-	chunk := bucket.Get(chunkKey)
-	if chunk == nil {
-		return fmt.Errorf("chunk key is missing")
-	}
-	s.chunkFactor = binary.BigEndian.Uint32(chunk)
-
-	return nil
-}
-
-// loadFields reads the fields info from the segment so that we never have to go
-// back to disk to access this (small and used frequently)
-func (s *Segment) loadFields() (err error) {
-
-	bucket := s.tx.Bucket(fieldsBucket)
-	if bucket == nil {
-		return fmt.Errorf("fields bucket missing")
-	}
-
-	indexLocs := roaring.NewBitmap()
-	err = bucket.ForEach(func(k []byte, v []byte) error {
-
-		// process index locations bitset
-		if k[0] == indexLocsKey[0] {
-			_, err2 := indexLocs.FromBuffer(v)
-			if err2 != nil {
-				return fmt.Errorf("error loading indexLocs: %v", err2)
-			}
-		} else {
-
-			_, fieldID, err2 := segment.DecodeUvarintAscending(k)
-			if err2 != nil {
-				return err2
-			}
-			// we store fieldID+1 in so we can discern the zero value
-			s.fieldsMap[string(v)] = uint16(fieldID + 1)
-		}
-		return nil
-	})
-	if err != nil {
-		return err
-	}
-
-	// now setup the inverse (should have same size as map and be keyed 0-(len-1))
-	s.fieldsInv = make([]string, len(s.fieldsMap))
-	for k, v := range s.fieldsMap {
-		s.fieldsInv[int(v)-1] = k
-	}
-	s.fieldsLoc = make([]bool, len(s.fieldsInv))
-	for i := range s.fieldsInv {
-		if indexLocs.ContainsInt(i) {
-			s.fieldsLoc[i] = true
-		}
-	}
-
-	return nil
-}
-
-// Fields returns the field names used in this segment
-func (s *Segment) Fields() []string {
-	return s.fieldsInv
-}
-
-// Count returns the number of documents in this segment
-// (this has no notion of deleted docs)
-func (s *Segment) Count() uint64 {
-	return uint64(s.tx.Bucket(storedBucket).Stats().BucketN - 1)
-}
-
-// Dictionary returns the term dictionary for the specified field
-func (s *Segment) Dictionary(field string) (segment.TermDictionary, error) {
-	dict, err := s.dictionary(field)
-	if err == nil && dict == nil {
-		return &segment.EmptyDictionary{}, nil
-	}
-	return dict, err
-}
-
-func (s *Segment) dictionary(field string) (*Dictionary, error) {
-
-	rv := &Dictionary{
-		segment: s,
-		field:   field,
-	}
-
-	rv.fieldID = s.fieldsMap[field]
-	if rv.fieldID > 0 {
-		rv.fieldID = rv.fieldID - 1
-		fieldIDKey := segment.EncodeUvarintAscending(nil, uint64(rv.fieldID))
-		bucket := s.tx.Bucket(dictBucket)
-		if bucket == nil {
-			return nil, fmt.Errorf("dictionary bucket missing")
-		}
-		fstBytes := bucket.Get(fieldIDKey)
-		if fstBytes == nil {
-			return nil, fmt.Errorf("dictionary field %s bytes nil", field)
-		}
-		if fstBytes != nil {
-			fst, err := vellum.Load(fstBytes)
-			if err != nil {
-				return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err)
-			}
-			if err == nil {
-				rv.fst = fst
-			}
-		}
-
-	} else {
-		return nil, nil
-	}
-
-	return rv, nil
-}
-
-// VisitDocument invokes the DocFieldValueVistor for each stored field
-// for the specified doc number
-func (s *Segment) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
-	storedBuucket := s.tx.Bucket(storedBucket)
-	if storedBuucket == nil {
-		return fmt.Errorf("stored bucket missing")
-	}
-	docNumKey := segment.EncodeUvarintAscending(nil, num)
-	docBucket := storedBuucket.Bucket(docNumKey)
-	if docBucket == nil {
-		return fmt.Errorf("segment has no doc number %d", num)
-	}
-	metaBytes := docBucket.Get(metaKey)
-	if metaBytes == nil {
-		return fmt.Errorf("stored meta bytes for doc number %d is nil", num)
-	}
-	dataBytes := docBucket.Get(dataKey)
-	if dataBytes == nil {
-		return fmt.Errorf("stored data bytes for doc number %d is nil", num)
-	}
-	uncompressed, err := snappy.Decode(nil, dataBytes)
-	if err != nil {
-		return err
-	}
-
-	reader := bytes.NewReader(metaBytes)
-	decoder := govarint.NewU64Base128Decoder(reader)
-
-	keepGoing := true
-	for keepGoing {
-		field, err := decoder.GetU64()
-		if err == io.EOF {
-			break
-		}
-		if err != nil {
-			return err
-		}
-		typ, err := decoder.GetU64()
-		if err != nil {
-			return err
-		}
-		offset, err := decoder.GetU64()
-		if err != nil {
-			return err
-		}
-		l, err := decoder.GetU64()
-		if err != nil {
-			return err
-		}
-		numap, err := decoder.GetU64()
-		if err != nil {
-			return err
-		}
-		var arrayPos []uint64
-		if numap > 0 {
-			arrayPos = make([]uint64, numap)
-			for i := 0; i < int(numap); i++ {
-				ap, err := decoder.GetU64()
-				if err != nil {
-					return err
-				}
-				arrayPos[i] = ap
-			}
-		}
-
-		value := uncompressed[offset : offset+l]
-		keepGoing = visitor(s.fieldsInv[field], byte(typ), value, arrayPos)
-	}
-
-	return nil
-}
-
-// DocNumbers returns a bitset corresponding to the doc numbers of all the
-// provided _id strings
-func (s *Segment) DocNumbers(ids []string) (*roaring.Bitmap, error) {
-	rv := roaring.New()
-
-	if len(s.fieldsMap) > 0 {
-		idDict, err := s.dictionary("_id")
-		if err != nil {
-			return nil, err
-		}
-
-		for _, id := range ids {
-			postings, err := idDict.postingsList(id, nil)
-			if err != nil {
-				return nil, err
-			}
-			if postings.postings != nil {
-				rv.Or(postings.postings)
-			}
-		}
-	}
-
-	return rv, nil
-}
-
-// Close releases all resources associated with this segment
-func (s *Segment) Close() error {
-	err := s.tx.Rollback()
-	if err != nil {
-		_ = s.db.Close()
-		return err
-	}
-	return s.db.Close()
-}
-
-func (s *Segment) Path() string {
-	return s.db.Path()
-}
diff --git a/index/scorch/segment/bolt/segment_test.go b/index/scorch/segment/bolt/segment_test.go
deleted file mode 100644
index 16ac2cadd..000000000
--- a/index/scorch/segment/bolt/segment_test.go
+++ /dev/null
@@ -1,517 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package bolt
-
-import (
-	"math"
-	"os"
-	"reflect"
-	"testing"
-)
-
-func TestOpen(t *testing.T) {
-	_ = os.RemoveAll("/tmp/scorch.bolt")
-
-	memSegment := buildMemSegment()
-	err := PersistSegment(memSegment, "/tmp/scorch.bolt", 1024)
-	if err != nil {
-		t.Fatalf("error persisting segment: %v", err)
-	}
-
-	segment, err := Open("/tmp/scorch.bolt")
-	if err != nil {
-		t.Fatalf("error opening segment: %v", err)
-	}
-	defer func() {
-		cerr := segment.Close()
-		if cerr != nil {
-			t.Fatalf("error closing segment: %v", err)
-		}
-	}()
-
-	expectFields := map[string]struct{}{
-		"_id":  struct{}{},
-		"_all": struct{}{},
-		"name": struct{}{},
-		"desc": struct{}{},
-		"tag":  struct{}{},
-	}
-	fields := segment.Fields()
-	if len(fields) != len(expectFields) {
-		t.Errorf("expected %d fields, only got %d", len(expectFields), len(fields))
-	}
-	for _, field := range fields {
-		if _, ok := expectFields[field]; !ok {
-			t.Errorf("got unexpected field: %s", field)
-		}
-	}
-
-	docCount := segment.Count()
-	if docCount != 1 {
-		t.Errorf("expected count 1, got %d", docCount)
-	}
-
-	// check the _id field
-	dict, err := segment.Dictionary("_id")
-	if err != nil {
-		t.Fatal(err)
-	}
-	if dict == nil {
-		t.Fatal("got nil dict, expected non-nil")
-	}
-
-	postingsList, err := dict.PostingsList("a", nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if postingsList == nil {
-		t.Fatal("got nil postings list, expected non-nil")
-	}
-
-	postingsItr := postingsList.Iterator()
-	if postingsItr == nil {
-		t.Fatal("got nil iterator, expected non-nil")
-	}
-
-	count := 0
-	nextPosting, err := postingsItr.Next()
-	for nextPosting != nil && err == nil {
-		count++
-		if nextPosting.Frequency() != 1 {
-			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
-		}
-		if nextPosting.Number() != 0 {
-			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
-		}
-		if nextPosting.Norm() != 1.0 {
-			t.Errorf("expected norm 1.0, got %f", nextPosting.Norm())
-		}
-
-		nextPosting, err = postingsItr.Next()
-	}
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if count != 1 {
-		t.Errorf("expected count to be 1, got %d", count)
-	}
-
-	// check the name field
-	dict, err = segment.Dictionary("name")
-	if err != nil {
-		t.Fatal(err)
-	}
-	if dict == nil {
-		t.Fatal("got nil dict, expected non-nil")
-	}
-
-	postingsList, err = dict.PostingsList("wow", nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if postingsList == nil {
-		t.Fatal("got nil postings list, expected non-nil")
-	}
-
-	postingsItr = postingsList.Iterator()
-	if postingsItr == nil {
-		t.Fatal("got nil iterator, expected non-nil")
-	}
-
-	count = 0
-	nextPosting, err = postingsItr.Next()
-	for nextPosting != nil && err == nil {
-		count++
-		if nextPosting.Frequency() != 1 {
-			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
-		}
-		if nextPosting.Number() != 0 {
-			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
-		}
-		if nextPosting.Norm() != 1.0 {
-			t.Errorf("expected norm 1.0, got %f", nextPosting.Norm())
-		}
-		var numLocs uint64
-		for _, loc := range nextPosting.Locations() {
-			numLocs++
-			if loc.Field() != "name" {
-				t.Errorf("expected loc field to be 'name', got '%s'", loc.Field())
-			}
-			if loc.Start() != 0 {
-				t.Errorf("expected loc start to be 0, got %d", loc.Start())
-			}
-			if loc.End() != 3 {
-				t.Errorf("expected loc end to be 3, got %d", loc.End())
-			}
-			if loc.Pos() != 1 {
-				t.Errorf("expected loc pos to be 1, got %d", loc.Pos())
-			}
-			if loc.ArrayPositions() != nil {
-				t.Errorf("expect loc array pos to be nil, got %v", loc.ArrayPositions())
-			}
-		}
-		if numLocs != nextPosting.Frequency() {
-			t.Errorf("expected %d locations, got %d", nextPosting.Frequency(), numLocs)
-		}
-
-		nextPosting, err = postingsItr.Next()
-	}
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if count != 1 {
-		t.Errorf("expected count to be 1, got %d", count)
-	}
-
-	// check the _all field (composite)
-	dict, err = segment.Dictionary("_all")
-	if err != nil {
-		t.Fatal(err)
-	}
-	if dict == nil {
-		t.Fatal("got nil dict, expected non-nil")
-	}
-
-	postingsList, err = dict.PostingsList("wow", nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if postingsList == nil {
-		t.Fatal("got nil postings list, expected non-nil")
-	}
-
-	postingsItr = postingsList.Iterator()
-	if postingsItr == nil {
-		t.Fatal("got nil iterator, expected non-nil")
-	}
-
-	count = 0
-	nextPosting, err = postingsItr.Next()
-	for nextPosting != nil && err == nil {
-		count++
-		if nextPosting.Frequency() != 1 {
-			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
-		}
-		if nextPosting.Number() != 0 {
-			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
-		}
-		expectedNorm := float32(1.0 / math.Sqrt(float64(5)))
-		if nextPosting.Norm() != float64(expectedNorm) {
-			t.Errorf("expected norm %f, got %f", expectedNorm, nextPosting.Norm())
-		}
-		var numLocs uint64
-		for _, loc := range nextPosting.Locations() {
-			numLocs++
-			if loc.Field() != "name" {
-				t.Errorf("expected loc field to be 'name', got '%s'", loc.Field())
-			}
-			if loc.Start() != 0 {
-				t.Errorf("expected loc start to be 0, got %d", loc.Start())
-			}
-			if loc.End() != 3 {
-				t.Errorf("expected loc end to be 3, got %d", loc.End())
-			}
-			if loc.Pos() != 1 {
-				t.Errorf("expected loc pos to be 1, got %d", loc.Pos())
-			}
-			if loc.ArrayPositions() != nil {
-				t.Errorf("expect loc array pos to be nil, got %v", loc.ArrayPositions())
-			}
-		}
-		if numLocs != nextPosting.Frequency() {
-			t.Errorf("expected %d locations, got %d", nextPosting.Frequency(), numLocs)
-		}
-
-		nextPosting, err = postingsItr.Next()
-	}
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if count != 1 {
-		t.Errorf("expected count to be 1, got %d", count)
-	}
-
-	// now try a field with array positions
-	dict, err = segment.Dictionary("tag")
-	if err != nil {
-		t.Fatal(err)
-	}
-	if dict == nil {
-		t.Fatal("got nil dict, expected non-nil")
-	}
-
-	postingsList, err = dict.PostingsList("dark", nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if postingsList == nil {
-		t.Fatal("got nil postings list, expected non-nil")
-	}
-
-	postingsItr = postingsList.Iterator()
-	if postingsItr == nil {
-		t.Fatal("got nil iterator, expected non-nil")
-	}
-
-	nextPosting, err = postingsItr.Next()
-	for nextPosting != nil && err == nil {
-
-		if nextPosting.Frequency() != 1 {
-			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
-		}
-		if nextPosting.Number() != 0 {
-			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
-		}
-		var numLocs uint64
-		for _, loc := range nextPosting.Locations() {
-			numLocs++
-			if loc.Field() != "tag" {
-				t.Errorf("expected loc field to be 'name', got '%s'", loc.Field())
-			}
-			if loc.Start() != 0 {
-				t.Errorf("expected loc start to be 0, got %d", loc.Start())
-			}
-			if loc.End() != 4 {
-				t.Errorf("expected loc end to be 3, got %d", loc.End())
-			}
-			if loc.Pos() != 1 {
-				t.Errorf("expected loc pos to be 1, got %d", loc.Pos())
-			}
-			expectArrayPos := []uint64{1}
-			if !reflect.DeepEqual(loc.ArrayPositions(), expectArrayPos) {
-				t.Errorf("expect loc array pos to be %v, got %v", expectArrayPos, loc.ArrayPositions())
-			}
-		}
-		if numLocs != nextPosting.Frequency() {
-			t.Errorf("expected %d locations, got %d", nextPosting.Frequency(), numLocs)
-		}
-
-		nextPosting, err = postingsItr.Next()
-	}
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	// now try and visit a document
-	var fieldValuesSeen int
-	err = segment.VisitDocument(0, func(field string, typ byte, value []byte, pos []uint64) bool {
-		fieldValuesSeen++
-		return true
-	})
-	if err != nil {
-		t.Fatal(err)
-	}
-	if fieldValuesSeen != 5 {
-		t.Errorf("expected 5 field values, got %d", fieldValuesSeen)
-	}
-}
-
-func TestOpenMulti(t *testing.T) {
-	_ = os.RemoveAll("/tmp/scorch.bolt")
-
-	memSegment := buildMemSegmentMulti()
-	err := PersistSegment(memSegment, "/tmp/scorch.bolt", 1024)
-	if err != nil {
-		t.Fatalf("error persisting segment: %v", err)
-	}
-
-	segment, err := Open("/tmp/scorch.bolt")
-	if err != nil {
-		t.Fatalf("error opening segment: %v", err)
-	}
-	defer func() {
-		cerr := segment.Close()
-		if cerr != nil {
-			t.Fatalf("error closing segment: %v", err)
-		}
-	}()
-
-	if segment.Count() != 2 {
-		t.Errorf("expected count 2, got %d", segment.Count())
-	}
-
-	// check the desc field
-	dict, err := segment.Dictionary("desc")
-	if err != nil {
-		t.Fatal(err)
-	}
-	if dict == nil {
-		t.Fatal("got nil dict, expected non-nil")
-	}
-
-	postingsList, err := dict.PostingsList("thing", nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if postingsList == nil {
-		t.Fatal("got nil postings list, expected non-nil")
-	}
-
-	postingsItr := postingsList.Iterator()
-	if postingsItr == nil {
-		t.Fatal("got nil iterator, expected non-nil")
-	}
-
-	count := 0
-	nextPosting, err := postingsItr.Next()
-	for nextPosting != nil && err == nil {
-		count++
-		nextPosting, err = postingsItr.Next()
-	}
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if count != 2 {
-		t.Errorf("expected count to be 2, got %d", count)
-	}
-
-	// get docnum of a
-	exclude, err := segment.DocNumbers([]string{"a"})
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	// look for term 'thing' excluding doc 'a'
-	postingsListExcluding, err := dict.PostingsList("thing", exclude)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if postingsList == nil {
-		t.Fatal("got nil postings list, expected non-nil")
-	}
-
-	postingsListExcludingCount := postingsListExcluding.Count()
-	if postingsListExcludingCount != 1 {
-		t.Errorf("expected count from postings list to be 1, got %d", postingsListExcludingCount)
-	}
-
-	postingsItrExcluding := postingsListExcluding.Iterator()
-	if postingsItr == nil {
-		t.Fatal("got nil iterator, expected non-nil")
-	}
-
-	count = 0
-	nextPosting, err = postingsItrExcluding.Next()
-	for nextPosting != nil && err == nil {
-		count++
-		nextPosting, err = postingsItrExcluding.Next()
-	}
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if count != 1 {
-		t.Errorf("expected count to be 1, got %d", count)
-	}
-}
-
-func TestOpenMultiWithTwoChunks(t *testing.T) {
-	_ = os.RemoveAll("/tmp/scorch.bolt")
-
-	memSegment := buildMemSegmentMulti()
-	err := PersistSegment(memSegment, "/tmp/scorch.bolt", 1)
-	if err != nil {
-		t.Fatalf("error persisting segment: %v", err)
-	}
-
-	segment, err := Open("/tmp/scorch.bolt")
-	if err != nil {
-		t.Fatalf("error opening segment: %v", err)
-	}
-	defer func() {
-		cerr := segment.Close()
-		if cerr != nil {
-			t.Fatalf("error closing segment: %v", err)
-		}
-	}()
-
-	if segment.Count() != 2 {
-		t.Errorf("expected count 2, got %d", segment.Count())
-	}
-
-	// check the desc field
-	dict, err := segment.Dictionary("desc")
-	if err != nil {
-		t.Fatal(err)
-	}
-	if dict == nil {
-		t.Fatal("got nil dict, expected non-nil")
-	}
-
-	postingsList, err := dict.PostingsList("thing", nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if postingsList == nil {
-		t.Fatal("got nil postings list, expected non-nil")
-	}
-
-	postingsItr := postingsList.Iterator()
-	if postingsItr == nil {
-		t.Fatal("got nil iterator, expected non-nil")
-	}
-
-	count := 0
-	nextPosting, err := postingsItr.Next()
-	for nextPosting != nil && err == nil {
-		count++
-		nextPosting, err = postingsItr.Next()
-	}
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if count != 2 {
-		t.Errorf("expected count to be 2, got %d", count)
-	}
-
-	// get docnum of a
-	exclude, err := segment.DocNumbers([]string{"a"})
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	// look for term 'thing' excluding doc 'a'
-	postingsListExcluding, err := dict.PostingsList("thing", exclude)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if postingsList == nil {
-		t.Fatal("got nil postings list, expected non-nil")
-	}
-
-	postingsItrExcluding := postingsListExcluding.Iterator()
-	if postingsItr == nil {
-		t.Fatal("got nil iterator, expected non-nil")
-	}
-
-	count = 0
-	nextPosting, err = postingsItrExcluding.Next()
-	for nextPosting != nil && err == nil {
-		count++
-		nextPosting, err = postingsItrExcluding.Next()
-	}
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if count != 1 {
-		t.Errorf("expected count to be 1, got %d", count)
-	}
-}

From 00722aa2997272e4ac043d91359465e0169a01e7 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Mon, 11 Dec 2017 15:38:44 -0500
Subject: [PATCH 040/728] disable http unit test which relied on debug
 functionality

---
 http/handlers_test.go | 27 ---------------------------
 1 file changed, 27 deletions(-)

diff --git a/http/handlers_test.go b/http/handlers_test.go
index 830260ab8..b6911b0b5 100644
--- a/http/handlers_test.go
+++ b/http/handlers_test.go
@@ -500,33 +500,6 @@ func TestHandlers(t *testing.T) {
 			Status:       http.StatusNotFound,
 			ResponseBody: []byte(`no such index 'tix'`),
 		},
-		{
-			Desc:    "debug doc",
-			Handler: debugHandler,
-			Path:    "/ti1/a/debug",
-			Method:  "GET",
-			Params: url.Values{
-				"indexName": []string{"ti1"},
-				"docID":     []string{"a"},
-			},
-			Status: http.StatusOK,
-			ResponseMatch: map[string]bool{
-				`"key"`: true,
-				`"val"`: true,
-			},
-		},
-		{
-			Desc:    "debug doc invalid index",
-			Handler: debugHandler,
-			Path:    "/ti1/a/debug",
-			Method:  "GET",
-			Params: url.Values{
-				"indexName": []string{"tix"},
-				"docID":     []string{"a"},
-			},
-			Status:       http.StatusNotFound,
-			ResponseBody: []byte(`no such index 'tix'`),
-		},
 		{
 			Desc:    "create alias",
 			Handler: aliasHandler,

From f13b786609ba6b13480b05fa9c1dcf8d74b1b2d1 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Mon, 11 Dec 2017 15:47:41 -0500
Subject: [PATCH 041/728] fix up issues to get all bleve unit tests passing for
 scorch make scorch default

---
 config.go                                     |  4 +-
 index/scorch/reader.go                        | 18 ++++-
 index/scorch/segment/mem/build.go             | 23 ++++--
 index/scorch/segment/mem/posting.go           |  6 +-
 index/scorch/segment/mem/segment.go           |  5 +-
 index/scorch/segment/zap/build.go             | 70 ++++++++++++++-----
 .../scorch/segment/zap/cmd/zap/cmd/fields.go  |  8 +--
 index/scorch/segment/zap/dict.go              |  2 +
 index/scorch/segment/zap/posting.go           | 36 +++++++---
 index/scorch/segment/zap/segment.go           | 11 +--
 index/scorch/snapshot_index.go                |  7 +-
 index/scorch/snapshot_index_tfr.go            | 59 +++++++++++-----
 12 files changed, 172 insertions(+), 77 deletions(-)

diff --git a/config.go b/config.go
index 74d407fdd..c1475db74 100644
--- a/config.go
+++ b/config.go
@@ -21,8 +21,8 @@ import (
 	"time"
 
 	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/scorch"
 	"github.com/blevesearch/bleve/index/store/gtreap"
-	"github.com/blevesearch/bleve/index/upsidedown"
 	"github.com/blevesearch/bleve/registry"
 	"github.com/blevesearch/bleve/search/highlight/highlighter/html"
 )
@@ -69,7 +69,7 @@ func init() {
 	Config.DefaultMemKVStore = gtreap.Name
 
 	// default index
-	Config.DefaultIndexType = upsidedown.Name
+	Config.DefaultIndexType = scorch.Name
 
 	bootDuration := time.Since(bootStart)
 	bleveExpVar.Add("bootDuration", int64(bootDuration))
diff --git a/index/scorch/reader.go b/index/scorch/reader.go
index e4abbce64..9a20aa013 100644
--- a/index/scorch/reader.go
+++ b/index/scorch/reader.go
@@ -82,15 +82,27 @@ func (r *Reader) InternalID(id string) (index.IndexInternalID, error) {
 }
 
 func (r *Reader) DumpAll() chan interface{} {
-	panic("dumpall")
+	rv := make(chan interface{})
+	go func() {
+		close(rv)
+	}()
+	return rv
 }
 
 func (r *Reader) DumpDoc(id string) chan interface{} {
-	panic("dumpdoc")
+	rv := make(chan interface{})
+	go func() {
+		close(rv)
+	}()
+	return rv
 }
 
 func (r *Reader) DumpFields() chan interface{} {
-	panic("dumpfields")
+	rv := make(chan interface{})
+	go func() {
+		close(rv)
+	}()
+	return rv
 }
 
 func (r *Reader) Close() error {
diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go
index dbba39b13..e111ce4f7 100644
--- a/index/scorch/segment/mem/build.go
+++ b/index/scorch/segment/mem/build.go
@@ -29,7 +29,7 @@ func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment {
 	s := New()
 
 	// ensure that _id field get fieldID 0
-	s.getOrDefineField("_id", false)
+	s.getOrDefineField("_id")
 
 	// walk each doc
 	for _, result := range results {
@@ -102,14 +102,14 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 
 	// walk each composite field
 	for _, field := range result.Document.CompositeFields {
-		fieldID := uint16(s.getOrDefineField(field.Name(), true))
+		fieldID := uint16(s.getOrDefineField(field.Name()))
 		l, tf := field.Analyze()
 		processField(fieldID, field.Name(), l, tf)
 	}
 
 	// walk each field
 	for i, field := range result.Document.Fields {
-		fieldID := uint16(s.getOrDefineField(field.Name(), field.Options().IncludeTermVectors()))
+		fieldID := uint16(s.getOrDefineField(field.Name()))
 		l := result.Length[i]
 		tf := result.Analyzed[i]
 		processField(fieldID, field.Name(), l, tf)
@@ -133,6 +133,9 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 				newPostingID := uint64(len(s.Postings) + 1)
 				// add this new bitset to the postings slice
 				s.Postings = append(s.Postings, bs)
+
+				locationBS := roaring.New()
+				s.PostingsLocs = append(s.PostingsLocs, locationBS)
 				// add this to the details slice
 				s.Freqs = append(s.Freqs, []uint64{uint64(tokenFreq.Frequency())})
 				s.Norms = append(s.Norms, []float32{float32(1.0 / math.Sqrt(float64(fieldLens[fieldID])))})
@@ -142,10 +145,13 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 				var locends []uint64
 				var locpos []uint64
 				var locarraypos [][]uint64
+				if len(tokenFreq.Locations) > 0 {
+					locationBS.AddInt(int(docNum))
+				}
 				for _, loc := range tokenFreq.Locations {
 					var locf = fieldID
 					if loc.Field != "" {
-						locf = uint16(s.getOrDefineField(loc.Field, false))
+						locf = uint16(s.getOrDefineField(loc.Field))
 					}
 					locfields = append(locfields, locf)
 					locstarts = append(locstarts, uint64(loc.Start))
@@ -171,12 +177,16 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 				// the actual offset is - 1, because 0 is zero value
 				bs := s.Postings[fieldTermPostings-1]
 				bs.AddInt(int(docNum))
+				locationBS := s.PostingsLocs[fieldTermPostings-1]
 				s.Freqs[fieldTermPostings-1] = append(s.Freqs[fieldTermPostings-1], uint64(tokenFreq.Frequency()))
 				s.Norms[fieldTermPostings-1] = append(s.Norms[fieldTermPostings-1], float32(1.0/math.Sqrt(float64(fieldLens[fieldID]))))
+				if len(tokenFreq.Locations) > 0 {
+					locationBS.AddInt(int(docNum))
+				}
 				for _, loc := range tokenFreq.Locations {
 					var locf = fieldID
 					if loc.Field != "" {
-						locf = uint16(s.getOrDefineField(loc.Field, false))
+						locf = uint16(s.getOrDefineField(loc.Field))
 					}
 					s.Locfields[fieldTermPostings-1] = append(s.Locfields[fieldTermPostings-1], locf)
 					s.Locstarts[fieldTermPostings-1] = append(s.Locstarts[fieldTermPostings-1], uint64(loc.Start))
@@ -193,13 +203,12 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 	}
 }
 
-func (s *Segment) getOrDefineField(name string, hasLoc bool) int {
+func (s *Segment) getOrDefineField(name string) int {
 	fieldID, ok := s.FieldsMap[name]
 	if !ok {
 		fieldID = uint16(len(s.FieldsInv) + 1)
 		s.FieldsMap[name] = fieldID
 		s.FieldsInv = append(s.FieldsInv, name)
-		s.FieldsLoc = append(s.FieldsLoc, hasLoc)
 		s.Dicts = append(s.Dicts, make(map[string]uint64))
 		s.DictKeys = append(s.DictKeys, make([]string, 0))
 	}
diff --git a/index/scorch/segment/mem/posting.go b/index/scorch/segment/mem/posting.go
index b6fd0c6a7..d91a00561 100644
--- a/index/scorch/segment/mem/posting.go
+++ b/index/scorch/segment/mem/posting.go
@@ -51,6 +51,7 @@ func (p *PostingsList) Iterator() segment.PostingsIterator {
 	}
 	if p.postingsID > 0 {
 		allbits := p.dictionary.segment.Postings[p.postingsID-1]
+		rv.locations = p.dictionary.segment.PostingsLocs[p.postingsID-1]
 		rv.all = allbits.Iterator()
 		if p.except != nil {
 			allExcept := allbits.Clone()
@@ -68,6 +69,7 @@ func (p *PostingsList) Iterator() segment.PostingsIterator {
 type PostingsIterator struct {
 	postings  *PostingsList
 	all       roaring.IntIterable
+	locations *roaring.Bitmap
 	offset    int
 	locoffset int
 	actual    roaring.IntIterable
@@ -95,6 +97,7 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 		docNum:    uint64(n),
 		offset:    i.offset,
 		locoffset: i.locoffset,
+		hasLoc:    i.locations.Contains(n),
 	}
 
 	i.locoffset += int(i.postings.dictionary.segment.Freqs[i.postings.postingsID-1][i.offset])
@@ -108,6 +111,7 @@ type Posting struct {
 	docNum    uint64
 	offset    int
 	locoffset int
+	hasLoc    bool
 }
 
 // Number returns the document number of this posting in this segment
@@ -127,7 +131,7 @@ func (p *Posting) Norm() float64 {
 
 // Locations returns the location information for each occurance
 func (p *Posting) Locations() []segment.Location {
-	if !p.iterator.postings.dictionary.segment.FieldsLoc[p.iterator.postings.dictionary.fieldID] {
+	if !p.hasLoc {
 		return nil
 	}
 	freq := int(p.Frequency())
diff --git a/index/scorch/segment/mem/segment.go b/index/scorch/segment/mem/segment.go
index 4940bb4cf..cdbff5839 100644
--- a/index/scorch/segment/mem/segment.go
+++ b/index/scorch/segment/mem/segment.go
@@ -44,8 +44,6 @@ type Segment struct {
 	FieldsMap map[string]uint16
 	// fields id -> name
 	FieldsInv []string
-	// field id -> has location info
-	FieldsLoc []bool
 
 	// term dictionary
 	//  field id -> term -> posting id + 1
@@ -59,6 +57,9 @@ type Segment struct {
 	//  Postings list id -> Postings bitmap
 	Postings []*roaring.Bitmap
 
+	// Postings List has locations
+	PostingsLocs []*roaring.Bitmap
+
 	// term frequencies
 	//  postings list id -> Freqs (one for each hit in bitmap)
 	Freqs [][]uint64
diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 0ccb6cb0c..a5d2bec25 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -27,7 +27,7 @@ import (
 	"github.com/golang/snappy"
 )
 
-var version uint32
+const version uint32 = 1
 
 // PersistSegment takes the in-memory segment and persists it to the specified
 // path in the zap file format.
@@ -58,8 +58,14 @@ func PersistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) (e
 		return err
 	}
 
+	var postingsListLocs []uint64
+	postingsListLocs, err = persistPostingsLocs(memSegment, cr)
+	if err != nil {
+		return err
+	}
+
 	var postingsLocs []uint64
-	postingsLocs, err = persistPostingsLists(memSegment, cr, freqOffsets, locOffsets)
+	postingsLocs, err = persistPostingsLists(memSegment, cr, postingsListLocs, freqOffsets, locOffsets)
 	if err != nil {
 		return err
 	}
@@ -420,7 +426,43 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
 	return freqOffsets, locOfffsets, nil
 }
 
-func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter, freqOffsets, locOffsets []uint64) ([]uint64, error) {
+func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) ([]uint64, error) {
+	var rv []uint64
+
+	var postingsBuf bytes.Buffer
+	for postingID := range memSegment.PostingsLocs {
+		if postingID != 0 {
+			postingsBuf.Reset()
+		}
+
+		// record where we start this posting loc
+		rv = append(rv, uint64(w.Count()))
+
+		// write out postings locs to memory so we know the len
+		postingsLocLen, err := memSegment.PostingsLocs[postingID].WriteTo(&postingsBuf)
+		if err != nil {
+			return nil, err
+		}
+
+		buf := make([]byte, binary.MaxVarintLen64)
+		// write out the length of this postings locs
+		n := binary.PutUvarint(buf, uint64(postingsLocLen))
+		_, err = w.Write(buf[:n])
+		if err != nil {
+			return nil, err
+		}
+
+		// write out the postings list itself
+		_, err = w.Write(postingsBuf.Bytes())
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	return rv, nil
+}
+
+func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter, postingsListLocs, freqOffsets, locOffsets []uint64) ([]uint64, error) {
 	var rv []uint64
 
 	var postingsBuf bytes.Buffer
@@ -453,6 +495,13 @@ func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter, freqOffse
 			return nil, err
 		}
 
+		// write out the start of the loc posting list
+		n = binary.PutUvarint(buf, postingsListLocs[postingID])
+		_, err = w.Write(buf[:n])
+		if err != nil {
+			return nil, err
+		}
+
 		// write out the length of this postings list
 		n = binary.PutUvarint(buf, uint64(postingsListLen))
 		_, err = w.Write(buf[:n])
@@ -534,20 +583,9 @@ func persistFields(memSegment *mem.Segment, w *CountHashWriter, dictLocs []uint6
 		fieldStarts = append(fieldStarts, uint64(w.Count()))
 
 		buf := make([]byte, binary.MaxVarintLen64)
-		// write out if the field has indexed locs (0 or 1)
-		var indexedLoc uint64
-		if memSegment.FieldsLoc[fieldID] {
-			indexedLoc = 1
-		}
-		n := binary.PutUvarint(buf, indexedLoc)
-		_, err := w.Write(buf[:n])
-		if err != nil {
-			return 0, err
-		}
-
 		// write out dict location for this field
-		n = binary.PutUvarint(buf, dictLocs[fieldID])
-		_, err = w.Write(buf[:n])
+		n := binary.PutUvarint(buf, dictLocs[fieldID])
+		_, err := w.Write(buf[:n])
 		if err != nil {
 			return 0, err
 		}
diff --git a/index/scorch/segment/zap/cmd/zap/cmd/fields.go b/index/scorch/segment/zap/cmd/zap/cmd/fields.go
index 472d966f5..98cdf9d73 100644
--- a/index/scorch/segment/zap/cmd/zap/cmd/fields.go
+++ b/index/scorch/segment/zap/cmd/zap/cmd/fields.go
@@ -43,11 +43,7 @@ var fieldsCmd = &cobra.Command{
 		for fieldsIndexOffset+(8*fieldID) < fieldsIndexEnd {
 			addr := binary.BigEndian.Uint64(data[fieldsIndexOffset+(8*fieldID) : fieldsIndexOffset+(8*fieldID)+8])
 			var n uint64
-			indexedLoc, read := binary.Uvarint(data[addr:fieldsIndexEnd])
-			n += uint64(read)
-
-			var dictLoc uint64
-			dictLoc, read = binary.Uvarint(data[addr+n : fieldsIndexEnd])
+			dictLoc, read := binary.Uvarint(data[addr+n : fieldsIndexEnd])
 			n += uint64(read)
 
 			var nameLen uint64
@@ -56,7 +52,7 @@ var fieldsCmd = &cobra.Command{
 
 			name := string(data[addr+n : addr+n+nameLen])
 
-			fmt.Printf("field %d '%s' indexedLoc: %t starts at %d (%x)\n", fieldID, name, indexedLoc == 1, dictLoc, dictLoc)
+			fmt.Printf("field %d '%s' starts at %d (%x)\n", fieldID, name, dictLoc, dictLoc)
 
 			fieldID++
 		}
diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index d69195958..e4824851e 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -60,6 +60,8 @@ func (d *Dictionary) postingsList(term string, except *roaring.Bitmap) (*Posting
 			n += uint64(read)
 			rv.locOffset, read = binary.Uvarint(d.segment.mm[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
 			n += uint64(read)
+			rv.locBitmapOffset, read = binary.Uvarint(d.segment.mm[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
+			n += uint64(read)
 			var postingsLen uint64
 			postingsLen, read = binary.Uvarint(d.segment.mm[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
 			n += uint64(read)
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index f29020093..051cfef8e 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -27,14 +27,15 @@ import (
 
 // PostingsList is an in-memory represenation of a postings list
 type PostingsList struct {
-	dictionary     *Dictionary
-	term           string
-	postingsOffset uint64
-	freqOffset     uint64
-	locOffset      uint64
-	postings       *roaring.Bitmap
-	except         *roaring.Bitmap
-	postingKey     []byte
+	dictionary      *Dictionary
+	term            string
+	postingsOffset  uint64
+	freqOffset      uint64
+	locOffset       uint64
+	locBitmapOffset uint64
+	postings        *roaring.Bitmap
+	except          *roaring.Bitmap
+	postingKey      []byte
 }
 
 // Iterator returns an iterator for this postings list
@@ -68,6 +69,18 @@ func (p *PostingsList) Iterator() segment.PostingsIterator {
 		}
 		rv.locChunkStart = p.locOffset + n
 
+		var locBitmapLen uint64
+		locBitmapLen, read = binary.Uvarint(p.dictionary.segment.mm[p.locBitmapOffset : p.locBitmapOffset+binary.MaxVarintLen64])
+		roaringBytes := p.dictionary.segment.mm[p.locBitmapOffset+uint64(read) : p.locBitmapOffset+uint64(read)+locBitmapLen]
+		bitmap := roaring.NewBitmap()
+		_, err := bitmap.FromBuffer(roaringBytes)
+		if err != nil {
+			// return nil, fmt.Errorf("error loading roaring bitmap: %v", err)
+			// FIXME dont break api yet
+			panic("i died")
+		}
+		rv.locBitmap = bitmap
+
 		rv.all = p.postings.Iterator()
 		if p.except != nil {
 			allExcept := p.postings.Clone()
@@ -86,6 +99,7 @@ func (p *PostingsList) Count() uint64 {
 	var rv uint64
 	if p.postings != nil {
 		rv = p.postings.GetCardinality()
+
 		if p.except != nil {
 			except := p.except.GetCardinality()
 			if except > rv {
@@ -117,6 +131,8 @@ type PostingsIterator struct {
 
 	locChunkLens  []uint64
 	locChunkStart uint64
+
+	locBitmap *roaring.Bitmap
 }
 
 func (i *PostingsIterator) loadChunk(chunk int) error {
@@ -245,7 +261,7 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 			if err != nil {
 				return nil, err
 			}
-			if i.postings.dictionary.segment.fieldsLoc[i.postings.dictionary.fieldID] {
+			if i.locBitmap.Contains(allN) {
 				for j := 0; j < int(freq); j++ {
 					err := i.readLocation(nil)
 					if err != nil {
@@ -280,7 +296,7 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 		return nil, err
 	}
 	rv.norm = math.Float32frombits(uint32(normBits))
-	if i.postings.dictionary.segment.fieldsLoc[i.postings.dictionary.fieldID] {
+	if i.locBitmap.Contains(n) {
 		// read off 'freq' locations
 		rv.locs = make([]segment.Location, rv.freq)
 		locs := make([]Location, rv.freq)
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index b8784921a..2c6d0bfed 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -78,7 +78,6 @@ type Segment struct {
 
 	fieldsMap     map[string]uint16
 	fieldsInv     []string
-	fieldsLoc     []bool
 	fieldsOffsets []uint64
 }
 
@@ -112,16 +111,8 @@ func (s *Segment) loadFields() error {
 	for s.fieldsIndexOffset+(8*fieldID) < fieldsIndexEnd {
 		addr := binary.BigEndian.Uint64(s.mm[s.fieldsIndexOffset+(8*fieldID) : s.fieldsIndexOffset+(8*fieldID)+8])
 		var n uint64
-		hasStoredLoc, read := binary.Uvarint(s.mm[addr:fieldsIndexEnd])
-		n += uint64(read)
-		if hasStoredLoc == 1 {
-			s.fieldsLoc = append(s.fieldsLoc, true)
-		} else {
-			s.fieldsLoc = append(s.fieldsLoc, false)
-		}
 
-		var dictLoc uint64
-		dictLoc, read = binary.Uvarint(s.mm[addr+n : fieldsIndexEnd])
+		dictLoc, read := binary.Uvarint(s.mm[addr+n : fieldsIndexEnd])
 		n += uint64(read)
 		s.fieldsOffsets = append(s.fieldsOffsets, dictLoc)
 
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index d2efa2dcd..10d208efd 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -212,6 +212,11 @@ func (i *IndexSnapshot) Document(id string) (rv *document.Document, err error) {
 		return nil, err
 	}
 
+	if next == nil {
+		// no such doc exists
+		return nil, nil
+	}
+
 	docNum := docInternalToNumber(next.ID)
 	segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum)
 
@@ -318,6 +323,7 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
 
 	var err error
 	rv := &IndexSnapshotTermFieldReader{
+		term:               term,
 		snapshot:           i,
 		postings:           make([]segment.PostingsList, len(i.segment)),
 		iterators:          make([]segment.PostingsIterator, len(i.segment)),
@@ -337,7 +343,6 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
 	if err != nil {
 		return nil, err
 	}
-
 	return rv, nil
 }
 
diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go
index 936704906..1fbabdfbb 100644
--- a/index/scorch/snapshot_index_tfr.go
+++ b/index/scorch/snapshot_index_tfr.go
@@ -22,6 +22,7 @@ import (
 )
 
 type IndexSnapshotTermFieldReader struct {
+	term               []byte
 	snapshot           *IndexSnapshot
 	postings           []segment.PostingsList
 	iterators          []segment.PostingsIterator
@@ -29,6 +30,8 @@ type IndexSnapshotTermFieldReader struct {
 	includeFreq        bool
 	includeNorm        bool
 	includeTermVectors bool
+	currPosting        segment.Posting
+	currID             index.IndexInternalID
 }
 
 func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
@@ -47,26 +50,11 @@ func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*in
 			globalOffset := i.snapshot.offsets[i.segmentOffset]
 			nnum := next.Number()
 			rv.ID = docNumberToBytes(nnum + globalOffset)
-			if i.includeFreq {
-				rv.Freq = next.Frequency()
-			}
-			if i.includeNorm {
-				rv.Norm = next.Norm()
-			}
-			if i.includeTermVectors {
-				locs := next.Locations()
-				rv.Vectors = make([]*index.TermFieldVector, len(locs))
-				for i, loc := range locs {
-					rv.Vectors[i] = &index.TermFieldVector{
-						Start:          loc.Start(),
-						End:            loc.End(),
-						Pos:            loc.Pos(),
-						ArrayPositions: loc.ArrayPositions(),
-						Field:          loc.Field(),
-					}
-				}
-			}
 
+			i.postingToTermFieldDoc(next, rv)
+
+			i.currID = rv.ID
+			i.currPosting = next
 			return rv, nil
 		}
 		i.segmentOffset++
@@ -74,7 +62,40 @@ func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*in
 	return nil, nil
 }
 
+func (i *IndexSnapshotTermFieldReader) postingToTermFieldDoc(next segment.Posting, rv *index.TermFieldDoc) {
+	if i.includeFreq {
+		rv.Freq = next.Frequency()
+	}
+	if i.includeNorm {
+		rv.Norm = next.Norm()
+	}
+	if i.includeTermVectors {
+		locs := next.Locations()
+		rv.Vectors = make([]*index.TermFieldVector, len(locs))
+		for i, loc := range locs {
+			rv.Vectors[i] = &index.TermFieldVector{
+				Start:          loc.Start(),
+				End:            loc.End(),
+				Pos:            loc.Pos(),
+				ArrayPositions: loc.ArrayPositions(),
+				Field:          loc.Field(),
+			}
+		}
+	}
+}
+
+// Advance go fuck yourself editor
 func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
+	// first make sure we aren't already pointing at the right thing, (due to way searchers work)
+	if i.currPosting != nil && bytes.Compare(i.currID, ID) >= 0 {
+		rv := preAlloced
+		if rv == nil {
+			rv = &index.TermFieldDoc{}
+		}
+		rv.ID = i.currID
+		i.postingToTermFieldDoc(i.currPosting, rv)
+		return rv, nil
+	}
 	// FIXME do something better
 	next, err := i.Next(preAlloced)
 	if err != nil {

From 74b2eeb14d4a27c68f0ed0c25e4d40cad7dc27e1 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Mon, 11 Dec 2017 15:59:36 -0500
Subject: [PATCH 042/728] refactor where we do some work so we can return error

---
 index/scorch/segment/zap/dict.go    | 15 +++++++++++++-
 index/scorch/segment/zap/posting.go | 31 ++++++++++-------------------
 2 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index e4824851e..5d3c160ba 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -60,8 +60,21 @@ func (d *Dictionary) postingsList(term string, except *roaring.Bitmap) (*Posting
 			n += uint64(read)
 			rv.locOffset, read = binary.Uvarint(d.segment.mm[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
 			n += uint64(read)
-			rv.locBitmapOffset, read = binary.Uvarint(d.segment.mm[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
+
+			var locBitmapOffset uint64
+			locBitmapOffset, read = binary.Uvarint(d.segment.mm[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
 			n += uint64(read)
+
+			// go ahead and load loc bitmap
+			var locBitmapLen uint64
+			locBitmapLen, read = binary.Uvarint(d.segment.mm[locBitmapOffset : locBitmapOffset+binary.MaxVarintLen64])
+			locRoaringBytes := d.segment.mm[locBitmapOffset+uint64(read) : locBitmapOffset+uint64(read)+locBitmapLen]
+			rv.locBitmap = roaring.NewBitmap()
+			_, err := rv.locBitmap.FromBuffer(locRoaringBytes)
+			if err != nil {
+				return nil, fmt.Errorf("error loading roaring bitmap of locations with hits: %v", err)
+			}
+
 			var postingsLen uint64
 			postingsLen, read = binary.Uvarint(d.segment.mm[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
 			n += uint64(read)
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 051cfef8e..eb5218d28 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -27,15 +27,15 @@ import (
 
 // PostingsList is an in-memory represenation of a postings list
 type PostingsList struct {
-	dictionary      *Dictionary
-	term            string
-	postingsOffset  uint64
-	freqOffset      uint64
-	locOffset       uint64
-	locBitmapOffset uint64
-	postings        *roaring.Bitmap
-	except          *roaring.Bitmap
-	postingKey      []byte
+	dictionary     *Dictionary
+	term           string
+	postingsOffset uint64
+	freqOffset     uint64
+	locOffset      uint64
+	locBitmap      *roaring.Bitmap
+	postings       *roaring.Bitmap
+	except         *roaring.Bitmap
+	postingKey     []byte
 }
 
 // Iterator returns an iterator for this postings list
@@ -68,18 +68,7 @@ func (p *PostingsList) Iterator() segment.PostingsIterator {
 			n += uint64(read)
 		}
 		rv.locChunkStart = p.locOffset + n
-
-		var locBitmapLen uint64
-		locBitmapLen, read = binary.Uvarint(p.dictionary.segment.mm[p.locBitmapOffset : p.locBitmapOffset+binary.MaxVarintLen64])
-		roaringBytes := p.dictionary.segment.mm[p.locBitmapOffset+uint64(read) : p.locBitmapOffset+uint64(read)+locBitmapLen]
-		bitmap := roaring.NewBitmap()
-		_, err := bitmap.FromBuffer(roaringBytes)
-		if err != nil {
-			// return nil, fmt.Errorf("error loading roaring bitmap: %v", err)
-			// FIXME dont break api yet
-			panic("i died")
-		}
-		rv.locBitmap = bitmap
+		rv.locBitmap = p.locBitmap
 
 		rv.all = p.postings.Iterator()
 		if p.except != nil {

From f246e0e4c012ae87c9b4c86686a803ab329a85f9 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Mon, 11 Dec 2017 16:22:29 -0500
Subject: [PATCH 043/728] update README for zap file format changes

---
 index/scorch/segment/zap/README.md | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/index/scorch/segment/zap/README.md b/index/scorch/segment/zap/README.md
index 39735d34d..b7a1b9e67 100644
--- a/index/scorch/segment/zap/README.md
+++ b/index/scorch/segment/zap/README.md
@@ -19,6 +19,7 @@ Current usage:
   - next use dictionary to navigate to posting list for a specific term
   - walk posting list
   - if necessary, walk posting details as we go
+  - if location info is desired, consult location bitmap to see if it is there
 
 ## stored fields section
 
@@ -89,6 +90,16 @@ If you know the doc number you're interested in, this format lets you jump to th
 
 If you know the doc number you're interested in, this format lets you jump to the correct chunk (docNum/chunkFactor) directly and then seek within that chunk until you find it.
 
+## bitmaps of hits with location info
+
+- for each posting list
+  - preparation phase:
+    - encode roaring bitmap (inidicating which hits have location details indexed) posting list to bytes (so we know the length)
+  - file writing phase:
+    - remember the start position for this bitmap
+    - write length of encoded roaring bitmap
+    - write the serialized roaring bitmap data
+
 ## postings list section
 
 - for each posting list
@@ -98,6 +109,7 @@ If you know the doc number you're interested in, this format lets you jump to th
     - remember the start position for this posting list
     - write freq/norm details offset (remembered from previous, as varint uint64)
     - write location details offset (remembered from previous, as varint uint64)
+    - write location bitmap offset (remembered from pervious, as varint uint64)
     - write length of encoded roaring bitmap
     - write the serialized roaring bitmap data
 
@@ -116,7 +128,6 @@ If you know the doc number you're interested in, this format lets you jump to th
 - for each field
   - file writing phase:
     - remember start offset for each field
-    - write 1 if field has location info indexed, 0 if not (varint uint64)
     - write dictionary address (remembered from previous) (varint uint64)
     - write length of field name (varint uint64)
     - write field name bytes

From 58ef21a88aecbf245353615f652a5accfc756e2f Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Mon, 11 Dec 2017 16:24:46 -0500
Subject: [PATCH 044/728] fix golint issue

---
 index/scorch/segment/zap/build.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index a5d2bec25..2bc520f63 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -618,8 +618,8 @@ func persistFields(memSegment *mem.Segment, w *CountHashWriter, dictLocs []uint6
 	return rv, nil
 }
 
-// NOTE: update if you make the footer bigger
-//               crc + ver + chunk + field offset + stored offset + num docs
+// FooterSize is the size of the footer record in bytes
+// crc + ver + chunk + field offset + stored offset + num docs
 const FooterSize = 4 + 4 + 4 + 8 + 8 + 8
 
 func persistFooter(numDocs, storedIndexOffset, fieldIndexOffset uint64,

From 3461fb741f1fb2b11d5fd45c262b2d1d0e3e1eb8 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 11 Dec 2017 10:40:35 -0800
Subject: [PATCH 045/728] mergeplan: a placeholder planner that merges all
 segments

A stepping stone to fleshing out the API contract.
---
 index/scorch/mergeplan/merge_plan.go      | 275 ++++++++++++++++++++++
 index/scorch/mergeplan/merge_plan_test.go |  92 ++++++++
 index/scorch/mergeplan/sort.go            |  27 +++
 3 files changed, 394 insertions(+)
 create mode 100644 index/scorch/mergeplan/merge_plan.go
 create mode 100644 index/scorch/mergeplan/merge_plan_test.go
 create mode 100644 index/scorch/mergeplan/sort.go

diff --git a/index/scorch/mergeplan/merge_plan.go b/index/scorch/mergeplan/merge_plan.go
new file mode 100644
index 000000000..ff50cff37
--- /dev/null
+++ b/index/scorch/mergeplan/merge_plan.go
@@ -0,0 +1,275 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package mergeplan
+
+import (
+	"math"
+	"sort"
+)
+
+// A Segment represents the information that the planner needs to
+// calculate segment merging.
+type Segment interface {
+	// Unique id of the segment -- used for sorting.
+	Id() uint64
+
+	// Full segment size (the size before any logical deletions).
+	FullSize() int64
+
+	// Size of the live data of the segment; i.e., FullSize() minus
+	// any logical deletions.
+	LiveSize() int64
+}
+
+// Plan() will functionally compute a merge plan.  A segment will be
+// assigned to at most a single MergeTask in the output MergePlan.  A
+// segment not assigned to any MergeTask means the segment should
+// remain unmerged.
+func Plan(segments []Segment, o *MergePlanOptions) (
+	result *MergePlan, err error) {
+	if len(segments) <= 1 {
+		return nil, nil
+	}
+
+	// TODO: PLACEHOLDER implementation for now, that always merges
+	// all the candidates.
+	return &MergePlan{
+		Tasks: []*MergeTask{
+			&MergeTask{
+				Segments: segments,
+			},
+		},
+	}, nil
+}
+
+// A MergePlan is the result of the Plan() API.
+//
+// The planner doesn’t know how or whether these tasks are executed --
+// that’s up to a separate merge execution system, which might execute
+// these tasks concurrently or not, and which might execute all the
+// tasks or not.
+type MergePlan struct {
+	Tasks []*MergeTask
+}
+
+// A MergeTask represents several segments that should be merged
+// together into a single segment.
+type MergeTask struct {
+	Segments []Segment
+}
+
+// The MergePlanOptions is designed to be reusable between planning calls.
+type MergePlanOptions struct {
+	// Max # segments per logarithmic tier, or max width of any
+	// logarithmic “step”.  Smaller values mean more merging but fewer
+	// segments.  Should be >= SegmentsPerMergeTask, else you'll have
+	// too much merging.
+	MaxSegmentsPerTier int
+
+	// Max size of any segment produced after merging.  Actual
+	// merging, however, may produce segment sizes different than the
+	// planner’s predicted sizes.
+	MaxSegmentSize int64
+
+	// The number of segments in any resulting MergeTask.  e.g.,
+	// len(result.Tasks[ * ].Segments) == SegmentsPerMergeTask.
+	SegmentsPerMergeTask int
+
+	// Small segments are rounded up to this size, i.e., treated as
+	// equal (floor) size for consideration.  This is to prevent lots
+	// of tiny segments from resulting in a long tail in the index.
+	FloorSegmentSize int64
+
+	// Controls how aggressively merges that reclaim more deletions
+	// are favored.  Higher values will more aggressively target
+	// merges that reclaim deletions, but be careful not to go so high
+	// that way too much merging takes place; a value of 3.0 is
+	// probably nearly too high.  A value of 0.0 means deletions don't
+	// impact merge selection.
+	ReclaimDeletesWeight float64
+
+	// Only consider a segment for merging if its delete percentage is
+	// over this threshold.
+	MinDeletesPct float64
+
+	// Optional, defaults to mergeplan.CalcBudget().
+	CalcBudget func(totalSize int64, firstTierSize int64,
+		o *MergePlanOptions) (budgetNumSegments int)
+
+	// Optional, defaults to mergeplan.ScoreSegments().
+	ScoreSegments func(segments []Segment, o *MergePlanOptions) float64
+
+	// Optional.
+	Logger func(string)
+}
+
+// Returns the higher of the input or FloorSegmentSize.
+func (o *MergePlanOptions) RaiseToFloorSegmentSize(s int64) int64 {
+	if s > o.FloorSegmentSize {
+		return s
+	}
+	return o.FloorSegmentSize
+}
+
+// Suggested default options.
+var DefaultMergePlanOptions = MergePlanOptions{
+	MaxSegmentsPerTier:   10,
+	MaxSegmentSize:       5000000,
+	SegmentsPerMergeTask: 10,
+	FloorSegmentSize:     2000,
+	ReclaimDeletesWeight: 2.0,
+	MinDeletesPct:        10.0,
+}
+
+// -------------------------------------------
+
+func plan(segmentsIn []Segment, o *MergePlanOptions) (
+	result *MergePlan, err error) {
+	if len(segmentsIn) <= 1 {
+		return nil, nil
+	}
+
+	if o == nil {
+		o = &DefaultMergePlanOptions
+	}
+
+	segments := append([]Segment(nil), segmentsIn...) // Copy.
+
+	sort.Sort(byLiveSizeDescending(segments))
+
+	var segmentsLiveSize int64
+
+	var minLiveSize int64 = math.MaxInt64
+
+	var eligible []Segment
+	var eligibleLiveSize int64
+
+	for _, segment := range segments {
+		segmentsLiveSize += segment.LiveSize()
+
+		if minLiveSize > segment.LiveSize() {
+			minLiveSize = segment.LiveSize()
+		}
+
+		// Only small-enough segments are eligible.
+		if segment.LiveSize() < o.MaxSegmentSize/2 {
+			eligible = append(eligible, segment)
+			eligibleLiveSize += segment.LiveSize()
+		}
+	}
+
+	minLiveSize = o.RaiseToFloorSegmentSize(minLiveSize)
+
+	calcBudget := o.CalcBudget
+	if calcBudget == nil {
+		calcBudget = CalcBudget
+	}
+
+	budgetNumSegments := CalcBudget(eligibleLiveSize, minLiveSize, o)
+
+	scoreSegments := o.ScoreSegments
+	if scoreSegments == nil {
+		scoreSegments = ScoreSegments
+	}
+
+	rv := &MergePlan{}
+
+	// While we’re over budget, keep looping, which might produce
+	// another MergeTask.
+	for len(eligible) > budgetNumSegments {
+		// Track a current best roster as we examine and score
+		// potential rosters of merges.
+		var bestRoster []Segment
+		var bestRosterScore float64 // Lower score is better.
+
+		for startIdx := 0; startIdx < len(eligible)-o.SegmentsPerMergeTask; startIdx++ {
+			var roster []Segment
+			var rosterLiveSize int64
+
+			for idx := startIdx; idx < len(eligible) && len(roster) < o.SegmentsPerMergeTask; idx++ {
+				rosterCandidate := eligible[idx]
+
+				if rosterLiveSize+rosterCandidate.LiveSize() > o.MaxSegmentSize {
+					// NOTE: We continue the loop, to try to “pack”
+					// the roster with smaller segments to get closer
+					// to the max size; but, we aren't doing full,
+					// comprehensive "bin-packing" permutations.
+					continue
+				}
+
+				roster = append(roster, rosterCandidate)
+				rosterLiveSize += rosterCandidate.LiveSize()
+			}
+
+			rosterScore := scoreSegments(roster, o)
+
+			if len(bestRoster) <= 0 || rosterScore < bestRosterScore {
+				bestRoster = roster
+				bestRosterScore = rosterScore
+			}
+		}
+
+		if len(bestRoster) <= 0 {
+			return rv, nil
+		}
+
+		rv.Tasks = append(rv.Tasks, &MergeTask{
+			Segments: bestRoster,
+		})
+
+		eligible = removeSegments(eligible, bestRoster)
+	}
+
+	return rv, nil
+}
+
+// Compute the number of segments that would be needed to cover the
+// totalSize, by climbing up a logarithmic staircase of segment tiers.
+func CalcBudget(totalSize int64, firstTierSize int64, o *MergePlanOptions) (
+	budgetNumSegments int) {
+	tierSize := firstTierSize
+
+	for totalSize > 0 {
+		segmentsInTier := float64(totalSize) / float64(tierSize)
+		if segmentsInTier < float64(o.MaxSegmentsPerTier) {
+			budgetNumSegments += int(math.Ceil(segmentsInTier))
+			break
+		}
+
+		budgetNumSegments += o.MaxSegmentsPerTier
+		totalSize -= int64(o.MaxSegmentsPerTier) * tierSize
+		tierSize *= int64(o.SegmentsPerMergeTask)
+	}
+
+	return budgetNumSegments
+}
+
+// removeSegments() keeps the ordering of the result segments stable.
+func removeSegments(segments []Segment, toRemove []Segment) (rv []Segment) {
+OUTER:
+	for _, segment := range segments {
+		for _, r := range toRemove {
+			if segment == r {
+				continue OUTER
+			}
+		}
+		rv = append(rv, segment)
+	}
+	return rv
+}
+
+func ScoreSegments(segments []Segment, o *MergePlanOptions) float64 {
+	return 0 // TODO. Bogus score.
+}
diff --git a/index/scorch/mergeplan/merge_plan_test.go b/index/scorch/mergeplan/merge_plan_test.go
new file mode 100644
index 000000000..0f0f0fcca
--- /dev/null
+++ b/index/scorch/mergeplan/merge_plan_test.go
@@ -0,0 +1,92 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package mergeplan
+
+import (
+	"reflect"
+	"testing"
+)
+
+// Implements the Segment interface for testing,
+type segment struct {
+	id       uint64
+	fullSize int64
+	liveSize int64
+}
+
+func (s *segment) Id() uint64      { return s.id }
+func (s *segment) FullSize() int64 { return s.fullSize }
+func (s *segment) LiveSize() int64 { return s.liveSize }
+
+func makeLinearSegments(n int) (rv []Segment) {
+	for i := 0; i < n; i++ {
+		rv = append(rv, &segment{
+			id:       uint64(i),
+			fullSize: int64(i),
+			liveSize: int64(i),
+		})
+	}
+	return rv
+}
+
+func TestSimplePlan(t *testing.T) {
+	segs := makeLinearSegments(10)
+
+	tests := []struct {
+		desc       string
+		segments   []Segment
+		expectPlan *MergePlan
+		expectErr  error
+	}{
+		{"nil candidates",
+			nil, nil, nil},
+		{"empty candidates",
+			[]Segment{}, nil, nil},
+		{"1 candidate",
+			[]Segment{segs[0]},
+			nil,
+			nil,
+		},
+		{"2 candidates",
+			[]Segment{
+				segs[0],
+				segs[1],
+			},
+			&MergePlan{
+				[]*MergeTask{
+					&MergeTask{
+						Segments: []Segment{
+							segs[0],
+							segs[1],
+						},
+					},
+				},
+			},
+			nil,
+		},
+	}
+
+	for testi, test := range tests {
+		plan, err := Plan(test.segments, &DefaultMergePlanOptions)
+		if err != test.expectErr {
+			t.Errorf("testi: %d, test: %v, got err: %v",
+				testi, test, err)
+		}
+		if !reflect.DeepEqual(plan, test.expectPlan) {
+			t.Errorf("testi: %d, test: %v, got plan: %v",
+				testi, test, plan)
+		}
+	}
+}
diff --git a/index/scorch/mergeplan/sort.go b/index/scorch/mergeplan/sort.go
new file mode 100644
index 000000000..4eabc31db
--- /dev/null
+++ b/index/scorch/mergeplan/sort.go
@@ -0,0 +1,27 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package mergeplan
+
+type byLiveSizeDescending []Segment
+
+func (a byLiveSizeDescending) Len() int      { return len(a) }
+func (a byLiveSizeDescending) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
+
+func (a byLiveSizeDescending) Less(i, j int) bool {
+	if a[i].LiveSize() != a[j].LiveSize() {
+		return a[i].LiveSize() < a[j].LiveSize()
+	}
+	return a[i].Id() < a[j].Id()
+}

From 3873237e46a1192a27b04c4381602698779f7170 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Mon, 11 Dec 2017 22:09:26 -0500
Subject: [PATCH 046/728] try newer version of bolt (seeing random crashes on
 travis)

---
 vendor/manifest | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vendor/manifest b/vendor/manifest
index 839e6fe5e..60c5fbb5e 100644
--- a/vendor/manifest
+++ b/vendor/manifest
@@ -21,7 +21,7 @@
 			"importpath": "github.com/boltdb/bolt",
 			"repository": "https://github.com/boltdb/bolt",
 			"vcs": "",
-			"revision": "144418e1475d8bf7abbdc48583500f1a20c62ea7",
+			"revision": "9da31745363232bc1e27dbab3569e77383a51585",
 			"branch": "master",
 			"notests": true
 		},
@@ -127,4 +127,4 @@
 			"notests": true
 		}
 	]
-}
\ No newline at end of file
+}

From 927216df8ccdd7f1cfe8eaa24664b9909f44f9e0 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 12 Dec 2017 08:42:13 -0500
Subject: [PATCH 047/728] fix postings list count impl

---
 index/scorch/segment/zap/posting.go | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index eb5218d28..68db04299 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -85,20 +85,13 @@ func (p *PostingsList) Iterator() segment.PostingsIterator {
 
 // Count returns the number of items on this postings list
 func (p *PostingsList) Count() uint64 {
-	var rv uint64
 	if p.postings != nil {
-		rv = p.postings.GetCardinality()
-
 		if p.except != nil {
-			except := p.except.GetCardinality()
-			if except > rv {
-				// avoid underflow
-				except = rv
-			}
-			rv -= except
+			return roaring.AndNot(p.postings, p.except).GetCardinality()
 		}
+		return p.postings.GetCardinality()
 	}
-	return rv
+	return 0
 }
 
 // PostingsIterator provides a way to iterate through the postings list

From 665c3c80ffddeedaf219cc9878618def4e00e7a2 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 12 Dec 2017 11:21:55 -0500
Subject: [PATCH 048/728] initial cut of zap segment merging

---
 index/scorch/segment/zap/intcoder.go      | 111 +++++
 index/scorch/segment/zap/intcoder_test.go |  59 +++
 index/scorch/segment/zap/merge.go         | 526 ++++++++++++++++++++++
 index/scorch/segment/zap/merge_test.go    | 280 ++++++++++++
 4 files changed, 976 insertions(+)
 create mode 100644 index/scorch/segment/zap/intcoder.go
 create mode 100644 index/scorch/segment/zap/intcoder_test.go
 create mode 100644 index/scorch/segment/zap/merge.go
 create mode 100644 index/scorch/segment/zap/merge_test.go

diff --git a/index/scorch/segment/zap/intcoder.go b/index/scorch/segment/zap/intcoder.go
new file mode 100644
index 000000000..a682740f1
--- /dev/null
+++ b/index/scorch/segment/zap/intcoder.go
@@ -0,0 +1,111 @@
+package zap
+
+import (
+	"bytes"
+	"encoding/binary"
+	"io"
+
+	"github.com/Smerity/govarint"
+)
+
+type chunkedIntCoder struct {
+	final     []byte
+	maxDocNum uint64
+	chunkSize uint64
+	chunkBuf  bytes.Buffer
+	encoder   *govarint.Base128Encoder
+	chunkLens []uint64
+	currChunk uint64
+}
+
+// newChunkedIntCoder returns a new chunk int coder which packs data into
+// chunks based on the provided chunkSize and supports up to the specified
+// maxDocNum
+func newChunkedIntCoder(chunkSize uint64, maxDocNum uint64) *chunkedIntCoder {
+	total := maxDocNum/chunkSize + 1
+	rv := &chunkedIntCoder{
+		chunkSize: chunkSize,
+		maxDocNum: maxDocNum,
+		chunkLens: make([]uint64, total),
+	}
+	rv.encoder = govarint.NewU64Base128Encoder(&rv.chunkBuf)
+
+	return rv
+}
+
+// Reset lets you reuse this chunked int coder.  buffers are reset and reused
+// from previous use.  you cannot change the chunk size or max doc num.
+func (c *chunkedIntCoder) Reset() {
+	c.final = c.final[:0]
+	c.chunkBuf.Reset()
+	c.currChunk = 0
+	for i := range c.chunkLens {
+		c.chunkLens[i] = 0
+	}
+}
+
+// Add encodes the provided integers into the correct chunk for the provided
+// doc num.  You MUST call Add() with increasing docNums.
+func (c *chunkedIntCoder) Add(docNum uint64, vals ...uint64) error {
+	chunk := docNum / c.chunkSize
+	if chunk != c.currChunk {
+		// starting a new chunk
+		if c.encoder != nil {
+			// close out last
+			c.encoder.Close()
+			encodingBytes := c.chunkBuf.Bytes()
+			c.chunkLens[c.currChunk] = uint64(len(encodingBytes))
+			c.final = append(c.final, encodingBytes...)
+			c.chunkBuf.Reset()
+			c.encoder = govarint.NewU64Base128Encoder(&c.chunkBuf)
+		}
+		c.currChunk = chunk
+	}
+
+	for _, val := range vals {
+		_, err := c.encoder.PutU64(val)
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+// Close indicates you are done calling Add() this allows the final chunk
+// to be encoded.
+func (c *chunkedIntCoder) Close() {
+	c.encoder.Close()
+	encodingBytes := c.chunkBuf.Bytes()
+	c.chunkLens[c.currChunk] = uint64(len(encodingBytes))
+	c.final = append(c.final, encodingBytes...)
+}
+
+// Write commits all the encoded chunked integers to the provided writer.
+func (c *chunkedIntCoder) Write(w io.Writer) (int, error) {
+	var tw int
+	buf := make([]byte, binary.MaxVarintLen64)
+	// write out the number of chunks
+	n := binary.PutUvarint(buf, uint64(len(c.chunkLens)))
+	nw, err := w.Write(buf[:n])
+	tw += nw
+	if err != nil {
+		return tw, err
+	}
+	// write out the chunk lens
+	for _, chunkLen := range c.chunkLens {
+		n := binary.PutUvarint(buf, uint64(chunkLen))
+		nw, err = w.Write(buf[:n])
+		tw += nw
+		if err != nil {
+			return tw, err
+		}
+	}
+	// write out the data
+	nw, err = w.Write(c.final)
+	tw += nw
+	if err != nil {
+		return tw, err
+	}
+	return tw, nil
+}
diff --git a/index/scorch/segment/zap/intcoder_test.go b/index/scorch/segment/zap/intcoder_test.go
new file mode 100644
index 000000000..f2623a548
--- /dev/null
+++ b/index/scorch/segment/zap/intcoder_test.go
@@ -0,0 +1,59 @@
+package zap
+
+import (
+	"bytes"
+	"reflect"
+	"testing"
+)
+
+func TestChunkIntCoder(t *testing.T) {
+	tests := []struct {
+		maxDocNum uint64
+		chunkSize uint64
+		docNums   []uint64
+		vals      [][]uint64
+		expected  []byte
+	}{
+		{
+			maxDocNum: 0,
+			chunkSize: 1,
+			docNums:   []uint64{0},
+			vals: [][]uint64{
+				[]uint64{3},
+			},
+			// 1 chunk, chunk-0 length 1, value 3
+			expected: []byte{0x1, 0x1, 0x3},
+		},
+		{
+			maxDocNum: 1,
+			chunkSize: 1,
+			docNums:   []uint64{0, 1},
+			vals: [][]uint64{
+				[]uint64{3},
+				[]uint64{7},
+			},
+			// 2 chunks, chunk-0 length 1, chunk-1 length 1, value 3, value 7
+			expected: []byte{0x2, 0x1, 0x1, 0x3, 0x7},
+		},
+	}
+
+	for _, test := range tests {
+
+		cic := newChunkedIntCoder(test.chunkSize, test.maxDocNum)
+		for i, docNum := range test.docNums {
+			err := cic.Add(docNum, test.vals[i]...)
+			if err != nil {
+				t.Fatalf("error adding to intcoder: %v", err)
+			}
+		}
+		cic.Close()
+		var actual bytes.Buffer
+		_, err := cic.Write(&actual)
+		if err != nil {
+			t.Fatalf("error writing: %v", err)
+		}
+		if !reflect.DeepEqual(test.expected, actual.Bytes()) {
+			t.Errorf("got % x, expected % x", actual.Bytes(), test.expected)
+		}
+	}
+}
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
new file mode 100644
index 000000000..fe5155d16
--- /dev/null
+++ b/index/scorch/segment/zap/merge.go
@@ -0,0 +1,526 @@
+package zap
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"math"
+	"os"
+
+	"github.com/RoaringBitmap/roaring"
+	"github.com/Smerity/govarint"
+	"github.com/couchbaselabs/vellum"
+	"github.com/golang/snappy"
+)
+
+// Merge takes a slice of zap segments, bit masks describing which documents
+// from the may be dropped, and creates a new segment containing the remaining
+// data.  This new segment is built at the specified path, with the provided
+// chunkFactor.
+func Merge(segments []*Segment, drops []*roaring.Bitmap, path string, chunkFactor uint32) error {
+
+	flag := os.O_RDWR | os.O_CREATE
+
+	f, err := os.OpenFile(path, flag, 0600)
+	if err != nil {
+		return err
+	}
+
+	// bufer the output
+	br := bufio.NewWriter(f)
+
+	// wrap it for counting (tracking offsets)
+	cr := NewCountHashWriter(br)
+
+	fieldsInv := mergeFields(segments)
+	fieldsMap := mapFields(fieldsInv)
+
+	newSegDocCount := computeNewDocCount(segments, drops)
+
+	var newDocNums [][]uint64
+	var storedIndexOffset uint64
+	storedIndexOffset, newDocNums, err = mergeStoredAndRemap(segments, drops,
+		fieldsMap, fieldsInv, newSegDocCount, cr)
+	if err != nil {
+		return err
+	}
+
+	// FIXME temp until computed
+	//dictLocs := make([]uint64, len(fieldsInv))
+
+	var dictLocs []uint64
+	dictLocs, err = persistMergedRest(segments, drops, fieldsInv, fieldsMap,
+		newDocNums, newSegDocCount, cr)
+	if err != nil {
+		return err
+	}
+
+	var fieldsIndexOffset uint64
+	fieldsIndexOffset, err = persistMergedFields(fieldsInv, cr, dictLocs)
+	if err != nil {
+		return err
+	}
+
+	err = persistFooter(newSegDocCount, storedIndexOffset,
+		fieldsIndexOffset, chunkFactor, cr)
+	if err != nil {
+		return err
+	}
+
+	err = br.Flush()
+	if err != nil {
+		return err
+	}
+
+	err = f.Sync()
+	if err != nil {
+		return err
+	}
+
+	err = f.Close()
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func mapFields(fields []string) map[string]uint16 {
+	rv := make(map[string]uint16)
+	for i, fieldName := range fields {
+		rv[fieldName] = uint16(i)
+	}
+	return rv
+}
+
+func computeNewDocCount(segments []*Segment, drops []*roaring.Bitmap) uint64 {
+	var newSegDocCount uint64
+	for segI, segment := range segments {
+		segIAfterDrop := segment.NumDocs()
+		if drops[segI] != nil {
+			segIAfterDrop -= drops[segI].GetCardinality()
+		}
+		newSegDocCount += segIAfterDrop
+	}
+	return newSegDocCount
+}
+
+func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer) (int, error) {
+	var buffer bytes.Buffer
+	// write out postings list to memory so we know the len
+	postingsListLen, err := r.WriteTo(&buffer)
+	if err != nil {
+		return 0, err
+	}
+	var tw int
+	// write out the length of this postings list
+	buf := make([]byte, binary.MaxVarintLen64)
+	n := binary.PutUvarint(buf, uint64(postingsListLen))
+	nw, err := w.Write(buf[:n])
+	tw += nw
+	if err != nil {
+		return tw, err
+	}
+
+	// write out the postings list itself
+	nw, err = w.Write(buffer.Bytes())
+	tw += nw
+	if err != nil {
+		return tw, err
+	}
+
+	return tw, nil
+}
+
+func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
+	fieldsInv []string, fieldsMap map[string]uint16, newDocNums [][]uint64, newSegDocCount uint64,
+	w *CountHashWriter) ([]uint64, error) {
+
+	rv := make([]uint64, len(fieldsInv))
+
+	var vellumBuf bytes.Buffer
+	// for each field
+	for fieldID, fieldName := range fieldsInv {
+		if fieldID != 0 {
+			vellumBuf.Reset()
+		}
+		newVellum, err := vellum.New(&vellumBuf, nil)
+		if err != nil {
+			return nil, err
+		}
+
+		// collect FTS iterators from all segments for this field
+		var dicts []*Dictionary
+		var itrs []vellum.Iterator
+		for _, segment := range segments {
+			dict, err2 := segment.dictionary(fieldName)
+			if err2 != nil {
+				return nil, err2
+			}
+			dicts = append(dicts, dict)
+
+			itr, err2 := dict.fst.Iterator(nil, nil)
+			if err2 != nil {
+				return nil, err2
+			}
+			itrs = append(itrs, itr)
+		}
+
+		// create merging iterator
+		mergeItr, err := vellum.NewMergeIterator(itrs, func(postingOffsets []uint64) uint64 {
+			// we don't actually use the merged value
+			return 0
+		})
+
+		tfEncoder := newChunkedIntCoder(1024, newSegDocCount-1)
+		locEncoder := newChunkedIntCoder(1024, newSegDocCount-1)
+		for err == nil {
+			term, _ := mergeItr.Current()
+
+			newRoaring := roaring.NewBitmap()
+			newRoaringLocs := roaring.NewBitmap()
+			tfEncoder.Reset()
+			locEncoder.Reset()
+
+			// now go back and get posting list for this term
+			// but pass in the deleted docs for that segment
+			for dictI, dict := range dicts {
+				postings, err2 := dict.postingsList(string(term), drops[dictI])
+				if err2 != nil {
+					return nil, err2
+				}
+
+				postItr := postings.Iterator()
+				next, err2 := postItr.Next()
+				for next != nil && err2 == nil {
+					hitNewDocNum := newDocNums[dictI][next.Number()]
+					if hitNewDocNum == docDropped {
+						return nil, fmt.Errorf("see hit with dropped doc num")
+					}
+					newRoaring.Add(uint32(hitNewDocNum))
+					// encode norm bits
+					norm := next.Norm()
+					normBits := math.Float32bits(float32(norm))
+					err3 := tfEncoder.Add(hitNewDocNum, next.Frequency(), uint64(normBits))
+					if err3 != nil {
+						return nil, err3
+					}
+					locs := next.Locations()
+					if len(locs) > 0 {
+						newRoaringLocs.Add(uint32(hitNewDocNum))
+						for _, loc := range locs {
+							args := make([]uint64, 0, 5+len(loc.ArrayPositions()))
+							args = append(args, uint64(fieldsMap[loc.Field()]))
+							args = append(args, loc.Pos())
+							args = append(args, loc.Start())
+							args = append(args, loc.End())
+							args = append(args, uint64(len(loc.ArrayPositions())))
+							args = append(args, loc.ArrayPositions()...)
+							locEncoder.Add(hitNewDocNum, args...)
+						}
+					}
+					next, err2 = postItr.Next()
+				}
+				if err != nil {
+					return nil, err
+				}
+
+			}
+			tfEncoder.Close()
+			locEncoder.Close()
+
+			if newRoaring.GetCardinality() > 0 {
+				// this field/term actually has hits in the new segment, lets write it down
+				freqOffset := uint64(w.Count())
+				_, err = tfEncoder.Write(w)
+				if err != nil {
+					return nil, err
+				}
+				locOffset := uint64(w.Count())
+				_, err = locEncoder.Write(w)
+				if err != nil {
+					return nil, err
+				}
+				postingLocOffset := uint64(w.Count())
+				_, err = writeRoaringWithLen(newRoaringLocs, w)
+				if err != nil {
+					return nil, err
+				}
+				postingOffset := uint64(w.Count())
+				// write out the start of the term info
+				buf := make([]byte, binary.MaxVarintLen64)
+				n := binary.PutUvarint(buf, freqOffset)
+				_, err = w.Write(buf[:n])
+				if err != nil {
+					return nil, err
+				}
+
+				// write out the start of the loc info
+				n = binary.PutUvarint(buf, locOffset)
+				_, err = w.Write(buf[:n])
+				if err != nil {
+					return nil, err
+				}
+
+				// write out the start of the loc posting list
+				n = binary.PutUvarint(buf, postingLocOffset)
+				_, err = w.Write(buf[:n])
+				if err != nil {
+					return nil, err
+				}
+				_, err = writeRoaringWithLen(newRoaring, w)
+				if err != nil {
+					return nil, err
+				}
+
+				newVellum.Insert(term, postingOffset)
+			}
+
+			err = mergeItr.Next()
+		}
+		if err != nil && err != vellum.ErrIteratorDone {
+			return nil, err
+		}
+
+		dictOffset := uint64(w.Count())
+		err = newVellum.Close()
+		if err != nil {
+			return nil, err
+		}
+		vellumData := vellumBuf.Bytes()
+
+		// write out the length of the vellum data
+		buf := make([]byte, binary.MaxVarintLen64)
+		// write out the number of chunks
+		n := binary.PutUvarint(buf, uint64(len(vellumData)))
+		_, err = w.Write(buf[:n])
+		if err != nil {
+			return nil, err
+		}
+
+		// write this vellum to disk
+		_, err = w.Write(vellumData)
+		if err != nil {
+			return nil, err
+		}
+
+		rv[fieldID] = dictOffset
+	}
+
+	return rv, nil
+}
+
+const docDropped = math.MaxUint64
+
+func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
+	fieldsMap map[string]uint16, fieldsInv []string, newSegDocCount uint64,
+	w *CountHashWriter) (uint64, [][]uint64, error) {
+	var rv [][]uint64
+	var newDocNum int
+
+	var curr int
+	var metaBuf bytes.Buffer
+	var data, compressed []byte
+
+	docNumOffsets := make([]uint64, newSegDocCount)
+
+	// for each segment
+	for segI, segment := range segments {
+		var segNewDocNums []uint64
+
+		// for each doc num
+		for docNum := uint64(0); docNum < segment.numDocs; docNum++ {
+			metaBuf.Reset()
+			data = data[:0]
+			compressed = compressed[:0]
+			curr = 0
+
+			metaEncoder := govarint.NewU64Base128Encoder(&metaBuf)
+
+			if drops[segI] != nil && drops[segI].Contains(uint32(docNum)) {
+				segNewDocNums = append(segNewDocNums, docDropped)
+			} else {
+				segNewDocNums = append(segNewDocNums, uint64(newDocNum))
+				// collect all the data
+				vals := make(map[uint16][][]byte)
+				typs := make(map[uint16][]byte)
+				poss := make(map[uint16][][]uint64)
+				err := segment.VisitDocument(docNum, func(field string, typ byte, value []byte, pos []uint64) bool {
+					fieldID := fieldsMap[field]
+					vals[fieldID] = append(vals[fieldID], value)
+					typs[fieldID] = append(typs[fieldID], typ)
+					poss[fieldID] = append(poss[fieldID], pos)
+					return true
+				})
+				if err != nil {
+					return 0, nil, err
+				}
+
+				// now walk the fields in order
+				for fieldID := range fieldsInv {
+
+					if storedFieldValues, ok := vals[uint16(fieldID)]; ok {
+
+						// has stored values for this field
+						num := len(storedFieldValues)
+
+						// process each value
+						for i := 0; i < num; i++ {
+							// encode field
+							_, err2 := metaEncoder.PutU64(uint64(fieldID))
+							if err2 != nil {
+								return 0, nil, err2
+							}
+							// encode type
+							_, err2 = metaEncoder.PutU64(uint64(typs[uint16(fieldID)][i]))
+							if err2 != nil {
+								return 0, nil, err2
+							}
+							// encode start offset
+							_, err2 = metaEncoder.PutU64(uint64(curr))
+							if err2 != nil {
+								return 0, nil, err2
+							}
+							// end len
+							_, err2 = metaEncoder.PutU64(uint64(len(storedFieldValues[i])))
+							if err2 != nil {
+								return 0, nil, err2
+							}
+							// encode number of array pos
+							_, err2 = metaEncoder.PutU64(uint64(len(poss[uint16(fieldID)][i])))
+							if err2 != nil {
+								return 0, nil, err2
+							}
+							// encode all array positions
+							for j := 0; j < len(poss[uint16(fieldID)][i]); j++ {
+								_, err2 = metaEncoder.PutU64(poss[uint16(fieldID)][i][j])
+								if err2 != nil {
+									return 0, nil, err2
+								}
+							}
+							// append data
+							data = append(data, storedFieldValues[i]...)
+							// update curr
+							curr += len(storedFieldValues[i])
+
+						}
+					}
+				}
+
+				metaEncoder.Close()
+				metaBytes := metaBuf.Bytes()
+				compressed = snappy.Encode(compressed, data)
+				// record where we're about to start writing
+				docNumOffsets[newDocNum] = uint64(w.Count())
+
+				buf := make([]byte, binary.MaxVarintLen64)
+				// write out the meta length
+				n := binary.PutUvarint(buf, uint64(len(metaBytes)))
+				_, err = w.Write(buf[:n])
+				if err != nil {
+					return 0, nil, err
+				}
+				// write out the compressed data length
+				n = binary.PutUvarint(buf, uint64(len(compressed)))
+				_, err = w.Write(buf[:n])
+				if err != nil {
+					return 0, nil, err
+				}
+				// now write the meta
+				_, err = w.Write(metaBytes)
+				if err != nil {
+					return 0, nil, err
+				}
+				// now write the compressed data
+				_, err = w.Write(compressed)
+				if err != nil {
+					return 0, nil, err
+				}
+
+				newDocNum++
+			}
+		}
+		rv = append(rv, segNewDocNums)
+	}
+
+	// return value is the start of the stored index
+	offset := uint64(w.Count())
+	// now write out the stored doc index
+	for docNum := range docNumOffsets {
+		err := binary.Write(w, binary.BigEndian, docNumOffsets[docNum])
+		if err != nil {
+			return 0, nil, err
+		}
+	}
+
+	return offset, rv, nil
+}
+
+// mergeFields builds a unified list of fields used across all the input segments
+func mergeFields(segments []*Segment) []string {
+	fieldsMap := map[string]struct{}{}
+
+	for _, segment := range segments {
+		fields := segment.Fields()
+		for _, field := range fields {
+			fieldsMap[field] = struct{}{}
+		}
+	}
+	rv := make([]string, 0, len(fieldsMap))
+	// ensure _id stays first
+	rv = append(rv, "_id")
+	for k := range fieldsMap {
+		if k != "_id" {
+			rv = append(rv, k)
+		}
+	}
+
+	return rv
+}
+
+func persistMergedFields(fieldsInv []string, w *CountHashWriter, dictLocs []uint64) (uint64, error) {
+	var rv uint64
+
+	var fieldStarts []uint64
+	for fieldID, fieldName := range fieldsInv {
+
+		// record start of this field
+		fieldStarts = append(fieldStarts, uint64(w.Count()))
+
+		buf := make([]byte, binary.MaxVarintLen64)
+		// write out dict location for this field
+		n := binary.PutUvarint(buf, dictLocs[fieldID])
+		_, err := w.Write(buf[:n])
+		if err != nil {
+			return 0, err
+		}
+
+		// write out the length of the field name
+		n = binary.PutUvarint(buf, uint64(len(fieldName)))
+		_, err = w.Write(buf[:n])
+		if err != nil {
+			return 0, err
+		}
+
+		// write out the field name
+		_, err = w.Write([]byte(fieldName))
+		if err != nil {
+			return 0, err
+		}
+	}
+
+	// now write out the fields index
+	rv = uint64(w.Count())
+
+	// now write out the stored doc index
+	for fieldID := range fieldsInv {
+		err := binary.Write(w, binary.BigEndian, fieldStarts[fieldID])
+		if err != nil {
+			return 0, err
+		}
+	}
+
+	return rv, nil
+}
diff --git a/index/scorch/segment/zap/merge_test.go b/index/scorch/segment/zap/merge_test.go
new file mode 100644
index 000000000..53bcde7fb
--- /dev/null
+++ b/index/scorch/segment/zap/merge_test.go
@@ -0,0 +1,280 @@
+package zap
+
+import (
+	"os"
+	"testing"
+
+	"github.com/RoaringBitmap/roaring"
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/scorch/segment/mem"
+)
+
+func TestMerge(t *testing.T) {
+	_ = os.RemoveAll("/tmp/scorch.zap")
+	_ = os.RemoveAll("/tmp/scorch2.zap")
+	_ = os.RemoveAll("/tmp/scorch3.zap")
+
+	memSegment := buildMemSegmentMulti()
+	err := PersistSegment(memSegment, "/tmp/scorch.zap", 1024)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	memSegment2 := buildMemSegmentMulti2()
+	err = PersistSegment(memSegment2, "/tmp/scorch2.zap", 1024)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	segment, err := Open("/tmp/scorch.zap")
+	if err != nil {
+		t.Fatalf("error opening segment: %v", err)
+	}
+	defer func() {
+		cerr := segment.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", err)
+		}
+	}()
+
+	segment2, err := Open("/tmp/scorch2.zap")
+	if err != nil {
+		t.Fatalf("error opening segment: %v", err)
+	}
+	defer func() {
+		cerr := segment2.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", err)
+		}
+	}()
+
+	segsToMerge := make([]*Segment, 2)
+	segsToMerge[0] = segment.(*Segment)
+	segsToMerge[1] = segment2.(*Segment)
+
+	err = Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024)
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestMergeAndDrop(t *testing.T) {
+	_ = os.RemoveAll("/tmp/scorch.zap")
+	_ = os.RemoveAll("/tmp/scorch2.zap")
+	_ = os.RemoveAll("/tmp/scorch3.zap")
+
+	memSegment := buildMemSegmentMulti()
+	err := PersistSegment(memSegment, "/tmp/scorch.zap", 1024)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	memSegment2 := buildMemSegmentMulti2()
+	err = PersistSegment(memSegment2, "/tmp/scorch2.zap", 1024)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	segment, err := Open("/tmp/scorch.zap")
+	if err != nil {
+		t.Fatalf("error opening segment: %v", err)
+	}
+	defer func() {
+		cerr := segment.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", err)
+		}
+	}()
+
+	segment2, err := Open("/tmp/scorch2.zap")
+	if err != nil {
+		t.Fatalf("error opening segment: %v", err)
+	}
+	defer func() {
+		cerr := segment2.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", err)
+		}
+	}()
+
+	segsToMerge := make([]*Segment, 2)
+	segsToMerge[0] = segment.(*Segment)
+	segsToMerge[1] = segment2.(*Segment)
+
+	docsToDrop := make([]*roaring.Bitmap, 2)
+	docsToDrop[0] = roaring.NewBitmap()
+	docsToDrop[0].AddInt(1)
+	docsToDrop[1] = roaring.NewBitmap()
+	docsToDrop[1].AddInt(1)
+
+	err = Merge(segsToMerge, docsToDrop, "/tmp/scorch3.zap", 1024)
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
+func buildMemSegmentMulti2() *mem.Segment {
+
+	doc := &document.Document{
+		ID: "c",
+		Fields: []document.Field{
+			document.NewTextFieldCustom("_id", nil, []byte("c"), document.IndexField|document.StoreField, nil),
+			document.NewTextFieldCustom("name", nil, []byte("mat"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+		},
+		CompositeFields: []*document.CompositeField{
+			document.NewCompositeField("_all", true, nil, []string{"_id"}),
+		},
+	}
+
+	doc2 := &document.Document{
+		ID: "d",
+		Fields: []document.Field{
+			document.NewTextFieldCustom("_id", nil, []byte("d"), document.IndexField|document.StoreField, nil),
+			document.NewTextFieldCustom("name", nil, []byte("joa"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+		},
+		CompositeFields: []*document.CompositeField{
+			document.NewCompositeField("_all", true, nil, []string{"_id"}),
+		},
+	}
+
+	// forge analyzed docs
+	results := []*index.AnalysisResult{
+		&index.AnalysisResult{
+			Document: doc,
+			Analyzed: []analysis.TokenFrequencies{
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      1,
+						Position: 1,
+						Term:     []byte("c"),
+					},
+				}, nil, false),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      3,
+						Position: 1,
+						Term:     []byte("mat"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("some"),
+					},
+					&analysis.Token{
+						Start:    5,
+						End:      10,
+						Position: 2,
+						Term:     []byte("thing"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("cold"),
+					},
+				}, []uint64{0}, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("dark"),
+					},
+				}, []uint64{1}, true),
+			},
+			Length: []int{
+				1,
+				1,
+				2,
+				1,
+				1,
+			},
+		},
+		&index.AnalysisResult{
+			Document: doc2,
+			Analyzed: []analysis.TokenFrequencies{
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      1,
+						Position: 1,
+						Term:     []byte("d"),
+					},
+				}, nil, false),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      3,
+						Position: 1,
+						Term:     []byte("joa"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("some"),
+					},
+					&analysis.Token{
+						Start:    5,
+						End:      10,
+						Position: 2,
+						Term:     []byte("thing"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("cold"),
+					},
+				}, []uint64{0}, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("dark"),
+					},
+				}, []uint64{1}, true),
+			},
+			Length: []int{
+				1,
+				1,
+				2,
+				1,
+				1,
+			},
+		},
+	}
+
+	// fix up composite fields
+	for _, ar := range results {
+		for i, f := range ar.Document.Fields {
+			for _, cf := range ar.Document.CompositeFields {
+				cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
+			}
+		}
+	}
+
+	segment := mem.NewFromAnalyzedDocs(results)
+
+	return segment
+}

From 57121e40a85624b112fd4333bc6c30ba2a7064bb Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 12 Dec 2017 11:41:14 -0500
Subject: [PATCH 049/728] fix issues identified by errcheck

---
 index/scorch/segment/zap/merge.go | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index fe5155d16..b41dc4275 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -218,7 +218,10 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 							args = append(args, loc.End())
 							args = append(args, uint64(len(loc.ArrayPositions())))
 							args = append(args, loc.ArrayPositions()...)
-							locEncoder.Add(hitNewDocNum, args...)
+							err = locEncoder.Add(hitNewDocNum, args...)
+							if err != nil {
+								return nil, err
+							}
 						}
 					}
 					next, err2 = postItr.Next()
@@ -275,7 +278,10 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 					return nil, err
 				}
 
-				newVellum.Insert(term, postingOffset)
+				err = newVellum.Insert(term, postingOffset)
+				if err != nil {
+					return nil, err
+				}
 			}
 
 			err = mergeItr.Next()

From 59a1e26300606112431b1c04b7a077dc0d164a45 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 11 Dec 2017 15:03:29 -0800
Subject: [PATCH 050/728] mergeplan: scoring implemented

---
 index/scorch/mergeplan/merge_plan.go      | 126 ++++---
 index/scorch/mergeplan/merge_plan_test.go | 422 ++++++++++++++++++++--
 index/scorch/mergeplan/sort.go            |   5 +-
 3 files changed, 465 insertions(+), 88 deletions(-)

diff --git a/index/scorch/mergeplan/merge_plan.go b/index/scorch/mergeplan/merge_plan.go
index ff50cff37..18bb3ec16 100644
--- a/index/scorch/mergeplan/merge_plan.go
+++ b/index/scorch/mergeplan/merge_plan.go
@@ -37,21 +37,8 @@ type Segment interface {
 // assigned to at most a single MergeTask in the output MergePlan.  A
 // segment not assigned to any MergeTask means the segment should
 // remain unmerged.
-func Plan(segments []Segment, o *MergePlanOptions) (
-	result *MergePlan, err error) {
-	if len(segments) <= 1 {
-		return nil, nil
-	}
-
-	// TODO: PLACEHOLDER implementation for now, that always merges
-	// all the candidates.
-	return &MergePlan{
-		Tasks: []*MergeTask{
-			&MergeTask{
-				Segments: segments,
-			},
-		},
-	}, nil
+func Plan(segments []Segment, o *MergePlanOptions) (*MergePlan, error) {
+	return plan(segments, o)
 }
 
 // A MergePlan is the result of the Plan() API.
@@ -100,10 +87,6 @@ type MergePlanOptions struct {
 	// impact merge selection.
 	ReclaimDeletesWeight float64
 
-	// Only consider a segment for merging if its delete percentage is
-	// over this threshold.
-	MinDeletesPct float64
-
 	// Optional, defaults to mergeplan.CalcBudget().
 	CalcBudget func(totalSize int64, firstTierSize int64,
 		o *MergePlanOptions) (budgetNumSegments int)
@@ -130,13 +113,11 @@ var DefaultMergePlanOptions = MergePlanOptions{
 	SegmentsPerMergeTask: 10,
 	FloorSegmentSize:     2000,
 	ReclaimDeletesWeight: 2.0,
-	MinDeletesPct:        10.0,
 }
 
 // -------------------------------------------
 
-func plan(segmentsIn []Segment, o *MergePlanOptions) (
-	result *MergePlan, err error) {
+func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) {
 	if len(segmentsIn) <= 1 {
 		return nil, nil
 	}
@@ -153,8 +134,8 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (
 
 	var minLiveSize int64 = math.MaxInt64
 
-	var eligible []Segment
-	var eligibleLiveSize int64
+	var eligibles []Segment
+	var eligiblesLiveSize int64
 
 	for _, segment := range segments {
 		segmentsLiveSize += segment.LiveSize()
@@ -165,8 +146,8 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (
 
 		// Only small-enough segments are eligible.
 		if segment.LiveSize() < o.MaxSegmentSize/2 {
-			eligible = append(eligible, segment)
-			eligibleLiveSize += segment.LiveSize()
+			eligibles = append(eligibles, segment)
+			eligiblesLiveSize += segment.LiveSize()
 		}
 	}
 
@@ -177,7 +158,7 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (
 		calcBudget = CalcBudget
 	}
 
-	budgetNumSegments := CalcBudget(eligibleLiveSize, minLiveSize, o)
+	budgetNumSegments := CalcBudget(eligiblesLiveSize, minLiveSize, o)
 
 	scoreSegments := o.ScoreSegments
 	if scoreSegments == nil {
@@ -188,36 +169,32 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (
 
 	// While we’re over budget, keep looping, which might produce
 	// another MergeTask.
-	for len(eligible) > budgetNumSegments {
+	for len(eligibles) > budgetNumSegments {
 		// Track a current best roster as we examine and score
 		// potential rosters of merges.
 		var bestRoster []Segment
 		var bestRosterScore float64 // Lower score is better.
 
-		for startIdx := 0; startIdx < len(eligible)-o.SegmentsPerMergeTask; startIdx++ {
+		for startIdx := 0; startIdx < len(eligibles)-o.SegmentsPerMergeTask; startIdx++ {
 			var roster []Segment
 			var rosterLiveSize int64
 
-			for idx := startIdx; idx < len(eligible) && len(roster) < o.SegmentsPerMergeTask; idx++ {
-				rosterCandidate := eligible[idx]
+			for idx := startIdx; idx < len(eligibles) && len(roster) < o.SegmentsPerMergeTask; idx++ {
+				eligible := eligibles[idx]
 
-				if rosterLiveSize+rosterCandidate.LiveSize() > o.MaxSegmentSize {
-					// NOTE: We continue the loop, to try to “pack”
-					// the roster with smaller segments to get closer
-					// to the max size; but, we aren't doing full,
-					// comprehensive "bin-packing" permutations.
-					continue
+				if rosterLiveSize+eligible.LiveSize() < o.MaxSegmentSize {
+					roster = append(roster, eligible)
+					rosterLiveSize += eligible.LiveSize()
 				}
-
-				roster = append(roster, rosterCandidate)
-				rosterLiveSize += rosterCandidate.LiveSize()
 			}
 
-			rosterScore := scoreSegments(roster, o)
+			if len(roster) > 0 {
+				rosterScore := scoreSegments(roster, o)
 
-			if len(bestRoster) <= 0 || rosterScore < bestRosterScore {
-				bestRoster = roster
-				bestRosterScore = rosterScore
+				if len(bestRoster) <= 0 || rosterScore < bestRosterScore {
+					bestRoster = roster
+					bestRosterScore = rosterScore
+				}
 			}
 		}
 
@@ -225,11 +202,9 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (
 			return rv, nil
 		}
 
-		rv.Tasks = append(rv.Tasks, &MergeTask{
-			Segments: bestRoster,
-		})
+		rv.Tasks = append(rv.Tasks, &MergeTask{Segments: bestRoster})
 
-		eligible = removeSegments(eligible, bestRoster)
+		eligibles = removeSegments(eligibles, bestRoster)
 	}
 
 	return rv, nil
@@ -240,24 +215,38 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (
 func CalcBudget(totalSize int64, firstTierSize int64, o *MergePlanOptions) (
 	budgetNumSegments int) {
 	tierSize := firstTierSize
+	if tierSize < 1 {
+		tierSize = 1
+	}
+
+	maxSegmentsPerTier := o.MaxSegmentsPerTier
+	if maxSegmentsPerTier < 1 {
+		maxSegmentsPerTier = 1
+	}
+
+	segmentsPerMergeTask := int64(o.SegmentsPerMergeTask)
+	if segmentsPerMergeTask < 2 {
+		segmentsPerMergeTask = 2
+	}
 
 	for totalSize > 0 {
 		segmentsInTier := float64(totalSize) / float64(tierSize)
-		if segmentsInTier < float64(o.MaxSegmentsPerTier) {
+		if segmentsInTier < float64(maxSegmentsPerTier) {
 			budgetNumSegments += int(math.Ceil(segmentsInTier))
 			break
 		}
 
-		budgetNumSegments += o.MaxSegmentsPerTier
-		totalSize -= int64(o.MaxSegmentsPerTier) * tierSize
-		tierSize *= int64(o.SegmentsPerMergeTask)
+		budgetNumSegments += maxSegmentsPerTier
+		totalSize -= int64(maxSegmentsPerTier) * tierSize
+		tierSize *= segmentsPerMergeTask
 	}
 
 	return budgetNumSegments
 }
 
-// removeSegments() keeps the ordering of the result segments stable.
-func removeSegments(segments []Segment, toRemove []Segment) (rv []Segment) {
+// Of note, removeSegments() keeps the ordering of the results stable.
+func removeSegments(segments []Segment, toRemove []Segment) []Segment {
+	rv := make([]Segment, 0, len(segments)-len(toRemove))
 OUTER:
 	for _, segment := range segments {
 		for _, r := range toRemove {
@@ -270,6 +259,33 @@ OUTER:
 	return rv
 }
 
+// Smaller result score is better.
 func ScoreSegments(segments []Segment, o *MergePlanOptions) float64 {
-	return 0 // TODO. Bogus score.
+	var totBeforeSize int64
+	var totAfterSize int64
+	var totAfterSizeFloored int64
+
+	for _, segment := range segments {
+		totBeforeSize += segment.FullSize()
+		totAfterSize += segment.LiveSize()
+		totAfterSizeFloored += o.RaiseToFloorSegmentSize(segment.LiveSize())
+	}
+
+	// Roughly guess the "balance" of the segments -- whether the
+	// segments are about the same size.
+	balance :=
+		float64(o.RaiseToFloorSegmentSize(segments[0].LiveSize())) /
+			float64(totAfterSizeFloored)
+
+	// Gently favor smaller merges over bigger ones.  We don't want to
+	// make the exponent too large else we end up with poor merges of
+	// small segments in order to avoid the large merges.
+	score := balance * math.Pow(float64(totAfterSize), 0.05)
+
+	// Strongly favor merges that reclaim deletes.
+	nonDelRatio := float64(totAfterSize) / float64(totBeforeSize)
+
+	score *= math.Pow(nonDelRatio, o.ReclaimDeletesWeight)
+
+	return score
 }
diff --git a/index/scorch/mergeplan/merge_plan_test.go b/index/scorch/mergeplan/merge_plan_test.go
index 0f0f0fcca..c1bca8459 100644
--- a/index/scorch/mergeplan/merge_plan_test.go
+++ b/index/scorch/mergeplan/merge_plan_test.go
@@ -15,61 +15,99 @@
 package mergeplan
 
 import (
+	"encoding/json"
+	"fmt"
+	"os"
 	"reflect"
+	"sort"
+	"strings"
 	"testing"
 )
 
 // Implements the Segment interface for testing,
 type segment struct {
-	id       uint64
-	fullSize int64
-	liveSize int64
+	MyId       uint64
+	MyFullSize int64
+	MyLiveSize int64
 }
 
-func (s *segment) Id() uint64      { return s.id }
-func (s *segment) FullSize() int64 { return s.fullSize }
-func (s *segment) LiveSize() int64 { return s.liveSize }
+func (s *segment) Id() uint64      { return s.MyId }
+func (s *segment) FullSize() int64 { return s.MyFullSize }
+func (s *segment) LiveSize() int64 { return s.MyLiveSize }
 
 func makeLinearSegments(n int) (rv []Segment) {
 	for i := 0; i < n; i++ {
 		rv = append(rv, &segment{
-			id:       uint64(i),
-			fullSize: int64(i),
-			liveSize: int64(i),
+			MyId:       uint64(i),
+			MyFullSize: int64(i),
+			MyLiveSize: int64(i),
 		})
 	}
 	return rv
 }
 
+// ----------------------------------------
+
 func TestSimplePlan(t *testing.T) {
 	segs := makeLinearSegments(10)
 
 	tests := []struct {
-		desc       string
-		segments   []Segment
-		expectPlan *MergePlan
-		expectErr  error
+		Desc       string
+		Segments   []Segment
+		Options    *MergePlanOptions
+		ExpectPlan *MergePlan
+		ExpectErr  error
 	}{
-		{"nil candidates",
-			nil, nil, nil},
-		{"empty candidates",
-			[]Segment{}, nil, nil},
-		{"1 candidate",
-			[]Segment{segs[0]},
+		{"nil segments",
+			nil, nil, nil, nil},
+		{"empty segments",
+			[]Segment{}, nil, nil, nil},
+		{"1 segment",
+			[]Segment{segs[1]},
+			nil,
+			nil,
+			nil,
+		},
+		{"2 segments",
+			[]Segment{
+				segs[1],
+				segs[2],
+			},
 			nil,
+			&MergePlan{},
 			nil,
 		},
-		{"2 candidates",
+		{"3 segments",
 			[]Segment{
-				segs[0],
 				segs[1],
+				segs[2],
+				segs[9],
+			},
+			nil,
+			&MergePlan{},
+			nil,
+		},
+		{"many segments",
+			[]Segment{
+				segs[1],
+				segs[2],
+				segs[3],
+				segs[4],
+				segs[5],
+				segs[6],
+			},
+			&MergePlanOptions{
+				MaxSegmentsPerTier:   1,
+				MaxSegmentSize:       1000,
+				SegmentsPerMergeTask: 2,
+				FloorSegmentSize:     1,
 			},
 			&MergePlan{
-				[]*MergeTask{
+				Tasks: []*MergeTask{
 					&MergeTask{
 						Segments: []Segment{
-							segs[0],
-							segs[1],
+							segs[6],
+							segs[5],
 						},
 					},
 				},
@@ -79,14 +117,336 @@ func TestSimplePlan(t *testing.T) {
 	}
 
 	for testi, test := range tests {
-		plan, err := Plan(test.segments, &DefaultMergePlanOptions)
-		if err != test.expectErr {
-			t.Errorf("testi: %d, test: %v, got err: %v",
-				testi, test, err)
+		plan, err := Plan(test.Segments, test.Options)
+		if err != test.ExpectErr {
+			testj, _ := json.Marshal(&test)
+			t.Errorf("testi: %d, test: %s, got err: %v",
+				testi, testj, err)
 		}
-		if !reflect.DeepEqual(plan, test.expectPlan) {
-			t.Errorf("testi: %d, test: %v, got plan: %v",
-				testi, test, plan)
+		if !reflect.DeepEqual(plan, test.ExpectPlan) {
+			testj, _ := json.Marshal(&test)
+			planj, _ := json.Marshal(&plan)
+			t.Errorf("testi: %d, test: %s, got plan: %s",
+				testi, testj, planj)
 		}
 	}
 }
+
+// ----------------------------------------
+
+func TestSort(t *testing.T) {
+	segs := makeLinearSegments(10)
+
+	sort.Sort(byLiveSizeDescending(segs))
+
+	for i := 1; i < len(segs); i++ {
+		if segs[i].LiveSize() >= segs[i-1].LiveSize() {
+			t.Errorf("not descending")
+		}
+	}
+}
+
+// ----------------------------------------
+
+func TestCalcBudget(t *testing.T) {
+	tests := []struct {
+		totalSize     int64
+		firstTierSize int64
+		o             MergePlanOptions
+		expect        int
+	}{
+		{0, 0, MergePlanOptions{}, 0},
+		{1, 0, MergePlanOptions{}, 1},
+		{9, 0, MergePlanOptions{}, 4},
+		{1, 1,
+			MergePlanOptions{
+				MaxSegmentsPerTier:   1,
+				MaxSegmentSize:       1000,
+				SegmentsPerMergeTask: 2,
+				FloorSegmentSize:     1,
+			},
+			1,
+		},
+		{21, 1,
+			MergePlanOptions{
+				MaxSegmentsPerTier:   1,
+				MaxSegmentSize:       1000,
+				SegmentsPerMergeTask: 2,
+				FloorSegmentSize:     1,
+			},
+			5,
+		},
+		{21, 1,
+			MergePlanOptions{
+				MaxSegmentsPerTier:   2,
+				MaxSegmentSize:       1000,
+				SegmentsPerMergeTask: 2,
+				FloorSegmentSize:     1,
+			},
+			7,
+		},
+	}
+
+	for testi, test := range tests {
+		res := CalcBudget(test.totalSize, test.firstTierSize, &test.o)
+		if res != test.expect {
+			t.Errorf("testi: %d, test: %#v, res: %v",
+				testi, test, res)
+		}
+	}
+}
+
+// ----------------------------------------
+
+func TestInsert1SameSizedSegmentBetweenMerges(t *testing.T) {
+	o := &MergePlanOptions{
+		MaxSegmentSize:       1000,
+		MaxSegmentsPerTier:   3,
+		SegmentsPerMergeTask: 3,
+	}
+
+	spec := testCyclesSpec{
+		descrip: "i1sssbm",
+		verbose: os.Getenv("VERBOSE") == "i1sssbm" || os.Getenv("VERBOSE") == "y",
+		n:       200,
+		o:       o,
+		beforePlan: func(spec *testCyclesSpec) {
+			spec.segments = append(spec.segments, &segment{
+				MyId:       spec.nextSegmentId,
+				MyFullSize: 1,
+				MyLiveSize: 1,
+			})
+			spec.nextSegmentId++
+		},
+	}
+
+	spec.runCycles(t)
+}
+
+func TestInsertManySameSizedSegmentsBetweenMerges(t *testing.T) {
+	o := &MergePlanOptions{
+		MaxSegmentSize:       1000,
+		MaxSegmentsPerTier:   3,
+		SegmentsPerMergeTask: 3,
+	}
+
+	spec := testCyclesSpec{
+		descrip: "imsssbm",
+		verbose: os.Getenv("VERBOSE") == "imsssbm" || os.Getenv("VERBOSE") == "y",
+		n:       20,
+		o:       o,
+		beforePlan: func(spec *testCyclesSpec) {
+			for i := 0; i < 10; i++ {
+				spec.segments = append(spec.segments, &segment{
+					MyId:       spec.nextSegmentId,
+					MyFullSize: 1,
+					MyLiveSize: 1,
+				})
+				spec.nextSegmentId++
+			}
+		},
+	}
+
+	spec.runCycles(t)
+}
+
+func TestInsertManyDifferentSizedSegmentsBetweenMerges(t *testing.T) {
+	o := &MergePlanOptions{
+		MaxSegmentSize:       1000,
+		MaxSegmentsPerTier:   3,
+		SegmentsPerMergeTask: 3,
+	}
+
+	spec := testCyclesSpec{
+		descrip: "imdssbm",
+		verbose: os.Getenv("VERBOSE") == "imdssbm" || os.Getenv("VERBOSE") == "y",
+		n:       20,
+		o:       o,
+		beforePlan: func(spec *testCyclesSpec) {
+			for i := 0; i < 10; i++ {
+				spec.segments = append(spec.segments, &segment{
+					MyId:       spec.nextSegmentId,
+					MyFullSize: int64(1 + (i % 5)),
+					MyLiveSize: int64(1 + (i % 5)),
+				})
+				spec.nextSegmentId++
+			}
+		},
+	}
+
+	spec.runCycles(t)
+}
+
+func TestManySameSizedSegmentsWithDeletesBetweenMerges(t *testing.T) {
+	o := &MergePlanOptions{
+		MaxSegmentSize:       1000,
+		MaxSegmentsPerTier:   3,
+		SegmentsPerMergeTask: 3,
+	}
+
+	var numPlansWithTasks int
+
+	spec := testCyclesSpec{
+		descrip: "mssswdbm",
+		verbose: os.Getenv("VERBOSE") == "mssswdbm" || os.Getenv("VERBOSE") == "y",
+		n:       20,
+		o:       o,
+		beforePlan: func(spec *testCyclesSpec) {
+			// Deletions are a shrinking of the live size.
+			for i, seg := range spec.segments {
+				if (spec.cycle+i)%5 == 0 {
+					s := seg.(*segment)
+					if s.MyLiveSize > 0 {
+						s.MyLiveSize -= 1
+					}
+				}
+			}
+
+			for i := 0; i < 10; i++ {
+				spec.segments = append(spec.segments, &segment{
+					MyId:       spec.nextSegmentId,
+					MyFullSize: 1,
+					MyLiveSize: 1,
+				})
+				spec.nextSegmentId++
+			}
+		},
+		afterPlan: func(spec *testCyclesSpec, plan *MergePlan) {
+			if plan != nil && len(plan.Tasks) > 0 {
+				numPlansWithTasks++
+			}
+		},
+	}
+
+	spec.runCycles(t)
+
+	if numPlansWithTasks <= 0 {
+		t.Errorf("expected some plans with tasks")
+	}
+}
+
+// ----------------------------------------
+
+type testCyclesSpec struct {
+	descrip string
+	verbose bool
+
+	n int // Number of cycles to run.
+	o *MergePlanOptions
+
+	beforePlan func(*testCyclesSpec)
+	afterPlan  func(*testCyclesSpec, *MergePlan)
+
+	cycle         int
+	segments      []Segment
+	nextSegmentId uint64
+}
+
+func (spec *testCyclesSpec) runCycles(t *testing.T) {
+	numPlansWithTasks := 0
+
+	for spec.cycle < spec.n {
+		if spec.verbose {
+			emit(spec.descrip, spec.cycle, 0, spec.segments, nil)
+		}
+
+		if spec.beforePlan != nil {
+			spec.beforePlan(spec)
+		}
+
+		if spec.verbose {
+			emit(spec.descrip, spec.cycle, 1, spec.segments, nil)
+		}
+
+		plan, err := Plan(spec.segments, spec.o)
+		if err != nil {
+			t.Fatalf("expected no err, got: %v", err)
+		}
+
+		if spec.afterPlan != nil {
+			spec.afterPlan(spec, plan)
+		}
+
+		if spec.verbose {
+			emit(spec.descrip, spec.cycle, 2, spec.segments, plan)
+		}
+
+		if plan != nil {
+			if len(plan.Tasks) > 0 {
+				numPlansWithTasks++
+			}
+
+			for _, task := range plan.Tasks {
+				spec.segments = removeSegments(spec.segments, task.Segments)
+
+				var totLiveSize int64
+				for _, segment := range task.Segments {
+					totLiveSize += segment.LiveSize()
+				}
+
+				spec.segments = append(spec.segments, &segment{
+					MyId:       spec.nextSegmentId,
+					MyFullSize: totLiveSize,
+					MyLiveSize: totLiveSize,
+				})
+				spec.nextSegmentId++
+			}
+		}
+
+		spec.cycle++
+	}
+
+	if numPlansWithTasks <= 0 {
+		t.Errorf("expected some plans with tasks")
+	}
+}
+
+func emit(descrip string, cycle int, step int, segments []Segment, plan *MergePlan) {
+	if os.Getenv("VERBOSE") == "" {
+		return
+	}
+
+	suffix := ""
+	if plan != nil && len(plan.Tasks) > 0 {
+		suffix = "hasPlan"
+	}
+
+	fmt.Printf("%s %d-%d ---------- %s\n", descrip, cycle, step, suffix)
+
+	var maxFullSize int64
+	for _, segment := range segments {
+		if maxFullSize < segment.FullSize() {
+			maxFullSize = segment.FullSize()
+		}
+	}
+
+	barMax := 100
+
+	for _, segment := range segments {
+		barFull := int(segment.FullSize())
+		barLive := int(segment.LiveSize())
+
+		if maxFullSize > int64(barMax) {
+			barFull = int(float64(barMax) * float64(barFull) / float64(maxFullSize))
+			barLive = int(float64(barMax) * float64(barLive) / float64(maxFullSize))
+		}
+
+		var barChar = "."
+
+		if plan != nil {
+		TASK_LOOP:
+			for taski, task := range plan.Tasks {
+				for _, taskSegment := range task.Segments {
+					if taskSegment == segment {
+						barChar = fmt.Sprintf("%d", taski)
+						break TASK_LOOP
+					}
+				}
+			}
+		}
+
+		bar := strings.Repeat(barChar, barLive) + strings.Repeat("x", barFull-barLive)
+
+		fmt.Printf("%s %5d %5d - %s\n", descrip, segment.Id(), segment.FullSize(), bar)
+	}
+}
diff --git a/index/scorch/mergeplan/sort.go b/index/scorch/mergeplan/sort.go
index 4eabc31db..d044b8d7c 100644
--- a/index/scorch/mergeplan/sort.go
+++ b/index/scorch/mergeplan/sort.go
@@ -16,12 +16,13 @@ package mergeplan
 
 type byLiveSizeDescending []Segment
 
-func (a byLiveSizeDescending) Len() int      { return len(a) }
+func (a byLiveSizeDescending) Len() int { return len(a) }
+
 func (a byLiveSizeDescending) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
 
 func (a byLiveSizeDescending) Less(i, j int) bool {
 	if a[i].LiveSize() != a[j].LiveSize() {
-		return a[i].LiveSize() < a[j].LiveSize()
+		return a[i].LiveSize() > a[j].LiveSize()
 	}
 	return a[i].Id() < a[j].Id()
 }

From be7dd36ac650b80cfc416731717db77877e8a2f1 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 12 Dec 2017 10:36:16 -0800
Subject: [PATCH 051/728] mergeplan: more tests and bargraph tweaks

---
 index/scorch/mergeplan/merge_plan.go      |  8 +--
 index/scorch/mergeplan/merge_plan_test.go | 70 +++++++++++++++++++----
 2 files changed, 64 insertions(+), 14 deletions(-)

diff --git a/index/scorch/mergeplan/merge_plan.go b/index/scorch/mergeplan/merge_plan.go
index 18bb3ec16..f3f7b9e3b 100644
--- a/index/scorch/mergeplan/merge_plan.go
+++ b/index/scorch/mergeplan/merge_plan.go
@@ -130,16 +130,12 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) {
 
 	sort.Sort(byLiveSizeDescending(segments))
 
-	var segmentsLiveSize int64
-
 	var minLiveSize int64 = math.MaxInt64
 
 	var eligibles []Segment
 	var eligiblesLiveSize int64
 
 	for _, segment := range segments {
-		segmentsLiveSize += segment.LiveSize()
-
 		if minLiveSize > segment.LiveSize() {
 			minLiveSize = segment.LiveSize()
 		}
@@ -271,6 +267,10 @@ func ScoreSegments(segments []Segment, o *MergePlanOptions) float64 {
 		totAfterSizeFloored += o.RaiseToFloorSegmentSize(segment.LiveSize())
 	}
 
+	if totBeforeSize <= 0 || totAfterSize <= 0 || totAfterSizeFloored <= 0 {
+		return 0
+	}
+
 	// Roughly guess the "balance" of the segments -- whether the
 	// segments are about the same size.
 	balance :=
diff --git a/index/scorch/mergeplan/merge_plan_test.go b/index/scorch/mergeplan/merge_plan_test.go
index c1bca8459..05dcaaf9f 100644
--- a/index/scorch/mergeplan/merge_plan_test.go
+++ b/index/scorch/mergeplan/merge_plan_test.go
@@ -12,6 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+// The segment merge planning approach was inspired by Lucene's
+// TieredMergePolicy.java and descriptions like
+// http://blog.mikemccandless.com/2011/02/visualizing-lucenes-segment-merges.html
 package mergeplan
 
 import (
@@ -250,6 +253,43 @@ func TestInsertManySameSizedSegmentsBetweenMerges(t *testing.T) {
 	spec.runCycles(t)
 }
 
+func TestInsertManySameSizedSegmentsWithDeletionsBetweenMerges(t *testing.T) {
+	o := &MergePlanOptions{
+		MaxSegmentSize:       1000,
+		MaxSegmentsPerTier:   3,
+		SegmentsPerMergeTask: 3,
+	}
+
+	spec := testCyclesSpec{
+		descrip: "imssswdbm",
+		verbose: os.Getenv("VERBOSE") == "imssswdbm" || os.Getenv("VERBOSE") == "y",
+		n:       20,
+		o:       o,
+		beforePlan: func(spec *testCyclesSpec) {
+			for i := 0; i < 10; i++ {
+				// Deletions are a shrinking of the live size.
+				for i, seg := range spec.segments {
+					if (spec.cycle+i)%5 == 0 {
+						s := seg.(*segment)
+						if s.MyLiveSize > 0 {
+							s.MyLiveSize -= 1
+						}
+					}
+				}
+
+				spec.segments = append(spec.segments, &segment{
+					MyId:       spec.nextSegmentId,
+					MyFullSize: 1,
+					MyLiveSize: 1,
+				})
+				spec.nextSegmentId++
+			}
+		},
+	}
+
+	spec.runCycles(t)
+}
+
 func TestInsertManyDifferentSizedSegmentsBetweenMerges(t *testing.T) {
 	o := &MergePlanOptions{
 		MaxSegmentSize:       1000,
@@ -384,12 +424,14 @@ func (spec *testCyclesSpec) runCycles(t *testing.T) {
 					totLiveSize += segment.LiveSize()
 				}
 
-				spec.segments = append(spec.segments, &segment{
-					MyId:       spec.nextSegmentId,
-					MyFullSize: totLiveSize,
-					MyLiveSize: totLiveSize,
-				})
-				spec.nextSegmentId++
+				if totLiveSize > 0 {
+					spec.segments = append(spec.segments, &segment{
+						MyId:       spec.nextSegmentId,
+						MyFullSize: totLiveSize,
+						MyLiveSize: totLiveSize,
+					})
+					spec.nextSegmentId++
+				}
 			}
 		}
 
@@ -411,7 +453,7 @@ func emit(descrip string, cycle int, step int, segments []Segment, plan *MergePl
 		suffix = "hasPlan"
 	}
 
-	fmt.Printf("%s %d-%d ---------- %s\n", descrip, cycle, step, suffix)
+	fmt.Printf("%s %d.%d ---------- %s\n", descrip, cycle, step, suffix)
 
 	var maxFullSize int64
 	for _, segment := range segments {
@@ -431,13 +473,15 @@ func emit(descrip string, cycle int, step int, segments []Segment, plan *MergePl
 			barLive = int(float64(barMax) * float64(barLive) / float64(maxFullSize))
 		}
 
-		var barChar = "."
+		barKind := " "
+		barChar := "."
 
 		if plan != nil {
 		TASK_LOOP:
 			for taski, task := range plan.Tasks {
 				for _, taskSegment := range task.Segments {
 					if taskSegment == segment {
+						barKind = "*"
 						barChar = fmt.Sprintf("%d", taski)
 						break TASK_LOOP
 					}
@@ -445,8 +489,14 @@ func emit(descrip string, cycle int, step int, segments []Segment, plan *MergePl
 			}
 		}
 
-		bar := strings.Repeat(barChar, barLive) + strings.Repeat("x", barFull-barLive)
+		bar :=
+			strings.Repeat(barChar, barLive)[0:barLive] +
+				strings.Repeat("x", barFull-barLive)[0:barFull-barLive]
 
-		fmt.Printf("%s %5d %5d - %s\n", descrip, segment.Id(), segment.FullSize(), bar)
+		fmt.Printf("%s %5d: %5d /%5d - %s %s\n", descrip,
+			segment.Id(),
+			segment.LiveSize(),
+			segment.FullSize(),
+			barKind, bar)
 	}
 }

From c15c3c11cd93dc473ef89afae5c479f09442d206 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Wed, 13 Dec 2017 13:31:18 -0500
Subject: [PATCH 052/728] extra protection if dict address is 0 (empty segment)

---
 index/scorch/segment/zap/segment.go | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 2c6d0bfed..fce80f933 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -150,16 +150,18 @@ func (s *Segment) dictionary(field string) (*Dictionary, error) {
 
 		dictStart := s.fieldsOffsets[rv.fieldID]
 
-		// read the length of the vellum data
-		vellumLen, read := binary.Uvarint(s.mm[dictStart : dictStart+binary.MaxVarintLen64])
-		fstBytes := s.mm[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
-		if fstBytes != nil {
-			fst, err := vellum.Load(fstBytes)
-			if err != nil {
-				return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err)
-			}
-			if err == nil {
-				rv.fst = fst
+		if dictStart > 0 {
+			// read the length of the vellum data
+			vellumLen, read := binary.Uvarint(s.mm[dictStart : dictStart+binary.MaxVarintLen64])
+			fstBytes := s.mm[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
+			if fstBytes != nil {
+				fst, err := vellum.Load(fstBytes)
+				if err != nil {
+					return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err)
+				}
+				if err == nil {
+					rv.fst = fst
+				}
 			}
 		}
 

From f83c9f2a2022a2cd1c45ac34b6cd020d9ab2f089 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Wed, 13 Dec 2017 13:41:03 -0500
Subject: [PATCH 053/728] initial cut of merger that actually introduces
 changes

---
 index/scorch/introducer.go             |  76 +++++++++++
 index/scorch/merge.go                  | 167 +++++++++++++++++++++++++
 index/scorch/persister.go              |  10 ++
 index/scorch/scorch.go                 |   6 +
 index/scorch/segment/zap/merge.go      |  56 +++++----
 index/scorch/segment/zap/merge_test.go |   4 +-
 index/scorch/snapshot_segment.go       |  12 ++
 7 files changed, 303 insertions(+), 28 deletions(-)
 create mode 100644 index/scorch/merge.go

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 0b30044a1..7998eae3b 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -42,6 +42,82 @@ OUTER:
 
 		case notify = <-s.introducerNotifier:
 
+		case nextMerge := <-s.merges:
+			// acquire lock
+			s.rootLock.Lock()
+
+			// prepare new index snapshot
+			currSize := len(s.root.segment)
+			newSize := currSize + 1 - len(nextMerge.old)
+			newSnapshot := &IndexSnapshot{
+				segment:  make([]*SegmentSnapshot, 0, newSize),
+				offsets:  make([]uint64, 0, newSize),
+				internal: make(map[string][]byte, len(s.root.segment)),
+				epoch:    s.nextSnapshotEpoch,
+			}
+			s.nextSnapshotEpoch++
+
+			// iterate through current segments
+			newSegmentDeleted := roaring.NewBitmap()
+			var running uint64
+			for i := range s.root.segment {
+				segmentID := s.root.segment[i].id
+				if segSnapAtMerge, ok := nextMerge.old[segmentID]; ok {
+					// this segment is going away, see if anything else was deleted since we started the merge
+					if s.root.segment[i].deleted != nil {
+						// assume all these deletes are new
+						deletedSince := s.root.segment[i].deleted
+						// if we already knew about some of them, remove
+						if segSnapAtMerge.deleted != nil {
+							deletedSince = roaring.AndNot(s.root.segment[i].deleted, segSnapAtMerge.deleted)
+						}
+						deletedSinceItr := deletedSince.Iterator()
+						for deletedSinceItr.HasNext() {
+							oldDocNum := deletedSinceItr.Next()
+							newDocNum := nextMerge.oldNewDocNums[segmentID][oldDocNum]
+							newSegmentDeleted.Add(uint32(newDocNum))
+						}
+					}
+				} else {
+					// this segment is staying
+					newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
+						id:      s.root.segment[i].id,
+						segment: s.root.segment[i].segment,
+						notify:  s.root.segment[i].notify,
+						deleted: s.root.segment[i].deleted,
+					})
+					newSnapshot.offsets = append(newSnapshot.offsets, running)
+					running += s.root.segment[i].Count()
+				}
+			}
+
+			// put new segment at end
+			newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
+				id:      nextMerge.id,
+				segment: nextMerge.new,
+				deleted: newSegmentDeleted,
+			})
+			newSnapshot.offsets = append(newSnapshot.offsets, running)
+
+			// copy old values
+			for key, oldVal := range s.root.internal {
+				newSnapshot.internal[key] = oldVal
+			}
+
+			// swap in new segment
+			s.root = newSnapshot
+			// release lock
+			s.rootLock.Unlock()
+
+			// notify merger we incorporated this
+			close(nextMerge.notify)
+
+			// notify persister
+			if notify != nil {
+				close(notify)
+				notify = nil
+			}
+
 		case next := <-s.introductions:
 			// acquire lock
 			s.rootLock.Lock()
diff --git a/index/scorch/merge.go b/index/scorch/merge.go
new file mode 100644
index 000000000..aef216646
--- /dev/null
+++ b/index/scorch/merge.go
@@ -0,0 +1,167 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package scorch
+
+import (
+	"fmt"
+	"log"
+	"os"
+	"sync/atomic"
+
+	"github.com/RoaringBitmap/roaring"
+	"github.com/blevesearch/bleve/index/scorch/mergeplan"
+	"github.com/blevesearch/bleve/index/scorch/segment"
+	"github.com/blevesearch/bleve/index/scorch/segment/zap"
+)
+
+func (s *Scorch) mergerLoop() {
+	var lastEpochMergePlanned uint64
+OUTER:
+	for {
+		select {
+		case <-s.closeCh:
+			break OUTER
+
+		default:
+			// check to see if there is a new snapshot to persist
+			s.rootLock.RLock()
+			ourSnapshot := s.root
+			s.rootLock.RUnlock()
+
+			if ourSnapshot.epoch != lastEpochMergePlanned {
+				// lets get started
+				err := s.planMergeAtSnapshot(ourSnapshot)
+				if err != nil {
+					log.Printf("merging err: %v", err)
+					continue OUTER
+				}
+				lastEpochMergePlanned = ourSnapshot.epoch
+			}
+
+			// tell the persister we're waiting for changes
+			// first make a notification chan
+			notifyUs := make(notificationChan)
+
+			// give it to the persister
+			select {
+			case <-s.closeCh:
+				break OUTER
+			case s.persisterNotifier <- notifyUs:
+			}
+
+			// check again
+			s.rootLock.RLock()
+			ourSnapshot = s.root
+			s.rootLock.RUnlock()
+
+			if ourSnapshot.epoch != lastEpochMergePlanned {
+				// lets get started
+				err := s.planMergeAtSnapshot(ourSnapshot)
+				if err != nil {
+					continue OUTER
+				}
+				lastEpochMergePlanned = ourSnapshot.epoch
+			}
+
+			// now wait for it (but also detect close)
+			select {
+			case <-s.closeCh:
+				break OUTER
+			case <-notifyUs:
+				// woken up, next loop should pick up work
+			}
+		}
+	}
+	s.asyncTasks.Done()
+}
+
+func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot) error {
+	// build list of zap segments in this snapshot
+	var onlyZapSnapshots []mergeplan.Segment
+	for _, segmentSnapshot := range ourSnapshot.segment {
+		if _, ok := segmentSnapshot.segment.(*zap.Segment); ok {
+			onlyZapSnapshots = append(onlyZapSnapshots, segmentSnapshot)
+		}
+	}
+
+	// give this list to the planner
+	resultMergePlan, err := mergeplan.Plan(onlyZapSnapshots, nil)
+	if err != nil {
+		return fmt.Errorf("merge planning err: %v", err)
+	}
+	if resultMergePlan == nil {
+		// nothing to do
+		return nil
+	}
+
+	// process tasks in serial for now
+	var notifications []notificationChan
+	for _, task := range resultMergePlan.Tasks {
+		oldMap := make(map[uint64]*SegmentSnapshot)
+		newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
+		segmentsToMerge := make([]*zap.Segment, 0, len(task.Segments))
+		docsToDrop := make([]*roaring.Bitmap, 0, len(task.Segments))
+		for _, planSegment := range task.Segments {
+			if segSnapshot, ok := planSegment.(*SegmentSnapshot); ok {
+				oldMap[segSnapshot.id] = segSnapshot
+				if zapSeg, ok := segSnapshot.segment.(*zap.Segment); ok {
+					segmentsToMerge = append(segmentsToMerge, zapSeg)
+					docsToDrop = append(docsToDrop, segSnapshot.deleted)
+				}
+			}
+		}
+
+		filename := fmt.Sprintf("%x.zap", newSegmentID)
+		path := s.path + string(os.PathSeparator) + filename
+		newDocNums, err := zap.Merge(segmentsToMerge, docsToDrop, path, 1024)
+		if err != nil {
+			return fmt.Errorf("merging failed: %v", err)
+		}
+		segment, err := zap.Open(path)
+		if err != nil {
+			return err
+		}
+		sm := &segmentMerge{
+			id:            newSegmentID,
+			old:           oldMap,
+			oldNewDocNums: make(map[uint64][]uint64),
+			new:           segment,
+			notify:        make(notificationChan),
+		}
+		notifications = append(notifications, sm.notify)
+		for i, segNewDocNums := range newDocNums {
+			sm.oldNewDocNums[task.Segments[i].Id()] = segNewDocNums
+		}
+
+		// give it to the introducer
+		select {
+		case <-s.closeCh:
+			return nil
+		case s.merges <- sm:
+		}
+	}
+	for _, notification := range notifications {
+		<-notification
+	}
+	return nil
+}
+
+type segmentMerge struct {
+	id            uint64
+	old           map[uint64]*SegmentSnapshot
+	oldNewDocNums map[uint64][]uint64
+	new           segment.Segment
+	notify        notificationChan
+}
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 8a171b311..78a1092ba 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -31,12 +31,14 @@ import (
 type notificationChan chan struct{}
 
 func (s *Scorch) persisterLoop() {
+	var notify notificationChan
 	var lastPersistedEpoch uint64
 OUTER:
 	for {
 		select {
 		case <-s.closeCh:
 			break OUTER
+		case notify = <-s.persisterNotifier:
 
 		default:
 			// check to see if there is a new snapshot to persist
@@ -53,6 +55,10 @@ OUTER:
 					continue OUTER
 				}
 				lastPersistedEpoch = ourSnapshot.epoch
+				if notify != nil {
+					close(notify)
+					notify = nil
+				}
 			}
 
 			// tell the introducer we're waiting for changes
@@ -79,6 +85,10 @@ OUTER:
 					continue OUTER
 				}
 				lastPersistedEpoch = ourSnapshot.epoch
+				if notify != nil {
+					close(notify)
+					notify = nil
+				}
 			}
 
 			// now wait for it (but also detect close)
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index d95cb05bc..7373e6b43 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -54,7 +54,9 @@ type Scorch struct {
 
 	closeCh            chan struct{}
 	introductions      chan *segmentIntroduction
+	merges             chan *segmentMerge
 	introducerNotifier chan notificationChan
+	persisterNotifier  chan notificationChan
 	rootBolt           *bolt.DB
 	asyncTasks         sync.WaitGroup
 }
@@ -115,7 +117,9 @@ func (s *Scorch) Open() error {
 
 	s.closeCh = make(chan struct{})
 	s.introductions = make(chan *segmentIntroduction)
+	s.merges = make(chan *segmentMerge)
 	s.introducerNotifier = make(chan notificationChan)
+	s.persisterNotifier = make(chan notificationChan)
 
 	s.asyncTasks.Add(1)
 	go s.mainLoop()
@@ -123,6 +127,8 @@ func (s *Scorch) Open() error {
 	if !s.readOnly && s.path != "" {
 		s.asyncTasks.Add(1)
 		go s.persisterLoop()
+		s.asyncTasks.Add(1)
+		go s.mergerLoop()
 	}
 
 	return nil
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index b41dc4275..7652c2210 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -19,13 +19,13 @@ import (
 // from the may be dropped, and creates a new segment containing the remaining
 // data.  This new segment is built at the specified path, with the provided
 // chunkFactor.
-func Merge(segments []*Segment, drops []*roaring.Bitmap, path string, chunkFactor uint32) error {
-
+func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
+	chunkFactor uint32) ([][]uint64, error) {
 	flag := os.O_RDWR | os.O_CREATE
 
 	f, err := os.OpenFile(path, flag, 0600)
 	if err != nil {
-		return err
+		return nil, err
 	}
 
 	// bufer the output
@@ -41,50 +41,49 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string, chunkFacto
 
 	var newDocNums [][]uint64
 	var storedIndexOffset uint64
-	storedIndexOffset, newDocNums, err = mergeStoredAndRemap(segments, drops,
-		fieldsMap, fieldsInv, newSegDocCount, cr)
-	if err != nil {
-		return err
-	}
-
-	// FIXME temp until computed
-	//dictLocs := make([]uint64, len(fieldsInv))
+	dictLocs := make([]uint64, len(fieldsInv))
+	if newSegDocCount > 0 {
+		storedIndexOffset, newDocNums, err = mergeStoredAndRemap(segments, drops,
+			fieldsMap, fieldsInv, newSegDocCount, cr)
+		if err != nil {
+			return nil, err
+		}
 
-	var dictLocs []uint64
-	dictLocs, err = persistMergedRest(segments, drops, fieldsInv, fieldsMap,
-		newDocNums, newSegDocCount, cr)
-	if err != nil {
-		return err
+		dictLocs, err = persistMergedRest(segments, drops, fieldsInv, fieldsMap,
+			newDocNums, newSegDocCount, cr)
+		if err != nil {
+			return nil, err
+		}
 	}
 
 	var fieldsIndexOffset uint64
 	fieldsIndexOffset, err = persistMergedFields(fieldsInv, cr, dictLocs)
 	if err != nil {
-		return err
+		return nil, err
 	}
 
 	err = persistFooter(newSegDocCount, storedIndexOffset,
 		fieldsIndexOffset, chunkFactor, cr)
 	if err != nil {
-		return err
+		return nil, err
 	}
 
 	err = br.Flush()
 	if err != nil {
-		return err
+		return nil, err
 	}
 
 	err = f.Sync()
 	if err != nil {
-		return err
+		return nil, err
 	}
 
 	err = f.Close()
 	if err != nil {
-		return err
+		return nil, err
 	}
 
-	return nil
+	return newDocNums, nil
 }
 
 func mapFields(fields []string) map[string]uint16 {
@@ -161,11 +160,13 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 			}
 			dicts = append(dicts, dict)
 
-			itr, err2 := dict.fst.Iterator(nil, nil)
-			if err2 != nil {
-				return nil, err2
+			if dict != nil && dict.fst != nil {
+				itr, err2 := dict.fst.Iterator(nil, nil)
+				if err2 != nil {
+					return nil, err2
+				}
+				itrs = append(itrs, itr)
 			}
-			itrs = append(itrs, itr)
 		}
 
 		// create merging iterator
@@ -187,6 +188,9 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 			// now go back and get posting list for this term
 			// but pass in the deleted docs for that segment
 			for dictI, dict := range dicts {
+				if dict == nil {
+					continue
+				}
 				postings, err2 := dict.postingsList(string(term), drops[dictI])
 				if err2 != nil {
 					return nil, err2
diff --git a/index/scorch/segment/zap/merge_test.go b/index/scorch/segment/zap/merge_test.go
index 53bcde7fb..c8046efab 100644
--- a/index/scorch/segment/zap/merge_test.go
+++ b/index/scorch/segment/zap/merge_test.go
@@ -54,7 +54,7 @@ func TestMerge(t *testing.T) {
 	segsToMerge[0] = segment.(*Segment)
 	segsToMerge[1] = segment2.(*Segment)
 
-	err = Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024)
+	_, err = Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -109,7 +109,7 @@ func TestMergeAndDrop(t *testing.T) {
 	docsToDrop[1] = roaring.NewBitmap()
 	docsToDrop[1].AddInt(1)
 
-	err = Merge(segsToMerge, docsToDrop, "/tmp/scorch3.zap", 1024)
+	_, err = Merge(segsToMerge, docsToDrop, "/tmp/scorch3.zap", 1024)
 	if err != nil {
 		t.Fatal(err)
 	}
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index 14c49450b..ffd38cac5 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -49,6 +49,18 @@ type SegmentSnapshot struct {
 	notify []chan error
 }
 
+func (s *SegmentSnapshot) Id() uint64 {
+	return s.id
+}
+
+func (s *SegmentSnapshot) FullSize() int64 {
+	return int64(s.segment.Count())
+}
+
+func (s SegmentSnapshot) LiveSize() int64 {
+	return int64(s.Count())
+}
+
 func (s *SegmentSnapshot) Close() error {
 	return s.segment.Close()
 }

From cd45487cb3b5cee1b89dff8a0dd6f7b2d5be740a Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Wed, 13 Dec 2017 13:55:06 -0500
Subject: [PATCH 054/728] fsync rootBolt when persisting snapshot

---
 index/scorch/persister.go | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 78a1092ba..d54840864 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -109,6 +109,13 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 	if err != nil {
 		return err
 	}
+	// defer fsync of the rootbolt
+	defer func() {
+		if err == nil {
+			err = s.rootBolt.Sync()
+		}
+	}()
+	// defer commit/rollback transaction
 	defer func() {
 		if err == nil {
 			err = tx.Commit()

From 1cd3fd7fbe661f0f29656155aa48fa864b0551e9 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Wed, 13 Dec 2017 14:06:54 -0500
Subject: [PATCH 055/728] extrac common functionality between build/merge

---
 index/scorch/segment/zap/build.go | 47 +--------------
 index/scorch/segment/zap/merge.go | 76 +------------------------
 index/scorch/segment/zap/write.go | 95 +++++++++++++++++++++++++++++++
 3 files changed, 97 insertions(+), 121 deletions(-)
 create mode 100644 index/scorch/segment/zap/write.go

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 2bc520f63..2cad61a58 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -77,7 +77,7 @@ func PersistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) (e
 	}
 
 	var fieldIndexStart uint64
-	fieldIndexStart, err = persistFields(memSegment, cr, dictLocs)
+	fieldIndexStart, err = persistFields(memSegment.FieldsInv, cr, dictLocs)
 	if err != nil {
 		return err
 	}
@@ -573,51 +573,6 @@ func persistDictionary(memSegment *mem.Segment, w *CountHashWriter, postingsLocs
 	return rv, nil
 }
 
-func persistFields(memSegment *mem.Segment, w *CountHashWriter, dictLocs []uint64) (uint64, error) {
-	var rv uint64
-
-	var fieldStarts []uint64
-	for fieldID, fieldName := range memSegment.FieldsInv {
-
-		// record start of this field
-		fieldStarts = append(fieldStarts, uint64(w.Count()))
-
-		buf := make([]byte, binary.MaxVarintLen64)
-		// write out dict location for this field
-		n := binary.PutUvarint(buf, dictLocs[fieldID])
-		_, err := w.Write(buf[:n])
-		if err != nil {
-			return 0, err
-		}
-
-		// write out the length of the field name
-		n = binary.PutUvarint(buf, uint64(len(fieldName)))
-		_, err = w.Write(buf[:n])
-		if err != nil {
-			return 0, err
-		}
-
-		// write out the field name
-		_, err = w.Write([]byte(fieldName))
-		if err != nil {
-			return 0, err
-		}
-	}
-
-	// now write out the fields index
-	rv = uint64(w.Count())
-
-	// now write out the stored doc index
-	for fieldID := range memSegment.FieldsInv {
-		err := binary.Write(w, binary.BigEndian, fieldStarts[fieldID])
-		if err != nil {
-			return 0, err
-		}
-	}
-
-	return rv, nil
-}
-
 // FooterSize is the size of the footer record in bytes
 // crc + ver + chunk + field offset + stored offset + num docs
 const FooterSize = 4 + 4 + 4 + 8 + 8 + 8
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 7652c2210..972b1d167 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -5,7 +5,6 @@ import (
 	"bytes"
 	"encoding/binary"
 	"fmt"
-	"io"
 	"math"
 	"os"
 
@@ -36,7 +35,6 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
 
 	fieldsInv := mergeFields(segments)
 	fieldsMap := mapFields(fieldsInv)
-
 	newSegDocCount := computeNewDocCount(segments, drops)
 
 	var newDocNums [][]uint64
@@ -57,7 +55,7 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
 	}
 
 	var fieldsIndexOffset uint64
-	fieldsIndexOffset, err = persistMergedFields(fieldsInv, cr, dictLocs)
+	fieldsIndexOffset, err = persistFields(fieldsInv, cr, dictLocs)
 	if err != nil {
 		return nil, err
 	}
@@ -106,33 +104,6 @@ func computeNewDocCount(segments []*Segment, drops []*roaring.Bitmap) uint64 {
 	return newSegDocCount
 }
 
-func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer) (int, error) {
-	var buffer bytes.Buffer
-	// write out postings list to memory so we know the len
-	postingsListLen, err := r.WriteTo(&buffer)
-	if err != nil {
-		return 0, err
-	}
-	var tw int
-	// write out the length of this postings list
-	buf := make([]byte, binary.MaxVarintLen64)
-	n := binary.PutUvarint(buf, uint64(postingsListLen))
-	nw, err := w.Write(buf[:n])
-	tw += nw
-	if err != nil {
-		return tw, err
-	}
-
-	// write out the postings list itself
-	nw, err = w.Write(buffer.Bytes())
-	tw += nw
-	if err != nil {
-		return tw, err
-	}
-
-	return tw, nil
-}
-
 func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 	fieldsInv []string, fieldsMap map[string]uint16, newDocNums [][]uint64, newSegDocCount uint64,
 	w *CountHashWriter) ([]uint64, error) {
@@ -489,48 +460,3 @@ func mergeFields(segments []*Segment) []string {
 
 	return rv
 }
-
-func persistMergedFields(fieldsInv []string, w *CountHashWriter, dictLocs []uint64) (uint64, error) {
-	var rv uint64
-
-	var fieldStarts []uint64
-	for fieldID, fieldName := range fieldsInv {
-
-		// record start of this field
-		fieldStarts = append(fieldStarts, uint64(w.Count()))
-
-		buf := make([]byte, binary.MaxVarintLen64)
-		// write out dict location for this field
-		n := binary.PutUvarint(buf, dictLocs[fieldID])
-		_, err := w.Write(buf[:n])
-		if err != nil {
-			return 0, err
-		}
-
-		// write out the length of the field name
-		n = binary.PutUvarint(buf, uint64(len(fieldName)))
-		_, err = w.Write(buf[:n])
-		if err != nil {
-			return 0, err
-		}
-
-		// write out the field name
-		_, err = w.Write([]byte(fieldName))
-		if err != nil {
-			return 0, err
-		}
-	}
-
-	// now write out the fields index
-	rv = uint64(w.Count())
-
-	// now write out the stored doc index
-	for fieldID := range fieldsInv {
-		err := binary.Write(w, binary.BigEndian, fieldStarts[fieldID])
-		if err != nil {
-			return 0, err
-		}
-	}
-
-	return rv, nil
-}
diff --git a/index/scorch/segment/zap/write.go b/index/scorch/segment/zap/write.go
new file mode 100644
index 000000000..9772b3a64
--- /dev/null
+++ b/index/scorch/segment/zap/write.go
@@ -0,0 +1,95 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zap
+
+import (
+	"bytes"
+	"encoding/binary"
+	"io"
+
+	"github.com/RoaringBitmap/roaring"
+)
+
+// writes out the length of the roaring bitmap in bytes as varint
+// then writs out the roaring bitmap itself
+func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer) (int, error) {
+	var buffer bytes.Buffer
+	// write out postings list to memory so we know the len
+	postingsListLen, err := r.WriteTo(&buffer)
+	if err != nil {
+		return 0, err
+	}
+	var tw int
+	// write out the length of this postings list
+	buf := make([]byte, binary.MaxVarintLen64)
+	n := binary.PutUvarint(buf, uint64(postingsListLen))
+	nw, err := w.Write(buf[:n])
+	tw += nw
+	if err != nil {
+		return tw, err
+	}
+
+	// write out the postings list itself
+	nw, err = w.Write(buffer.Bytes())
+	tw += nw
+	if err != nil {
+		return tw, err
+	}
+
+	return tw, nil
+}
+
+func persistFields(fieldsInv []string, w *CountHashWriter, dictLocs []uint64) (uint64, error) {
+	var rv uint64
+
+	var fieldStarts []uint64
+	for fieldID, fieldName := range fieldsInv {
+
+		// record start of this field
+		fieldStarts = append(fieldStarts, uint64(w.Count()))
+
+		buf := make([]byte, binary.MaxVarintLen64)
+		// write out dict location for this field
+		n := binary.PutUvarint(buf, dictLocs[fieldID])
+		_, err := w.Write(buf[:n])
+		if err != nil {
+			return 0, err
+		}
+
+		// write out the length of the field name
+		n = binary.PutUvarint(buf, uint64(len(fieldName)))
+		_, err = w.Write(buf[:n])
+		if err != nil {
+			return 0, err
+		}
+
+		// write out the field name
+		_, err = w.Write([]byte(fieldName))
+		if err != nil {
+			return 0, err
+		}
+	}
+
+	// now write out the fields index
+	rv = uint64(w.Count())
+	for fieldID := range fieldsInv {
+		err := binary.Write(w, binary.BigEndian, fieldStarts[fieldID])
+		if err != nil {
+			return 0, err
+		}
+	}
+
+	return rv, nil
+}

From 289dc398bd88ea1bd9c952772e2323b64c0ec2a5 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Wed, 13 Dec 2017 14:26:11 -0500
Subject: [PATCH 056/728] more refacotring of build/merge

---
 index/scorch/segment/zap/build.go | 39 -------------------------------
 index/scorch/segment/zap/merge.go |  3 +++
 index/scorch/segment/zap/write.go | 39 +++++++++++++++++++++++++++++++
 3 files changed, 42 insertions(+), 39 deletions(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 2cad61a58..56061a2e9 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -572,42 +572,3 @@ func persistDictionary(memSegment *mem.Segment, w *CountHashWriter, postingsLocs
 
 	return rv, nil
 }
-
-// FooterSize is the size of the footer record in bytes
-// crc + ver + chunk + field offset + stored offset + num docs
-const FooterSize = 4 + 4 + 4 + 8 + 8 + 8
-
-func persistFooter(numDocs, storedIndexOffset, fieldIndexOffset uint64,
-	chunkFactor uint32, w *CountHashWriter) error {
-	// write out the number of docs
-	err := binary.Write(w, binary.BigEndian, numDocs)
-	if err != nil {
-		return err
-	}
-	// write out the stored field index location:
-	err = binary.Write(w, binary.BigEndian, storedIndexOffset)
-	if err != nil {
-		return err
-	}
-	// write out the field index location
-	err = binary.Write(w, binary.BigEndian, fieldIndexOffset)
-	if err != nil {
-		return err
-	}
-	// write out 32-bit chunk factor
-	err = binary.Write(w, binary.BigEndian, chunkFactor)
-	if err != nil {
-		return err
-	}
-	// write out 32-bit version
-	err = binary.Write(w, binary.BigEndian, version)
-	if err != nil {
-		return err
-	}
-	// write out CRC-32 of everything upto but not including this CRC
-	err = binary.Write(w, binary.BigEndian, w.Sum32())
-	if err != nil {
-		return err
-	}
-	return nil
-}
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 972b1d167..670c8d795 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -84,6 +84,7 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
 	return newDocNums, nil
 }
 
+// mapFields takes the fieldsInv list and builds the map
 func mapFields(fields []string) map[string]uint16 {
 	rv := make(map[string]uint16)
 	for i, fieldName := range fields {
@@ -92,6 +93,8 @@ func mapFields(fields []string) map[string]uint16 {
 	return rv
 }
 
+// computeNewDocCount determines how many documents will be in the newly
+// merged segment when obsoleted docs are dropped
 func computeNewDocCount(segments []*Segment, drops []*roaring.Bitmap) uint64 {
 	var newSegDocCount uint64
 	for segI, segment := range segments {
diff --git a/index/scorch/segment/zap/write.go b/index/scorch/segment/zap/write.go
index 9772b3a64..bd63e1472 100644
--- a/index/scorch/segment/zap/write.go
+++ b/index/scorch/segment/zap/write.go
@@ -93,3 +93,42 @@ func persistFields(fieldsInv []string, w *CountHashWriter, dictLocs []uint64) (u
 
 	return rv, nil
 }
+
+// FooterSize is the size of the footer record in bytes
+// crc + ver + chunk + field offset + stored offset + num docs
+const FooterSize = 4 + 4 + 4 + 8 + 8 + 8
+
+func persistFooter(numDocs, storedIndexOffset, fieldIndexOffset uint64,
+	chunkFactor uint32, w *CountHashWriter) error {
+	// write out the number of docs
+	err := binary.Write(w, binary.BigEndian, numDocs)
+	if err != nil {
+		return err
+	}
+	// write out the stored field index location:
+	err = binary.Write(w, binary.BigEndian, storedIndexOffset)
+	if err != nil {
+		return err
+	}
+	// write out the field index location
+	err = binary.Write(w, binary.BigEndian, fieldIndexOffset)
+	if err != nil {
+		return err
+	}
+	// write out 32-bit chunk factor
+	err = binary.Write(w, binary.BigEndian, chunkFactor)
+	if err != nil {
+		return err
+	}
+	// write out 32-bit version
+	err = binary.Write(w, binary.BigEndian, version)
+	if err != nil {
+		return err
+	}
+	// write out CRC-32 of everything upto but not including this CRC
+	err = binary.Write(w, binary.BigEndian, w.Sum32())
+	if err != nil {
+		return err
+	}
+	return nil
+}

From 50441e5065699ca36156bf265974c7c6a025ba4b Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Wed, 13 Dec 2017 14:41:20 -0500
Subject: [PATCH 057/728] refactor to reuse shared code

---
 index/scorch/segment/zap/build.go | 61 ++++---------------------------
 1 file changed, 7 insertions(+), 54 deletions(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 56061a2e9..31eacb1a9 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -426,60 +426,24 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
 	return freqOffsets, locOfffsets, nil
 }
 
-func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) ([]uint64, error) {
-	var rv []uint64
-
-	var postingsBuf bytes.Buffer
+func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint64, err error) {
 	for postingID := range memSegment.PostingsLocs {
-		if postingID != 0 {
-			postingsBuf.Reset()
-		}
-
 		// record where we start this posting loc
 		rv = append(rv, uint64(w.Count()))
-
-		// write out postings locs to memory so we know the len
-		postingsLocLen, err := memSegment.PostingsLocs[postingID].WriteTo(&postingsBuf)
-		if err != nil {
-			return nil, err
-		}
-
-		buf := make([]byte, binary.MaxVarintLen64)
-		// write out the length of this postings locs
-		n := binary.PutUvarint(buf, uint64(postingsLocLen))
-		_, err = w.Write(buf[:n])
-		if err != nil {
-			return nil, err
-		}
-
-		// write out the postings list itself
-		_, err = w.Write(postingsBuf.Bytes())
+		// write out the length and bitmap
+		_, err = writeRoaringWithLen(memSegment.PostingsLocs[postingID], w)
 		if err != nil {
 			return nil, err
 		}
 	}
-
 	return rv, nil
 }
 
-func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter, postingsListLocs, freqOffsets, locOffsets []uint64) ([]uint64, error) {
-	var rv []uint64
-
-	var postingsBuf bytes.Buffer
+func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter,
+	postingsListLocs, freqOffsets, locOffsets []uint64) (rv []uint64, err error) {
 	for postingID := range memSegment.Postings {
-		if postingID != 0 {
-			postingsBuf.Reset()
-		}
-
 		// record where we start this posting list
 		rv = append(rv, uint64(w.Count()))
-
-		// write out postings list to memory so we know the len
-		postingsListLen, err := memSegment.Postings[postingID].WriteTo(&postingsBuf)
-		if err != nil {
-			return nil, err
-		}
-
 		// write out the start of the term info
 		buf := make([]byte, binary.MaxVarintLen64)
 		n := binary.PutUvarint(buf, freqOffsets[postingID])
@@ -487,35 +451,24 @@ func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter, postingsL
 		if err != nil {
 			return nil, err
 		}
-
 		// write out the start of the loc info
 		n = binary.PutUvarint(buf, locOffsets[postingID])
 		_, err = w.Write(buf[:n])
 		if err != nil {
 			return nil, err
 		}
-
 		// write out the start of the loc posting list
 		n = binary.PutUvarint(buf, postingsListLocs[postingID])
 		_, err = w.Write(buf[:n])
 		if err != nil {
 			return nil, err
 		}
-
-		// write out the length of this postings list
-		n = binary.PutUvarint(buf, uint64(postingsListLen))
-		_, err = w.Write(buf[:n])
-		if err != nil {
-			return nil, err
-		}
-
-		// write out the postings list itself
-		_, err = w.Write(postingsBuf.Bytes())
+		// write out the length and bitmap
+		_, err = writeRoaringWithLen(memSegment.Postings[postingID], w)
 		if err != nil {
 			return nil, err
 		}
 	}
-
 	return rv, nil
 }
 

From 6e2207c445fd00fc12824dcd2e31941fba3e3022 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Wed, 13 Dec 2017 15:22:13 -0500
Subject: [PATCH 058/728] additional refactoring of build/merge

---
 index/scorch/segment/zap/build.go   | 34 +++++++----------------------
 index/scorch/segment/zap/merge.go   | 13 +++--------
 index/scorch/segment/zap/read.go    | 31 ++++++++++++++++++++++++++
 index/scorch/segment/zap/segment.go | 13 ++---------
 index/scorch/segment/zap/write.go   | 27 +++++++++++++----------
 5 files changed, 60 insertions(+), 58 deletions(-)
 create mode 100644 index/scorch/segment/zap/read.go

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 31eacb1a9..f3f9658e4 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -182,19 +182,12 @@ func persistStored(memSegment *mem.Segment, w *CountHashWriter) (uint64, error)
 		// record where we're about to start writing
 		docNumOffsets[docNum] = uint64(w.Count())
 
-		buf := make([]byte, binary.MaxVarintLen64)
-		// write out the meta length
-		n := binary.PutUvarint(buf, uint64(len(metaBytes)))
-		_, err := w.Write(buf[:n])
-		if err != nil {
-			return 0, err
-		}
-		// write out the compressed data length
-		n = binary.PutUvarint(buf, uint64(len(compressed)))
-		_, err = w.Write(buf[:n])
+		// write out the meta len and compressed data len
+		_, err := writeUvarints(w, uint64(len(metaBytes)), uint64(len(compressed)))
 		if err != nil {
 			return 0, err
 		}
+
 		// now write the meta
 		_, err = w.Write(metaBytes)
 		if err != nil {
@@ -444,25 +437,14 @@ func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter,
 	for postingID := range memSegment.Postings {
 		// record where we start this posting list
 		rv = append(rv, uint64(w.Count()))
-		// write out the start of the term info
-		buf := make([]byte, binary.MaxVarintLen64)
-		n := binary.PutUvarint(buf, freqOffsets[postingID])
-		_, err = w.Write(buf[:n])
-		if err != nil {
-			return nil, err
-		}
-		// write out the start of the loc info
-		n = binary.PutUvarint(buf, locOffsets[postingID])
-		_, err = w.Write(buf[:n])
-		if err != nil {
-			return nil, err
-		}
-		// write out the start of the loc posting list
-		n = binary.PutUvarint(buf, postingsListLocs[postingID])
-		_, err = w.Write(buf[:n])
+
+		// write out the term info, loc info, and loc posting list offset
+		_, err = writeUvarints(w, freqOffsets[postingID],
+			locOffsets[postingID], postingsListLocs[postingID])
 		if err != nil {
 			return nil, err
 		}
+
 		// write out the length and bitmap
 		_, err = writeRoaringWithLen(memSegment.Postings[postingID], w)
 		if err != nil {
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 670c8d795..8683eb648 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -399,16 +399,9 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
 				// record where we're about to start writing
 				docNumOffsets[newDocNum] = uint64(w.Count())
 
-				buf := make([]byte, binary.MaxVarintLen64)
-				// write out the meta length
-				n := binary.PutUvarint(buf, uint64(len(metaBytes)))
-				_, err = w.Write(buf[:n])
-				if err != nil {
-					return 0, nil, err
-				}
-				// write out the compressed data length
-				n = binary.PutUvarint(buf, uint64(len(compressed)))
-				_, err = w.Write(buf[:n])
+				// write out the meta len and compressed data len
+				_, err = writeUvarints(w,
+					uint64(len(metaBytes)), uint64(len(compressed)))
 				if err != nil {
 					return 0, nil, err
 				}
diff --git a/index/scorch/segment/zap/read.go b/index/scorch/segment/zap/read.go
new file mode 100644
index 000000000..c9b3e7720
--- /dev/null
+++ b/index/scorch/segment/zap/read.go
@@ -0,0 +1,31 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zap
+
+import "encoding/binary"
+
+func (s *Segment) getStoredMetaAndCompressed(docNum uint64) ([]byte, []byte) {
+	docStoredStartAddr := s.storedIndexOffset + (8 * docNum)
+	docStoredStart := binary.BigEndian.Uint64(s.mm[docStoredStartAddr : docStoredStartAddr+8])
+	var n uint64
+	metaLen, read := binary.Uvarint(s.mm[docStoredStart : docStoredStart+binary.MaxVarintLen64])
+	n += uint64(read)
+	var dataLen uint64
+	dataLen, read = binary.Uvarint(s.mm[docStoredStart+n : docStoredStart+n+binary.MaxVarintLen64])
+	n += uint64(read)
+	meta := s.mm[docStoredStart+n : docStoredStart+n+metaLen]
+	data := s.mm[docStoredStart+n+metaLen : docStoredStart+n+metaLen+dataLen]
+	return meta, data
+}
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index fce80f933..9f80b7037 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -177,17 +177,8 @@ func (s *Segment) dictionary(field string) (*Dictionary, error) {
 func (s *Segment) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
 	// first make sure this is a valid number in this segment
 	if num < s.numDocs {
-		docStoredStartAddr := s.storedIndexOffset + (8 * num)
-		docStoredStart := binary.BigEndian.Uint64(s.mm[docStoredStartAddr : docStoredStartAddr+8])
-		var n uint64
-		metaLen, read := binary.Uvarint(s.mm[docStoredStart : docStoredStart+binary.MaxVarintLen64])
-		n += uint64(read)
-		var dataLen uint64
-		dataLen, read = binary.Uvarint(s.mm[docStoredStart+n : docStoredStart+n+binary.MaxVarintLen64])
-		n += uint64(read)
-		meta := s.mm[docStoredStart+n : docStoredStart+n+metaLen]
-		data := s.mm[docStoredStart+n+metaLen : docStoredStart+n+metaLen+dataLen]
-		uncompressed, err := snappy.Decode(nil, data)
+		meta, compressed := s.getStoredMetaAndCompressed(num)
+		uncompressed, err := snappy.Decode(nil, compressed)
 		if err != nil {
 			panic(err)
 		}
diff --git a/index/scorch/segment/zap/write.go b/index/scorch/segment/zap/write.go
index bd63e1472..a831ef6ae 100644
--- a/index/scorch/segment/zap/write.go
+++ b/index/scorch/segment/zap/write.go
@@ -60,17 +60,8 @@ func persistFields(fieldsInv []string, w *CountHashWriter, dictLocs []uint64) (u
 		// record start of this field
 		fieldStarts = append(fieldStarts, uint64(w.Count()))
 
-		buf := make([]byte, binary.MaxVarintLen64)
-		// write out dict location for this field
-		n := binary.PutUvarint(buf, dictLocs[fieldID])
-		_, err := w.Write(buf[:n])
-		if err != nil {
-			return 0, err
-		}
-
-		// write out the length of the field name
-		n = binary.PutUvarint(buf, uint64(len(fieldName)))
-		_, err = w.Write(buf[:n])
+		// write out the dict location and field name length
+		_, err := writeUvarints(w, dictLocs[fieldID], uint64(len(fieldName)))
 		if err != nil {
 			return 0, err
 		}
@@ -132,3 +123,17 @@ func persistFooter(numDocs, storedIndexOffset, fieldIndexOffset uint64,
 	}
 	return nil
 }
+
+func writeUvarints(w io.Writer, vals ...uint64) (tw int, err error) {
+	buf := make([]byte, binary.MaxVarintLen64)
+	for _, val := range vals {
+		n := binary.PutUvarint(buf, val)
+		var nw int
+		nw, err = w.Write(buf[:n])
+		tw += nw
+		if err != nil {
+			return tw, err
+		}
+	}
+	return tw, err
+}

From 85e15628eed9e87801523f881afccffdb500ae24 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Wed, 13 Dec 2017 16:10:06 -0500
Subject: [PATCH 059/728] major refactoring of posting details

---
 index/scorch/segment/zap/build.go | 192 ++++++++----------------------
 index/scorch/segment/zap/merge.go |  13 +-
 2 files changed, 60 insertions(+), 145 deletions(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index f3f9658e4..60906d334 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -47,33 +47,39 @@ func PersistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) (e
 	cr := NewCountHashWriter(br)
 
 	var storedIndexOffset uint64
-	storedIndexOffset, err = persistStored(memSegment, cr)
-	if err != nil {
-		return err
-	}
+	var dictLocs []uint64
+	if len(memSegment.Stored) > 0 {
 
-	var freqOffsets, locOffsets []uint64
-	freqOffsets, locOffsets, err = persistPostingDetails(memSegment, cr, chunkFactor)
-	if err != nil {
-		return err
-	}
+		storedIndexOffset, err = persistStored(memSegment, cr)
+		if err != nil {
+			return err
+		}
 
-	var postingsListLocs []uint64
-	postingsListLocs, err = persistPostingsLocs(memSegment, cr)
-	if err != nil {
-		return err
-	}
+		var freqOffsets, locOffsets []uint64
+		freqOffsets, locOffsets, err = persistPostingDetails(memSegment, cr, chunkFactor)
+		if err != nil {
+			return err
+		}
 
-	var postingsLocs []uint64
-	postingsLocs, err = persistPostingsLists(memSegment, cr, postingsListLocs, freqOffsets, locOffsets)
-	if err != nil {
-		return err
-	}
+		var postingsListLocs []uint64
+		postingsListLocs, err = persistPostingsLocs(memSegment, cr)
+		if err != nil {
+			return err
+		}
 
-	var dictLocs []uint64
-	dictLocs, err = persistDictionary(memSegment, cr, postingsLocs)
-	if err != nil {
-		return err
+		var postingsLocs []uint64
+		postingsLocs, err = persistPostingsLists(memSegment, cr, postingsListLocs, freqOffsets, locOffsets)
+		if err != nil {
+			return err
+		}
+
+		dictLocs, err = persistDictionary(memSegment, cr, postingsLocs)
+		if err != nil {
+			return err
+		}
+
+	} else {
+		dictLocs = make([]uint64, len(memSegment.FieldsInv))
 	}
 
 	var fieldIndexStart uint64
@@ -215,40 +221,19 @@ func persistStored(memSegment *mem.Segment, w *CountHashWriter) (uint64, error)
 
 func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFactor uint32) ([]uint64, []uint64, error) {
 	var freqOffsets, locOfffsets []uint64
+	tfEncoder := newChunkedIntCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
 	for postingID := range memSegment.Postings {
+		if postingID != 0 {
+			tfEncoder.Reset()
+		}
 		postingsListItr := memSegment.Postings[postingID].Iterator()
-
-		total := uint64(len(memSegment.Stored))/uint64(chunkFactor) + 1
-
-		var freqNormBuf []byte
 		var offset int
-
-		var encodingBuf bytes.Buffer
-		encoder := govarint.NewU64Base128Encoder(&encodingBuf)
-
-		chunkLens := make([]uint64, total)
-		var currChunk uint64
 		for postingsListItr.HasNext() {
-			docNum := postingsListItr.Next()
-			chunk := uint64(docNum) / uint64(chunkFactor)
-
-			if chunk != currChunk {
-				// starting a new chunk
-				if encoder != nil {
-					// close out last
-					encoder.Close()
-					encodingBytes := encodingBuf.Bytes()
-					chunkLens[currChunk] = uint64(len(encodingBytes))
-					freqNormBuf = append(freqNormBuf, encodingBytes...)
-					encodingBuf.Reset()
-					encoder = govarint.NewU64Base128Encoder(&encodingBuf)
-				}
 
-				currChunk = chunk
-			}
+			docNum := uint64(postingsListItr.Next())
 
 			// put freq
-			_, err := encoder.PutU64(memSegment.Freqs[postingID][offset])
+			err := tfEncoder.Add(docNum, memSegment.Freqs[postingID][offset])
 			if err != nil {
 				return nil, nil, err
 			}
@@ -256,7 +241,7 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
 			// put norm
 			norm := memSegment.Norms[postingID][offset]
 			normBits := math.Float32bits(norm)
-			_, err = encoder.PutU32(normBits)
+			err = tfEncoder.Add(docNum, uint64(normBits))
 			if err != nil {
 				return nil, nil, err
 			}
@@ -264,35 +249,11 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
 			offset++
 		}
 
-		// close out last chunk
-		if encoder != nil {
-			// fix me write freq/norms
-			encoder.Close()
-			encodingBytes := encodingBuf.Bytes()
-			chunkLens[currChunk] = uint64(len(encodingBytes))
-			freqNormBuf = append(freqNormBuf, encodingBytes...)
-		}
-
 		// record where this postings freq info starts
 		freqOffsets = append(freqOffsets, uint64(w.Count()))
 
-		buf := make([]byte, binary.MaxVarintLen64)
-		// write out the number of chunks
-		n := binary.PutUvarint(buf, uint64(total))
-		_, err := w.Write(buf[:n])
-		if err != nil {
-			return nil, nil, err
-		}
-		// write out the chunk lens
-		for _, chunkLen := range chunkLens {
-			n := binary.PutUvarint(buf, uint64(chunkLen))
-			_, err = w.Write(buf[:n])
-			if err != nil {
-				return nil, nil, err
-			}
-		}
-		// write out the data
-		_, err = w.Write(freqNormBuf)
+		tfEncoder.Close()
+		_, err := tfEncoder.Write(w)
 		if err != nil {
 			return nil, nil, err
 		}
@@ -300,61 +261,39 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
 	}
 
 	// now do it again for the locations
+	locEncoder := newChunkedIntCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
 	for postingID := range memSegment.Postings {
+		if postingID != 0 {
+			locEncoder.Reset()
+		}
 		postingsListItr := memSegment.Postings[postingID].Iterator()
-
-		total := uint64(len(memSegment.Stored))/uint64(chunkFactor) + 1
-
-		var locBuf []byte
 		var offset int
 		var locOffset int
-
-		var encodingBuf bytes.Buffer
-		encoder := govarint.NewU64Base128Encoder(&encodingBuf)
-
-		chunkLens := make([]uint64, total)
-		var currChunk uint64
 		for postingsListItr.HasNext() {
-			docNum := postingsListItr.Next()
-			chunk := uint64(docNum) / uint64(chunkFactor)
-
-			if chunk != currChunk {
-				// starting a new chunk
-				if encoder != nil {
-					// close out last
-					encoder.Close()
-					encodingBytes := encodingBuf.Bytes()
-					chunkLens[currChunk] = uint64(len(encodingBytes))
-					locBuf = append(locBuf, encodingBytes...)
-					encodingBuf.Reset()
-					encoder = govarint.NewU64Base128Encoder(&encodingBuf)
-				}
-				currChunk = chunk
-			}
-
+			docNum := uint64(postingsListItr.Next())
 			for i := 0; i < int(memSegment.Freqs[postingID][offset]); i++ {
-
 				if len(memSegment.Locfields[postingID]) > 0 {
 					// put field
-					_, err := encoder.PutU64(uint64(memSegment.Locfields[postingID][locOffset]))
+					err := locEncoder.Add(docNum, uint64(memSegment.Locfields[postingID][locOffset]))
 					if err != nil {
 						return nil, nil, err
 					}
 
 					// put pos
-					_, err = encoder.PutU64(memSegment.Locpos[postingID][locOffset])
+
+					err = locEncoder.Add(docNum, memSegment.Locpos[postingID][locOffset])
 					if err != nil {
 						return nil, nil, err
 					}
 
 					// put start
-					_, err = encoder.PutU64(memSegment.Locstarts[postingID][locOffset])
+					err = locEncoder.Add(docNum, memSegment.Locstarts[postingID][locOffset])
 					if err != nil {
 						return nil, nil, err
 					}
 
 					// put end
-					_, err = encoder.PutU64(memSegment.Locends[postingID][locOffset])
+					err = locEncoder.Add(docNum, memSegment.Locends[postingID][locOffset])
 					if err != nil {
 						return nil, nil, err
 					}
@@ -363,58 +302,31 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
 					num := len(memSegment.Locarraypos[postingID][locOffset])
 
 					// put the number of array positions to follow
-					_, err = encoder.PutU64(uint64(num))
+					err = locEncoder.Add(docNum, uint64(num))
 					if err != nil {
 						return nil, nil, err
 					}
 
 					// put each array position
 					for j := 0; j < num; j++ {
-						_, err = encoder.PutU64(memSegment.Locarraypos[postingID][locOffset][j])
+						err = locEncoder.Add(docNum, memSegment.Locarraypos[postingID][locOffset][j])
 						if err != nil {
 							return nil, nil, err
 						}
 					}
 				}
-
 				locOffset++
 			}
 			offset++
 		}
 
-		// close out last chunk
-		if encoder != nil {
-			// fix me write freq/norms
-			encoder.Close()
-			encodingBytes := encodingBuf.Bytes()
-			chunkLens[currChunk] = uint64(len(encodingBytes))
-			locBuf = append(locBuf, encodingBytes...)
-		}
-
 		// record where this postings loc info starts
 		locOfffsets = append(locOfffsets, uint64(w.Count()))
-
-		buf := make([]byte, binary.MaxVarintLen64)
-		// write out the number of chunks
-		n := binary.PutUvarint(buf, uint64(total))
-		_, err := w.Write(buf[:n])
+		locEncoder.Close()
+		_, err := locEncoder.Write(w)
 		if err != nil {
 			return nil, nil, err
 		}
-		// write out the chunk lens
-		for _, chunkLen := range chunkLens {
-			n := binary.PutUvarint(buf, uint64(chunkLen))
-			_, err = w.Write(buf[:n])
-			if err != nil {
-				return nil, nil, err
-			}
-		}
-		// write out the data
-		_, err = w.Write(locBuf)
-		if err != nil {
-			return nil, nil, err
-		}
-
 	}
 	return freqOffsets, locOfffsets, nil
 }
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 8683eb648..b1ea6c148 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -39,7 +39,7 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
 
 	var newDocNums [][]uint64
 	var storedIndexOffset uint64
-	dictLocs := make([]uint64, len(fieldsInv))
+	var dictLocs []uint64
 	if newSegDocCount > 0 {
 		storedIndexOffset, newDocNums, err = mergeStoredAndRemap(segments, drops,
 			fieldsMap, fieldsInv, newSegDocCount, cr)
@@ -48,10 +48,12 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
 		}
 
 		dictLocs, err = persistMergedRest(segments, drops, fieldsInv, fieldsMap,
-			newDocNums, newSegDocCount, cr)
+			newDocNums, newSegDocCount, chunkFactor, cr)
 		if err != nil {
 			return nil, err
 		}
+	} else {
+		dictLocs = make([]uint64, len(fieldsInv))
 	}
 
 	var fieldsIndexOffset uint64
@@ -108,7 +110,8 @@ func computeNewDocCount(segments []*Segment, drops []*roaring.Bitmap) uint64 {
 }
 
 func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
-	fieldsInv []string, fieldsMap map[string]uint16, newDocNums [][]uint64, newSegDocCount uint64,
+	fieldsInv []string, fieldsMap map[string]uint16, newDocNums [][]uint64,
+	newSegDocCount uint64, chunkFactor uint32,
 	w *CountHashWriter) ([]uint64, error) {
 
 	rv := make([]uint64, len(fieldsInv))
@@ -149,8 +152,8 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 			return 0
 		})
 
-		tfEncoder := newChunkedIntCoder(1024, newSegDocCount-1)
-		locEncoder := newChunkedIntCoder(1024, newSegDocCount-1)
+		tfEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
+		locEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
 		for err == nil {
 			term, _ := mergeItr.Current()
 

From a0e12b264080e720febad16e269d8ebb803123ea Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Wed, 13 Dec 2017 16:12:29 -0500
Subject: [PATCH 060/728] add license to a few files missing it

---
 index/scorch/segment/zap/intcoder.go      | 14 ++++++++++++++
 index/scorch/segment/zap/intcoder_test.go | 14 ++++++++++++++
 index/scorch/segment/zap/merge.go         | 14 ++++++++++++++
 index/scorch/segment/zap/merge_test.go    | 14 ++++++++++++++
 4 files changed, 56 insertions(+)

diff --git a/index/scorch/segment/zap/intcoder.go b/index/scorch/segment/zap/intcoder.go
index a682740f1..7e268bcf3 100644
--- a/index/scorch/segment/zap/intcoder.go
+++ b/index/scorch/segment/zap/intcoder.go
@@ -1,3 +1,17 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 package zap
 
 import (
diff --git a/index/scorch/segment/zap/intcoder_test.go b/index/scorch/segment/zap/intcoder_test.go
index f2623a548..85d2c5a76 100644
--- a/index/scorch/segment/zap/intcoder_test.go
+++ b/index/scorch/segment/zap/intcoder_test.go
@@ -1,3 +1,17 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 package zap
 
 import (
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index b1ea6c148..6d635bdfb 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -1,3 +1,17 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 package zap
 
 import (
diff --git a/index/scorch/segment/zap/merge_test.go b/index/scorch/segment/zap/merge_test.go
index c8046efab..1e0110418 100644
--- a/index/scorch/segment/zap/merge_test.go
+++ b/index/scorch/segment/zap/merge_test.go
@@ -1,3 +1,17 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 package zap
 
 import (

From 50471003dc874b68f2e1c0d9f7d54fa62992f956 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Wed, 13 Dec 2017 16:30:39 -0500
Subject: [PATCH 061/728] basic refactoring of introducer to make it more
 readable

---
 index/scorch/introducer.go | 292 +++++++++++++++++++------------------
 1 file changed, 150 insertions(+), 142 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 7998eae3b..76c145517 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -41,162 +41,170 @@ OUTER:
 			break OUTER
 
 		case notify = <-s.introducerNotifier:
+			continue
 
 		case nextMerge := <-s.merges:
-			// acquire lock
-			s.rootLock.Lock()
-
-			// prepare new index snapshot
-			currSize := len(s.root.segment)
-			newSize := currSize + 1 - len(nextMerge.old)
-			newSnapshot := &IndexSnapshot{
-				segment:  make([]*SegmentSnapshot, 0, newSize),
-				offsets:  make([]uint64, 0, newSize),
-				internal: make(map[string][]byte, len(s.root.segment)),
-				epoch:    s.nextSnapshotEpoch,
-			}
-			s.nextSnapshotEpoch++
-
-			// iterate through current segments
-			newSegmentDeleted := roaring.NewBitmap()
-			var running uint64
-			for i := range s.root.segment {
-				segmentID := s.root.segment[i].id
-				if segSnapAtMerge, ok := nextMerge.old[segmentID]; ok {
-					// this segment is going away, see if anything else was deleted since we started the merge
-					if s.root.segment[i].deleted != nil {
-						// assume all these deletes are new
-						deletedSince := s.root.segment[i].deleted
-						// if we already knew about some of them, remove
-						if segSnapAtMerge.deleted != nil {
-							deletedSince = roaring.AndNot(s.root.segment[i].deleted, segSnapAtMerge.deleted)
-						}
-						deletedSinceItr := deletedSince.Iterator()
-						for deletedSinceItr.HasNext() {
-							oldDocNum := deletedSinceItr.Next()
-							newDocNum := nextMerge.oldNewDocNums[segmentID][oldDocNum]
-							newSegmentDeleted.Add(uint32(newDocNum))
-						}
-					}
-				} else {
-					// this segment is staying
-					newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
-						id:      s.root.segment[i].id,
-						segment: s.root.segment[i].segment,
-						notify:  s.root.segment[i].notify,
-						deleted: s.root.segment[i].deleted,
-					})
-					newSnapshot.offsets = append(newSnapshot.offsets, running)
-					running += s.root.segment[i].Count()
-				}
-			}
+			s.introduceMerge(nextMerge)
 
-			// put new segment at end
-			newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
-				id:      nextMerge.id,
-				segment: nextMerge.new,
-				deleted: newSegmentDeleted,
-			})
-			newSnapshot.offsets = append(newSnapshot.offsets, running)
-
-			// copy old values
-			for key, oldVal := range s.root.internal {
-				newSnapshot.internal[key] = oldVal
+		case next := <-s.introductions:
+			err := s.introduceSegment(next)
+			if err != nil {
+				continue OUTER
 			}
+		}
+		// notify persister
+		if notify != nil {
+			close(notify)
+			notify = nil
+		}
+	}
 
-			// swap in new segment
-			s.root = newSnapshot
-			// release lock
-			s.rootLock.Unlock()
+	s.asyncTasks.Done()
+}
 
-			// notify merger we incorporated this
-			close(nextMerge.notify)
+func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
+	// acquire lock
+	s.rootLock.Lock()
 
-			// notify persister
-			if notify != nil {
-				close(notify)
-				notify = nil
+	// prepare new index snapshot, with curr size + 1
+	newSnapshot := &IndexSnapshot{
+		segment:  make([]*SegmentSnapshot, len(s.root.segment)+1),
+		offsets:  make([]uint64, len(s.root.segment)+1),
+		internal: make(map[string][]byte, len(s.root.segment)),
+		epoch:    s.nextSnapshotEpoch,
+	}
+	s.nextSnapshotEpoch++
+
+	// iterate through current segments
+	var running uint64
+	for i := range s.root.segment {
+		// see if optimistic work included this segment
+		delta, ok := next.obsoletes[s.root.segment[i].id]
+		if !ok {
+			var err error
+			delta, err = s.root.segment[i].segment.DocNumbers(next.ids)
+			if err != nil {
+				next.applied <- fmt.Errorf("error computing doc numbers: %v", err)
+				close(next.applied)
+				return err
 			}
+		}
+		newSnapshot.segment[i] = &SegmentSnapshot{
+			id:      s.root.segment[i].id,
+			segment: s.root.segment[i].segment,
+			notify:  s.root.segment[i].notify,
+		}
+		// apply new obsoletions
+		if s.root.segment[i].deleted == nil {
+			newSnapshot.segment[i].deleted = delta
+		} else {
+			newSnapshot.segment[i].deleted = roaring.Or(s.root.segment[i].deleted, delta)
+		}
 
-		case next := <-s.introductions:
-			// acquire lock
-			s.rootLock.Lock()
-
-			// prepare new index snapshot, with curr size + 1
-			newSnapshot := &IndexSnapshot{
-				segment:  make([]*SegmentSnapshot, len(s.root.segment)+1),
-				offsets:  make([]uint64, len(s.root.segment)+1),
-				internal: make(map[string][]byte, len(s.root.segment)),
-				epoch:    s.nextSnapshotEpoch,
-			}
-			s.nextSnapshotEpoch++
-
-			// iterate through current segments
-			var running uint64
-			for i := range s.root.segment {
-				// see if optimistic work included this segment
-				delta, ok := next.obsoletes[s.root.segment[i].id]
-				if !ok {
-					var err error
-					delta, err = s.root.segment[i].segment.DocNumbers(next.ids)
-					if err != nil {
-						next.applied <- fmt.Errorf("error computing doc numbers: %v", err)
-						close(next.applied)
-						continue OUTER
-					}
-				}
-				newSnapshot.segment[i] = &SegmentSnapshot{
-					id:      s.root.segment[i].id,
-					segment: s.root.segment[i].segment,
-					notify:  s.root.segment[i].notify,
-				}
-				// apply new obsoletions
-				if s.root.segment[i].deleted == nil {
-					newSnapshot.segment[i].deleted = delta
-				} else {
-					newSnapshot.segment[i].deleted = roaring.Or(s.root.segment[i].deleted, delta)
-				}
+		newSnapshot.offsets[i] = running
+		running += s.root.segment[i].Count()
 
-				newSnapshot.offsets[i] = running
-				running += s.root.segment[i].Count()
+	}
+	// put new segment at end
+	newSnapshot.segment[len(s.root.segment)] = &SegmentSnapshot{
+		id:      next.id,
+		segment: next.data,
+	}
+	newSnapshot.offsets[len(s.root.segment)] = running
+	if !s.unsafeBatch {
+		newSnapshot.segment[len(s.root.segment)].notify = append(
+			newSnapshot.segment[len(s.root.segment)].notify,
+			next.persisted,
+		)
+	}
+	// copy old values
+	for key, oldVal := range s.root.internal {
+		newSnapshot.internal[key] = oldVal
+	}
+	// set new values and apply deletes
+	for key, newVal := range next.internal {
+		if newVal != nil {
+			newSnapshot.internal[key] = newVal
+		} else {
+			delete(newSnapshot.internal, key)
+		}
+	}
+	// swap in new segment
+	s.root = newSnapshot
+	// release lock
+	s.rootLock.Unlock()
+	close(next.applied)
 
-			}
-			// put new segment at end
-			newSnapshot.segment[len(s.root.segment)] = &SegmentSnapshot{
-				id:      next.id,
-				segment: next.data,
-			}
-			newSnapshot.offsets[len(s.root.segment)] = running
-			if !s.unsafeBatch {
-				newSnapshot.segment[len(s.root.segment)].notify = append(
-					newSnapshot.segment[len(s.root.segment)].notify,
-					next.persisted,
-				)
-			}
-			// copy old values
-			for key, oldVal := range s.root.internal {
-				newSnapshot.internal[key] = oldVal
-			}
-			// set new values and apply deletes
-			for key, newVal := range next.internal {
-				if newVal != nil {
-					newSnapshot.internal[key] = newVal
-				} else {
-					delete(newSnapshot.internal, key)
+	return nil
+}
+
+func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
+	// acquire lock
+	s.rootLock.Lock()
+
+	// prepare new index snapshot
+	currSize := len(s.root.segment)
+	newSize := currSize + 1 - len(nextMerge.old)
+	newSnapshot := &IndexSnapshot{
+		segment:  make([]*SegmentSnapshot, 0, newSize),
+		offsets:  make([]uint64, 0, newSize),
+		internal: make(map[string][]byte, len(s.root.segment)),
+		epoch:    s.nextSnapshotEpoch,
+	}
+	s.nextSnapshotEpoch++
+
+	// iterate through current segments
+	newSegmentDeleted := roaring.NewBitmap()
+	var running uint64
+	for i := range s.root.segment {
+		segmentID := s.root.segment[i].id
+		if segSnapAtMerge, ok := nextMerge.old[segmentID]; ok {
+			// this segment is going away, see if anything else was deleted since we started the merge
+			if s.root.segment[i].deleted != nil {
+				// assume all these deletes are new
+				deletedSince := s.root.segment[i].deleted
+				// if we already knew about some of them, remove
+				if segSnapAtMerge.deleted != nil {
+					deletedSince = roaring.AndNot(s.root.segment[i].deleted, segSnapAtMerge.deleted)
+				}
+				deletedSinceItr := deletedSince.Iterator()
+				for deletedSinceItr.HasNext() {
+					oldDocNum := deletedSinceItr.Next()
+					newDocNum := nextMerge.oldNewDocNums[segmentID][oldDocNum]
+					newSegmentDeleted.Add(uint32(newDocNum))
 				}
 			}
-			// swap in new segment
-			s.root = newSnapshot
-			// release lock
-			s.rootLock.Unlock()
-			close(next.applied)
-
-			if notify != nil {
-				close(notify)
-				notify = nil
-			}
+		} else {
+			// this segment is staying
+			newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
+				id:      s.root.segment[i].id,
+				segment: s.root.segment[i].segment,
+				notify:  s.root.segment[i].notify,
+				deleted: s.root.segment[i].deleted,
+			})
+			newSnapshot.offsets = append(newSnapshot.offsets, running)
+			running += s.root.segment[i].Count()
 		}
 	}
 
-	s.asyncTasks.Done()
+	// put new segment at end
+	newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
+		id:      nextMerge.id,
+		segment: nextMerge.new,
+		deleted: newSegmentDeleted,
+	})
+	newSnapshot.offsets = append(newSnapshot.offsets, running)
+
+	// copy old values
+	for key, oldVal := range s.root.internal {
+		newSnapshot.internal[key] = oldVal
+	}
+
+	// swap in new segment
+	s.root = newSnapshot
+	// release lock
+	s.rootLock.Unlock()
+
+	// notify merger we incorporated this
+	close(nextMerge.notify)
 }

From c13ff85aaf682079e549674d54eff7d59d16eb3c Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 13 Dec 2017 13:10:44 -0800
Subject: [PATCH 062/728] scorch ref-counting

Future commits will provide actual cleanup when ref-counts reach 0.
---
 index/scorch/introducer.go          | 20 ++++++++++++++--
 index/scorch/merge.go               |  6 +++++
 index/scorch/persister.go           | 30 ++++++++++++++++++++----
 index/scorch/reader.go              |  4 ++--
 index/scorch/reader_test.go         |  5 ++++
 index/scorch/scorch.go              | 36 +++++++++++++++--------------
 index/scorch/segment/empty.go       |  7 ++++++
 index/scorch/segment/mem/segment.go |  7 ++++++
 index/scorch/segment/segment.go     |  3 +++
 index/scorch/segment/zap/segment.go | 25 ++++++++++++++++++++
 index/scorch/snapshot_index.go      | 27 ++++++++++++++++++++++
 11 files changed, 145 insertions(+), 25 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 76c145517..9402e5a48 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -72,6 +72,7 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 		offsets:  make([]uint64, len(s.root.segment)+1),
 		internal: make(map[string][]byte, len(s.root.segment)),
 		epoch:    s.nextSnapshotEpoch,
+		refs:     1,
 	}
 	s.nextSnapshotEpoch++
 
@@ -86,6 +87,7 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 			if err != nil {
 				next.applied <- fmt.Errorf("error computing doc numbers: %v", err)
 				close(next.applied)
+				_ = newSnapshot.DecRef()
 				return err
 			}
 		}
@@ -94,6 +96,7 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 			segment: s.root.segment[i].segment,
 			notify:  s.root.segment[i].notify,
 		}
+		s.root.segment[i].segment.AddRef()
 		// apply new obsoletions
 		if s.root.segment[i].deleted == nil {
 			newSnapshot.segment[i].deleted = delta
@@ -108,7 +111,7 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 	// put new segment at end
 	newSnapshot.segment[len(s.root.segment)] = &SegmentSnapshot{
 		id:      next.id,
-		segment: next.data,
+		segment: next.data, // Take ownership of next.data's ref-count.
 	}
 	newSnapshot.offsets[len(s.root.segment)] = running
 	if !s.unsafeBatch {
@@ -130,9 +133,15 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 		}
 	}
 	// swap in new segment
+	rootPrev := s.root
 	s.root = newSnapshot
 	// release lock
 	s.rootLock.Unlock()
+
+	if rootPrev != nil {
+		_ = rootPrev.DecRef()
+	}
+
 	close(next.applied)
 
 	return nil
@@ -150,6 +159,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 		offsets:  make([]uint64, 0, newSize),
 		internal: make(map[string][]byte, len(s.root.segment)),
 		epoch:    s.nextSnapshotEpoch,
+		refs:     1,
 	}
 	s.nextSnapshotEpoch++
 
@@ -182,6 +192,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 				notify:  s.root.segment[i].notify,
 				deleted: s.root.segment[i].deleted,
 			})
+			s.root.segment[i].segment.AddRef()
 			newSnapshot.offsets = append(newSnapshot.offsets, running)
 			running += s.root.segment[i].Count()
 		}
@@ -190,7 +201,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 	// put new segment at end
 	newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
 		id:      nextMerge.id,
-		segment: nextMerge.new,
+		segment: nextMerge.new, // Take ownership for nextMerge.new's ref-count.
 		deleted: newSegmentDeleted,
 	})
 	newSnapshot.offsets = append(newSnapshot.offsets, running)
@@ -201,10 +212,15 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 	}
 
 	// swap in new segment
+	rootPrev := s.root
 	s.root = newSnapshot
 	// release lock
 	s.rootLock.Unlock()
 
+	if rootPrev != nil {
+		_ = rootPrev.DecRef()
+	}
+
 	// notify merger we incorporated this
 	close(nextMerge.notify)
 }
diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index aef216646..818f3dbd1 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -38,6 +38,7 @@ OUTER:
 			// check to see if there is a new snapshot to persist
 			s.rootLock.RLock()
 			ourSnapshot := s.root
+			ourSnapshot.AddRef()
 			s.rootLock.RUnlock()
 
 			if ourSnapshot.epoch != lastEpochMergePlanned {
@@ -45,10 +46,12 @@ OUTER:
 				err := s.planMergeAtSnapshot(ourSnapshot)
 				if err != nil {
 					log.Printf("merging err: %v", err)
+					_ = ourSnapshot.DecRef()
 					continue OUTER
 				}
 				lastEpochMergePlanned = ourSnapshot.epoch
 			}
+			_ = ourSnapshot.DecRef()
 
 			// tell the persister we're waiting for changes
 			// first make a notification chan
@@ -64,16 +67,19 @@ OUTER:
 			// check again
 			s.rootLock.RLock()
 			ourSnapshot = s.root
+			ourSnapshot.AddRef()
 			s.rootLock.RUnlock()
 
 			if ourSnapshot.epoch != lastEpochMergePlanned {
 				// lets get started
 				err := s.planMergeAtSnapshot(ourSnapshot)
 				if err != nil {
+					_ = ourSnapshot.DecRef()
 					continue OUTER
 				}
 				lastEpochMergePlanned = ourSnapshot.epoch
 			}
+			_ = ourSnapshot.DecRef()
 
 			// now wait for it (but also detect close)
 			select {
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index d54840864..964e1db93 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -44,6 +44,7 @@ OUTER:
 			// check to see if there is a new snapshot to persist
 			s.rootLock.RLock()
 			ourSnapshot := s.root
+			ourSnapshot.AddRef()
 			s.rootLock.RUnlock()
 
 			//for ourSnapshot.epoch != lastPersistedEpoch {
@@ -52,6 +53,7 @@ OUTER:
 				err := s.persistSnapshot(ourSnapshot)
 				if err != nil {
 					log.Printf("got err persisting snapshot: %v", err)
+					_ = ourSnapshot.DecRef()
 					continue OUTER
 				}
 				lastPersistedEpoch = ourSnapshot.epoch
@@ -60,6 +62,7 @@ OUTER:
 					notify = nil
 				}
 			}
+			_ = ourSnapshot.DecRef()
 
 			// tell the introducer we're waiting for changes
 			// first make a notification chan
@@ -75,13 +78,15 @@ OUTER:
 			// check again
 			s.rootLock.RLock()
 			ourSnapshot = s.root
+			ourSnapshot.AddRef()
 			s.rootLock.RUnlock()
-			if ourSnapshot.epoch != lastPersistedEpoch {
 
+			if ourSnapshot.epoch != lastPersistedEpoch {
 				// lets get started
 				err := s.persistSnapshot(ourSnapshot)
 				if err != nil {
 					log.Printf("got err persisting snapshot: %v", err)
+					_ = ourSnapshot.DecRef()
 					continue OUTER
 				}
 				lastPersistedEpoch = ourSnapshot.epoch
@@ -90,6 +95,7 @@ OUTER:
 					notify = nil
 				}
 			}
+			_ = ourSnapshot.DecRef()
 
 			// now wait for it (but also detect close)
 			select {
@@ -199,6 +205,9 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 	for segmentID, path := range newSegmentPaths {
 		newSegments[segmentID], err = zap.Open(path)
 		if err != nil {
+			for _, s := range newSegments {
+				_ = s.Close() // cleanup segments that were successfully opened
+			}
 			return fmt.Errorf("error opening new segment at %s, %v", path, err)
 		}
 	}
@@ -212,6 +221,7 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 		segment:  make([]*SegmentSnapshot, len(s.root.segment)),
 		offsets:  make([]uint64, len(s.root.offsets)),
 		internal: make(map[string][]byte, len(s.root.internal)),
+		refs:     1,
 	}
 	for i, segmentSnapshot := range s.root.segment {
 		// see if this segment has been replaced
@@ -228,15 +238,21 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 			}
 		} else {
 			newIndexSnapshot.segment[i] = s.root.segment[i]
+			newIndexSnapshot.segment[i].segment.AddRef()
 		}
 		newIndexSnapshot.offsets[i] = s.root.offsets[i]
 	}
 	for k, v := range s.root.internal {
 		newIndexSnapshot.internal[k] = v
 	}
+	rootPrev := s.root
 	s.root = newIndexSnapshot
 	s.rootLock.Unlock()
 
+	if rootPrev != nil {
+		_ = rootPrev.DecRef()
+	}
+
 	// now that we've given up the lock, notify everyone that we've safely
 	// persisted their data
 	for _, notification := range notifications {
@@ -263,17 +279,17 @@ func (s *Scorch) loadFromBolt() error {
 		for k, _ := c.Last(); k != nil; k, _ = c.Prev() {
 			_, snapshotEpoch, err := segment.DecodeUvarintAscending(k)
 			if err != nil {
-				log.Printf("unable to parse segment epoch % x, contiuing", k)
+				log.Printf("unable to parse segment epoch %x, continuing", k)
 				continue
 			}
 			snapshot := snapshots.Bucket(k)
 			if snapshot == nil {
-				log.Printf("snapshot key, but bucket missing % x, continuing", k)
+				log.Printf("snapshot key, but bucket missing %x, continuing", k)
 				continue
 			}
 			indexSnapshot, err := s.loadSnapshot(snapshot)
 			if err != nil {
-				log.Printf("unable to load snapshot, %v continuing", err)
+				log.Printf("unable to load snapshot, %v, continuing", err)
 				continue
 			}
 			indexSnapshot.epoch = snapshotEpoch
@@ -296,6 +312,7 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
 
 	rv := &IndexSnapshot{
 		internal: make(map[string][]byte),
+		refs:     1,
 	}
 	var running uint64
 	c := snapshot.Cursor()
@@ -308,19 +325,23 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
 				return nil
 			})
 			if err != nil {
+				_ = rv.DecRef()
 				return nil, err
 			}
 		} else {
 			segmentBucket := snapshot.Bucket(k)
 			if segmentBucket == nil {
+				_ = rv.DecRef()
 				return nil, fmt.Errorf("segment key, but bucket missing % x", k)
 			}
 			segmentSnapshot, err := s.loadSegment(segmentBucket)
 			if err != nil {
+				_ = rv.DecRef()
 				return nil, fmt.Errorf("failed to load segment: %v", err)
 			}
 			_, segmentSnapshot.id, err = segment.DecodeUvarintAscending(k)
 			if err != nil {
+				_ = rv.DecRef()
 				return nil, fmt.Errorf("failed to decode segment id: %v", err)
 			}
 			rv.segment = append(rv.segment, segmentSnapshot)
@@ -351,6 +372,7 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro
 		r := bytes.NewReader(deletedBytes)
 		_, err := deletedBitmap.ReadFrom(r)
 		if err != nil {
+			_ = segment.Close()
 			return nil, fmt.Errorf("error reading deleted bytes: %v", err)
 		}
 		rv.deleted = deletedBitmap
diff --git a/index/scorch/reader.go b/index/scorch/reader.go
index 9a20aa013..365ecb670 100644
--- a/index/scorch/reader.go
+++ b/index/scorch/reader.go
@@ -20,7 +20,7 @@ import (
 )
 
 type Reader struct {
-	root *IndexSnapshot
+	root *IndexSnapshot // Owns 1 ref-count on the index snapshot.
 }
 
 func (r *Reader) TermFieldReader(term []byte, field string, includeFreq,
@@ -106,5 +106,5 @@ func (r *Reader) DumpFields() chan interface{} {
 }
 
 func (r *Reader) Close() error {
-	return nil
+	return r.root.DecRef()
 }
diff --git a/index/scorch/reader_test.go b/index/scorch/reader_test.go
index 2cd42fe47..4eb9b5fb9 100644
--- a/index/scorch/reader_test.go
+++ b/index/scorch/reader_test.go
@@ -646,6 +646,7 @@ func TestSegmentIndexAndLocalDocNumFromGlobal(t *testing.T) {
 	for _, test := range tests {
 		i := &IndexSnapshot{
 			offsets: test.offsets,
+			refs:    1,
 		}
 		gotSegmentIndex, gotLocalDocNum := i.segmentIndexAndLocalDocNumFromGlobal(test.globalDocNum)
 		if gotSegmentIndex != test.segmentIndex {
@@ -654,5 +655,9 @@ func TestSegmentIndexAndLocalDocNumFromGlobal(t *testing.T) {
 		if gotLocalDocNum != test.localDocNum {
 			t.Errorf("got localDocNum %d expected %d for offsets %v globalDocNum %d", gotLocalDocNum, test.localDocNum, test.offsets, test.globalDocNum)
 		}
+		err := i.DecRef()
+		if err != nil {
+			t.Errorf("expected no err, got: %v", err)
+		}
 	}
 }
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 7373e6b43..d59563be2 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -50,7 +50,7 @@ type Scorch struct {
 	unsafeBatch bool
 
 	rootLock sync.RWMutex
-	root     *IndexSnapshot
+	root     *IndexSnapshot // holds 1 ref-count on the root
 
 	closeCh            chan struct{}
 	introductions      chan *segmentIntroduction
@@ -67,7 +67,7 @@ func NewScorch(storeName string, config map[string]interface{}, analysisQueue *i
 		config:            config,
 		analysisQueue:     analysisQueue,
 		stats:             &Stats{},
-		root:              &IndexSnapshot{},
+		root:              &IndexSnapshot{refs: 1},
 		nextSnapshotEpoch: 1,
 	}
 	ro, ok := config["read_only"].(bool)
@@ -140,16 +140,12 @@ func (s *Scorch) Close() (err error) {
 	// wait for them to close
 	s.asyncTasks.Wait()
 	// now close the root bolt
-
 	if s.rootBolt != nil {
 		err = s.rootBolt.Close()
 		s.rootLock.Lock()
-		for _, segment := range s.root.segment {
-			cerr := segment.Close()
-			if err == nil {
-				err = cerr
-			}
-		}
+		_ = s.root.DecRef()
+		s.root = nil
+		s.rootLock.Unlock()
 	}
 
 	return
@@ -218,7 +214,12 @@ func (s *Scorch) Batch(batch *index.Batch) error {
 	} else {
 		newSegment = mem.New()
 	}
-	return s.prepareSegment(newSegment, ids, batch.InternalOps)
+
+	err := s.prepareSegment(newSegment, ids, batch.InternalOps)
+	if err != nil {
+		_ = newSegment.Close()
+	}
+	return err
 }
 
 func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
@@ -240,12 +241,13 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
 
 	// get read lock, to optimistically prepare obsoleted info
 	s.rootLock.RLock()
-	for i := range s.root.segment {
-		delta, err := s.root.segment[i].segment.DocNumbers(ids)
+	for _, seg := range s.root.segment {
+		delta, err := seg.segment.DocNumbers(ids)
 		if err != nil {
+			s.rootLock.RUnlock()
 			return err
 		}
-		introduction.obsoletes[s.root.segment[i].id] = delta
+		introduction.obsoletes[seg.id] = delta
 	}
 	s.rootLock.RUnlock()
 
@@ -280,10 +282,10 @@ func (s *Scorch) DeleteInternal(key []byte) error {
 // release associated resources.
 func (s *Scorch) Reader() (index.IndexReader, error) {
 	s.rootLock.RLock()
-	defer s.rootLock.RUnlock()
-	return &Reader{
-		root: s.root,
-	}, nil
+	rv := &Reader{root: s.root}
+	rv.root.AddRef()
+	s.rootLock.RUnlock()
+	return rv, nil
 }
 
 func (s *Scorch) Stats() json.Marshaler {
diff --git a/index/scorch/segment/empty.go b/index/scorch/segment/empty.go
index 724195007..83454644d 100644
--- a/index/scorch/segment/empty.go
+++ b/index/scorch/segment/empty.go
@@ -46,6 +46,13 @@ func (e *EmptySegment) Close() error {
 	return nil
 }
 
+func (e *EmptySegment) AddRef() {
+}
+
+func (e *EmptySegment) DecRef() error {
+	return nil
+}
+
 type EmptyDictionary struct{}
 
 func (e *EmptyDictionary) PostingsList(term string,
diff --git a/index/scorch/segment/mem/segment.go b/index/scorch/segment/mem/segment.go
index cdbff5839..75ff50cc0 100644
--- a/index/scorch/segment/mem/segment.go
+++ b/index/scorch/segment/mem/segment.go
@@ -96,6 +96,13 @@ func New() *Segment {
 	}
 }
 
+func (s *Segment) AddRef() {
+}
+
+func (s *Segment) DecRef() error {
+	return nil
+}
+
 // Fields returns the field names used in this segment
 func (s *Segment) Fields() []string {
 	return s.FieldsInv
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index 6a9d70730..14b97ec80 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -35,6 +35,9 @@ type Segment interface {
 	Fields() []string
 
 	Close() error
+
+	AddRef()
+	DecRef() error
 }
 
 type TermDictionary interface {
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 9f80b7037..498699072 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -20,6 +20,7 @@ import (
 	"fmt"
 	"io"
 	"os"
+	"sync"
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/Smerity/govarint"
@@ -47,6 +48,7 @@ func Open(path string) (segment.Segment, error) {
 		mm:        mm,
 		path:      path,
 		fieldsMap: make(map[string]uint16),
+		refs:      1,
 	}
 
 	err = rv.loadConfig()
@@ -79,6 +81,25 @@ type Segment struct {
 	fieldsMap     map[string]uint16
 	fieldsInv     []string
 	fieldsOffsets []uint64
+
+	m    sync.Mutex // Protects the fields that follow.
+	refs int64
+}
+
+func (s *Segment) AddRef() {
+	s.m.Lock()
+	s.refs++
+	s.m.Unlock()
+}
+
+func (s *Segment) DecRef() (err error) {
+	s.m.Lock()
+	s.refs--
+	if s.refs == 0 {
+		err = s.closeActual()
+	}
+	s.m.Unlock()
+	return err
 }
 
 func (s *Segment) loadConfig() error {
@@ -272,6 +293,10 @@ func (s *Segment) Path() string {
 
 // Close releases all resources associated with this segment
 func (s *Segment) Close() (err error) {
+	return s.DecRef()
+}
+
+func (s *Segment) closeActual() (err error) {
 	if s.mm != nil {
 		err = s.mm.Unmap()
 	}
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 10d208efd..19581f755 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -20,6 +20,7 @@ import (
 	"encoding/binary"
 	"fmt"
 	"sort"
+	"sync"
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/document"
@@ -43,6 +44,32 @@ type IndexSnapshot struct {
 	offsets  []uint64
 	internal map[string][]byte
 	epoch    uint64
+
+	m    sync.Mutex // Protects the fields that follow.
+	refs int64
+}
+
+func (i *IndexSnapshot) AddRef() {
+	i.m.Lock()
+	i.refs++
+	i.m.Unlock()
+}
+
+func (i *IndexSnapshot) DecRef() (err error) {
+	i.m.Lock()
+	i.refs--
+	if i.refs == 0 {
+		for _, s := range i.segment {
+			if s != nil {
+				err2 := s.segment.DecRef()
+				if err == nil {
+					err = err2
+				}
+			}
+		}
+	}
+	i.m.Unlock()
+	return err
 }
 
 func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {

From 74b882640e422f7e3db0205dcbb3e4d007a974e7 Mon Sep 17 00:00:00 2001
From: Damien Tournoud <damien@platform.sh>
Date: Wed, 13 Dec 2017 13:45:58 -0800
Subject: [PATCH 063/728] mapping: Fix closestDocMapping selecting wrong
 mapping

---
 mapping/document.go     |  1 +
 mapping/mapping_test.go | 23 +++++++++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/mapping/document.go b/mapping/document.go
index d62675e52..d4c9a8f9d 100644
--- a/mapping/document.go
+++ b/mapping/document.go
@@ -179,6 +179,7 @@ OUTER:
 				continue OUTER
 			}
 		}
+		break
 	}
 	return current
 }
diff --git a/mapping/mapping_test.go b/mapping/mapping_test.go
index 5cd86015f..735aef057 100644
--- a/mapping/mapping_test.go
+++ b/mapping/mapping_test.go
@@ -991,3 +991,26 @@ func TestMappingForNilTextMarshaler(t *testing.T) {
 	}
 
 }
+
+func TestClosestDocDynamicMapping(t *testing.T) {
+	mapping := NewIndexMapping()
+	mapping.IndexDynamic = false
+	mapping.DefaultMapping = NewDocumentStaticMapping()
+	mapping.DefaultMapping.AddFieldMappingsAt("foo", NewTextFieldMapping())
+
+	doc := document.NewDocument("x")
+	err := mapping.MapDocument(doc, map[string]interface{}{
+		"foo": "value",
+		"bar": map[string]string{
+			"foo": "value2",
+			"baz": "value3",
+		},
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(doc.Fields) != 1 {
+		t.Fatalf("expected 1 field, got: %d", len(doc.Fields))
+	}
+}

From c0cc46a2be80bdf4538d1de27fb8683892f85abf Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 13 Dec 2017 14:54:58 -0800
Subject: [PATCH 064/728] scorch cleanup of the rootBolt of old snapshots

A new global variable, NumSnapshotsToKeep, represents the default
number of old snapshots that each scorch instance should maintain -- 0
is the default.  Apps that need rollback'ability may want to increase
this value in early initialization.

The Scorch.eligibleForRemoval field tracks epoches which are safe to
delete from the rootBolt.  The eligibleForRemoval is appended to
whenever the ref-count on an IndexSnapshot drops to 0.

On startup, eligibleForRemoval is also initialized with any older
epoch's found in the rootBolt.

The newly introduced Scorch.removeOldSnapshots() method is called on
every cycle of the persisterLoop(), where it maintains the
eligibleForRemoval slice to under a size defined by the
NumSnapshotsToKeep.

A future commit will remove actual storage files in order to match the
"source of truth" information found in the rootBolt.
---
 index/scorch/introducer.go     |  2 +
 index/scorch/persister.go      | 79 +++++++++++++++++++++++++++++++++-
 index/scorch/scorch.go         | 12 +++++-
 index/scorch/snapshot_index.go |  4 ++
 4 files changed, 94 insertions(+), 3 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 9402e5a48..a80e5ac15 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -68,6 +68,7 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 
 	// prepare new index snapshot, with curr size + 1
 	newSnapshot := &IndexSnapshot{
+		parent:   s,
 		segment:  make([]*SegmentSnapshot, len(s.root.segment)+1),
 		offsets:  make([]uint64, len(s.root.segment)+1),
 		internal: make(map[string][]byte, len(s.root.segment)),
@@ -155,6 +156,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 	currSize := len(s.root.segment)
 	newSize := currSize + 1 - len(nextMerge.old)
 	newSnapshot := &IndexSnapshot{
+		parent:   s,
 		segment:  make([]*SegmentSnapshot, 0, newSize),
 		offsets:  make([]uint64, 0, newSize),
 		internal: make(map[string][]byte, len(s.root.segment)),
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 964e1db93..3a6ce85b5 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -19,6 +19,7 @@ import (
 	"fmt"
 	"log"
 	"os"
+	"sort"
 	"strings"
 
 	"github.com/RoaringBitmap/roaring"
@@ -35,6 +36,11 @@ func (s *Scorch) persisterLoop() {
 	var lastPersistedEpoch uint64
 OUTER:
 	for {
+		err := s.removeOldSnapshots()
+		if err != nil {
+			log.Printf("got err removing old snapshots: %v", err)
+		}
+
 		select {
 		case <-s.closeCh:
 			break OUTER
@@ -50,7 +56,7 @@ OUTER:
 			//for ourSnapshot.epoch != lastPersistedEpoch {
 			if ourSnapshot.epoch != lastPersistedEpoch {
 				// lets get started
-				err := s.persistSnapshot(ourSnapshot)
+				err = s.persistSnapshot(ourSnapshot)
 				if err != nil {
 					log.Printf("got err persisting snapshot: %v", err)
 					_ = ourSnapshot.DecRef()
@@ -217,6 +223,7 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 
 	s.rootLock.Lock()
 	newIndexSnapshot := &IndexSnapshot{
+		parent:   s,
 		epoch:    s.root.epoch,
 		segment:  make([]*SegmentSnapshot, len(s.root.segment)),
 		offsets:  make([]uint64, len(s.root.offsets)),
@@ -275,6 +282,7 @@ func (s *Scorch) loadFromBolt() error {
 		if snapshots == nil {
 			return nil
 		}
+		foundRoot := false
 		c := snapshots.Cursor()
 		for k, _ := c.Last(); k != nil; k, _ = c.Prev() {
 			_, snapshotEpoch, err := segment.DecodeUvarintAscending(k)
@@ -282,14 +290,20 @@ func (s *Scorch) loadFromBolt() error {
 				log.Printf("unable to parse segment epoch %x, continuing", k)
 				continue
 			}
+			if foundRoot {
+				s.eligibleForRemoval = append(s.eligibleForRemoval, snapshotEpoch)
+				continue
+			}
 			snapshot := snapshots.Bucket(k)
 			if snapshot == nil {
 				log.Printf("snapshot key, but bucket missing %x, continuing", k)
+				s.eligibleForRemoval = append(s.eligibleForRemoval, snapshotEpoch)
 				continue
 			}
 			indexSnapshot, err := s.loadSnapshot(snapshot)
 			if err != nil {
 				log.Printf("unable to load snapshot, %v, continuing", err)
+				s.eligibleForRemoval = append(s.eligibleForRemoval, snapshotEpoch)
 				continue
 			}
 			indexSnapshot.epoch = snapshotEpoch
@@ -301,8 +315,11 @@ func (s *Scorch) loadFromBolt() error {
 			}
 			s.nextSegmentID++
 			s.nextSnapshotEpoch = snapshotEpoch + 1
+			if s.root != nil {
+				_ = s.root.DecRef()
+			}
 			s.root = indexSnapshot
-			break
+			foundRoot = true
 		}
 		return nil
 	})
@@ -311,6 +328,7 @@ func (s *Scorch) loadFromBolt() error {
 func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
 
 	rv := &IndexSnapshot{
+		parent:   s,
 		internal: make(map[string][]byte),
 		refs:     1,
 	}
@@ -380,3 +398,60 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro
 
 	return rv, nil
 }
+
+type uint64Descending []uint64
+
+func (p uint64Descending) Len() int           { return len(p) }
+func (p uint64Descending) Less(i, j int) bool { return p[i] > p[j] }
+func (p uint64Descending) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
+
+// NumSnapshotsToKeep represents how many recent, old snapshots to
+// keep around per Scorch instance.  Useful for apps that require
+// rollback'ability.
+var NumSnapshotsToKeep int
+
+// Removes enough snapshots from the rootBolt so that the
+// s.eligibleForRemoval stays under the NumSnapshotsToKeep policy.
+func (s *Scorch) removeOldSnapshots() error {
+	var epochsToRemove []uint64
+
+	s.rootLock.Lock()
+	if len(s.eligibleForRemoval) > NumSnapshotsToKeep {
+		sort.Sort(uint64Descending(s.eligibleForRemoval))
+		epochsToRemove = append([]uint64(nil), s.eligibleForRemoval[NumSnapshotsToKeep:]...) // Copy.
+		s.eligibleForRemoval = s.eligibleForRemoval[0:NumSnapshotsToKeep]
+	}
+	s.rootLock.Unlock()
+
+	if len(epochsToRemove) <= 0 {
+		return nil
+	}
+
+	tx, err := s.rootBolt.Begin(true)
+	if err != nil {
+		return err
+	}
+	defer func() {
+		if err == nil {
+			err = s.rootBolt.Sync()
+		}
+	}()
+	defer func() {
+		if err == nil {
+			err = tx.Commit()
+		} else {
+			_ = tx.Rollback()
+		}
+	}()
+
+	for _, epochToRemove := range epochsToRemove {
+		k := segment.EncodeUvarintAscending(nil, epochToRemove)
+		err = tx.DeleteBucket(k)
+		if err == bolt.ErrBucketNotFound {
+			err = nil
+		}
+	}
+
+	return err
+}
+
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index d59563be2..06e788ca2 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -59,6 +59,8 @@ type Scorch struct {
 	persisterNotifier  chan notificationChan
 	rootBolt           *bolt.DB
 	asyncTasks         sync.WaitGroup
+
+	eligibleForRemoval []uint64 // Index snapshot epoch's that are safe to GC.
 }
 
 func NewScorch(storeName string, config map[string]interface{}, analysisQueue *index.AnalysisQueue) (index.Index, error) {
@@ -67,9 +69,9 @@ func NewScorch(storeName string, config map[string]interface{}, analysisQueue *i
 		config:            config,
 		analysisQueue:     analysisQueue,
 		stats:             &Stats{},
-		root:              &IndexSnapshot{refs: 1},
 		nextSnapshotEpoch: 1,
 	}
+	rv.root = &IndexSnapshot{parent: rv, refs: 1}
 	ro, ok := config["read_only"].(bool)
 	if ok {
 		rv.readOnly = ro
@@ -324,6 +326,14 @@ func (s *Scorch) Advanced() (store.KVStore, error) {
 	return nil, nil
 }
 
+func (s *Scorch) AddEligibleForRemoval(epoch uint64) {
+	s.rootLock.Lock()
+	if s.root == nil || s.root.epoch != epoch {
+		s.eligibleForRemoval = append(s.eligibleForRemoval, epoch)
+	}
+	s.rootLock.Unlock()
+}
+
 func init() {
 	registry.RegisterIndexType(Name, NewScorch)
 }
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 19581f755..6dd77ff4a 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -40,6 +40,7 @@ type asynchSegmentResult struct {
 }
 
 type IndexSnapshot struct {
+	parent   *Scorch
 	segment  []*SegmentSnapshot
 	offsets  []uint64
 	internal map[string][]byte
@@ -67,6 +68,9 @@ func (i *IndexSnapshot) DecRef() (err error) {
 				}
 			}
 		}
+		if i.parent != nil {
+			go i.parent.AddEligibleForRemoval(i.epoch)
+		}
 	}
 	i.m.Unlock()
 	return err

From b7dff6669f0770eb5b4115f19d1f973f5f08a58a Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 13 Dec 2017 16:58:36 -0800
Subject: [PATCH 065/728] scorch cleanup of *.zap files not listed in the
 rootBolt

---
 index/scorch/persister.go | 99 +++++++++++++++++++++++++++++++++++----
 1 file changed, 89 insertions(+), 10 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 3a6ce85b5..d66bdea0d 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -17,8 +17,10 @@ package scorch
 import (
 	"bytes"
 	"fmt"
+	"io/ioutil"
 	"log"
 	"os"
+	"path/filepath"
 	"sort"
 	"strings"
 
@@ -32,15 +34,12 @@ import (
 type notificationChan chan struct{}
 
 func (s *Scorch) persisterLoop() {
+	s.removeOldData(true)
+
 	var notify notificationChan
 	var lastPersistedEpoch uint64
 OUTER:
 	for {
-		err := s.removeOldSnapshots()
-		if err != nil {
-			log.Printf("got err removing old snapshots: %v", err)
-		}
-
 		select {
 		case <-s.closeCh:
 			break OUTER
@@ -56,7 +55,7 @@ OUTER:
 			//for ourSnapshot.epoch != lastPersistedEpoch {
 			if ourSnapshot.epoch != lastPersistedEpoch {
 				// lets get started
-				err = s.persistSnapshot(ourSnapshot)
+				err := s.persistSnapshot(ourSnapshot)
 				if err != nil {
 					log.Printf("got err persisting snapshot: %v", err)
 					_ = ourSnapshot.DecRef()
@@ -111,6 +110,7 @@ OUTER:
 				// woken up, next loop should pick up work
 			}
 		}
+		s.removeOldData(false)
 	}
 	s.asyncTasks.Done()
 }
@@ -405,6 +405,20 @@ func (p uint64Descending) Len() int           { return len(p) }
 func (p uint64Descending) Less(i, j int) bool { return p[i] > p[j] }
 func (p uint64Descending) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
 
+func (s *Scorch) removeOldData(force bool) {
+	removed, err := s.removeOldBoltSnapshots()
+	if err != nil {
+		log.Printf("got err removing old bolt snapshots: %v", err)
+	}
+
+	if force || removed > 0 {
+		err = s.removeOldZapFiles()
+		if err != nil {
+			log.Printf("go err removing old zap files: %v", err)
+		}
+	}
+}
+
 // NumSnapshotsToKeep represents how many recent, old snapshots to
 // keep around per Scorch instance.  Useful for apps that require
 // rollback'ability.
@@ -412,7 +426,7 @@ var NumSnapshotsToKeep int
 
 // Removes enough snapshots from the rootBolt so that the
 // s.eligibleForRemoval stays under the NumSnapshotsToKeep policy.
-func (s *Scorch) removeOldSnapshots() error {
+func (s *Scorch) removeOldBoltSnapshots() (numRemoved int, err error) {
 	var epochsToRemove []uint64
 
 	s.rootLock.Lock()
@@ -424,12 +438,12 @@ func (s *Scorch) removeOldSnapshots() error {
 	s.rootLock.Unlock()
 
 	if len(epochsToRemove) <= 0 {
-		return nil
+		return 0, nil
 	}
 
 	tx, err := s.rootBolt.Begin(true)
 	if err != nil {
-		return err
+		return 0, err
 	}
 	defer func() {
 		if err == nil {
@@ -450,8 +464,73 @@ func (s *Scorch) removeOldSnapshots() error {
 		if err == bolt.ErrBucketNotFound {
 			err = nil
 		}
+		if err == nil {
+			numRemoved++
+		}
+	}
+
+	return numRemoved, err
+}
+
+// Removes any *.zap files which aren't listed in the rootBolt.
+func (s *Scorch) removeOldZapFiles() error {
+	liveFileNames, err := s.loadZapFileNames()
+	if err != nil {
+		return err
+	}
+
+	currFileInfos, err := ioutil.ReadDir(s.path)
+	if err != nil {
+		return err
+	}
+
+	for _, finfo := range currFileInfos {
+		fname := finfo.Name()
+		if filepath.Ext(fname) == ".zap" {
+			if _, exists := liveFileNames[fname]; !exists {
+				err := os.Remove(s.path + string(os.PathSeparator) + fname)
+				if err != nil {
+					log.Printf("got err removing file: %s, err: %v", fname, err)
+				}
+			}
+		}
 	}
 
-	return err
+	return nil
 }
 
+// Returns the *.zap file names that are listed in the rootBolt.
+func (s *Scorch) loadZapFileNames() (map[string]struct{}, error) {
+	rv := map[string]struct{}{}
+	err := s.rootBolt.View(func(tx *bolt.Tx) error {
+		snapshots := tx.Bucket(boltSnapshotsBucket)
+		if snapshots == nil {
+			return nil
+		}
+		sc := snapshots.Cursor()
+		for sk, _ := sc.First(); sk != nil; sk, _ = sc.Next() {
+			snapshot := snapshots.Bucket(sk)
+			if snapshot == nil {
+				continue
+			}
+			segc := snapshot.Cursor()
+			for segk, _ := segc.First(); segk != nil; segk, _ = segc.Next() {
+				if segk[0] == boltInternalKey[0] {
+					continue
+				}
+				segmentBucket := snapshot.Bucket(segk)
+				if segmentBucket == nil {
+					continue
+				}
+				pathBytes := segmentBucket.Get(boltPathKey)
+				if pathBytes == nil {
+					continue
+				}
+				rv[string(pathBytes)] = struct{}{}
+			}
+		}
+		return nil
+	})
+
+	return rv, err
+}

From e1b0c61e2a529ddd703ce0a66e9f311cbec88597 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Wed, 13 Dec 2017 22:07:37 -0500
Subject: [PATCH 066/728] fix bug in handling iterator-done

---
 index/scorch/segment/zap/merge.go | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 6d635bdfb..481d9272a 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -153,10 +153,12 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 
 			if dict != nil && dict.fst != nil {
 				itr, err2 := dict.fst.Iterator(nil, nil)
-				if err2 != nil {
+				if err2 != nil && err2 != vellum.ErrIteratorDone {
 					return nil, err2
 				}
-				itrs = append(itrs, itr)
+				if itr != nil {
+					itrs = append(itrs, itr)
+				}
 			}
 		}
 

From 1066ee7d221cc6f5b6e57f2fdc99d4fb1a2a26a1 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Thu, 14 Dec 2017 12:38:29 +0530
Subject: [PATCH 067/728] DocumentVisitFieldTerms Scorch implementation level1

---
 index/scorch/introducer.go       |   2 +
 index/scorch/scorch_test.go      | 152 +++++++++++++++++++++++++++++++
 index/scorch/snapshot_index.go   |  69 +++++++++++---
 index/scorch/snapshot_segment.go |  98 +++++++++++++++++++-
 4 files changed, 305 insertions(+), 16 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index a80e5ac15..56c95143e 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -96,6 +96,7 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 			id:      s.root.segment[i].id,
 			segment: s.root.segment[i].segment,
 			notify:  s.root.segment[i].notify,
+			cachedDocs: s.root.segment[i].cachedDocs,
 		}
 		s.root.segment[i].segment.AddRef()
 		// apply new obsoletions
@@ -113,6 +114,7 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 	newSnapshot.segment[len(s.root.segment)] = &SegmentSnapshot{
 		id:      next.id,
 		segment: next.data, // Take ownership of next.data's ref-count.
+		cachedDocs: &cachedDocs{cache: nil},
 	}
 	newSnapshot.offsets[len(s.root.segment)] = running
 	if !s.unsafeBatch {
diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index b46a5ffd9..8d2db4d24 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -1377,3 +1377,155 @@ Mechanism[edit]
 
 This section needs additional citations for verification. Please help improve this article by adding citations to reliable sources. Unsourced material may be challenged and removed. (July 2013)
 There are three characteristics of liquids which are relevant to the discussion of a BLEVE:`)
+
+func TestIndexDocumentVisitFieldTermsWithMultipleDocs(t *testing.T) {
+	defer func() {
+		err := DestroyTest()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Fatalf("error opening index: %v", err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	doc := document.NewDocument("1")
+	doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), document.IndexField|document.StoreField|document.IncludeTermVectors))
+	doc.AddField(document.NewTextFieldWithIndexingOptions("title", []uint64{}, []byte("mister"), document.IndexField|document.StoreField|document.IncludeTermVectors))
+
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+
+	indexReader, err := idx.Reader()
+	if err != nil {
+		t.Error(err)
+	}
+
+	fieldTerms := make(index.FieldTerms)
+	docNumber, err := indexReader.InternalID("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = indexReader.DocumentVisitFieldTerms(docNumber, []string{"name", "title"}, func(field string, term []byte) {
+		fieldTerms[field] = append(fieldTerms[field], string(term))
+	})
+	if err != nil {
+		t.Error(err)
+	}
+	expectedFieldTerms := index.FieldTerms{
+		"name":  []string{"test"},
+		"title": []string{"mister"},
+	}
+	if !reflect.DeepEqual(fieldTerms, expectedFieldTerms) {
+		t.Errorf("expected field terms: %#v, got: %#v", expectedFieldTerms, fieldTerms)
+	}
+	err = indexReader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	doc2 := document.NewDocument("2")
+	doc2.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test2"), document.IndexField|document.StoreField|document.IncludeTermVectors))
+	doc2.AddField(document.NewTextFieldWithIndexingOptions("title", []uint64{}, []byte("mister2"), document.IndexField|document.StoreField|document.IncludeTermVectors))
+	err = idx.Update(doc2)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+	indexReader, err = idx.Reader()
+	if err != nil {
+		t.Error(err)
+	}
+
+	fieldTerms = make(index.FieldTerms)
+	docNumber, err = indexReader.InternalID("2")
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = indexReader.DocumentVisitFieldTerms(docNumber, []string{"name", "title"}, func(field string, term []byte) {
+		fieldTerms[field] = append(fieldTerms[field], string(term))
+	})
+	if err != nil {
+		t.Error(err)
+	}
+	expectedFieldTerms = index.FieldTerms{
+		"name":  []string{"test2"},
+		"title": []string{"mister2"},
+	}
+	if !reflect.DeepEqual(fieldTerms, expectedFieldTerms) {
+		t.Errorf("expected field terms: %#v, got: %#v", expectedFieldTerms, fieldTerms)
+	}
+	err = indexReader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	doc3 := document.NewDocument("3")
+	doc3.AddField(document.NewTextFieldWithIndexingOptions("name3", []uint64{}, []byte("test3"), document.IndexField|document.StoreField|document.IncludeTermVectors))
+	doc3.AddField(document.NewTextFieldWithIndexingOptions("title3", []uint64{}, []byte("mister3"), document.IndexField|document.StoreField|document.IncludeTermVectors))
+	err = idx.Update(doc3)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+	indexReader, err = idx.Reader()
+	if err != nil {
+		t.Error(err)
+	}
+
+	fieldTerms = make(index.FieldTerms)
+	docNumber, err = indexReader.InternalID("3")
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = indexReader.DocumentVisitFieldTerms(docNumber, []string{"name3", "title3"}, func(field string, term []byte) {
+		fieldTerms[field] = append(fieldTerms[field], string(term))
+	})
+	if err != nil {
+		t.Error(err)
+	}
+	expectedFieldTerms = index.FieldTerms{
+		"name3":  []string{"test3"},
+		"title3": []string{"mister3"},
+	}
+	if !reflect.DeepEqual(fieldTerms, expectedFieldTerms) {
+		t.Errorf("expected field terms: %#v, got: %#v", expectedFieldTerms, fieldTerms)
+	}
+
+	fieldTerms = make(index.FieldTerms)
+	docNumber, err = indexReader.InternalID("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = indexReader.DocumentVisitFieldTerms(docNumber, []string{"name", "title"}, func(field string, term []byte) {
+		fieldTerms[field] = append(fieldTerms[field], string(term))
+	})
+	if err != nil {
+		t.Error(err)
+	}
+	expectedFieldTerms = index.FieldTerms{
+		"name":  []string{"test"},
+		"title": []string{"mister"},
+	}
+	if !reflect.DeepEqual(fieldTerms, expectedFieldTerms) {
+		t.Errorf("expected field terms: %#v, got: %#v", expectedFieldTerms, fieldTerms)
+	}
+	err = indexReader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+}
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 6dd77ff4a..cb25efc65 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -248,7 +248,10 @@ func (i *IndexSnapshot) Document(id string) (rv *document.Document, err error) {
 		return nil, nil
 	}
 
-	docNum := docInternalToNumber(next.ID)
+	docNum, err := docInternalToNumber(next.ID)
+	if err != nil {
+		return nil, err
+	}
 	segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum)
 
 	rv = document.NewDocument(id)
@@ -286,12 +289,15 @@ func (i *IndexSnapshot) segmentIndexAndLocalDocNumFromGlobal(docNum uint64) (int
 }
 
 func (i *IndexSnapshot) ExternalID(id index.IndexInternalID) (string, error) {
-	docNum := docInternalToNumber(id)
+	docNum, err := docInternalToNumber(id)
+	if err != nil {
+		return "", err
+	}
 	segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum)
 
 	var found bool
 	var rv string
-	err := i.segment[segmentIndex].VisitDocument(localDocNum, func(field string, typ byte, value []byte, pos []uint64) bool {
+	err = i.segment[segmentIndex].VisitDocument(localDocNum, func(field string, typ byte, value []byte, pos []uint64) bool {
 		if field == "_id" {
 			found = true
 			rv = string(value)
@@ -377,15 +383,6 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
 	return rv, nil
 }
 
-func (i *IndexSnapshot) DocumentVisitFieldTerms(id index.IndexInternalID, fields []string,
-	visitor index.DocumentFieldTermVisitor) error {
-
-	docNum := docInternalToNumber(id)
-	segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum)
-
-	return i.segment[segmentIndex].DocumentVisitFieldTerms(localDocNum, fields, visitor)
-}
-
 func docNumberToBytes(in uint64) []byte {
 
 	buf := new(bytes.Buffer)
@@ -393,8 +390,50 @@ func docNumberToBytes(in uint64) []byte {
 	return buf.Bytes()
 }
 
-func docInternalToNumber(in index.IndexInternalID) uint64 {
+func docInternalToNumber(in index.IndexInternalID) (uint64, error) {
 	var res uint64
-	_ = binary.Read(bytes.NewReader(in), binary.BigEndian, &res)
-	return res
+	err := binary.Read(bytes.NewReader(in), binary.BigEndian, &res)
+	if err != nil {
+		return res, err
+	}
+	return res, nil
+}
+
+func (i *IndexSnapshot) DocumentVisitFieldTerms(id index.IndexInternalID,
+	fields []string, visitor index.DocumentFieldTermVisitor) error {
+	docNum, err := docInternalToNumber(id)
+	if err != nil {
+		return err
+	}
+	segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum)
+	if segmentIndex >= len(i.segment) {
+		return nil
+	}
+
+	i.m.Lock()
+	ss := i.segment[segmentIndex]
+	if ss.cachedDocs == nil {
+		ss.cachedDocs = &cachedDocs{cache: nil}
+	}
+	i.m.Unlock()
+
+	err = ss.cachedDocs.prepareFields(localDocNum, fields, ss)
+	if err != nil {
+		return err
+	}
+
+	for _, field := range fields {
+		if cachedFieldDocs, exists := ss.cachedDocs.cache[field]; exists {
+			if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists {
+				terms := bytes.SplitN(tlist, TermSeparatorSplitSlice, -1)
+				for _, term := range terms {
+					if len(term) > 0 {
+						visitor(field, term)
+					}
+				}
+			}
+		}
+	}
+
+	return nil
 }
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index ffd38cac5..836ec6fa7 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -15,11 +15,17 @@
 package scorch
 
 import (
+	"sync"
+
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
 )
 
+var TermSeparator byte = 0xff
+
+var TermSeparatorSplitSlice = []byte{TermSeparator}
+
 type SegmentDictionarySnapshot struct {
 	s *SegmentSnapshot
 	d segment.TermDictionary
@@ -46,7 +52,8 @@ type SegmentSnapshot struct {
 	segment segment.Segment
 	deleted *roaring.Bitmap
 
-	notify []chan error
+	notify     []chan error
+	cachedDocs *cachedDocs
 }
 
 func (s *SegmentSnapshot) Id() uint64 {
@@ -157,3 +164,92 @@ func (s *SegmentSnapshot) DocNumbersLive() *roaring.Bitmap {
 func (s *SegmentSnapshot) Fields() []string {
 	return s.segment.Fields()
 }
+
+type cachedFieldDocs struct {
+	readyCh chan struct{}     // closed when the cachedFieldDocs.docs is ready to be used.
+	err     error             // Non-nil if there was an error when preparing this cachedFieldDocs.
+	docs    map[uint64][]byte // Keyed by localDocNum, value is a list of terms delimited by 0xFF.
+}
+
+func (cfd *cachedFieldDocs) prepareFields(docNum uint64, field string,
+	ss *SegmentSnapshot) {
+	defer close(cfd.readyCh)
+
+	dict, err := ss.segment.Dictionary(field)
+	if err != nil {
+		cfd.err = err
+		return
+	}
+
+	dictItr := dict.Iterator()
+	next, err := dictItr.Next()
+	for next != nil && err == nil {
+		postings, err1 := dict.PostingsList(next.Term, nil)
+		if err1 != nil {
+			cfd.err = err1
+			return
+		}
+
+		postingsItr := postings.Iterator()
+		nextPosting, err2 := postingsItr.Next()
+		for err2 == nil && nextPosting != nil && nextPosting.Number() <= docNum {
+			if nextPosting.Number() == docNum {
+				// got what we're looking for
+				cfd.docs[docNum] = append(cfd.docs[docNum], []byte(next.Term)...)
+				cfd.docs[docNum] = append(cfd.docs[docNum], TermSeparator)
+			}
+			nextPosting, err2 = postingsItr.Next()
+		}
+
+		if err2 != nil {
+			cfd.err = err2
+			return
+		}
+
+		next, err = dictItr.Next()
+	}
+
+	if err != nil {
+		cfd.err = err
+		return
+	}
+}
+
+type cachedDocs struct {
+	m     sync.Mutex                  // As the cache is asynchronously prepared, need a lock
+	cache map[string]*cachedFieldDocs // Keyed by field
+}
+
+func (c *cachedDocs) prepareFields(docNum uint64, wantedFields []string,
+	ss *SegmentSnapshot) error {
+	c.m.Lock()
+	if c.cache == nil {
+		c.cache = make(map[string]*cachedFieldDocs, len(ss.Fields()))
+	}
+
+	for _, field := range wantedFields {
+		_, exists := c.cache[field]
+		if !exists {
+			c.cache[field] = &cachedFieldDocs{
+				readyCh: make(chan struct{}),
+				docs:    make(map[uint64][]byte),
+			}
+
+			go c.cache[field].prepareFields(docNum, field, ss)
+		}
+	}
+
+	for _, field := range wantedFields {
+		cachedFieldDocs := c.cache[field]
+		c.m.Unlock()
+		<-cachedFieldDocs.readyCh
+
+		if cachedFieldDocs.err != nil {
+			return cachedFieldDocs.err
+		}
+		c.m.Lock()
+	}
+
+	c.m.Unlock()
+	return nil
+}

From 95b65ade3e266db161f900aa847f36f912f8abd1 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Thu, 14 Dec 2017 17:16:47 +0530
Subject: [PATCH 068/728] getting right internalID for doc in UT

---
 index/scorch/scorch_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index 8d2db4d24..6feabb306 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -1240,7 +1240,7 @@ func TestIndexDocumentVisitFieldTerms(t *testing.T) {
 
 	fieldTerms := make(index.FieldTerms)
 
-	internalID, err := indexReader.GetInternal([]byte("1"))
+	internalID, err := indexReader.InternalID("1")
 	if err != nil {
 		t.Fatal(err)
 	}

From a4acd53c54b2de8927340707637a7ec9d3c5b692 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Thu, 14 Dec 2017 07:52:38 -0500
Subject: [PATCH 069/728] try to make racey test safe

---
 index_test.go | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/index_test.go b/index_test.go
index c4e99ce1e..03ef71929 100644
--- a/index_test.go
+++ b/index_test.go
@@ -674,16 +674,19 @@ func TestIndexMetadataRaceBug198(t *testing.T) {
 		}
 	}()
 
+	wg := sync.WaitGroup{}
+	wg.Add(1)
 	done := make(chan struct{})
 	go func() {
 		for {
 			select {
 			case <-done:
+				wg.Done()
 				return
 			default:
-				_, err := index.DocCount()
-				if err != nil {
-					t.Fatal(err)
+				_, err2 := index.DocCount()
+				if err2 != nil {
+					t.Fatal(err2)
 				}
 			}
 		}
@@ -701,6 +704,7 @@ func TestIndexMetadataRaceBug198(t *testing.T) {
 		}
 	}
 	close(done)
+	wg.Wait()
 }
 
 func TestSortMatchSearch(t *testing.T) {

From 149a26b5c159e1367fdb6d1e6b5fe32495005dcd Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Thu, 14 Dec 2017 10:27:39 -0500
Subject: [PATCH 070/728] merge deletion and cacheddocs fixes discussed in
 meeting

---
 index/scorch/introducer.go     | 26 ++++++++++++++------------
 index/scorch/merge.go          |  2 +-
 index/scorch/persister.go      | 27 +++++++++++++++++----------
 index/scorch/snapshot_index.go |  6 +-----
 4 files changed, 33 insertions(+), 28 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 56c95143e..a3a197b08 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -93,9 +93,9 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 			}
 		}
 		newSnapshot.segment[i] = &SegmentSnapshot{
-			id:      s.root.segment[i].id,
-			segment: s.root.segment[i].segment,
-			notify:  s.root.segment[i].notify,
+			id:         s.root.segment[i].id,
+			segment:    s.root.segment[i].segment,
+			notify:     s.root.segment[i].notify,
 			cachedDocs: s.root.segment[i].cachedDocs,
 		}
 		s.root.segment[i].segment.AddRef()
@@ -112,8 +112,8 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 	}
 	// put new segment at end
 	newSnapshot.segment[len(s.root.segment)] = &SegmentSnapshot{
-		id:      next.id,
-		segment: next.data, // Take ownership of next.data's ref-count.
+		id:         next.id,
+		segment:    next.data, // Take ownership of next.data's ref-count.
 		cachedDocs: &cachedDocs{cache: nil},
 	}
 	newSnapshot.offsets[len(s.root.segment)] = running
@@ -191,10 +191,11 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 		} else {
 			// this segment is staying
 			newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
-				id:      s.root.segment[i].id,
-				segment: s.root.segment[i].segment,
-				notify:  s.root.segment[i].notify,
-				deleted: s.root.segment[i].deleted,
+				id:         s.root.segment[i].id,
+				segment:    s.root.segment[i].segment,
+				notify:     s.root.segment[i].notify,
+				deleted:    s.root.segment[i].deleted,
+				cachedDocs: s.root.segment[i].cachedDocs,
 			})
 			s.root.segment[i].segment.AddRef()
 			newSnapshot.offsets = append(newSnapshot.offsets, running)
@@ -204,9 +205,10 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 
 	// put new segment at end
 	newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
-		id:      nextMerge.id,
-		segment: nextMerge.new, // Take ownership for nextMerge.new's ref-count.
-		deleted: newSegmentDeleted,
+		id:         nextMerge.id,
+		segment:    nextMerge.new, // Take ownership for nextMerge.new's ref-count.
+		deleted:    newSegmentDeleted,
+		cachedDocs: &cachedDocs{cache: nil},
 	})
 	newSnapshot.offsets = append(newSnapshot.offsets, running)
 
diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 818f3dbd1..c077a1c7e 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -129,7 +129,7 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot) error {
 			}
 		}
 
-		filename := fmt.Sprintf("%x.zap", newSegmentID)
+		filename := fmt.Sprintf("%08x.zap", newSegmentID)
 		path := s.path + string(os.PathSeparator) + filename
 		newDocNums, err := zap.Merge(segmentsToMerge, docsToDrop, path, 1024)
 		if err != nil {
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index d66bdea0d..1a1ea2833 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -171,7 +171,7 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 		switch seg := segmentSnapshot.segment.(type) {
 		case *mem.Segment:
 			// need to persist this to disk
-			filename := fmt.Sprintf("%x.zap", segmentSnapshot.id)
+			filename := fmt.Sprintf("%08x.zap", segmentSnapshot.id)
 			path := s.path + string(os.PathSeparator) + filename
 			err2 := zap.PersistSegment(seg, path, 1024)
 			if err2 != nil {
@@ -234,9 +234,10 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 		// see if this segment has been replaced
 		if replacement, ok := newSegments[segmentSnapshot.id]; ok {
 			newSegmentSnapshot := &SegmentSnapshot{
-				segment: replacement,
-				deleted: segmentSnapshot.deleted,
-				id:      segmentSnapshot.id,
+				segment:    replacement,
+				deleted:    segmentSnapshot.deleted,
+				id:         segmentSnapshot.id,
+				cachedDocs: segmentSnapshot.cachedDocs,
 			}
 			newIndexSnapshot.segment[i] = newSegmentSnapshot
 			// add the old segment snapshots notifications to the list
@@ -382,7 +383,8 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro
 	}
 
 	rv := &SegmentSnapshot{
-		segment: segment,
+		segment:    segment,
+		cachedDocs: &cachedDocs{cache: nil},
 	}
 	deletedBytes := segmentBucket.Get(boltDeletedKey)
 	if deletedBytes != nil {
@@ -474,7 +476,7 @@ func (s *Scorch) removeOldBoltSnapshots() (numRemoved int, err error) {
 
 // Removes any *.zap files which aren't listed in the rootBolt.
 func (s *Scorch) removeOldZapFiles() error {
-	liveFileNames, err := s.loadZapFileNames()
+	liveFileNames, highestName, err := s.loadZapFileNames()
 	if err != nil {
 		return err
 	}
@@ -487,7 +489,7 @@ func (s *Scorch) removeOldZapFiles() error {
 	for _, finfo := range currFileInfos {
 		fname := finfo.Name()
 		if filepath.Ext(fname) == ".zap" {
-			if _, exists := liveFileNames[fname]; !exists {
+			if _, exists := liveFileNames[fname]; !exists && fname < highestName {
 				err := os.Remove(s.path + string(os.PathSeparator) + fname)
 				if err != nil {
 					log.Printf("got err removing file: %s, err: %v", fname, err)
@@ -500,8 +502,9 @@ func (s *Scorch) removeOldZapFiles() error {
 }
 
 // Returns the *.zap file names that are listed in the rootBolt.
-func (s *Scorch) loadZapFileNames() (map[string]struct{}, error) {
+func (s *Scorch) loadZapFileNames() (map[string]struct{}, string, error) {
 	rv := map[string]struct{}{}
+	var highest string
 	err := s.rootBolt.View(func(tx *bolt.Tx) error {
 		snapshots := tx.Bucket(boltSnapshotsBucket)
 		if snapshots == nil {
@@ -526,11 +529,15 @@ func (s *Scorch) loadZapFileNames() (map[string]struct{}, error) {
 				if pathBytes == nil {
 					continue
 				}
-				rv[string(pathBytes)] = struct{}{}
+				pathString := string(pathBytes)
+				if pathString > highest {
+					highest = pathString
+				}
+				rv[string(pathString)] = struct{}{}
 			}
 		}
 		return nil
 	})
 
-	return rv, err
+	return rv, highest, err
 }
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index cb25efc65..201c1b24e 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -401,6 +401,7 @@ func docInternalToNumber(in index.IndexInternalID) (uint64, error) {
 
 func (i *IndexSnapshot) DocumentVisitFieldTerms(id index.IndexInternalID,
 	fields []string, visitor index.DocumentFieldTermVisitor) error {
+
 	docNum, err := docInternalToNumber(id)
 	if err != nil {
 		return err
@@ -410,12 +411,7 @@ func (i *IndexSnapshot) DocumentVisitFieldTerms(id index.IndexInternalID,
 		return nil
 	}
 
-	i.m.Lock()
 	ss := i.segment[segmentIndex]
-	if ss.cachedDocs == nil {
-		ss.cachedDocs = &cachedDocs{cache: nil}
-	}
-	i.m.Unlock()
 
 	err = ss.cachedDocs.prepareFields(localDocNum, fields, ss)
 	if err != nil {

From bd742caf6525d7e704ba5b79e028da96644eeeb9 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Thu, 14 Dec 2017 10:29:19 -0500
Subject: [PATCH 071/728] don't try to close a nil segment if err opening

---
 index/scorch/persister.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 1a1ea2833..2ef55b69c 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -212,7 +212,9 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 		newSegments[segmentID], err = zap.Open(path)
 		if err != nil {
 			for _, s := range newSegments {
-				_ = s.Close() // cleanup segments that were successfully opened
+				if s != nil {
+					_ = s.Close() // cleanup segments that were successfully opened
+				}
 			}
 			return fmt.Errorf("error opening new segment at %s, %v", path, err)
 		}

From 2be5eb4427a6c3a26812c8a3d567f8933db5bcf4 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 14 Dec 2017 10:49:33 -0800
Subject: [PATCH 072/728] scorch tracks zap files that can't be removed yet

A race & solution found by Marty Schoch... consider a case when the
merger might grab a nextSegmentID, like 4, but takes awhile to
complete.  Meanwhile, the persister grabs the nextSegmentID of 5, but
finishes its persistence work fast, and then loops to cleanup any old
files.  The simple approach of checking a "highest segment ID" of 5 is
wrong now, because the deleter now thinks that segment 4's zap file is
(incorrectly) ok to delete.

The solution in this commit is to track an ephemeral map of filenames
which are ineligibleForRemoval, because they're still being written
(by the merger) and haven't been fully incorporated into the rootBolt
yet.

The merger adds to that ineligibleForRemoval map as it starts a merged
zap file, the persister cleans up entries from that map when it
persists zap filenames into the rootBolt, and the deleter (part of the
persister's loop) consults the map before performing any actual zap
file deletions.
---
 index/scorch/merge.go     |  5 ++++-
 index/scorch/persister.go | 39 ++++++++++++++++++++++---------------
 index/scorch/scorch.go    | 41 +++++++++++++++++++++++++++++++--------
 3 files changed, 60 insertions(+), 25 deletions(-)

diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index c077a1c7e..cc3af774a 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -129,14 +129,17 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot) error {
 			}
 		}
 
-		filename := fmt.Sprintf("%08x.zap", newSegmentID)
+		filename := zapFileName(newSegmentID)
+		s.markIneligibleForRemoval(filename)
 		path := s.path + string(os.PathSeparator) + filename
 		newDocNums, err := zap.Merge(segmentsToMerge, docsToDrop, path, 1024)
 		if err != nil {
+			s.unmarkIneligibleForRemoval(filename)
 			return fmt.Errorf("merging failed: %v", err)
 		}
 		segment, err := zap.Open(path)
 		if err != nil {
+			s.unmarkIneligibleForRemoval(filename)
 			return err
 		}
 		sm := &segmentMerge{
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 2ef55b69c..f12d86f52 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -34,8 +34,6 @@ import (
 type notificationChan chan struct{}
 
 func (s *Scorch) persisterLoop() {
-	s.removeOldData(true)
-
 	var notify notificationChan
 	var lastPersistedEpoch uint64
 OUTER:
@@ -52,7 +50,6 @@ OUTER:
 			ourSnapshot.AddRef()
 			s.rootLock.RUnlock()
 
-			//for ourSnapshot.epoch != lastPersistedEpoch {
 			if ourSnapshot.epoch != lastPersistedEpoch {
 				// lets get started
 				err := s.persistSnapshot(ourSnapshot)
@@ -110,7 +107,7 @@ OUTER:
 				// woken up, next loop should pick up work
 			}
 		}
-		s.removeOldData(false)
+		s.removeOldData()
 	}
 	s.asyncTasks.Done()
 }
@@ -159,6 +156,7 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 		}
 	}
 
+	var filenames []string
 	newSegmentPaths := make(map[uint64]string)
 
 	// first ensure that each segment in this snapshot has been persisted
@@ -171,7 +169,7 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 		switch seg := segmentSnapshot.segment.(type) {
 		case *mem.Segment:
 			// need to persist this to disk
-			filename := fmt.Sprintf("%08x.zap", segmentSnapshot.id)
+			filename := zapFileName(segmentSnapshot.id)
 			path := s.path + string(os.PathSeparator) + filename
 			err2 := zap.PersistSegment(seg, path, 1024)
 			if err2 != nil {
@@ -182,6 +180,7 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 			if err != nil {
 				return err
 			}
+			filenames = append(filenames, filename)
 		case *zap.Segment:
 			path := seg.Path()
 			filename := strings.TrimPrefix(path, s.path+string(os.PathSeparator))
@@ -189,6 +188,7 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 			if err != nil {
 				return err
 			}
+			filenames = append(filenames, filename)
 		default:
 			return fmt.Errorf("unknown segment type: %T", seg)
 		}
@@ -255,6 +255,9 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 	for k, v := range s.root.internal {
 		newIndexSnapshot.internal[k] = v
 	}
+	for _, filename := range filenames {
+		delete(s.ineligibleForRemoval, filename)
+	}
 	rootPrev := s.root
 	s.root = newIndexSnapshot
 	s.rootLock.Unlock()
@@ -272,6 +275,10 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 	return nil
 }
 
+func zapFileName(epoch uint64) string {
+	return fmt.Sprintf("%012x.zap", epoch)
+}
+
 // bolt snapshot code
 
 var boltSnapshotsBucket = []byte{'s'}
@@ -409,16 +416,16 @@ func (p uint64Descending) Len() int           { return len(p) }
 func (p uint64Descending) Less(i, j int) bool { return p[i] > p[j] }
 func (p uint64Descending) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
 
-func (s *Scorch) removeOldData(force bool) {
+func (s *Scorch) removeOldData() {
 	removed, err := s.removeOldBoltSnapshots()
 	if err != nil {
 		log.Printf("got err removing old bolt snapshots: %v", err)
 	}
 
-	if force || removed > 0 {
+	if removed > 0 {
 		err = s.removeOldZapFiles()
 		if err != nil {
-			log.Printf("go err removing old zap files: %v", err)
+			log.Printf("got err removing old zap files: %v", err)
 		}
 	}
 }
@@ -478,7 +485,7 @@ func (s *Scorch) removeOldBoltSnapshots() (numRemoved int, err error) {
 
 // Removes any *.zap files which aren't listed in the rootBolt.
 func (s *Scorch) removeOldZapFiles() error {
-	liveFileNames, highestName, err := s.loadZapFileNames()
+	liveFileNames, err := s.loadZapFileNames()
 	if err != nil {
 		return err
 	}
@@ -488,10 +495,12 @@ func (s *Scorch) removeOldZapFiles() error {
 		return err
 	}
 
+	s.rootLock.RLock()
+
 	for _, finfo := range currFileInfos {
 		fname := finfo.Name()
 		if filepath.Ext(fname) == ".zap" {
-			if _, exists := liveFileNames[fname]; !exists && fname < highestName {
+			if _, exists := liveFileNames[fname]; !exists && !s.ineligibleForRemoval[fname] {
 				err := os.Remove(s.path + string(os.PathSeparator) + fname)
 				if err != nil {
 					log.Printf("got err removing file: %s, err: %v", fname, err)
@@ -500,13 +509,14 @@ func (s *Scorch) removeOldZapFiles() error {
 		}
 	}
 
+	s.rootLock.RUnlock()
+
 	return nil
 }
 
 // Returns the *.zap file names that are listed in the rootBolt.
-func (s *Scorch) loadZapFileNames() (map[string]struct{}, string, error) {
+func (s *Scorch) loadZapFileNames() (map[string]struct{}, error) {
 	rv := map[string]struct{}{}
-	var highest string
 	err := s.rootBolt.View(func(tx *bolt.Tx) error {
 		snapshots := tx.Bucket(boltSnapshotsBucket)
 		if snapshots == nil {
@@ -532,14 +542,11 @@ func (s *Scorch) loadZapFileNames() (map[string]struct{}, string, error) {
 					continue
 				}
 				pathString := string(pathBytes)
-				if pathString > highest {
-					highest = pathString
-				}
 				rv[string(pathString)] = struct{}{}
 			}
 		}
 		return nil
 	})
 
-	return rv, highest, err
+	return rv, err
 }
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 06e788ca2..2e54bebce 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -60,16 +60,19 @@ type Scorch struct {
 	rootBolt           *bolt.DB
 	asyncTasks         sync.WaitGroup
 
-	eligibleForRemoval []uint64 // Index snapshot epoch's that are safe to GC.
+	eligibleForRemoval   []uint64        // Index snapshot epochs that are safe to GC.
+	ineligibleForRemoval map[string]bool // Filenames that should not be GC'ed yet.
 }
 
 func NewScorch(storeName string, config map[string]interface{}, analysisQueue *index.AnalysisQueue) (index.Index, error) {
 	rv := &Scorch{
-		version:           Version,
-		config:            config,
-		analysisQueue:     analysisQueue,
-		stats:             &Stats{},
-		nextSnapshotEpoch: 1,
+		version:              Version,
+		config:               config,
+		analysisQueue:        analysisQueue,
+		stats:                &Stats{},
+		nextSnapshotEpoch:    1,
+		closeCh:              make(chan struct{}),
+		ineligibleForRemoval: map[string]bool{},
 	}
 	rv.root = &IndexSnapshot{parent: rv, refs: 1}
 	ro, ok := config["read_only"].(bool)
@@ -113,16 +116,24 @@ func (s *Scorch) Open() error {
 		// now see if there is any existing state to load
 		err = s.loadFromBolt()
 		if err != nil {
+			_ = s.Close()
 			return err
 		}
 	}
 
-	s.closeCh = make(chan struct{})
 	s.introductions = make(chan *segmentIntroduction)
 	s.merges = make(chan *segmentMerge)
 	s.introducerNotifier = make(chan notificationChan)
 	s.persisterNotifier = make(chan notificationChan)
 
+	if !s.readOnly && s.path != "" {
+		err := s.removeOldZapFiles() // Before persister or merger create any new files.
+		if err != nil {
+			_ = s.Close()
+			return err
+		}
+	}
+
 	s.asyncTasks.Add(1)
 	go s.mainLoop()
 
@@ -145,7 +156,9 @@ func (s *Scorch) Close() (err error) {
 	if s.rootBolt != nil {
 		err = s.rootBolt.Close()
 		s.rootLock.Lock()
-		_ = s.root.DecRef()
+		if s.root != nil {
+			_ = s.root.DecRef()
+		}
 		s.root = nil
 		s.rootLock.Unlock()
 	}
@@ -334,6 +347,18 @@ func (s *Scorch) AddEligibleForRemoval(epoch uint64) {
 	s.rootLock.Unlock()
 }
 
+func (s *Scorch) markIneligibleForRemoval(filename string) {
+	s.rootLock.Lock()
+	s.ineligibleForRemoval[filename] = true
+	s.rootLock.Unlock()
+}
+
+func (s *Scorch) unmarkIneligibleForRemoval(filename string) {
+	s.rootLock.Lock()
+	delete(s.ineligibleForRemoval, filename)
+	s.rootLock.Unlock()
+}
+
 func init() {
 	registry.RegisterIndexType(Name, NewScorch)
 }

From a8884e101136a9abb11d4c610001c3b0cd6ed58d Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 14 Dec 2017 13:16:06 -0800
Subject: [PATCH 073/728] scorch fix for TestSortMatchSearch

The cachedDocs preparation has to happen for all docs in the field,
not just on the currently requested docNum.

Also, as part of this commit, there's a loop optimization where we no
longer use bytes.Split() on the terms buffer, thus avoiding garbage
creation.
---
 index/scorch/snapshot_index.go   | 12 +++++++-----
 index/scorch/snapshot_segment.go | 20 ++++++++------------
 2 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 201c1b24e..86998bbdd 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -413,7 +413,7 @@ func (i *IndexSnapshot) DocumentVisitFieldTerms(id index.IndexInternalID,
 
 	ss := i.segment[segmentIndex]
 
-	err = ss.cachedDocs.prepareFields(localDocNum, fields, ss)
+	err = ss.cachedDocs.prepareFields(fields, ss)
 	if err != nil {
 		return err
 	}
@@ -421,11 +421,13 @@ func (i *IndexSnapshot) DocumentVisitFieldTerms(id index.IndexInternalID,
 	for _, field := range fields {
 		if cachedFieldDocs, exists := ss.cachedDocs.cache[field]; exists {
 			if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists {
-				terms := bytes.SplitN(tlist, TermSeparatorSplitSlice, -1)
-				for _, term := range terms {
-					if len(term) > 0 {
-						visitor(field, term)
+				for {
+					i := bytes.Index(tlist, TermSeparatorSplitSlice)
+					if i < 0 {
+						break
 					}
+					visitor(field, tlist[0:i])
+					tlist = tlist[i+1:]
 				}
 			}
 		}
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index 836ec6fa7..9b22eaedd 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -171,8 +171,7 @@ type cachedFieldDocs struct {
 	docs    map[uint64][]byte // Keyed by localDocNum, value is a list of terms delimited by 0xFF.
 }
 
-func (cfd *cachedFieldDocs) prepareFields(docNum uint64, field string,
-	ss *SegmentSnapshot) {
+func (cfd *cachedFieldDocs) prepareFields(field string, ss *SegmentSnapshot) {
 	defer close(cfd.readyCh)
 
 	dict, err := ss.segment.Dictionary(field)
@@ -183,7 +182,7 @@ func (cfd *cachedFieldDocs) prepareFields(docNum uint64, field string,
 
 	dictItr := dict.Iterator()
 	next, err := dictItr.Next()
-	for next != nil && err == nil {
+	for err == nil && next != nil {
 		postings, err1 := dict.PostingsList(next.Term, nil)
 		if err1 != nil {
 			cfd.err = err1
@@ -192,12 +191,10 @@ func (cfd *cachedFieldDocs) prepareFields(docNum uint64, field string,
 
 		postingsItr := postings.Iterator()
 		nextPosting, err2 := postingsItr.Next()
-		for err2 == nil && nextPosting != nil && nextPosting.Number() <= docNum {
-			if nextPosting.Number() == docNum {
-				// got what we're looking for
-				cfd.docs[docNum] = append(cfd.docs[docNum], []byte(next.Term)...)
-				cfd.docs[docNum] = append(cfd.docs[docNum], TermSeparator)
-			}
+		for err2 == nil && nextPosting != nil {
+			docNum := nextPosting.Number()
+			cfd.docs[docNum] = append(cfd.docs[docNum], []byte(next.Term)...)
+			cfd.docs[docNum] = append(cfd.docs[docNum], TermSeparator)
 			nextPosting, err2 = postingsItr.Next()
 		}
 
@@ -220,8 +217,7 @@ type cachedDocs struct {
 	cache map[string]*cachedFieldDocs // Keyed by field
 }
 
-func (c *cachedDocs) prepareFields(docNum uint64, wantedFields []string,
-	ss *SegmentSnapshot) error {
+func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error {
 	c.m.Lock()
 	if c.cache == nil {
 		c.cache = make(map[string]*cachedFieldDocs, len(ss.Fields()))
@@ -235,7 +231,7 @@ func (c *cachedDocs) prepareFields(docNum uint64, wantedFields []string,
 				docs:    make(map[uint64][]byte),
 			}
 
-			go c.cache[field].prepareFields(docNum, field, ss)
+			go c.cache[field].prepareFields(field, ss)
 		}
 	}
 

From eb2f541d4f9f5756d524cb52cc9b9621f7c8eef6 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 14 Dec 2017 13:52:28 -0800
Subject: [PATCH 074/728] scorch filters _id from Reader.Document() results

---
 index/scorch/scorch_test.go    | 29 ++++++++++++++---------------
 index/scorch/snapshot_index.go |  3 +++
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index 6feabb306..d347ecec0 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -540,7 +540,7 @@ func TestIndexInsertWithStore(t *testing.T) {
 		t.Error(err)
 	}
 
-	if len(storedDoc.Fields) != 2 {
+	if len(storedDoc.Fields) != 1 {
 		t.Errorf("expected 1 stored field, got %d", len(storedDoc.Fields))
 	}
 	for _, field := range storedDoc.Fields {
@@ -553,13 +553,7 @@ func TestIndexInsertWithStore(t *testing.T) {
 				t.Errorf("expected field content 'test', got '%s'", string(textField.Value()))
 			}
 		} else if field.Name() == "_id" {
-			textField, ok := field.(*document.TextField)
-			if !ok {
-				t.Errorf("expected text field")
-			}
-			if string(textField.Value()) != "1" {
-				t.Errorf("expected field content '1', got '%s'", string(textField.Value()))
-			}
+			t.Errorf("not expecting _id field")
 		}
 	}
 }
@@ -857,8 +851,8 @@ func TestIndexInsertUpdateDeleteWithMultipleTypesStored(t *testing.T) {
 		t.Error(err)
 	}
 
-	if len(storedDoc.Fields) != 4 {
-		t.Errorf("expected 4 stored field, got %d", len(storedDoc.Fields))
+	if len(storedDoc.Fields) != 3 {
+		t.Errorf("expected 3 stored field, got %d", len(storedDoc.Fields))
 	}
 	for _, field := range storedDoc.Fields {
 
@@ -896,8 +890,9 @@ func TestIndexInsertUpdateDeleteWithMultipleTypesStored(t *testing.T) {
 					t.Errorf("expected date value unix epoch, got %v", dateFieldDate)
 				}
 			}
+		} else if field.Name() == "_id" {
+			t.Errorf("not expecting _id field")
 		}
-
 	}
 
 	// now update the document, but omit one of the fields
@@ -934,8 +929,8 @@ func TestIndexInsertUpdateDeleteWithMultipleTypesStored(t *testing.T) {
 		t.Error(err)
 	}
 
-	if len(storedDoc.Fields) != 3 {
-		t.Errorf("expected 3 stored field, got %d", len(storedDoc.Fields))
+	if len(storedDoc.Fields) != 2 {
+		t.Errorf("expected 2 stored field, got %d", len(storedDoc.Fields))
 	}
 
 	for _, field := range storedDoc.Fields {
@@ -961,6 +956,8 @@ func TestIndexInsertUpdateDeleteWithMultipleTypesStored(t *testing.T) {
 					t.Errorf("expeted numeric value 36.99, got %f", numFieldNumer)
 				}
 			}
+		} else if field.Name() == "_id" {
+			t.Errorf("not expecting _id field")
 		}
 	}
 
@@ -1114,8 +1111,8 @@ func TestIndexUpdateComposites(t *testing.T) {
 	if err != nil {
 		t.Error(err)
 	}
-	if len(storedDoc.Fields) != 3 {
-		t.Errorf("expected 3 stored field, got %d", len(storedDoc.Fields))
+	if len(storedDoc.Fields) != 2 {
+		t.Errorf("expected 2 stored field, got %d", len(storedDoc.Fields))
 	}
 	for _, field := range storedDoc.Fields {
 		if field.Name() == "name" {
@@ -1126,6 +1123,8 @@ func TestIndexUpdateComposites(t *testing.T) {
 			if string(textField.Value()) != "testupdated" {
 				t.Errorf("expected field content 'test', got '%s'", string(textField.Value()))
 			}
+		} else if field.Name() == "_id" {
+			t.Errorf("not expecting _id field")
 		}
 	}
 }
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 86998bbdd..5b54669b9 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -256,6 +256,9 @@ func (i *IndexSnapshot) Document(id string) (rv *document.Document, err error) {
 
 	rv = document.NewDocument(id)
 	err = i.segment[segmentIndex].VisitDocument(localDocNum, func(name string, typ byte, value []byte, pos []uint64) bool {
+		if name == "_id" {
+			return true
+		}
 		switch typ {
 		case 't':
 			rv.AddField(document.NewTextField(name, pos, value))

From 6ab27e4afa51f56b2de03b5e62f5ab45edeec701 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Thu, 14 Dec 2017 17:19:50 -0500
Subject: [PATCH 075/728] quick hack to disable safe batches in fts

---
 index/scorch/scorch.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 2e54bebce..585904979 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -79,6 +79,10 @@ func NewScorch(storeName string, config map[string]interface{}, analysisQueue *i
 	if ok {
 		rv.readOnly = ro
 	}
+	// hack for now to disable safe batches in FTS
+	if storeName == "moss" {
+		rv.unsafeBatch = true
+	}
 	return rv, nil
 }
 

From 506aa1c325005fb1d955e0c8b5a9bf013011fe96 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 14 Dec 2017 14:40:33 -0800
Subject: [PATCH 076/728] scorch fix data race w/ AddEligibleForRemoval

Found from "go test -race ./..."

WARNING: DATA RACE
Read at 0x00c420088060 by goroutine 48:
  github.com/blevesearch/bleve/index/scorch.(*Scorch).AddEligibleForRemoval()
      /Users/steveyen/go/src/github.com/blevesearch/bleve/index/scorch/scorch.go:348 +0x6d

Previous write at 0x00c420088060 by goroutine 31:
  github.com/blevesearch/bleve/index/scorch.(*Scorch).loadFromBolt.func1()
      /Users/steveyen/go/src/github.com/blevesearch/bleve/index/scorch/persister.go:332 +0x87b
  github.com/boltdb/bolt.(*DB).View()
      /Users/steveyen/go/src/github.com/boltdb/bolt/db.go:629 +0xc1
  github.com/blevesearch/bleve/index/scorch.(*Scorch).loadFromBolt()
      /Users/steveyen/go/src/github.com/blevesearch/bleve/index/scorch/persister.go:290 +0xa1
  github.com/blevesearch/bleve/index/scorch.(*Scorch).Open()
      /Users/steveyen/go/src/github.com/blevesearch/bleve/index/scorch/scorch.go:121 +0x77f
  github.com/blevesearch/bleve/index/scorch.TestIndexOpenReopen()
      /Users/steveyen/go/src/github.com/blevesearch/bleve/index/scorch/scorch_test.go:115 +0x1351
  testing.tRunner()
      /usr/local/Cellar/go/1.9/libexec/src/testing/testing.go:746 +0x16c

Goroutine 48 (running) created at:
  github.com/blevesearch/bleve/index/scorch.(*IndexSnapshot).DecRef()
      /Users/steveyen/go/src/github.com/blevesearch/bleve/index/scorch/snapshot_index.go:72 +0x23e
  github.com/blevesearch/bleve/index/scorch.(*Scorch).loadFromBolt.func1()
      /Users/steveyen/go/src/github.com/blevesearch/bleve/index/scorch/persister.go:330 +0x8f4
  github.com/boltdb/bolt.(*DB).View()
      /Users/steveyen/go/src/github.com/boltdb/bolt/db.go:629 +0xc1
  github.com/blevesearch/bleve/index/scorch.(*Scorch).loadFromBolt()
      /Users/steveyen/go/src/github.com/blevesearch/bleve/index/scorch/persister.go:290 +0xa1
  github.com/blevesearch/bleve/index/scorch.(*Scorch).Open()
      /Users/steveyen/go/src/github.com/blevesearch/bleve/index/scorch/scorch.go:121 +0x77f
  github.com/blevesearch/bleve/index/scorch.TestIndexOpenReopen()
      /Users/steveyen/go/src/github.com/blevesearch/bleve/index/scorch/scorch_test.go:115 +0x1351
  testing.tRunner()
      /usr/local/Cellar/go/1.9/libexec/src/testing/testing.go:746 +0x16c
---
 index/scorch/persister.go | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index f12d86f52..d0f010e48 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -325,10 +325,12 @@ func (s *Scorch) loadFromBolt() error {
 			}
 			s.nextSegmentID++
 			s.nextSnapshotEpoch = snapshotEpoch + 1
+			s.rootLock.Lock()
 			if s.root != nil {
 				_ = s.root.DecRef()
 			}
 			s.root = indexSnapshot
+			s.rootLock.Unlock()
 			foundRoot = true
 		}
 		return nil

From b5aa4ed22b6d73d1dd3ff589c7e21db536616df1 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Thu, 14 Dec 2017 17:41:02 -0500
Subject: [PATCH 077/728] return err not panic

---
 index/scorch/segment/zap/segment.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 498699072..65c62ac9c 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -201,7 +201,7 @@ func (s *Segment) VisitDocument(num uint64, visitor segment.DocumentFieldValueVi
 		meta, compressed := s.getStoredMetaAndCompressed(num)
 		uncompressed, err := snappy.Decode(nil, compressed)
 		if err != nil {
-			panic(err)
+			return err
 		}
 		// now decode meta and process
 		reader := bytes.NewReader(meta)

From f05794c6aace58cf791ee8f0d2108c5ecf9ea484 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 15 Dec 2017 11:11:18 -0800
Subject: [PATCH 078/728] scorch removed worker goroutines from
 TermFieldReader()

On a couple of micro benchmarks on a dev macbook using bleve-query on
an index of 50K wikipedia docs, scorch is now in more the same
neighborhood of upsidedown/moss...

high-freq term search "text:date"...
   400 qps - upsidedown/moss
   360 qps - scorch before
   404 qps - scorch after

zero-freq term search "text:mschoch"...
  100K qps - upsidedown/moss
   55K qps - scorch before
   99K qps - scorch after

Of note, the scorch index had ~150 *.zap files in it, which likely
made made the worker goroutine overhead more costly than for a case
with few segments, where goroutine and channel related work appeared
relatively prominently in the pprof SVG's.
---
 index/scorch/snapshot_index.go | 41 +++++++++-------------------------
 1 file changed, 10 insertions(+), 31 deletions(-)

diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 5b54669b9..c0b50a25e 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -341,27 +341,6 @@ func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err err
 func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
 	includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
 
-	results := make(chan *asynchSegmentResult)
-	for index, segment := range i.segment {
-		go func(index int, segment *SegmentSnapshot) {
-			dict, err := segment.Dictionary(field)
-			if err != nil {
-				results <- &asynchSegmentResult{err: err}
-			} else {
-				pl, err := dict.PostingsList(string(term), nil)
-				if err != nil {
-					results <- &asynchSegmentResult{err: err}
-				} else {
-					results <- &asynchSegmentResult{
-						index:    index,
-						postings: pl,
-					}
-				}
-			}
-		}(index, segment)
-	}
-
-	var err error
 	rv := &IndexSnapshotTermFieldReader{
 		term:               term,
 		snapshot:           i,
@@ -371,17 +350,17 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
 		includeNorm:        includeNorm,
 		includeTermVectors: includeTermVectors,
 	}
-	for count := 0; count < len(i.segment); count++ {
-		asr := <-results
-		if asr.err != nil && err == nil {
-			err = asr.err
-		} else {
-			rv.postings[asr.index] = asr.postings
-			rv.iterators[asr.index] = asr.postings.Iterator()
+	for i, segment := range i.segment {
+		dict, err := segment.Dictionary(field)
+		if err != nil {
+			return nil, err
 		}
-	}
-	if err != nil {
-		return nil, err
+		pl, err := dict.PostingsList(string(term), nil)
+		if err != nil {
+			return nil, err
+		}
+		rv.postings[i] = pl
+		rv.iterators[i] = pl.Iterator()
 	}
 	return rv, nil
 }

From 620dcdb6f87dcc78c5c1baec713d141f7c7e604d Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 15 Dec 2017 11:54:52 -0800
Subject: [PATCH 079/728] scorch uses prealloc'ed buffer for docNumberToBytes()

On a couple of micro benchmarks on a dev macbook using bleve-query on
an index of 50K wikipedia docs, scorch is now faster than
upsidedown/moss on high-freq term search "text:date"...

       400 qps - upsidedown/moss
       404 qps - scorch before
       565 qps - scorch after
---
 index/scorch/snapshot_index.go     | 11 ++++++-----
 index/scorch/snapshot_index_doc.go |  2 +-
 index/scorch/snapshot_index_tfr.go |  3 +--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index c0b50a25e..7f5f41032 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -365,11 +365,12 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
 	return rv, nil
 }
 
-func docNumberToBytes(in uint64) []byte {
-
-	buf := new(bytes.Buffer)
-	_ = binary.Write(buf, binary.BigEndian, in)
-	return buf.Bytes()
+func docNumberToBytes(buf []byte, in uint64) []byte {
+	if len(buf) != 8 {
+		buf = make([]byte, 8)
+	}
+	binary.BigEndian.PutUint64(buf, in)
+	return buf
 }
 
 func docInternalToNumber(in index.IndexInternalID) (uint64, error) {
diff --git a/index/scorch/snapshot_index_doc.go b/index/scorch/snapshot_index_doc.go
index 4656079b0..d1205ff8e 100644
--- a/index/scorch/snapshot_index_doc.go
+++ b/index/scorch/snapshot_index_doc.go
@@ -36,7 +36,7 @@ func (i *IndexSnapshotDocIDReader) Next() (index.IndexInternalID, error) {
 		next := i.iterators[i.segmentOffset].Next()
 		// make segment number into global number by adding offset
 		globalOffset := i.snapshot.offsets[i.segmentOffset]
-		return docNumberToBytes(uint64(next) + globalOffset), nil
+		return docNumberToBytes(nil, uint64(next)+globalOffset), nil
 	}
 	return nil, nil
 }
diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go
index 1fbabdfbb..d6c8dcd13 100644
--- a/index/scorch/snapshot_index_tfr.go
+++ b/index/scorch/snapshot_index_tfr.go
@@ -49,8 +49,7 @@ func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*in
 			// make segment number into global number by adding offset
 			globalOffset := i.snapshot.offsets[i.segmentOffset]
 			nnum := next.Number()
-			rv.ID = docNumberToBytes(nnum + globalOffset)
-
+			rv.ID = docNumberToBytes(rv.ID, nnum+globalOffset)
 			i.postingToTermFieldDoc(next, rv)
 
 			i.currID = rv.ID

From 45c212a0c227f1923a74fa16d5f115658ac13020 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 15 Dec 2017 13:25:37 -0800
Subject: [PATCH 080/728] scorch mergeplan package comments tweak

Moving the package comment for mergeplan to the right place.
---
 index/scorch/mergeplan/merge_plan.go      | 3 +++
 index/scorch/mergeplan/merge_plan_test.go | 3 ---
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/index/scorch/mergeplan/merge_plan.go b/index/scorch/mergeplan/merge_plan.go
index f3f7b9e3b..61510c1c9 100644
--- a/index/scorch/mergeplan/merge_plan.go
+++ b/index/scorch/mergeplan/merge_plan.go
@@ -12,6 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+// Package mergeplan provides a segment merge planning approach that's
+// inspired by Lucene's TieredMergePolicy.java and descriptions like
+// http://blog.mikemccandless.com/2011/02/visualizing-lucenes-segment-merges.html
 package mergeplan
 
 import (
diff --git a/index/scorch/mergeplan/merge_plan_test.go b/index/scorch/mergeplan/merge_plan_test.go
index 05dcaaf9f..bf2f24be1 100644
--- a/index/scorch/mergeplan/merge_plan_test.go
+++ b/index/scorch/mergeplan/merge_plan_test.go
@@ -12,9 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-// The segment merge planning approach was inspired by Lucene's
-// TieredMergePolicy.java and descriptions like
-// http://blog.mikemccandless.com/2011/02/visualizing-lucenes-segment-merges.html
 package mergeplan
 
 import (

From a575be4d56f89f805b4a4904834932095df5c977 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 15 Dec 2017 19:26:23 -0500
Subject: [PATCH 081/728] fix issue where we incorrectly seed the nextSegmentID
 on Open()

---
 index/scorch/persister.go | 31 +++++++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index d0f010e48..73a0bd9f4 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -22,6 +22,7 @@ import (
 	"os"
 	"path/filepath"
 	"sort"
+	"strconv"
 	"strings"
 
 	"github.com/RoaringBitmap/roaring"
@@ -318,10 +319,9 @@ func (s *Scorch) loadFromBolt() error {
 			}
 			indexSnapshot.epoch = snapshotEpoch
 			// set the nextSegmentID
-			for _, segment := range indexSnapshot.segment {
-				if segment.id > s.nextSegmentID {
-					s.nextSegmentID = segment.id
-				}
+			s.nextSegmentID, err = s.maxSegmentIDOnDisk()
+			if err != nil {
+				return err
 			}
 			s.nextSegmentID++
 			s.nextSnapshotEpoch = snapshotEpoch + 1
@@ -485,6 +485,29 @@ func (s *Scorch) removeOldBoltSnapshots() (numRemoved int, err error) {
 	return numRemoved, err
 }
 
+func (s *Scorch) maxSegmentIDOnDisk() (uint64, error) {
+	currFileInfos, err := ioutil.ReadDir(s.path)
+	if err != nil {
+		return 0, err
+	}
+
+	var rv uint64
+	for _, finfo := range currFileInfos {
+		fname := finfo.Name()
+		if filepath.Ext(fname) == ".zap" {
+			prefix := strings.TrimSuffix(fname, ".zap")
+			id, err2 := strconv.ParseUint(prefix, 16, 64)
+			if err2 != nil {
+				return 0, err2
+			}
+			if id > rv {
+				rv = id
+			}
+		}
+	}
+	return rv, err
+}
+
 // Removes any *.zap files which aren't listed in the rootBolt.
 func (s *Scorch) removeOldZapFiles() error {
 	liveFileNames, err := s.loadZapFileNames()

From 0539744e901cc74f2ba7d1dd89d6f8bdc893d79b Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 16 Dec 2017 08:39:10 -0800
Subject: [PATCH 082/728] scorch mergeplan.ToBarChart() refactored to callable
 API

Refactored out API so it's usable from other places.
---
 index/scorch/mergeplan/merge_plan.go      | 58 +++++++++++++++++++++++
 index/scorch/mergeplan/merge_plan_test.go | 47 +-----------------
 2 files changed, 59 insertions(+), 46 deletions(-)

diff --git a/index/scorch/mergeplan/merge_plan.go b/index/scorch/mergeplan/merge_plan.go
index 61510c1c9..c77fe37f8 100644
--- a/index/scorch/mergeplan/merge_plan.go
+++ b/index/scorch/mergeplan/merge_plan.go
@@ -18,8 +18,10 @@
 package mergeplan
 
 import (
+	"fmt"
 	"math"
 	"sort"
+	"strings"
 )
 
 // A Segment represents the information that the planner needs to
@@ -292,3 +294,59 @@ func ScoreSegments(segments []Segment, o *MergePlanOptions) float64 {
 
 	return score
 }
+
+// ------------------------------------------
+
+// ToBarChart returns an ASCII rendering of the segments and the plan.
+// The barMax is the max width of the bars in the bar chart.
+func ToBarChart(prefix string, barMax int, segments []Segment, plan *MergePlan) string {
+	rv := make([]string, 0, len(segments))
+
+	var maxFullSize int64
+	for _, segment := range segments {
+		if maxFullSize < segment.FullSize() {
+			maxFullSize = segment.FullSize()
+		}
+	}
+	if maxFullSize < 0 {
+		maxFullSize = 1
+	}
+
+	for _, segment := range segments {
+		barFull := int(segment.FullSize())
+		barLive := int(segment.LiveSize())
+
+		if maxFullSize > int64(barMax) {
+			barFull = int(float64(barMax) * float64(barFull) / float64(maxFullSize))
+			barLive = int(float64(barMax) * float64(barLive) / float64(maxFullSize))
+		}
+
+		barKind := " "
+		barChar := "."
+
+		if plan != nil {
+		TASK_LOOP:
+			for taski, task := range plan.Tasks {
+				for _, taskSegment := range task.Segments {
+					if taskSegment == segment {
+						barKind = "*"
+						barChar = fmt.Sprintf("%d", taski)
+						break TASK_LOOP
+					}
+				}
+			}
+		}
+
+		bar :=
+			strings.Repeat(barChar, barLive)[0:barLive] +
+				strings.Repeat("x", barFull-barLive)[0:barFull-barLive]
+
+		rv = append(rv, fmt.Sprintf("%s %5d: %5d /%5d - %s %s", prefix,
+			segment.Id(),
+			segment.LiveSize(),
+			segment.FullSize(),
+			barKind, bar))
+	}
+
+	return strings.Join(rv, "\n")
+}
diff --git a/index/scorch/mergeplan/merge_plan_test.go b/index/scorch/mergeplan/merge_plan_test.go
index bf2f24be1..ca4d80bd1 100644
--- a/index/scorch/mergeplan/merge_plan_test.go
+++ b/index/scorch/mergeplan/merge_plan_test.go
@@ -20,7 +20,6 @@ import (
 	"os"
 	"reflect"
 	"sort"
-	"strings"
 	"testing"
 )
 
@@ -451,49 +450,5 @@ func emit(descrip string, cycle int, step int, segments []Segment, plan *MergePl
 	}
 
 	fmt.Printf("%s %d.%d ---------- %s\n", descrip, cycle, step, suffix)
-
-	var maxFullSize int64
-	for _, segment := range segments {
-		if maxFullSize < segment.FullSize() {
-			maxFullSize = segment.FullSize()
-		}
-	}
-
-	barMax := 100
-
-	for _, segment := range segments {
-		barFull := int(segment.FullSize())
-		barLive := int(segment.LiveSize())
-
-		if maxFullSize > int64(barMax) {
-			barFull = int(float64(barMax) * float64(barFull) / float64(maxFullSize))
-			barLive = int(float64(barMax) * float64(barLive) / float64(maxFullSize))
-		}
-
-		barKind := " "
-		barChar := "."
-
-		if plan != nil {
-		TASK_LOOP:
-			for taski, task := range plan.Tasks {
-				for _, taskSegment := range task.Segments {
-					if taskSegment == segment {
-						barKind = "*"
-						barChar = fmt.Sprintf("%d", taski)
-						break TASK_LOOP
-					}
-				}
-			}
-		}
-
-		bar :=
-			strings.Repeat(barChar, barLive)[0:barLive] +
-				strings.Repeat("x", barFull-barLive)[0:barFull-barLive]
-
-		fmt.Printf("%s %5d: %5d /%5d - %s %s\n", descrip,
-			segment.Id(),
-			segment.LiveSize(),
-			segment.FullSize(),
-			barKind, bar)
-	}
+	fmt.Printf("%s\n", ToBarChart(descrip, 100, segments, plan))
 }

From e98602600d3067a3e073380875727b5deeef310c Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 16 Dec 2017 14:22:15 -0800
Subject: [PATCH 083/728] scorch mergeplan added TierGrowth option

Previously, CalcBudget() was treating
MergePlanOptions.SegmentsPerMergeTask as the growth factor while
computing the idealized staircase of segments.

This change introduces a TierGrowth option to MergePlanOptions for
more control and so that SegmentsPerMergeTask can be tweaked
independently of the tier growth factor.
---
 index/scorch/mergeplan/merge_plan.go      | 16 +++++++++++-----
 index/scorch/mergeplan/merge_plan_test.go | 11 ++++++++++-
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/index/scorch/mergeplan/merge_plan.go b/index/scorch/mergeplan/merge_plan.go
index c77fe37f8..41ccae0c4 100644
--- a/index/scorch/mergeplan/merge_plan.go
+++ b/index/scorch/mergeplan/merge_plan.go
@@ -75,6 +75,10 @@ type MergePlanOptions struct {
 	// planner’s predicted sizes.
 	MaxSegmentSize int64
 
+	// The growth factor for each tier in a staircase of idealized
+	// segments computed by CalcBudget().
+	TierGrowth float64
+
 	// The number of segments in any resulting MergeTask.  e.g.,
 	// len(result.Tasks[ * ].Segments) == SegmentsPerMergeTask.
 	SegmentsPerMergeTask int
@@ -115,6 +119,7 @@ func (o *MergePlanOptions) RaiseToFloorSegmentSize(s int64) int64 {
 var DefaultMergePlanOptions = MergePlanOptions{
 	MaxSegmentsPerTier:   10,
 	MaxSegmentSize:       5000000,
+	TierGrowth:           10.0,
 	SegmentsPerMergeTask: 10,
 	FloorSegmentSize:     2000,
 	ReclaimDeletesWeight: 2.0,
@@ -212,7 +217,8 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) {
 }
 
 // Compute the number of segments that would be needed to cover the
-// totalSize, by climbing up a logarithmic staircase of segment tiers.
+// totalSize, by climbing up a logarithmically growing staircase of
+// segment tiers.
 func CalcBudget(totalSize int64, firstTierSize int64, o *MergePlanOptions) (
 	budgetNumSegments int) {
 	tierSize := firstTierSize
@@ -225,9 +231,9 @@ func CalcBudget(totalSize int64, firstTierSize int64, o *MergePlanOptions) (
 		maxSegmentsPerTier = 1
 	}
 
-	segmentsPerMergeTask := int64(o.SegmentsPerMergeTask)
-	if segmentsPerMergeTask < 2 {
-		segmentsPerMergeTask = 2
+	tierGrowth := o.TierGrowth
+	if tierGrowth < 1.0 {
+		tierGrowth = 1.0
 	}
 
 	for totalSize > 0 {
@@ -239,7 +245,7 @@ func CalcBudget(totalSize int64, firstTierSize int64, o *MergePlanOptions) (
 
 		budgetNumSegments += maxSegmentsPerTier
 		totalSize -= int64(maxSegmentsPerTier) * tierSize
-		tierSize *= segmentsPerMergeTask
+		tierSize = int64(float64(tierSize) * tierGrowth)
 	}
 
 	return budgetNumSegments
diff --git a/index/scorch/mergeplan/merge_plan_test.go b/index/scorch/mergeplan/merge_plan_test.go
index ca4d80bd1..4db8eb1e8 100644
--- a/index/scorch/mergeplan/merge_plan_test.go
+++ b/index/scorch/mergeplan/merge_plan_test.go
@@ -98,6 +98,7 @@ func TestSimplePlan(t *testing.T) {
 			&MergePlanOptions{
 				MaxSegmentsPerTier:   1,
 				MaxSegmentSize:       1000,
+				TierGrowth:           2.0,
 				SegmentsPerMergeTask: 2,
 				FloorSegmentSize:     1,
 			},
@@ -156,11 +157,12 @@ func TestCalcBudget(t *testing.T) {
 	}{
 		{0, 0, MergePlanOptions{}, 0},
 		{1, 0, MergePlanOptions{}, 1},
-		{9, 0, MergePlanOptions{}, 4},
+		{9, 0, MergePlanOptions{}, 9},
 		{1, 1,
 			MergePlanOptions{
 				MaxSegmentsPerTier:   1,
 				MaxSegmentSize:       1000,
+				TierGrowth:           2.0,
 				SegmentsPerMergeTask: 2,
 				FloorSegmentSize:     1,
 			},
@@ -170,6 +172,7 @@ func TestCalcBudget(t *testing.T) {
 			MergePlanOptions{
 				MaxSegmentsPerTier:   1,
 				MaxSegmentSize:       1000,
+				TierGrowth:           2.0,
 				SegmentsPerMergeTask: 2,
 				FloorSegmentSize:     1,
 			},
@@ -179,6 +182,7 @@ func TestCalcBudget(t *testing.T) {
 			MergePlanOptions{
 				MaxSegmentsPerTier:   2,
 				MaxSegmentSize:       1000,
+				TierGrowth:           2.0,
 				SegmentsPerMergeTask: 2,
 				FloorSegmentSize:     1,
 			},
@@ -201,6 +205,7 @@ func TestInsert1SameSizedSegmentBetweenMerges(t *testing.T) {
 	o := &MergePlanOptions{
 		MaxSegmentSize:       1000,
 		MaxSegmentsPerTier:   3,
+		TierGrowth:           3.0,
 		SegmentsPerMergeTask: 3,
 	}
 
@@ -226,6 +231,7 @@ func TestInsertManySameSizedSegmentsBetweenMerges(t *testing.T) {
 	o := &MergePlanOptions{
 		MaxSegmentSize:       1000,
 		MaxSegmentsPerTier:   3,
+		TierGrowth:           3.0,
 		SegmentsPerMergeTask: 3,
 	}
 
@@ -253,6 +259,7 @@ func TestInsertManySameSizedSegmentsWithDeletionsBetweenMerges(t *testing.T) {
 	o := &MergePlanOptions{
 		MaxSegmentSize:       1000,
 		MaxSegmentsPerTier:   3,
+		TierGrowth:           3.0,
 		SegmentsPerMergeTask: 3,
 	}
 
@@ -290,6 +297,7 @@ func TestInsertManyDifferentSizedSegmentsBetweenMerges(t *testing.T) {
 	o := &MergePlanOptions{
 		MaxSegmentSize:       1000,
 		MaxSegmentsPerTier:   3,
+		TierGrowth:           3.0,
 		SegmentsPerMergeTask: 3,
 	}
 
@@ -317,6 +325,7 @@ func TestManySameSizedSegmentsWithDeletesBetweenMerges(t *testing.T) {
 	o := &MergePlanOptions{
 		MaxSegmentSize:       1000,
 		MaxSegmentsPerTier:   3,
+		TierGrowth:           3.0,
 		SegmentsPerMergeTask: 3,
 	}
 

From ecbb3d2df4a573988e20255ef64651a1bf602ad6 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 16 Dec 2017 21:21:43 -0800
Subject: [PATCH 084/728] scorch handles non-updating batches better

This commit improves handling when an incoming batch has internal-data
updates only and no doc updates.  In this case, a nil segment instead
of an empty segment instance is used in the segmentIntroduction.  The
segmentIntroduction, that is, might now hold only internal-data
updates only.

To handle synchronous persistence, a new field that's a slice of
persisted notification channels is added to the IndexSnapshot struct,
which the persister goroutine will close as each IndexSnapshot is
persisted.

Also, as part of this change, instead of checking the unsafeBatch flag
in several places, we instead check for non-nil'ness of these
persisted chan's.
---
 index/scorch/introducer.go       | 51 ++++++++++++++++----------------
 index/scorch/persister.go        |  7 +++--
 index/scorch/scorch.go           |  8 ++---
 index/scorch/snapshot_index.go   |  2 ++
 index/scorch/snapshot_segment.go |  3 +-
 5 files changed, 37 insertions(+), 34 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index a3a197b08..ef7eaab68 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -66,12 +66,14 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 	// acquire lock
 	s.rootLock.Lock()
 
-	// prepare new index snapshot, with curr size + 1
+	nsegs := len(s.root.segment)
+
+	// prepare new index snapshot
 	newSnapshot := &IndexSnapshot{
 		parent:   s,
-		segment:  make([]*SegmentSnapshot, len(s.root.segment)+1),
-		offsets:  make([]uint64, len(s.root.segment)+1),
-		internal: make(map[string][]byte, len(s.root.segment)),
+		segment:  make([]*SegmentSnapshot, nsegs, nsegs+1),
+		offsets:  make([]uint64, nsegs, nsegs+1),
+		internal: make(map[string][]byte, len(s.root.internal)),
 		epoch:    s.nextSnapshotEpoch,
 		refs:     1,
 	}
@@ -95,7 +97,7 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 		newSnapshot.segment[i] = &SegmentSnapshot{
 			id:         s.root.segment[i].id,
 			segment:    s.root.segment[i].segment,
-			notify:     s.root.segment[i].notify,
+			persisted:  s.root.segment[i].persisted,
 			cachedDocs: s.root.segment[i].cachedDocs,
 		}
 		s.root.segment[i].segment.AddRef()
@@ -110,18 +112,22 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 		running += s.root.segment[i].Count()
 
 	}
-	// put new segment at end
-	newSnapshot.segment[len(s.root.segment)] = &SegmentSnapshot{
-		id:         next.id,
-		segment:    next.data, // Take ownership of next.data's ref-count.
-		cachedDocs: &cachedDocs{cache: nil},
-	}
-	newSnapshot.offsets[len(s.root.segment)] = running
-	if !s.unsafeBatch {
-		newSnapshot.segment[len(s.root.segment)].notify = append(
-			newSnapshot.segment[len(s.root.segment)].notify,
-			next.persisted,
-		)
+	// append new segment, if any, to end of the new index snapshot
+	if next.data != nil {
+		newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
+			id:         next.id,
+			segment:    next.data, // take ownership of next.data's ref-count
+			cachedDocs: &cachedDocs{cache: nil},
+		})
+		newSnapshot.offsets = append(newSnapshot.offsets, running)
+		if next.persisted != nil {
+			newSnapshot.segment[nsegs].persisted =
+				append(newSnapshot.segment[nsegs].persisted, next.persisted)
+		}
+	} else { // new segment might be nil when it's an internal data update only
+		if next.persisted != nil {
+			newSnapshot.persisted = append(newSnapshot.persisted, next.persisted)
+		}
 	}
 	// copy old values
 	for key, oldVal := range s.root.internal {
@@ -161,7 +167,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 		parent:   s,
 		segment:  make([]*SegmentSnapshot, 0, newSize),
 		offsets:  make([]uint64, 0, newSize),
-		internal: make(map[string][]byte, len(s.root.segment)),
+		internal: s.root.internal,
 		epoch:    s.nextSnapshotEpoch,
 		refs:     1,
 	}
@@ -193,8 +199,8 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 			newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
 				id:         s.root.segment[i].id,
 				segment:    s.root.segment[i].segment,
-				notify:     s.root.segment[i].notify,
 				deleted:    s.root.segment[i].deleted,
+				persisted:  s.root.segment[i].persisted,
 				cachedDocs: s.root.segment[i].cachedDocs,
 			})
 			s.root.segment[i].segment.AddRef()
@@ -206,17 +212,12 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 	// put new segment at end
 	newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
 		id:         nextMerge.id,
-		segment:    nextMerge.new, // Take ownership for nextMerge.new's ref-count.
+		segment:    nextMerge.new, // take ownership for nextMerge.new's ref-count
 		deleted:    newSegmentDeleted,
 		cachedDocs: &cachedDocs{cache: nil},
 	})
 	newSnapshot.offsets = append(newSnapshot.offsets, running)
 
-	// copy old values
-	for key, oldVal := range s.root.internal {
-		newSnapshot.internal[key] = oldVal
-	}
-
 	// swap in new segment
 	rootPrev := s.root
 	s.root = newSnapshot
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 73a0bd9f4..05f336642 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -222,7 +222,10 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 	}
 
 	// get write lock and update the current snapshot with disk-based versions
-	var notifications []chan error
+	snapshot.m.Lock()
+	notifications := snapshot.persisted
+	snapshot.persisted = nil
+	snapshot.m.Unlock()
 
 	s.rootLock.Lock()
 	newIndexSnapshot := &IndexSnapshot{
@@ -244,7 +247,7 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 			}
 			newIndexSnapshot.segment[i] = newSegmentSnapshot
 			// add the old segment snapshots notifications to the list
-			for _, notification := range segmentSnapshot.notify {
+			for _, notification := range segmentSnapshot.persisted {
 				notifications = append(notifications, notification)
 			}
 		} else {
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 585904979..13dd32d33 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -215,11 +215,9 @@ func (s *Scorch) Batch(batch *index.Batch) error {
 
 	// wait for analysis result
 	analysisResults := make([]*index.AnalysisResult, int(numUpdates))
-	// newRowsMap := make(map[string][]index.IndexRow)
 	var itemsDeQueued uint64
 	for itemsDeQueued < numUpdates {
 		result := <-resultChan
-		//newRowsMap[result.DocID] = result.Rows
 		analysisResults[itemsDeQueued] = result
 		itemsDeQueued++
 	}
@@ -230,12 +228,10 @@ func (s *Scorch) Batch(batch *index.Batch) error {
 	var newSegment segment.Segment
 	if len(analysisResults) > 0 {
 		newSegment = mem.NewFromAnalyzedDocs(analysisResults)
-	} else {
-		newSegment = mem.New()
 	}
 
 	err := s.prepareSegment(newSegment, ids, batch.InternalOps)
-	if err != nil {
+	if err != nil && newSegment != nil {
 		_ = newSegment.Close()
 	}
 	return err
@@ -278,7 +274,7 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
 		return err
 	}
 
-	if !s.unsafeBatch {
+	if introduction.persisted != nil {
 		err = <-introduction.persisted
 	}
 
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 7f5f41032..9b03a0e73 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -48,6 +48,8 @@ type IndexSnapshot struct {
 
 	m    sync.Mutex // Protects the fields that follow.
 	refs int64
+
+	persisted []chan error
 }
 
 func (i *IndexSnapshot) AddRef() {
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index 9b22eaedd..fd15ed7fb 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -52,7 +52,8 @@ type SegmentSnapshot struct {
 	segment segment.Segment
 	deleted *roaring.Bitmap
 
-	notify     []chan error
+	persisted []chan error // closed when the segment is persisted
+
 	cachedDocs *cachedDocs
 }
 

From 34f5e2175fcd7a1692c6e483614f25d97a0f3199 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sun, 17 Dec 2017 08:23:00 -0800
Subject: [PATCH 085/728] scorch fix persister for lost notifications on
 no-data batches

With the previous commit, there can be a scenario where batches that
had internal-updates-only can be rapidly introduced by the app, but
the persisted notifications on only the very last IndexSnapshot would
be fired.  The persisted notifications on the in-between batches might
be missed.

The solution was to track the persisted notification channels at a
higher Scorch struct level, instead of tracking the persisted channels
at the IndexSnapshot and SegmentSnapshot levels.

Also, the persister double-check looping was simplified, which avoids
a race where an introducer might incorrectly not notify the persister.
---
 index/scorch/introducer.go       |  46 ++++++-----
 index/scorch/persister.go        | 133 ++++++++++++++-----------------
 index/scorch/scorch.go           |  11 +--
 index/scorch/snapshot_index.go   |   2 -
 index/scorch/snapshot_segment.go |   2 -
 5 files changed, 94 insertions(+), 100 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index ef7eaab68..5cb43b9c0 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -32,16 +32,21 @@ type segmentIntroduction struct {
 	persisted chan error
 }
 
+type epochWatcher struct {
+	epoch    uint64
+	notifyCh notificationChan
+}
+
 func (s *Scorch) mainLoop() {
-	var notify notificationChan
+	var epochWatchers []*epochWatcher
 OUTER:
 	for {
 		select {
 		case <-s.closeCh:
 			break OUTER
 
-		case notify = <-s.introducerNotifier:
-			continue
+		case epochWatcher := <-s.introducerNotifier:
+			epochWatchers = append(epochWatchers, epochWatcher)
 
 		case nextMerge := <-s.merges:
 			s.introduceMerge(nextMerge)
@@ -52,11 +57,22 @@ OUTER:
 				continue OUTER
 			}
 		}
-		// notify persister
-		if notify != nil {
-			close(notify)
-			notify = nil
+
+		var epochCurr uint64
+		s.rootLock.RLock()
+		if s.root != nil {
+			epochCurr = s.root.epoch
+		}
+		s.rootLock.RUnlock()
+		var epochWatchersNext []*epochWatcher
+		for _, w := range epochWatchers {
+			if w.epoch < epochCurr {
+				close(w.notifyCh)
+			} else {
+				epochWatchersNext = append(epochWatchersNext, w)
+			}
 		}
+		epochWatchers = epochWatchersNext
 	}
 
 	s.asyncTasks.Done()
@@ -97,10 +113,10 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 		newSnapshot.segment[i] = &SegmentSnapshot{
 			id:         s.root.segment[i].id,
 			segment:    s.root.segment[i].segment,
-			persisted:  s.root.segment[i].persisted,
 			cachedDocs: s.root.segment[i].cachedDocs,
 		}
 		s.root.segment[i].segment.AddRef()
+
 		// apply new obsoletions
 		if s.root.segment[i].deleted == nil {
 			newSnapshot.segment[i].deleted = delta
@@ -120,14 +136,6 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 			cachedDocs: &cachedDocs{cache: nil},
 		})
 		newSnapshot.offsets = append(newSnapshot.offsets, running)
-		if next.persisted != nil {
-			newSnapshot.segment[nsegs].persisted =
-				append(newSnapshot.segment[nsegs].persisted, next.persisted)
-		}
-	} else { // new segment might be nil when it's an internal data update only
-		if next.persisted != nil {
-			newSnapshot.persisted = append(newSnapshot.persisted, next.persisted)
-		}
 	}
 	// copy old values
 	for key, oldVal := range s.root.internal {
@@ -141,7 +149,10 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 			delete(newSnapshot.internal, key)
 		}
 	}
-	// swap in new segment
+	if next.persisted != nil {
+		s.rootPersisted = append(s.rootPersisted, next.persisted)
+	}
+	// swap in new index snapshot
 	rootPrev := s.root
 	s.root = newSnapshot
 	// release lock
@@ -200,7 +211,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 				id:         s.root.segment[i].id,
 				segment:    s.root.segment[i].segment,
 				deleted:    s.root.segment[i].deleted,
-				persisted:  s.root.segment[i].persisted,
 				cachedDocs: s.root.segment[i].cachedDocs,
 			})
 			s.root.segment[i].segment.AddRef()
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 05f336642..969b449ce 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -35,82 +35,85 @@ import (
 type notificationChan chan struct{}
 
 func (s *Scorch) persisterLoop() {
-	var notify notificationChan
+	defer s.asyncTasks.Done()
+
+	var notifyChs []notificationChan
 	var lastPersistedEpoch uint64
 OUTER:
 	for {
 		select {
 		case <-s.closeCh:
 			break OUTER
-		case notify = <-s.persisterNotifier:
-
+		case notifyCh := <-s.persisterNotifier:
+			notifyChs = append(notifyChs, notifyCh)
 		default:
-			// check to see if there is a new snapshot to persist
-			s.rootLock.RLock()
-			ourSnapshot := s.root
-			ourSnapshot.AddRef()
-			s.rootLock.RUnlock()
-
-			if ourSnapshot.epoch != lastPersistedEpoch {
-				// lets get started
-				err := s.persistSnapshot(ourSnapshot)
-				if err != nil {
-					log.Printf("got err persisting snapshot: %v", err)
-					_ = ourSnapshot.DecRef()
-					continue OUTER
-				}
-				lastPersistedEpoch = ourSnapshot.epoch
-				if notify != nil {
-					close(notify)
-					notify = nil
-				}
-			}
-			_ = ourSnapshot.DecRef()
-
-			// tell the introducer we're waiting for changes
-			// first make a notification chan
-			notifyUs := make(notificationChan)
+		}
 
-			// give it to the introducer
-			select {
-			case <-s.closeCh:
-				break OUTER
-			case s.introducerNotifier <- notifyUs:
-			}
+		var ourSnapshot *IndexSnapshot
+		var ourPersisted []chan error
 
-			// check again
-			s.rootLock.RLock()
+		// check to see if there is a new snapshot to persist
+		s.rootLock.Lock()
+		if s.root != nil && s.root.epoch > lastPersistedEpoch {
 			ourSnapshot = s.root
 			ourSnapshot.AddRef()
-			s.rootLock.RUnlock()
+			ourPersisted = s.rootPersisted
+			s.rootPersisted = nil
+		}
+		s.rootLock.Unlock()
 
-			if ourSnapshot.epoch != lastPersistedEpoch {
-				// lets get started
-				err := s.persistSnapshot(ourSnapshot)
+		if ourSnapshot != nil {
+			err := s.persistSnapshot(ourSnapshot)
+			for _, ch := range ourPersisted {
 				if err != nil {
-					log.Printf("got err persisting snapshot: %v", err)
-					_ = ourSnapshot.DecRef()
-					continue OUTER
-				}
-				lastPersistedEpoch = ourSnapshot.epoch
-				if notify != nil {
-					close(notify)
-					notify = nil
+					ch <- err
 				}
+				close(ch)
+			}
+			if err != nil {
+				log.Printf("got err persisting snapshot: %v", err)
+				_ = ourSnapshot.DecRef()
+				continue OUTER
 			}
+			lastPersistedEpoch = ourSnapshot.epoch
+			for _, notifyCh := range notifyChs {
+				close(notifyCh)
+			}
+			notifyChs = nil
 			_ = ourSnapshot.DecRef()
 
-			// now wait for it (but also detect close)
-			select {
-			case <-s.closeCh:
-				break OUTER
-			case <-notifyUs:
-				// woken up, next loop should pick up work
+			changed := false
+			s.rootLock.RLock()
+			if s.root != nil && s.root.epoch != lastPersistedEpoch {
+				changed = true
 			}
+			s.rootLock.RUnlock()
+			if changed {
+				continue OUTER
+			}
+		}
+
+		// tell the introducer we're waiting for changes
+		w := &epochWatcher{
+			epoch:    lastPersistedEpoch,
+			notifyCh: make(notificationChan, 1),
+		}
+
+		select {
+		case <-s.closeCh:
+			break OUTER
+		case s.introducerNotifier <- w:
+		}
+
+		s.removeOldData() // might as well cleanup while waiting
+
+		select {
+		case <-s.closeCh:
+			break OUTER
+		case <-w.notifyCh:
+			// woken up, next loop should pick up work
 		}
-		s.removeOldData()
 	}
-	s.asyncTasks.Done()
 }
 
 func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
@@ -221,12 +224,6 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 		}
 	}
 
-	// get write lock and update the current snapshot with disk-based versions
-	snapshot.m.Lock()
-	notifications := snapshot.persisted
-	snapshot.persisted = nil
-	snapshot.m.Unlock()
-
 	s.rootLock.Lock()
 	newIndexSnapshot := &IndexSnapshot{
 		parent:   s,
@@ -240,16 +237,12 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 		// see if this segment has been replaced
 		if replacement, ok := newSegments[segmentSnapshot.id]; ok {
 			newSegmentSnapshot := &SegmentSnapshot{
+				id:         segmentSnapshot.id,
 				segment:    replacement,
 				deleted:    segmentSnapshot.deleted,
-				id:         segmentSnapshot.id,
 				cachedDocs: segmentSnapshot.cachedDocs,
 			}
 			newIndexSnapshot.segment[i] = newSegmentSnapshot
-			// add the old segment snapshots notifications to the list
-			for _, notification := range segmentSnapshot.persisted {
-				notifications = append(notifications, notification)
-			}
 		} else {
 			newIndexSnapshot.segment[i] = s.root.segment[i]
 			newIndexSnapshot.segment[i].segment.AddRef()
@@ -270,12 +263,6 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 		_ = rootPrev.DecRef()
 	}
 
-	// now that we've given up the lock, notify everyone that we've safely
-	// persisted their data
-	for _, notification := range notifications {
-		close(notification)
-	}
-
 	return nil
 }
 
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 13dd32d33..5bba9bf98 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -49,13 +49,14 @@ type Scorch struct {
 
 	unsafeBatch bool
 
-	rootLock sync.RWMutex
-	root     *IndexSnapshot // holds 1 ref-count on the root
+	rootLock      sync.RWMutex
+	root          *IndexSnapshot // holds 1 ref-count on the root
+	rootPersisted []chan error   // closed when root is persisted
 
 	closeCh            chan struct{}
 	introductions      chan *segmentIntroduction
 	merges             chan *segmentMerge
-	introducerNotifier chan notificationChan
+	introducerNotifier chan *epochWatcher
 	persisterNotifier  chan notificationChan
 	rootBolt           *bolt.DB
 	asyncTasks         sync.WaitGroup
@@ -127,7 +128,7 @@ func (s *Scorch) Open() error {
 
 	s.introductions = make(chan *segmentIntroduction)
 	s.merges = make(chan *segmentMerge)
-	s.introducerNotifier = make(chan notificationChan)
+	s.introducerNotifier = make(chan *epochWatcher, 1)
 	s.persisterNotifier = make(chan notificationChan)
 
 	if !s.readOnly && s.path != "" {
@@ -251,7 +252,7 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
 	}
 
 	if !s.unsafeBatch {
-		introduction.persisted = make(chan error)
+		introduction.persisted = make(chan error, 1)
 	}
 
 	// get read lock, to optimistically prepare obsoleted info
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 9b03a0e73..7f5f41032 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -48,8 +48,6 @@ type IndexSnapshot struct {
 
 	m    sync.Mutex // Protects the fields that follow.
 	refs int64
-
-	persisted []chan error
 }
 
 func (i *IndexSnapshot) AddRef() {
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index fd15ed7fb..c543af826 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -52,8 +52,6 @@ type SegmentSnapshot struct {
 	segment segment.Segment
 	deleted *roaring.Bitmap
 
-	persisted []chan error // closed when the segment is persisted
-
 	cachedDocs *cachedDocs
 }
 

From 20fe70770a9a43119c64319b7b59ddbc0d0bf7fd Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sun, 17 Dec 2017 12:39:15 -0800
Subject: [PATCH 086/728] scorch added some tests on # of expected segments

---
 index/scorch/scorch_test.go | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index d347ecec0..42e8df50a 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -587,6 +587,10 @@ func TestIndexInternalCRUD(t *testing.T) {
 		t.Error(err)
 	}
 
+	if len(indexReader.(*Reader).root.segment) != 0 {
+		t.Errorf("expected 0 segments")
+	}
+
 	// get something that doesn't exist yet
 	val, err := indexReader.GetInternal([]byte("key"))
 	if err != nil {
@@ -612,6 +616,10 @@ func TestIndexInternalCRUD(t *testing.T) {
 		t.Error(err)
 	}
 
+	if len(indexReader2.(*Reader).root.segment) != 0 {
+		t.Errorf("expected 0 segments")
+	}
+
 	// get
 	val, err = indexReader2.GetInternal([]byte("key"))
 	if err != nil {
@@ -637,6 +645,10 @@ func TestIndexInternalCRUD(t *testing.T) {
 		t.Error(err)
 	}
 
+	if len(indexReader3.(*Reader).root.segment) != 0 {
+		t.Errorf("expected 0 segments")
+	}
+
 	// get again
 	val, err = indexReader3.GetInternal([]byte("key"))
 	if err != nil {
@@ -726,6 +738,11 @@ func TestIndexBatch(t *testing.T) {
 		}
 	}()
 
+	numSegments := len(indexReader.(*Reader).root.segment)
+	if numSegments <= 0 {
+		t.Errorf("expected some segments, got: %d", numSegments)
+	}
+
 	docCount, err := indexReader.DocCount()
 	if err != nil {
 		t.Fatal(err)

From 867bb2c0314205d922dba91c452bb1e7fe61456d Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 18 Dec 2017 11:33:19 -0800
Subject: [PATCH 087/728] scorch mergeplan explicitly weeds out empty segments

Rather than waiting on scoring to weed out empty segments, this commit
does the weeding out of empty segments explicitly and up front.
---
 index/scorch/mergeplan/merge_plan.go | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/index/scorch/mergeplan/merge_plan.go b/index/scorch/mergeplan/merge_plan.go
index 41ccae0c4..0afc3ce5c 100644
--- a/index/scorch/mergeplan/merge_plan.go
+++ b/index/scorch/mergeplan/merge_plan.go
@@ -173,6 +173,17 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) {
 
 	rv := &MergePlan{}
 
+	var empties []Segment
+	for _, eligible := range eligibles {
+		if eligible.LiveSize() <= 0 {
+			empties = append(empties, eligible)
+		}
+	}
+	if len(empties) > 0 {
+		rv.Tasks = append(rv.Tasks, &MergeTask{Segments: empties})
+		eligibles = removeSegments(eligibles, empties)
+	}
+
 	// While we’re over budget, keep looping, which might produce
 	// another MergeTask.
 	for len(eligibles) > budgetNumSegments {

From 730d906a50487535e90b566eeb78b93b6f74ee68 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 15 Dec 2017 12:32:37 -0800
Subject: [PATCH 088/728] scorch reuses Posting instance in
 PostingsIterator.Next()

With this change, there are no more memory allocations in the calls to
PostingsIterator.Next() in the micro benchmarks of bleve-query.  On a
dev macbook, on an index of 50K wikipedia docs, using high frequency
search of "text:date"...

   400 qps - upsidedown/moss
   565 qps - scorch before
   680 qps - scorch after
---
 index/scorch/segment/segment.go     |  4 ++++
 index/scorch/segment/zap/posting.go | 10 ++++++----
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index 14b97ec80..2c91c0ef8 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -64,6 +64,10 @@ type PostingsList interface {
 }
 
 type PostingsIterator interface {
+	// The caller is responsible for copying whatever it needs from
+	// the returned Posting instance before calling Next(), as some
+	// implementations may return a shared instance to reduce memory
+	// allocations.
 	Next() (Posting, error)
 }
 
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 68db04299..e6b76bf90 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -115,6 +115,8 @@ type PostingsIterator struct {
 	locChunkStart uint64
 
 	locBitmap *roaring.Bitmap
+
+	next Posting
 }
 
 func (i *PostingsIterator) loadChunk(chunk int) error {
@@ -266,10 +268,10 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 		}
 	}
 
-	rv := &Posting{
-		iterator: i,
-		docNum:   uint64(n),
-	}
+	i.next = Posting{} // clear the struct.
+	rv := &i.next
+	rv.iterator = i
+	rv.docNum = uint64(n)
 
 	var err error
 	var normBits uint64

From f6b506134b0b2a62f9b12a1ea2200791284ea06d Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 19 Dec 2017 10:49:57 -0800
Subject: [PATCH 089/728] import couchbase/vellum instead of
 couchbaselabs/vellum

Also, scrubbed an old couchbaselabs/moss reference in comments.

Also, go fmt.
---
 index/scorch/segment/zap/build.go               |  2 +-
 index/scorch/segment/zap/cmd/zap/cmd/dict.go    |  2 +-
 index/scorch/segment/zap/cmd/zap/cmd/explore.go |  2 +-
 index/scorch/segment/zap/dict.go                |  4 ++--
 index/scorch/segment/zap/merge.go               |  2 +-
 index/scorch/segment/zap/segment.go             |  2 +-
 index/store/moss/lower.go                       |  2 +-
 index/store/moss/store.go                       |  2 +-
 search/search.go                                | 10 +++++-----
 9 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 60906d334..8bd00601e 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -23,7 +23,7 @@ import (
 
 	"github.com/Smerity/govarint"
 	"github.com/blevesearch/bleve/index/scorch/segment/mem"
-	"github.com/couchbaselabs/vellum"
+	"github.com/couchbase/vellum"
 	"github.com/golang/snappy"
 )
 
diff --git a/index/scorch/segment/zap/cmd/zap/cmd/dict.go b/index/scorch/segment/zap/cmd/zap/cmd/dict.go
index 74e59e902..fa8b3277e 100644
--- a/index/scorch/segment/zap/cmd/zap/cmd/dict.go
+++ b/index/scorch/segment/zap/cmd/zap/cmd/dict.go
@@ -18,7 +18,7 @@ import (
 	"encoding/binary"
 	"fmt"
 
-	"github.com/couchbaselabs/vellum"
+	"github.com/couchbase/vellum"
 	"github.com/spf13/cobra"
 )
 
diff --git a/index/scorch/segment/zap/cmd/zap/cmd/explore.go b/index/scorch/segment/zap/cmd/zap/cmd/explore.go
index 42ab82732..012a829fe 100644
--- a/index/scorch/segment/zap/cmd/zap/cmd/explore.go
+++ b/index/scorch/segment/zap/cmd/zap/cmd/explore.go
@@ -19,7 +19,7 @@ import (
 	"fmt"
 	"log"
 
-	"github.com/couchbaselabs/vellum"
+	"github.com/couchbase/vellum"
 	"github.com/spf13/cobra"
 )
 
diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 5d3c160ba..3221d0616 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -21,8 +21,8 @@ import (
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
-	"github.com/couchbaselabs/vellum"
-	"github.com/couchbaselabs/vellum/regexp"
+	"github.com/couchbase/vellum"
+	"github.com/couchbase/vellum/regexp"
 )
 
 // Dictionary is the zap representation of the term dictionary
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 481d9272a..842371e5f 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -24,7 +24,7 @@ import (
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/Smerity/govarint"
-	"github.com/couchbaselabs/vellum"
+	"github.com/couchbase/vellum"
 	"github.com/golang/snappy"
 )
 
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 65c62ac9c..62c147a2c 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -25,7 +25,7 @@ import (
 	"github.com/RoaringBitmap/roaring"
 	"github.com/Smerity/govarint"
 	"github.com/blevesearch/bleve/index/scorch/segment"
-	"github.com/couchbaselabs/vellum"
+	"github.com/couchbase/vellum"
 	mmap "github.com/edsrzf/mmap-go"
 	"github.com/golang/snappy"
 )
diff --git a/index/store/moss/lower.go b/index/store/moss/lower.go
index 2aff2aea3..1133f95f6 100644
--- a/index/store/moss/lower.go
+++ b/index/store/moss/lower.go
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 // Package moss provides a KVStore implementation based on the
-// github.com/couchbaselabs/moss library.
+// github.com/couchbase/moss library.
 
 package moss
 
diff --git a/index/store/moss/store.go b/index/store/moss/store.go
index a7aa4d417..89ea553cf 100644
--- a/index/store/moss/store.go
+++ b/index/store/moss/store.go
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 // Package moss provides a KVStore implementation based on the
-// github.com/couchbaselabs/moss library.
+// github.com/couchbase/moss library.
 
 package moss
 
diff --git a/search/search.go b/search/search.go
index cbbcfbfd6..f9a92783b 100644
--- a/search/search.go
+++ b/search/search.go
@@ -37,12 +37,12 @@ func (ap ArrayPositions) Equals(other ArrayPositions) bool {
 
 type Location struct {
 	// Pos is the position of the term within the field, starting at 1
-	Pos            uint64         `json:"pos"`
-	
+	Pos uint64 `json:"pos"`
+
 	// Start and End are the byte offsets of the term in the field
-	Start          uint64         `json:"start"`
-	End            uint64         `json:"end"`
-	
+	Start uint64 `json:"start"`
+	End   uint64 `json:"end"`
+
 	// ArrayPositions contains the positions of the term within any elements.
 	ArrayPositions ArrayPositions `json:"array_positions"`
 }

From 679f1ce9c3cb0f9ccd95bcb11e3253fa0e66a983 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Fri, 15 Dec 2017 14:49:33 -0800
Subject: [PATCH 090/728] scorch APIs to support rollback

- PreviousPersistedSnapshot
- SnapshotRevert

+ unit test
---
 index/scorch/introducer.go             |  58 +++++++++++++
 index/scorch/scorch.go                 |   2 +
 index/scorch/snapshot_rollback.go      | 102 ++++++++++++++++++++++
 index/scorch/snapshot_rollback_test.go | 113 +++++++++++++++++++++++++
 4 files changed, 275 insertions(+)
 create mode 100644 index/scorch/snapshot_rollback.go
 create mode 100644 index/scorch/snapshot_rollback_test.go

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 5cb43b9c0..41ce4ca1f 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -37,6 +37,11 @@ type epochWatcher struct {
 	notifyCh notificationChan
 }
 
+type snapshotReversion struct {
+	snapshot *IndexSnapshot
+	applied  chan error
+}
+
 func (s *Scorch) mainLoop() {
 	var epochWatchers []*epochWatcher
 OUTER:
@@ -56,6 +61,12 @@ OUTER:
 			if err != nil {
 				continue OUTER
 			}
+
+		case revertTo := <-s.revertToSnapshots:
+			err := s.revertToSnapshot(revertTo)
+			if err != nil {
+				continue OUTER
+			}
 		}
 
 		var epochCurr uint64
@@ -241,3 +252,50 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 	// notify merger we incorporated this
 	close(nextMerge.notify)
 }
+
+func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
+	if revertTo.snapshot == nil {
+		err := fmt.Errorf("Cannot revert to a nil snapshot")
+		revertTo.applied <- err
+		return err
+	}
+
+	// acquire lock
+	s.rootLock.Lock()
+
+	// prepare a new index snapshot, based on next snapshot
+	newSnapshot := &IndexSnapshot{
+		parent:   s,
+		segment:  make([]*SegmentSnapshot, len(revertTo.snapshot.segment)),
+		offsets:  revertTo.snapshot.offsets,
+		internal: revertTo.snapshot.internal,
+		epoch:    s.nextSnapshotEpoch,
+		refs:     1,
+	}
+	s.nextSnapshotEpoch++
+
+	// iterate through segments
+	for i, segmentSnapshot := range revertTo.snapshot.segment {
+		newSnapshot.segment[i] = &SegmentSnapshot{
+			id:         segmentSnapshot.id,
+			segment:    segmentSnapshot.segment,
+			deleted:    segmentSnapshot.deleted,
+			cachedDocs: segmentSnapshot.cachedDocs,
+		}
+		segmentSnapshot.segment.AddRef()
+	}
+
+	// swap in new snapshot
+	rootPrev := s.root
+	s.root = newSnapshot
+	// release lock
+	s.rootLock.Unlock()
+
+	if rootPrev != nil {
+		_ = rootPrev.DecRef()
+	}
+
+	close(revertTo.applied)
+
+	return nil
+}
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 5bba9bf98..9484920f3 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -57,6 +57,7 @@ type Scorch struct {
 	introductions      chan *segmentIntroduction
 	merges             chan *segmentMerge
 	introducerNotifier chan *epochWatcher
+	revertToSnapshots  chan *snapshotReversion
 	persisterNotifier  chan notificationChan
 	rootBolt           *bolt.DB
 	asyncTasks         sync.WaitGroup
@@ -129,6 +130,7 @@ func (s *Scorch) Open() error {
 	s.introductions = make(chan *segmentIntroduction)
 	s.merges = make(chan *segmentMerge)
 	s.introducerNotifier = make(chan *epochWatcher, 1)
+	s.revertToSnapshots = make(chan *snapshotReversion)
 	s.persisterNotifier = make(chan notificationChan)
 
 	if !s.readOnly && s.path != "" {
diff --git a/index/scorch/snapshot_rollback.go b/index/scorch/snapshot_rollback.go
new file mode 100644
index 000000000..e31308a20
--- /dev/null
+++ b/index/scorch/snapshot_rollback.go
@@ -0,0 +1,102 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package scorch
+
+import (
+	"bytes"
+	"log"
+
+	"github.com/blevesearch/bleve/index/scorch/segment"
+)
+
+// PreviousPersistedSnapshot returns the next older, previous
+// IndexSnapshot based on the provided IndexSnapshot. If the provided
+// argument is nil, the most recently persisted IndexSnapshot is returned.
+// This API allows the application to walk backwards into the history
+// of a store to previous points in time. A nil return value indicates
+// that no previous snapshots are available.
+func (s *Scorch) PreviousPersistedSnapshot(is *IndexSnapshot) (*IndexSnapshot, error) {
+	if s.rootBolt == nil {
+		return nil, nil
+	}
+
+	// start a read-only transaction
+	tx, err := s.rootBolt.Begin(false)
+	if err != nil {
+		return nil, err
+	}
+
+	// Read-only bolt transactions to be rolled back.
+	defer func() {
+		_ = tx.Rollback()
+	}()
+
+	snapshots := tx.Bucket(boltSnapshotsBucket)
+	if snapshots == nil {
+		return nil, nil
+	}
+
+	pos := []byte(nil)
+
+	if is != nil {
+		pos = segment.EncodeUvarintAscending(nil, is.epoch)
+	}
+
+	c := snapshots.Cursor()
+	for k, _ := c.Last(); k != nil; k, _ = c.Prev() {
+		if pos == nil || bytes.Compare(k, pos) < 0 {
+			_, snapshotEpoch, err := segment.DecodeUvarintAscending(k)
+			if err != nil {
+				log.Printf("PreviousPersistedSnapshot:"+
+					" unable to parse segment epoch %x, continuing", k)
+				continue
+			}
+
+			snapshot := snapshots.Bucket(k)
+			if snapshot == nil {
+				log.Printf("PreviousPersistedSnapshot:"+
+					" snapshot key, but bucket missing %x, continuing", k)
+				continue
+			}
+
+			indexSnapshot, err := s.loadSnapshot(snapshot)
+			if err != nil {
+				log.Printf("PreviousPersistedSnapshot:"+
+					" unable to load snapshot, %v, continuing", err)
+				continue
+			}
+
+			indexSnapshot.epoch = snapshotEpoch
+			return indexSnapshot, nil
+		}
+	}
+
+	return nil, nil
+}
+
+// SnapshotRevert atomically brings the store back to the point in time
+// as represented by the revertTo IndexSnapshot. SnapshotRevert() should
+// only be passed an IndexSnapshot that came from the same store.
+func (s *Scorch) SnapshotRevert(revertTo *IndexSnapshot) error {
+	revert := &snapshotReversion{
+		snapshot: revertTo,
+		applied:  make(chan error),
+	}
+
+	s.revertToSnapshots <- revert
+
+	// block until this IndexSnapshot is applied
+	return <-revert.applied
+}
diff --git a/index/scorch/snapshot_rollback_test.go b/index/scorch/snapshot_rollback_test.go
new file mode 100644
index 000000000..3054c9bb8
--- /dev/null
+++ b/index/scorch/snapshot_rollback_test.go
@@ -0,0 +1,113 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package scorch
+
+import (
+	"testing"
+
+	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/index"
+)
+
+func TestIndexRollback(t *testing.T) {
+	defer func() {
+		err := DestroyTest()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Fatalf("error opening index: %v", err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	// create 2 docs
+	doc := document.NewDocument("1")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test1")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Error(err)
+	}
+
+	doc = document.NewDocument("2")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test2")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Error(err)
+	}
+
+	// create a batch, insert new doc, update existing doc, delete existing doc
+	batch := index.NewBatch()
+	doc = document.NewDocument("3")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test3")))
+	batch.Update(doc)
+	doc = document.NewDocument("2")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test2updated")))
+	batch.Update(doc)
+	batch.Delete("1")
+
+	err = idx.Batch(batch)
+	if err != nil {
+		t.Error(err)
+	}
+
+	sh, ok := idx.(*Scorch)
+	if !ok {
+		t.Errorf("Not a scorch index?")
+	}
+
+	// Get Last persisted snapshot
+	ss, err := sh.PreviousPersistedSnapshot(nil)
+	if err != nil {
+		t.Error(err)
+	}
+
+	// Retrieve the snapshot earlier
+	prev, err := sh.PreviousPersistedSnapshot(ss)
+	if err != nil {
+		t.Error(err)
+	}
+
+	err = sh.SnapshotRevert(prev)
+	if err != nil {
+		t.Error(err)
+	}
+
+	newRoot := sh.root
+	if newRoot != nil && prev != nil {
+		if newRoot.epoch <= prev.epoch {
+			t.Errorf("Unexpected epoch, %v <= %v", newRoot.epoch, prev.epoch)
+		}
+	} else {
+		if prev == nil {
+			t.Errorf("The last persisted snapshot before the revert was nil!")
+		}
+		if newRoot == nil {
+			t.Errorf("The new root has been set to nil?")
+		}
+	}
+}

From d0e4f850268ac69894a1e16d45512b2fa7fd2ea0 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 19 Dec 2017 13:37:04 -0800
Subject: [PATCH 091/728] scorch avoid extra clone by using roaring.AndNot(x,
 y)

---
 index/scorch/segment/zap/posting.go | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index e6b76bf90..1ef85eef2 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -72,8 +72,7 @@ func (p *PostingsList) Iterator() segment.PostingsIterator {
 
 		rv.all = p.postings.Iterator()
 		if p.except != nil {
-			allExcept := p.postings.Clone()
-			allExcept.AndNot(p.except)
+			allExcept := roaring.AndNot(p.postings, p.except)
 			rv.actual = allExcept.Iterator()
 		} else {
 			rv.actual = p.postings.Iterator()

From 142ccdfaecc3631e4ff51ed013bf7d5671264d85 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 19 Dec 2017 13:46:13 -0800
Subject: [PATCH 092/728] scorch remove leftover doc comment

I'm suspecting that Marty's editor is more exciting than mine. :-)
---
 index/scorch/snapshot_index_tfr.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go
index d6c8dcd13..25cc0bd07 100644
--- a/index/scorch/snapshot_index_tfr.go
+++ b/index/scorch/snapshot_index_tfr.go
@@ -83,7 +83,6 @@ func (i *IndexSnapshotTermFieldReader) postingToTermFieldDoc(next segment.Postin
 	}
 }
 
-// Advance go fuck yourself editor
 func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
 	// first make sure we aren't already pointing at the right thing, (due to way searchers work)
 	if i.currPosting != nil && bytes.Compare(i.currID, ID) >= 0 {

From a0556ad65b939cad24da881043f71f534411bee2 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 19 Dec 2017 16:41:56 -0800
Subject: [PATCH 093/728] scorch added more cases to TestIndexInsertThenDelete

---
 index/scorch/scorch_test.go | 93 +++++++++++++++++++++++++++++++++++++
 1 file changed, 93 insertions(+)

diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index 42e8df50a..a9df3e9e2 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -275,6 +275,18 @@ func TestIndexInsertThenDelete(t *testing.T) {
 	if docCount != expectedCount {
 		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
 	}
+	iid, err := reader.InternalID("1")
+	if err != nil || iid == nil {
+		t.Errorf("unexpected on doc id 1")
+	}
+	iid, err = reader.InternalID("2")
+	if err != nil || iid == nil {
+		t.Errorf("unexpected on doc id 2")
+	}
+	iid, err = reader.InternalID("3")
+	if err != nil || iid != nil {
+		t.Errorf("unexpected on doc id 3")
+	}
 	err = reader.Close()
 	if err != nil {
 		t.Fatal(err)
@@ -297,6 +309,73 @@ func TestIndexInsertThenDelete(t *testing.T) {
 	if docCount != expectedCount {
 		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
 	}
+	storedDoc, err := reader.Document("1")
+	if err != nil {
+		t.Error(err)
+	}
+	if storedDoc != nil {
+		t.Errorf("expected nil for deleted stored doc #1, got %v", storedDoc)
+	}
+	storedDoc, err = reader.Document("2")
+	if err != nil {
+		t.Error(err)
+	}
+	if storedDoc == nil {
+		t.Errorf("expected stored doc for #2, got nil")
+	}
+	err = reader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// now close it
+	err = idx.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	idx, err = NewScorch(Name, testConfig, analysisQueue) // reopen
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Errorf("error reopening index: %v", err)
+	}
+
+	reader, err = idx.Reader()
+	if err != nil {
+		t.Fatal(err)
+	}
+	docCount, err = reader.DocCount()
+	if err != nil {
+		t.Error(err)
+	}
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+	storedDoc, err = reader.Document("1")
+	if err != nil {
+		t.Error(err)
+	}
+	if storedDoc != nil {
+		t.Errorf("expected nil for deleted stored doc #1, got %v", storedDoc)
+	}
+	storedDoc, err = reader.Document("2")
+	if err != nil {
+		t.Error(err)
+	}
+	if storedDoc == nil {
+		t.Errorf("expected stored doc for #2, got nil")
+	}
+	iid, err = reader.InternalID("1")
+	if err != nil || iid != nil {
+		t.Errorf("unexpected on doc id 1")
+	}
+	iid, err = reader.InternalID("2")
+	if err != nil || iid == nil {
+		t.Errorf("unexpected on doc id 2, should exist")
+	}
 	err = reader.Close()
 	if err != nil {
 		t.Fatal(err)
@@ -319,6 +398,20 @@ func TestIndexInsertThenDelete(t *testing.T) {
 	if docCount != expectedCount {
 		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
 	}
+	storedDoc, err = reader.Document("1")
+	if err != nil {
+		t.Error(err)
+	}
+	if storedDoc != nil {
+		t.Errorf("expected nil for deleted stored doc #1, got %v", storedDoc)
+	}
+	storedDoc, err = reader.Document("2")
+	if err != nil {
+		t.Error(err)
+	}
+	if storedDoc != nil {
+		t.Errorf("expected nil for deleted stored doc #2, got nil")
+	}
 	err = reader.Close()
 	if err != nil {
 		t.Fatal(err)

From 8f8333e01bb1345790fe54d2f2889397d4acf0b2 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 19 Dec 2017 17:44:25 -0800
Subject: [PATCH 094/728] scorch optimize zap Count()

This proposed approach avoids building a temporary AndNot() bitmap,
following the same kind of optimization used by mem segments.
---
 index/scorch/segment/zap/posting.go | 9 +++++++--
 index/scorch/snapshot_segment.go    | 1 +
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 1ef85eef2..1b7a0a587 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -85,10 +85,15 @@ func (p *PostingsList) Iterator() segment.PostingsIterator {
 // Count returns the number of items on this postings list
 func (p *PostingsList) Count() uint64 {
 	if p.postings != nil {
+		n := p.postings.GetCardinality()
 		if p.except != nil {
-			return roaring.AndNot(p.postings, p.except).GetCardinality()
+			e := p.except.GetCardinality()
+			if e > n {
+				e = n
+			}
+			return n - e
 		}
-		return p.postings.GetCardinality()
+		return n
 	}
 	return 0
 }
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index c543af826..f2bcfb065 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -32,6 +32,7 @@ type SegmentDictionarySnapshot struct {
 }
 
 func (s *SegmentDictionarySnapshot) PostingsList(term string, except *roaring.Bitmap) (segment.PostingsList, error) {
+	// TODO: if except is non-nil, perhaps need to OR it with s.s.deleted?
 	return s.d.PostingsList(term, s.s.deleted)
 }
 

From dbc88cf6b3990a905ced2641d3f7e60fb7ea25d3 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 19 Dec 2017 19:15:19 -0800
Subject: [PATCH 095/728] scorch docNumberToBytes() checks cap(buf) before
 allocating

With more pprof focusing (zooming in on a particular func), there were
still some memory allocations showing up with docNumberToBytes() in
micro benchmarks of bleve-query.  On a dev macbook, on an index of 50K
wikipedia docs, using search of relatively common "text:date"...

   400 qps - upsidedown/moss
   680 qps - scorch before
   775 qps - scorch after
---
 index/scorch/snapshot_index.go | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 7f5f41032..bb6a8be6b 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -367,7 +367,11 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
 
 func docNumberToBytes(buf []byte, in uint64) []byte {
 	if len(buf) != 8 {
-		buf = make([]byte, 8)
+		if cap(buf) >= 8 {
+			buf = buf[0:8]
+		} else {
+			buf = make([]byte, 8)
+		}
 	}
 	binary.BigEndian.PutUint64(buf, in)
 	return buf

From 1abbfadf0dca26a1dcfc0721fb1bdc57e0538af5 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 19 Dec 2017 22:26:17 -0800
Subject: [PATCH 096/728] scorch simplify err check after vellum load

---
 index/scorch/segment/zap/segment.go | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 62c147a2c..e78ac392f 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -159,8 +159,8 @@ func (s *Segment) Dictionary(field string) (segment.TermDictionary, error) {
 	return dict, err
 }
 
-func (s *Segment) dictionary(field string) (*Dictionary, error) {
-	rv := &Dictionary{
+func (s *Segment) dictionary(field string) (rv *Dictionary, err error) {
+	rv = &Dictionary{
 		segment: s,
 		field:   field,
 	}
@@ -170,19 +170,15 @@ func (s *Segment) dictionary(field string) (*Dictionary, error) {
 		rv.fieldID = rv.fieldID - 1
 
 		dictStart := s.fieldsOffsets[rv.fieldID]
-
 		if dictStart > 0 {
 			// read the length of the vellum data
 			vellumLen, read := binary.Uvarint(s.mm[dictStart : dictStart+binary.MaxVarintLen64])
 			fstBytes := s.mm[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
 			if fstBytes != nil {
-				fst, err := vellum.Load(fstBytes)
+				rv.fst, err = vellum.Load(fstBytes)
 				if err != nil {
 					return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err)
 				}
-				if err == nil {
-					rv.fst = fst
-				}
 			}
 		}
 

From df6c8f4074f45a4c5375383cf0c2e444e8fba0b6 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 20 Dec 2017 10:06:50 -0800
Subject: [PATCH 097/728] scorch added kvconfig unsafe_batch option

Added an option to the kvconfig JSON, called "unsafe_batch" (bool).
Default is false, so Batch() calls are synchronously persisted by
default.  Advanced users may want to unsafe, asynchronous persistence
to tradeoff performance (mutations are queryable sooner) over safety.

    {
      "index_type": "scorch",
      "kvconfig": { "unsafe_batch": true }
    }

This change replaces the previous kvstore=="moss" workaround.
---
 index/scorch/scorch.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 9484920f3..d72a8f886 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -81,9 +81,9 @@ func NewScorch(storeName string, config map[string]interface{}, analysisQueue *i
 	if ok {
 		rv.readOnly = ro
 	}
-	// hack for now to disable safe batches in FTS
-	if storeName == "moss" {
-		rv.unsafeBatch = true
+	ub, ok := config["unsafe_batch"].(bool)
+	if ok {
+		rv.unsafeBatch = ub
 	}
 	return rv, nil
 }

From 04ac9d5b1ff31d7c16ead43c2909f0535ec72824 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 20 Dec 2017 14:43:08 -0800
Subject: [PATCH 098/728] scorch removeOldBoltSnapshots() deletes from correct
 bucket

---
 index/scorch/persister.go | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 969b449ce..4ad3df80d 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -448,22 +448,25 @@ func (s *Scorch) removeOldBoltSnapshots() (numRemoved int, err error) {
 	if err != nil {
 		return 0, err
 	}
-	defer func() {
-		if err == nil {
-			err = s.rootBolt.Sync()
-		}
-	}()
 	defer func() {
 		if err == nil {
 			err = tx.Commit()
 		} else {
 			_ = tx.Rollback()
 		}
+		if err == nil {
+			err = s.rootBolt.Sync()
+		}
 	}()
 
+	snapshots := tx.Bucket(boltSnapshotsBucket)
+	if snapshots == nil {
+		return 0, nil
+	}
+
 	for _, epochToRemove := range epochsToRemove {
 		k := segment.EncodeUvarintAscending(nil, epochToRemove)
-		err = tx.DeleteBucket(k)
+		err = snapshots.DeleteBucket(k)
 		if err == bolt.ErrBucketNotFound {
 			err = nil
 		}

From ea4eb7301bb237773a9ae2d7b8c5c6cd570499a0 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 20 Dec 2017 14:51:10 -0800
Subject: [PATCH 099/728] scorch merger checks closeCh

---
 index/scorch/merge.go | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index cc3af774a..35469d591 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -162,7 +162,11 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot) error {
 		}
 	}
 	for _, notification := range notifications {
-		<-notification
+		select {
+		case <-s.closeCh:
+			return nil
+		case <-notification:
+		}
 	}
 	return nil
 }

From c1552555063bf69e8a1025dd5e7997ce4843303e Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 20 Dec 2017 14:55:31 -0800
Subject: [PATCH 100/728] scorch optimize zap.Merge() to reuse some buffers

---
 index/scorch/segment/zap/merge.go | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 842371e5f..500830d03 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -41,7 +41,7 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
 		return nil, err
 	}
 
-	// bufer the output
+	// buffer the output
 	br := bufio.NewWriter(f)
 
 	// wrap it for counting (tracking offsets)
@@ -128,6 +128,9 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 	newSegDocCount uint64, chunkFactor uint32,
 	w *CountHashWriter) ([]uint64, error) {
 
+	var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64)
+	var bufLoc []uint64
+
 	rv := make([]uint64, len(fieldsInv))
 
 	var vellumBuf bytes.Buffer
@@ -208,7 +211,10 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 					if len(locs) > 0 {
 						newRoaringLocs.Add(uint32(hitNewDocNum))
 						for _, loc := range locs {
-							args := make([]uint64, 0, 5+len(loc.ArrayPositions()))
+							if cap(bufLoc) < 5+len(loc.ArrayPositions()) {
+								bufLoc = make([]uint64, 0, 5+len(loc.ArrayPositions()))
+							}
+							args := bufLoc[0:0]
 							args = append(args, uint64(fieldsMap[loc.Field()]))
 							args = append(args, loc.Pos())
 							args = append(args, loc.Start())
@@ -250,7 +256,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 				}
 				postingOffset := uint64(w.Count())
 				// write out the start of the term info
-				buf := make([]byte, binary.MaxVarintLen64)
+				buf := bufMaxVarintLen64
 				n := binary.PutUvarint(buf, freqOffset)
 				_, err = w.Write(buf[:n])
 				if err != nil {
@@ -295,7 +301,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 		vellumData := vellumBuf.Bytes()
 
 		// write out the length of the vellum data
-		buf := make([]byte, binary.MaxVarintLen64)
+		buf := bufMaxVarintLen64
 		// write out the number of chunks
 		n := binary.PutUvarint(buf, uint64(len(vellumData)))
 		_, err = w.Write(buf[:n])

From 67e0e5973b8cba53e7150adf29bfe54ed71db258 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 20 Dec 2017 15:18:22 -0800
Subject: [PATCH 101/728] scorch mergeStoredAndRemap() memory reuse

In mergeStoredAndRemap(), instead of allocating new hashmaps for each
document, this commit reuses some arrays that are indexed by fieldId.
---
 index/scorch/segment/zap/merge.go | 90 ++++++++++++++++---------------
 1 file changed, 47 insertions(+), 43 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 500830d03..5d845bbd1 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -333,6 +333,10 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
 	var metaBuf bytes.Buffer
 	var data, compressed []byte
 
+	vals := make([][][]byte, len(fieldsInv))
+	typs := make([][]byte, len(fieldsInv))
+	poss := make([][][]uint64, len(fieldsInv))
+
 	docNumOffsets := make([]uint64, newSegDocCount)
 
 	// for each segment
@@ -353,11 +357,13 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
 			} else {
 				segNewDocNums = append(segNewDocNums, uint64(newDocNum))
 				// collect all the data
-				vals := make(map[uint16][][]byte)
-				typs := make(map[uint16][]byte)
-				poss := make(map[uint16][][]uint64)
+				for i := 0; i < len(fieldsInv); i++ {
+					vals[i] = vals[i][:0]
+					typs[i] = typs[i][:0]
+					poss[i] = poss[i][:0]
+				}
 				err := segment.VisitDocument(docNum, func(field string, typ byte, value []byte, pos []uint64) bool {
-					fieldID := fieldsMap[field]
+					fieldID := int(fieldsMap[field])
 					vals[fieldID] = append(vals[fieldID], value)
 					typs[fieldID] = append(typs[fieldID], typ)
 					poss[fieldID] = append(poss[fieldID], pos)
@@ -370,51 +376,49 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
 				// now walk the fields in order
 				for fieldID := range fieldsInv {
 
-					if storedFieldValues, ok := vals[uint16(fieldID)]; ok {
+					storedFieldValues := vals[int(fieldID)]
 
-						// has stored values for this field
-						num := len(storedFieldValues)
+					// has stored values for this field
+					num := len(storedFieldValues)
 
-						// process each value
-						for i := 0; i < num; i++ {
-							// encode field
-							_, err2 := metaEncoder.PutU64(uint64(fieldID))
-							if err2 != nil {
-								return 0, nil, err2
-							}
-							// encode type
-							_, err2 = metaEncoder.PutU64(uint64(typs[uint16(fieldID)][i]))
-							if err2 != nil {
-								return 0, nil, err2
-							}
-							// encode start offset
-							_, err2 = metaEncoder.PutU64(uint64(curr))
-							if err2 != nil {
-								return 0, nil, err2
-							}
-							// end len
-							_, err2 = metaEncoder.PutU64(uint64(len(storedFieldValues[i])))
-							if err2 != nil {
-								return 0, nil, err2
-							}
-							// encode number of array pos
-							_, err2 = metaEncoder.PutU64(uint64(len(poss[uint16(fieldID)][i])))
+					// process each value
+					for i := 0; i < num; i++ {
+						// encode field
+						_, err2 := metaEncoder.PutU64(uint64(fieldID))
+						if err2 != nil {
+							return 0, nil, err2
+						}
+						// encode type
+						_, err2 = metaEncoder.PutU64(uint64(typs[int(fieldID)][i]))
+						if err2 != nil {
+							return 0, nil, err2
+						}
+						// encode start offset
+						_, err2 = metaEncoder.PutU64(uint64(curr))
+						if err2 != nil {
+							return 0, nil, err2
+						}
+						// end len
+						_, err2 = metaEncoder.PutU64(uint64(len(storedFieldValues[i])))
+						if err2 != nil {
+							return 0, nil, err2
+						}
+						// encode number of array pos
+						_, err2 = metaEncoder.PutU64(uint64(len(poss[int(fieldID)][i])))
+						if err2 != nil {
+							return 0, nil, err2
+						}
+						// encode all array positions
+						for j := 0; j < len(poss[int(fieldID)][i]); j++ {
+							_, err2 = metaEncoder.PutU64(poss[int(fieldID)][i][j])
 							if err2 != nil {
 								return 0, nil, err2
 							}
-							// encode all array positions
-							for j := 0; j < len(poss[uint16(fieldID)][i]); j++ {
-								_, err2 = metaEncoder.PutU64(poss[uint16(fieldID)][i][j])
-								if err2 != nil {
-									return 0, nil, err2
-								}
-							}
-							// append data
-							data = append(data, storedFieldValues[i]...)
-							// update curr
-							curr += len(storedFieldValues[i])
-
 						}
+						// append data
+						data = append(data, storedFieldValues[i]...)
+						// update curr
+						curr += len(storedFieldValues[i])
 					}
 				}
 

From b3e41335e15c0327063313ea4229dc9fe134d175 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 21 Dec 2017 07:36:03 -0800
Subject: [PATCH 102/728] scorch compared to upsidedown/bolt using templated,
 generated searches

This is somewhat like a simple, unit-test'ish version of testrunner's
random query generator, where this does not have a dependency on an
external elasticsearch server, and instead depends on functional
correctness when comparing to upsidedown/bolt.
---
 test/versus_test.go | 435 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 435 insertions(+)
 create mode 100644 test/versus_test.go

diff --git a/test/versus_test.go b/test/versus_test.go
new file mode 100644
index 000000000..16abce078
--- /dev/null
+++ b/test/versus_test.go
@@ -0,0 +1,435 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package test
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"math"
+	"math/rand"
+	"os"
+	"reflect"
+	"strconv"
+	"strings"
+	"testing"
+	"text/template"
+
+	"github.com/blevesearch/bleve"
+	"github.com/blevesearch/bleve/index/scorch"
+	"github.com/blevesearch/bleve/index/store/boltdb"
+	"github.com/blevesearch/bleve/index/upsidedown"
+	"github.com/blevesearch/bleve/mapping"
+	"github.com/blevesearch/bleve/search"
+)
+
+// Tests scorch indexer versus upsidedown/bolt indexer against various
+// templated queries.  Example usage from the bleve top-level directory...
+//
+//     go test -v -run TestScorchVersusUpsideDownBolt ./test
+//     VERBOSE=1 FOCUS=Trista go test -v -run TestScorchVersusUpsideDownBolt ./test
+//
+func TestScorchVersusUpsideDownBolt(t *testing.T) {
+	(&VersusTest{
+		t:                    t,
+		NumDocs:              10000,
+		MaxWordsPerDoc:       20,
+		NumWords:             100,
+		BatchSize:            100,
+		NumAttemptsPerSearch: 1000,
+	}).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil)
+}
+
+func TestScorchVersusUpsideDownBoltSmallMNSAM(t *testing.T) {
+	(&VersusTest{
+		t:                    t,
+		Focus:                "must-not-same-as-must",
+		NumDocs:              5,
+		MaxWordsPerDoc:       2,
+		NumWords:             1,
+		BatchSize:            1,
+		NumAttemptsPerSearch: 1,
+	}).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil)
+}
+
+// -------------------------------------------------------
+
+// Templates used to compare search results in the "versus" tests.
+var searchTemplates = []string{
+	`{
+      "about": "expected to return zero hits",
+      "query": {
+       "query": "title:notARealTitle"
+      }
+     }`,
+	`{
+      "about": "try straight word()'s",
+      "query": {
+       "query": "body:{{word}}"
+      }
+     }`,
+	`{
+      "about": "conjuncts on same term",
+      "query": {
+        "conjuncts": [
+          { "field": "body", "term": "{{word}}", "boost": 1.0 },
+          { "field": "body", "term": "{{word}}", "boost": 1.0 }
+        ]
+      }
+     }`,
+	`{
+      "about": "disjuncts on same term",
+      "query": {
+        "disjuncts": [
+          { "field": "body", "term": "{{word}}", "boost": 1.0 },
+          { "field": "body", "term": "{{word}}", "boost": 1.0 }
+        ]
+      }
+     }`,
+	`{
+      "about": "never-matching-title-conjuncts",
+      "query": {
+        "conjuncts": [
+          {"field": "body", "match": "{{word}}"},
+          {"field": "body", "match": "{{word}}"},
+          {"field": "title", "match": "notAnActualTitle"}
+        ]
+      }
+     }`,
+	`{
+      "about": "never-matching-title-disjuncts",
+      "query": {
+        "disjuncts": [
+          {"field": "body", "match": "{{word}}"},
+          {"field": "body", "match": "{{word}}"},
+          {"field": "title", "match": "notAnActualTitle"}
+        ]
+      }
+     }`,
+	`{
+      "about": "must-not-never-matches",
+      "query": {
+        "must_not": {"disjuncts": [
+          {"field": "title", "match": "notAnActualTitle"}
+        ]},
+        "should": {"disjuncts": [
+          {"field": "body", "match": "{{word}}"}
+        ]}
+      }
+     }`,
+	`{
+      "about": "must-not-only -- FAILS!!!",
+      "query": {
+        "must_not": {"disjuncts": [
+          {"field": "body", "term": "{{word}}"}
+        ]}
+      }
+     }`,
+	`{
+      "about": "must-not-same-as-must -- FAILS!!!",
+      "query": {
+        "must_not": {"disjuncts": [
+          {"field": "body", "match": "{{word}}"}
+        ]},
+        "must": {"conjuncts": [
+          {"field": "body", "match": "{{word}}"}
+        ]}
+      }
+     }`,
+	`{
+      "about": "must-not-same-as-should -- FAILS!!!",
+      "query": {
+        "must_not": {"disjuncts": [
+          {"field": "body", "match": "{{word}}"}
+        ]},
+        "should": {"disjuncts": [
+          {"field": "body", "match": "{{word}}"}
+        ]}
+      }
+     }`,
+	`{
+      "about": "inspired by testrunner RQG issue -- FAILS!!!",
+      "query": {
+        "must_not": {"disjuncts": [
+          {"field": "title", "match": "Trista Allen"},
+          {"field": "body", "match": "{{word}}"}
+        ]},
+        "should": {"disjuncts": [
+          {"field": "title", "match": "Kallie Safiya Amara"},
+          {"field": "body", "match": "{{word}}"}
+        ]}
+      }
+     }`,
+}
+
+// -------------------------------------------------------
+
+type VersusTest struct {
+	t *testing.T
+
+	// Use environment variable VERBOSE=<integer> that's > 0 for more
+	// verbose output.
+	Verbose int
+
+	// Allow user to focus on particular search templates, where
+	// where the search template must contain the Focus string.
+	Focus string
+
+	NumDocs              int // Number of docs to insert.
+	MaxWordsPerDoc       int // Max number words in each doc's Body field.
+	NumWords             int // Total number of words in the dictionary.
+	BatchSize            int // Batch size when inserting docs.
+	NumAttemptsPerSearch int // For each search template, number of searches to try.
+
+	// The Bodies is an array with length NumDocs, where each entry
+	// is the words in a doc's Body field.
+	Bodies [][]string
+
+	CurAttempt  int
+	TotAttempts int
+}
+
+// -------------------------------------------------------
+
+func testVersusSearches(vt *VersusTest, idxA, idxB bleve.Index) {
+	t := vt.t
+
+	funcMap := template.FuncMap{
+		"word": func() string {
+			return vt.genWord(vt.CurAttempt % vt.NumWords)
+		},
+	}
+
+	// Optionally allow call to focus on a particular search templates,
+	// where the search template must contain the vt.Focus string.
+	if vt.Focus == "" {
+		vt.Focus = os.Getenv("FOCUS")
+	}
+
+	for i, searchTemplate := range searchTemplates {
+		if vt.Focus != "" && !strings.Contains(searchTemplate, vt.Focus) {
+			continue
+		}
+
+		tmpl, err := template.New("search").Funcs(funcMap).Parse(searchTemplate)
+		if err != nil {
+			t.Fatalf("could not parse search template: %s, err: %v", searchTemplate, err)
+		}
+
+		for j := 0; j < vt.NumAttemptsPerSearch; j++ {
+			vt.CurAttempt = j
+
+			var buf bytes.Buffer
+			err = tmpl.Execute(&buf, vt)
+			if err != nil {
+				t.Fatalf("could not execute search template: %s, err: %v", searchTemplate, err)
+			}
+
+			bufBytes := buf.Bytes()
+
+			if vt.Verbose > 0 {
+				fmt.Printf("  %s\n", bufBytes)
+			}
+
+			var search bleve.SearchRequest
+			err = json.Unmarshal(bufBytes, &search)
+			if err != nil {
+				t.Fatalf("could not unmarshal search: %s, err: %v", bufBytes, err)
+			}
+
+			search.Size = vt.NumDocs * 10 // Crank up limit to get all results.
+
+			searchA := search
+			searchB := search
+
+			resA, errA := idxA.Search(&searchA)
+			resB, errB := idxB.Search(&searchB)
+			if errA != errB {
+				t.Errorf("search: (%d) %s,\n err mismatch, errA: %v, errB: %v",
+					i, bufBytes, errA, errB)
+			}
+
+			// Scores might have float64 vs float32 wobbles, so truncate precision.
+			resA.MaxScore = math.Trunc(resA.MaxScore*1000.0) / 1000.0
+			resB.MaxScore = math.Trunc(resB.MaxScore*1000.0) / 1000.0
+
+			// Timings may be different between A & B, so force equality.
+			resA.Took = resB.Took
+
+			// Hits might have different ordering since some indexers
+			// (like upsidedown) have a natural secondary sort on id
+			// while others (like scorch) don't.  So, we compare by
+			// putting the hits from A & B into maps.
+			hitsA := hitsById(resA)
+			hitsB := hitsById(resB)
+			if !reflect.DeepEqual(hitsA, hitsB) {
+				t.Errorf("search: (%d) %s,\n res hits mismatch,\n len(hitsA): %d,\n len(hitsB): %d",
+					i, bufBytes, len(hitsA), len(hitsB))
+				t.Errorf("\n  hitsA: %#v,\n  hitsB: %#v",
+					hitsA, hitsB)
+				for id, hitA := range hitsA {
+					hitB := hitsB[id]
+					if !reflect.DeepEqual(hitA, hitB) {
+						t.Errorf("\n  hitA: %#v,\n  hitB: %#v", hitA, hitB)
+						idx, _ := strconv.Atoi(id)
+						t.Errorf("\n  body: %s", strings.Join(vt.Bodies[idx], " "))
+					}
+				}
+			}
+
+			resA.Hits = nil
+			resB.Hits = nil
+
+			if !reflect.DeepEqual(resA, resB) {
+				resAj, _ := json.Marshal(resA)
+				resBj, _ := json.Marshal(resB)
+				t.Errorf("search: (%d) %s,\n res mismatch,\n resA: %s,\n resB: %s",
+					i, bufBytes, resAj, resBj)
+			}
+
+			if vt.Verbose > 0 {
+				fmt.Printf("  Total: (%t) %d\n", resA.Total == resB.Total, resA.Total)
+			}
+
+			vt.TotAttempts++
+		}
+	}
+}
+
+// Organizes the hits into a map keyed by id.
+func hitsById(res *bleve.SearchResult) map[string]*search.DocumentMatch {
+	rv := make(map[string]*search.DocumentMatch, len(res.Hits))
+
+	for _, hit := range res.Hits {
+		// Clear out or truncate precision of hit fields that might be
+		// different across different indexer implementations.
+		hit.Index = ""
+		hit.Score = math.Trunc(hit.Score*1000.0) / 1000.0
+		hit.IndexInternalID = nil
+		hit.HitNumber = 0
+
+		rv[hit.ID] = hit
+	}
+
+	return rv
+}
+
+// -------------------------------------------------------
+
+func (vt *VersusTest) run(indexTypeA, kvStoreA, indexTypeB, kvStoreB string,
+	cb func(versusTest *VersusTest, idxA, idxB bleve.Index)) {
+	if cb == nil {
+		cb = testVersusSearches
+	}
+
+	if vt.Verbose <= 0 {
+		vt.Verbose, _ = strconv.Atoi(os.Getenv("VERBOSE"))
+	}
+
+	dirA := "/tmp/bleve-versus-test-a"
+	dirB := "/tmp/bleve-versus-test-b"
+
+	defer os.RemoveAll(dirA)
+	defer os.RemoveAll(dirB)
+
+	os.RemoveAll(dirA)
+	os.RemoveAll(dirB)
+
+	imA := vt.makeIndexMapping()
+	imB := vt.makeIndexMapping()
+
+	kvConfigA := map[string]interface{}{}
+	kvConfigB := map[string]interface{}{}
+
+	idxA, err := bleve.NewUsing(dirA, imA, indexTypeA, kvStoreA, kvConfigA)
+	if err != nil || idxA == nil {
+		vt.t.Fatalf("new using err: %v", err)
+	}
+	defer idxA.Close()
+
+	idxB, err := bleve.NewUsing(dirB, imB, indexTypeB, kvStoreB, kvConfigB)
+	if err != nil || idxB == nil {
+		vt.t.Fatalf("new using err: %v", err)
+	}
+	defer idxB.Close()
+
+	rand.Seed(0)
+
+	vt.Bodies = vt.genBodies()
+
+	vt.insertBodies(idxA)
+	vt.insertBodies(idxB)
+
+	cb(vt, idxA, idxB)
+}
+
+// -------------------------------------------------------
+
+func (vt *VersusTest) makeIndexMapping() mapping.IndexMapping {
+	standardFM := bleve.NewTextFieldMapping()
+	standardFM.Store = false
+	standardFM.IncludeInAll = false
+	standardFM.IncludeTermVectors = true
+	standardFM.Analyzer = "standard"
+
+	dm := bleve.NewDocumentMapping()
+	dm.AddFieldMappingsAt("title", standardFM)
+	dm.AddFieldMappingsAt("body", standardFM)
+
+	im := bleve.NewIndexMapping()
+	im.DefaultMapping = dm
+	im.DefaultAnalyzer = "standard"
+
+	return im
+}
+
+func (vt *VersusTest) insertBodies(idx bleve.Index) {
+	batch := idx.NewBatch()
+	for i, bodyWords := range vt.Bodies {
+		title := fmt.Sprintf("%d", i)
+		body := strings.Join(bodyWords, " ")
+		batch.Index(title, map[string]interface{}{"title": title, "body": body})
+		if i%vt.BatchSize == 0 {
+			err := idx.Batch(batch)
+			if err != nil {
+				vt.t.Fatalf("batch err: %v", err)
+			}
+			batch.Reset()
+		}
+	}
+	err := idx.Batch(batch)
+	if err != nil {
+		vt.t.Fatalf("last batch err: %v", err)
+	}
+}
+
+func (vt *VersusTest) genBodies() (rv [][]string) {
+	for i := 0; i < vt.NumDocs; i++ {
+		rv = append(rv, vt.genBody())
+	}
+	return rv
+}
+
+func (vt *VersusTest) genBody() (rv []string) {
+	m := rand.Intn(vt.MaxWordsPerDoc)
+	for j := 0; j < m; j++ {
+		rv = append(rv, vt.genWord(rand.Intn(vt.NumWords)))
+	}
+	return rv
+}
+
+func (vt *VersusTest) genWord(i int) string {
+	return fmt.Sprintf("%x", i)
+}

From a884f38bf6993b6fb49897bfe8521d4624cf72a3 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 21 Dec 2017 16:44:31 -0800
Subject: [PATCH 103/728] scorch docInternalToNumber returns 0 on error

---
 index/scorch/snapshot_index.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index bb6a8be6b..6089a771d 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -381,7 +381,7 @@ func docInternalToNumber(in index.IndexInternalID) (uint64, error) {
 	var res uint64
 	err := binary.Read(bytes.NewReader(in), binary.BigEndian, &res)
 	if err != nil {
-		return res, err
+		return 0, err
 	}
 	return res, nil
 }

From 33687260ca978ba506f0c92a83e669465d6e5950 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 21 Dec 2017 16:45:36 -0800
Subject: [PATCH 104/728] children of conjunct/disjunct's are not necessarily
 termSearchers

Rename termSearcher loop variable to searcher, as the child searchers
of a conjunction/disjunction searcher aren't necessarily
termSearchers.
---
 search/searcher/search_conjunction.go | 16 ++++++++--------
 search/searcher/search_disjunction.go | 16 ++++++++--------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/search/searcher/search_conjunction.go b/search/searcher/search_conjunction.go
index 9ab0e7fa4..d7a873ffb 100644
--- a/search/searcher/search_conjunction.go
+++ b/search/searcher/search_conjunction.go
@@ -57,25 +57,25 @@ func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.S
 func (s *ConjunctionSearcher) computeQueryNorm() {
 	// first calculate sum of squared weights
 	sumOfSquaredWeights := 0.0
-	for _, termSearcher := range s.searchers {
-		sumOfSquaredWeights += termSearcher.Weight()
+	for _, searcher := range s.searchers {
+		sumOfSquaredWeights += searcher.Weight()
 	}
 	// now compute query norm from this
 	s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
 	// finally tell all the downstream searchers the norm
-	for _, termSearcher := range s.searchers {
-		termSearcher.SetQueryNorm(s.queryNorm)
+	for _, searcher := range s.searchers {
+		searcher.SetQueryNorm(s.queryNorm)
 	}
 }
 
 func (s *ConjunctionSearcher) initSearchers(ctx *search.SearchContext) error {
 	var err error
 	// get all searchers pointing at their first match
-	for i, termSearcher := range s.searchers {
+	for i, searcher := range s.searchers {
 		if s.currs[i] != nil {
 			ctx.DocumentMatchPool.Put(s.currs[i])
 		}
-		s.currs[i], err = termSearcher.Next(ctx)
+		s.currs[i], err = searcher.Next(ctx)
 		if err != nil {
 			return err
 		}
@@ -160,11 +160,11 @@ OUTER:
 
 		// we know all the searchers are pointing at the same thing
 		// so they all need to be bumped
-		for i, termSearcher := range s.searchers {
+		for i, searcher := range s.searchers {
 			if s.currs[i] != rv {
 				ctx.DocumentMatchPool.Put(s.currs[i])
 			}
-			s.currs[i], err = termSearcher.Next(ctx)
+			s.currs[i], err = searcher.Next(ctx)
 			if err != nil {
 				return nil, err
 			}
diff --git a/search/searcher/search_disjunction.go b/search/searcher/search_disjunction.go
index 96bd54474..07df9a326 100644
--- a/search/searcher/search_disjunction.go
+++ b/search/searcher/search_disjunction.go
@@ -93,25 +93,25 @@ func newDisjunctionSearcher(indexReader index.IndexReader,
 func (s *DisjunctionSearcher) computeQueryNorm() {
 	// first calculate sum of squared weights
 	sumOfSquaredWeights := 0.0
-	for _, termSearcher := range s.searchers {
-		sumOfSquaredWeights += termSearcher.Weight()
+	for _, searcher := range s.searchers {
+		sumOfSquaredWeights += searcher.Weight()
 	}
 	// now compute query norm from this
 	s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
 	// finally tell all the downstream searchers the norm
-	for _, termSearcher := range s.searchers {
-		termSearcher.SetQueryNorm(s.queryNorm)
+	for _, searcher := range s.searchers {
+		searcher.SetQueryNorm(s.queryNorm)
 	}
 }
 
 func (s *DisjunctionSearcher) initSearchers(ctx *search.SearchContext) error {
 	var err error
 	// get all searchers pointing at their first match
-	for i, termSearcher := range s.searchers {
+	for i, searcher := range s.searchers {
 		if s.currs[i] != nil {
 			ctx.DocumentMatchPool.Put(s.currs[i])
 		}
-		s.currs[i], err = termSearcher.Next(ctx)
+		s.currs[i], err = searcher.Next(ctx)
 		if err != nil {
 			return err
 		}
@@ -221,11 +221,11 @@ func (s *DisjunctionSearcher) Advance(ctx *search.SearchContext,
 	}
 	// get all searchers pointing at their first match
 	var err error
-	for i, termSearcher := range s.searchers {
+	for i, searcher := range s.searchers {
 		if s.currs[i] != nil {
 			ctx.DocumentMatchPool.Put(s.currs[i])
 		}
-		s.currs[i], err = termSearcher.Advance(ctx, ID)
+		s.currs[i], err = searcher.Advance(ctx, ID)
 		if err != nil {
 			return nil, err
 		}

From 93c787ca099657dbb727d28d9ca2b04ae430e1e4 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 21 Dec 2017 16:49:12 -0800
Subject: [PATCH 105/728] scorch versus_test.go passes errcheck

---
 test/versus_test.go | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/test/versus_test.go b/test/versus_test.go
index 16abce078..9b6325c78 100644
--- a/test/versus_test.go
+++ b/test/versus_test.go
@@ -341,11 +341,13 @@ func (vt *VersusTest) run(indexTypeA, kvStoreA, indexTypeB, kvStoreB string,
 	dirA := "/tmp/bleve-versus-test-a"
 	dirB := "/tmp/bleve-versus-test-b"
 
-	defer os.RemoveAll(dirA)
-	defer os.RemoveAll(dirB)
+	defer func() {
+		_ = os.RemoveAll(dirA)
+		_ = os.RemoveAll(dirB)
+	}()
 
-	os.RemoveAll(dirA)
-	os.RemoveAll(dirB)
+	_ = os.RemoveAll(dirA)
+	_ = os.RemoveAll(dirB)
 
 	imA := vt.makeIndexMapping()
 	imB := vt.makeIndexMapping()
@@ -357,13 +359,13 @@ func (vt *VersusTest) run(indexTypeA, kvStoreA, indexTypeB, kvStoreB string,
 	if err != nil || idxA == nil {
 		vt.t.Fatalf("new using err: %v", err)
 	}
-	defer idxA.Close()
+	defer func() { _ = idxA.Close() }()
 
 	idxB, err := bleve.NewUsing(dirB, imB, indexTypeB, kvStoreB, kvConfigB)
 	if err != nil || idxB == nil {
 		vt.t.Fatalf("new using err: %v", err)
 	}
-	defer idxB.Close()
+	defer func() { _ = idxB.Close() }()
 
 	rand.Seed(0)
 
@@ -400,9 +402,12 @@ func (vt *VersusTest) insertBodies(idx bleve.Index) {
 	for i, bodyWords := range vt.Bodies {
 		title := fmt.Sprintf("%d", i)
 		body := strings.Join(bodyWords, " ")
-		batch.Index(title, map[string]interface{}{"title": title, "body": body})
+		err := batch.Index(title, map[string]interface{}{"title": title, "body": body})
+		if err != nil {
+			vt.t.Fatalf("batch.Index err: %v", err)
+		}
 		if i%vt.BatchSize == 0 {
-			err := idx.Batch(batch)
+			err = idx.Batch(batch)
 			if err != nil {
 				vt.t.Fatalf("batch err: %v", err)
 			}

From d425a3be8693778ca3d8cc6bd3f810bc7a7603bb Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 21 Dec 2017 17:49:55 -0800
Subject: [PATCH 106/728] scorch fix disjunction searcher Advance()

Found with "versus" test (TestScorchVersusUpsideDownBoltSmallMNSAM),
which had a boolean query with a MustNot that was the same as the Must
parameters.  This replicates a situation found by
Aruna/Mihir/testrunner/RQG (MB-27291).  Example:

  "query": {
    "must_not": {"disjuncts": [
      {"field": "body", "match": "hello"}
    ]},
    "must": {"conjuncts": [
      {"field": "body", "match": "hello"}
    ]}
  }

The nested searchers along the MustNot pathway would end up looking
roughly like...

  booleanSearcher
    MustNot
      => disjunctionSearcher
         => disjunctionSearcher
            => termSearcher

On the first Next() call by the collector, the two disjunction
searchers would run through their respective Next() method processing,
which includes their initSearcher() processing on the first time.
This has the effect of driving the leaf termSearcher through two
Next() invocations.

That is, if there were 3 docs (doc-1, doc-2, doc-3), the leaf
termSearcher would at this point have moved to point to doc-3, while
the topmost MustNot would have received doc-1.

Next, the booleanSearcher's Must searcher would produce doc-2, so the
booleanSearcher would try to Advance() the MustNot searcher to doc-2.

But, in scorch, the leafmost termSearcher had already gotten past
doc-2 and would return its doc-3.

In upsidedown, in contrast, the leaf termSearcher would then drive the
KVStore iterator with a Seek(doc-2), and the KVStore iterator would
perform a backwards seek to reach doc-2.

In scorch, however, backwards iteration seeking isn't supported.

So, this fix checks the state of the disjunction searcher to see if we
already have the necessary state so that we don't have to perform
actual Advance()'es on the underlying searchers.  This not only fixes
the behavior w.r.t. scorch, but also can have an effect of potentially
making upsidedown slightly faster as we're avoiding some backwards
KVStore iterator seeks.
---
 search/searcher/search_disjunction.go |  3 +++
 test/versus_test.go                   | 14 +++++++-------
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/search/searcher/search_disjunction.go b/search/searcher/search_disjunction.go
index 07df9a326..b6910ddb6 100644
--- a/search/searcher/search_disjunction.go
+++ b/search/searcher/search_disjunction.go
@@ -223,6 +223,9 @@ func (s *DisjunctionSearcher) Advance(ctx *search.SearchContext,
 	var err error
 	for i, searcher := range s.searchers {
 		if s.currs[i] != nil {
+			if s.currs[i].IndexInternalID.Compare(ID) >= 0 {
+				continue
+			}
 			ctx.DocumentMatchPool.Put(s.currs[i])
 		}
 		s.currs[i], err = searcher.Advance(ctx, ID)
diff --git a/test/versus_test.go b/test/versus_test.go
index 9b6325c78..de4123ca1 100644
--- a/test/versus_test.go
+++ b/test/versus_test.go
@@ -44,11 +44,11 @@ import (
 func TestScorchVersusUpsideDownBolt(t *testing.T) {
 	(&VersusTest{
 		t:                    t,
-		NumDocs:              10000,
+		NumDocs:              1000,
 		MaxWordsPerDoc:       20,
-		NumWords:             100,
-		BatchSize:            100,
-		NumAttemptsPerSearch: 1000,
+		NumWords:             10,
+		BatchSize:            10,
+		NumAttemptsPerSearch: 100,
 	}).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil)
 }
 
@@ -138,7 +138,7 @@ var searchTemplates = []string{
       }
      }`,
 	`{
-      "about": "must-not-same-as-must -- FAILS!!!",
+      "about": "must-not-same-as-must -- see: MB-27291",
       "query": {
         "must_not": {"disjuncts": [
           {"field": "body", "match": "{{word}}"}
@@ -149,7 +149,7 @@ var searchTemplates = []string{
       }
      }`,
 	`{
-      "about": "must-not-same-as-should -- FAILS!!!",
+      "about": "must-not-same-as-should",
       "query": {
         "must_not": {"disjuncts": [
           {"field": "body", "match": "{{word}}"}
@@ -160,7 +160,7 @@ var searchTemplates = []string{
       }
      }`,
 	`{
-      "about": "inspired by testrunner RQG issue -- FAILS!!!",
+      "about": "inspired by testrunner RQG issue -- see: MB-27291",
       "query": {
         "must_not": {"disjuncts": [
           {"field": "title", "match": "Trista Allen"},

From c7a342bc7d69414ea651f3b4ea7a0ec1d1586b49 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 22 Dec 2017 10:28:26 -0800
Subject: [PATCH 107/728] scorch conjuncts match phrase test passes

The conjunction searcher Advance() method now checks if its curr
doc-matches suffices before advancing them.
---
 index/scorch/snapshot_index.go        |  1 +
 index/scorch/snapshot_index_tfr.go    | 15 ++---
 search/searcher/search_conjunction.go |  3 +
 search/searcher/search_phrase.go      |  6 ++
 test/versus_test.go                   | 83 ++++++++++++++++++++++-----
 5 files changed, 88 insertions(+), 20 deletions(-)

diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 6089a771d..5f08a496f 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -343,6 +343,7 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
 
 	rv := &IndexSnapshotTermFieldReader{
 		term:               term,
+		field:              field,
 		snapshot:           i,
 		postings:           make([]segment.PostingsList, len(i.segment)),
 		iterators:          make([]segment.PostingsIterator, len(i.segment)),
diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go
index 25cc0bd07..497b83dd7 100644
--- a/index/scorch/snapshot_index_tfr.go
+++ b/index/scorch/snapshot_index_tfr.go
@@ -23,6 +23,7 @@ import (
 
 type IndexSnapshotTermFieldReader struct {
 	term               []byte
+	field              string
 	snapshot           *IndexSnapshot
 	postings           []segment.PostingsList
 	iterators          []segment.PostingsIterator
@@ -84,15 +85,15 @@ func (i *IndexSnapshotTermFieldReader) postingToTermFieldDoc(next segment.Postin
 }
 
 func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
-	// first make sure we aren't already pointing at the right thing, (due to way searchers work)
+	// FIXME do something better
+	// for now, if we need to seek backwards, then restart from the beginning
 	if i.currPosting != nil && bytes.Compare(i.currID, ID) >= 0 {
-		rv := preAlloced
-		if rv == nil {
-			rv = &index.TermFieldDoc{}
+		i2, err := i.snapshot.TermFieldReader(i.term, i.field,
+			i.includeFreq, i.includeNorm, i.includeTermVectors)
+		if err != nil {
+			return nil, err
 		}
-		rv.ID = i.currID
-		i.postingToTermFieldDoc(i.currPosting, rv)
-		return rv, nil
+		*i = *(i2.(*IndexSnapshotTermFieldReader))
 	}
 	// FIXME do something better
 	next, err := i.Next(preAlloced)
diff --git a/search/searcher/search_conjunction.go b/search/searcher/search_conjunction.go
index d7a873ffb..73fba19cd 100644
--- a/search/searcher/search_conjunction.go
+++ b/search/searcher/search_conjunction.go
@@ -184,6 +184,9 @@ func (s *ConjunctionSearcher) Advance(ctx *search.SearchContext, ID index.IndexI
 		}
 	}
 	for i := range s.searchers {
+		if s.currs[i] != nil && s.currs[i].IndexInternalID.Compare(ID) >= 0 {
+			continue
+		}
 		err := s.advanceChild(ctx, i, ID)
 		if err != nil {
 			return nil, err
diff --git a/search/searcher/search_phrase.go b/search/searcher/search_phrase.go
index 6ff592ef5..552dfabef 100644
--- a/search/searcher/search_phrase.go
+++ b/search/searcher/search_phrase.go
@@ -313,6 +313,12 @@ func (s *PhraseSearcher) Advance(ctx *search.SearchContext, ID index.IndexIntern
 			return nil, err
 		}
 	}
+	if s.currMust != nil {
+		if s.currMust.IndexInternalID.Compare(ID) >= 0 {
+			return s.Next(ctx)
+		}
+		ctx.DocumentMatchPool.Put(s.currMust)
+	}
 	var err error
 	s.currMust, err = s.mustSearcher.Advance(ctx, ID)
 	if err != nil {
diff --git a/test/versus_test.go b/test/versus_test.go
index de4123ca1..70463a93c 100644
--- a/test/versus_test.go
+++ b/test/versus_test.go
@@ -41,7 +41,7 @@ import (
 //     go test -v -run TestScorchVersusUpsideDownBolt ./test
 //     VERBOSE=1 FOCUS=Trista go test -v -run TestScorchVersusUpsideDownBolt ./test
 //
-func TestScorchVersusUpsideDownBolt(t *testing.T) {
+func TestScorchVersusUpsideDownBoltAll(t *testing.T) {
 	(&VersusTest{
 		t:                    t,
 		NumDocs:              1000,
@@ -49,7 +49,7 @@ func TestScorchVersusUpsideDownBolt(t *testing.T) {
 		NumWords:             10,
 		BatchSize:            10,
 		NumAttemptsPerSearch: 100,
-	}).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil)
+	}).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil, nil)
 }
 
 func TestScorchVersusUpsideDownBoltSmallMNSAM(t *testing.T) {
@@ -61,13 +61,25 @@ func TestScorchVersusUpsideDownBoltSmallMNSAM(t *testing.T) {
 		NumWords:             1,
 		BatchSize:            1,
 		NumAttemptsPerSearch: 1,
-	}).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil)
+	}).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil, nil)
+}
+
+func TestScorchVersusUpsideDownBoltSmallCMP11(t *testing.T) {
+	(&VersusTest{
+		t:                    t,
+		Focus:                "conjuncts-match-phrase-1-1",
+		NumDocs:              30,
+		MaxWordsPerDoc:       8,
+		NumWords:             2,
+		BatchSize:            1,
+		NumAttemptsPerSearch: 1,
+	}).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil, nil)
 }
 
 // -------------------------------------------------------
 
 // Templates used to compare search results in the "versus" tests.
-var searchTemplates = []string{
+var testVersusSearchTemplates = []string{
 	`{
       "about": "expected to return zero hits",
       "query": {
@@ -130,7 +142,7 @@ var searchTemplates = []string{
       }
      }`,
 	`{
-      "about": "must-not-only -- FAILS!!!",
+      "about": "must-not-only",
       "query": {
         "must_not": {"disjuncts": [
           {"field": "body", "term": "{{word}}"}
@@ -172,6 +184,24 @@ var searchTemplates = []string{
         ]}
       }
      }`,
+	`{
+      "about": "conjuncts-match-phrase-1-1 inspired by testrunner RQG issue -- see: MB-27291",
+      "query": {
+        "conjuncts": [
+          {"field": "body", "match": "{{bodyWord 0}}"},
+          {"field": "body", "match_phrase": "{{bodyWord 1}} {{bodyWord 1}}"}
+        ]
+      }
+     }`,
+	`{
+      "about": "conjuncts-match-phrase-1-2 inspired by testrunner RQG issue -- see: MB-27291 -- FAILS!!",
+      "query": {
+        "conjuncts": [
+          {"field": "body", "match": "{{bodyWord 0}}"},
+          {"field": "body", "match_phrase": "{{bodyWord 1}} {{bodyWord 2}}"}
+        ]
+      }
+     }`,
 }
 
 // -------------------------------------------------------
@@ -203,13 +233,25 @@ type VersusTest struct {
 
 // -------------------------------------------------------
 
-func testVersusSearches(vt *VersusTest, idxA, idxB bleve.Index) {
+func testVersusSearches(vt *VersusTest, searchTemplates []string, idxA, idxB bleve.Index) {
 	t := vt.t
 
 	funcMap := template.FuncMap{
+		// Returns a word.  The word may or may not be in any
+		// document's body.
 		"word": func() string {
 			return vt.genWord(vt.CurAttempt % vt.NumWords)
 		},
+		// Picks a document and returns the i'th word in that
+		// document's body.  You can use this in searches to
+		// definitely find at least one document.
+		"bodyWord": func(i int) string {
+			body := vt.Bodies[vt.CurAttempt%len(vt.Bodies)]
+			if len(body) <= 0 {
+				return ""
+			}
+			return body[i%len(body)]
+		},
 	}
 
 	// Optionally allow call to focus on a particular search templates,
@@ -275,16 +317,24 @@ func testVersusSearches(vt *VersusTest, idxA, idxB bleve.Index) {
 			hitsA := hitsById(resA)
 			hitsB := hitsById(resB)
 			if !reflect.DeepEqual(hitsA, hitsB) {
-				t.Errorf("search: (%d) %s,\n res hits mismatch,\n len(hitsA): %d,\n len(hitsB): %d",
+				t.Errorf("=========\nsearch: (%d) %s,\n res hits mismatch,\n len(hitsA): %d,\n len(hitsB): %d",
 					i, bufBytes, len(hitsA), len(hitsB))
 				t.Errorf("\n  hitsA: %#v,\n  hitsB: %#v",
 					hitsA, hitsB)
 				for id, hitA := range hitsA {
 					hitB := hitsB[id]
 					if !reflect.DeepEqual(hitA, hitB) {
-						t.Errorf("\n  hitA: %#v,\n  hitB: %#v", hitA, hitB)
+						t.Errorf("\n  driving from hitsA\n    hitA: %#v,\n    hitB: %#v", hitA, hitB)
 						idx, _ := strconv.Atoi(id)
-						t.Errorf("\n  body: %s", strings.Join(vt.Bodies[idx], " "))
+						t.Errorf("\n    doc: %d, body: %s", idx, strings.Join(vt.Bodies[idx], " "))
+					}
+				}
+				for id, hitB := range hitsB {
+					hitA := hitsA[id]
+					if !reflect.DeepEqual(hitA, hitB) {
+						t.Errorf("\n  driving from hitsB\n    hitA: %#v,\n    hitB: %#v", hitA, hitB)
+						idx, _ := strconv.Atoi(id)
+						t.Errorf("\n    doc: %d, body: %s", idx, strings.Join(vt.Bodies[idx], " "))
 					}
 				}
 			}
@@ -295,7 +345,7 @@ func testVersusSearches(vt *VersusTest, idxA, idxB bleve.Index) {
 			if !reflect.DeepEqual(resA, resB) {
 				resAj, _ := json.Marshal(resA)
 				resBj, _ := json.Marshal(resB)
-				t.Errorf("search: (%d) %s,\n res mismatch,\n resA: %s,\n resB: %s",
+				t.Errorf("search: (%d) %s,\n  res mismatch,\n  resA: %s,\n  resB: %s",
 					i, bufBytes, resAj, resBj)
 			}
 
@@ -329,11 +379,16 @@ func hitsById(res *bleve.SearchResult) map[string]*search.DocumentMatch {
 // -------------------------------------------------------
 
 func (vt *VersusTest) run(indexTypeA, kvStoreA, indexTypeB, kvStoreB string,
-	cb func(versusTest *VersusTest, idxA, idxB bleve.Index)) {
+	cb func(versusTest *VersusTest, searchTemplates []string, idxA, idxB bleve.Index),
+	searchTemplates []string) {
 	if cb == nil {
 		cb = testVersusSearches
 	}
 
+	if searchTemplates == nil {
+		searchTemplates = testVersusSearchTemplates
+	}
+
 	if vt.Verbose <= 0 {
 		vt.Verbose, _ = strconv.Atoi(os.Getenv("VERBOSE"))
 	}
@@ -369,12 +424,14 @@ func (vt *VersusTest) run(indexTypeA, kvStoreA, indexTypeB, kvStoreB string,
 
 	rand.Seed(0)
 
-	vt.Bodies = vt.genBodies()
+	if vt.Bodies == nil {
+		vt.Bodies = vt.genBodies()
+	}
 
 	vt.insertBodies(idxA)
 	vt.insertBodies(idxB)
 
-	cb(vt, idxA, idxB)
+	cb(vt, searchTemplates, idxA, idxB)
 }
 
 // -------------------------------------------------------

From dcabc267a06339af6827792fe18858e0b95bdbfb Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Tue, 26 Dec 2017 10:37:42 -0700
Subject: [PATCH 108/728] Wait for rollback'ed snapshot to persist

---
 index/scorch/introducer.go             |  9 +++++++--
 index/scorch/snapshot_rollback.go      | 15 +++++++++++++-
 index/scorch/snapshot_rollback_test.go | 27 +++++++++++++-------------
 3 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 41ce4ca1f..c1f9321d5 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -38,8 +38,9 @@ type epochWatcher struct {
 }
 
 type snapshotReversion struct {
-	snapshot *IndexSnapshot
-	applied  chan error
+	snapshot  *IndexSnapshot
+	applied   chan error
+	persisted chan error
 }
 
 func (s *Scorch) mainLoop() {
@@ -285,6 +286,10 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
 		segmentSnapshot.segment.AddRef()
 	}
 
+	if revertTo.persisted != nil {
+		s.rootPersisted = append(s.rootPersisted, revertTo.persisted)
+	}
+
 	// swap in new snapshot
 	rootPrev := s.root
 	s.root = newSnapshot
diff --git a/index/scorch/snapshot_rollback.go b/index/scorch/snapshot_rollback.go
index e31308a20..d4b1f2eb8 100644
--- a/index/scorch/snapshot_rollback.go
+++ b/index/scorch/snapshot_rollback.go
@@ -95,8 +95,21 @@ func (s *Scorch) SnapshotRevert(revertTo *IndexSnapshot) error {
 		applied:  make(chan error),
 	}
 
+	if !s.unsafeBatch {
+		revert.persisted = make(chan error)
+	}
+
 	s.revertToSnapshots <- revert
 
 	// block until this IndexSnapshot is applied
-	return <-revert.applied
+	err := <-revert.applied
+	if err != nil {
+		return err
+	}
+
+	if revert.persisted != nil {
+		err = <-revert.persisted
+	}
+
+	return err
 }
diff --git a/index/scorch/snapshot_rollback_test.go b/index/scorch/snapshot_rollback_test.go
index 3054c9bb8..879d01685 100644
--- a/index/scorch/snapshot_rollback_test.go
+++ b/index/scorch/snapshot_rollback_test.go
@@ -92,22 +92,23 @@ func TestIndexRollback(t *testing.T) {
 		t.Error(err)
 	}
 
-	err = sh.SnapshotRevert(prev)
-	if err != nil {
-		t.Error(err)
-	}
-
-	newRoot := sh.root
-	if newRoot != nil && prev != nil {
-		if newRoot.epoch <= prev.epoch {
-			t.Errorf("Unexpected epoch, %v <= %v", newRoot.epoch, prev.epoch)
+	if prev != nil {
+		err = sh.SnapshotRevert(prev)
+		if err != nil {
+			t.Error(err)
 		}
-	} else {
-		if prev == nil {
-			t.Errorf("The last persisted snapshot before the revert was nil!")
+
+		newRoot, err := sh.PreviousPersistedSnapshot(nil)
+		if err != nil {
+			t.Error(err)
 		}
+
 		if newRoot == nil {
-			t.Errorf("The new root has been set to nil?")
+			t.Errorf("Failed to retrieve latest persisted snapshot")
+		}
+
+		if newRoot.epoch <= prev.epoch {
+			t.Errorf("Unexpected epoch, %v <= %v", newRoot.epoch, prev.epoch)
 		}
 	}
 }

From 272da43c165e780945708785738167e093a91bb6 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Wed, 27 Dec 2017 10:20:45 -0800
Subject: [PATCH 109/728] phrase searcher don't allow advance after end

---
 search/searcher/search_phrase.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/search/searcher/search_phrase.go b/search/searcher/search_phrase.go
index 552dfabef..6237cecfd 100644
--- a/search/searcher/search_phrase.go
+++ b/search/searcher/search_phrase.go
@@ -319,6 +319,9 @@ func (s *PhraseSearcher) Advance(ctx *search.SearchContext, ID index.IndexIntern
 		}
 		ctx.DocumentMatchPool.Put(s.currMust)
 	}
+	if s.currMust == nil {
+		return nil, nil
+	}
 	var err error
 	s.currMust, err = s.mustSearcher.Advance(ctx, ID)
 	if err != nil {

From 76f827f469197577d8c6b79f2238fce6608b5da4 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Thu, 28 Dec 2017 12:05:33 +0530
Subject: [PATCH 110/728] docValue persist changes

docValues are persisted along with the index,
in a columnar fashion per field with variable
sized chunking for quick look up.
-naive chunk level caching is added per field
-data part inside a chunk is snappy compressed
-metaHeader inside the chunk index the dv values
 inside the uncompressed data part
-all the fields are docValue persisted in this iteration
---
 index/scorch/scorch.go                        |   8 +
 index/scorch/segment/mem/build.go             |   4 +
 index/scorch/segment/mem/segment.go           |   7 +-
 index/scorch/segment/zap/build.go             | 110 ++++++++++-
 index/scorch/segment/zap/contentcoder.go      | 180 ++++++++++++++++++
 index/scorch/segment/zap/contentcoder_test.go |  75 ++++++++
 index/scorch/segment/zap/docvalues.go         | 176 +++++++++++++++++
 index/scorch/segment/zap/merge.go             |  96 +++++++---
 index/scorch/segment/zap/segment.go           |  44 ++++-
 index/scorch/segment/zap/write.go             |  11 +-
 index/scorch/snapshot_index.go                |  40 ++--
 11 files changed, 699 insertions(+), 52 deletions(-)
 create mode 100644 index/scorch/segment/zap/contentcoder.go
 create mode 100644 index/scorch/segment/zap/contentcoder_test.go
 create mode 100644 index/scorch/segment/zap/docvalues.go

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index d72a8f886..61381dd57 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -37,6 +37,14 @@ const Name = "scorch"
 
 const Version uint8 = 1
 
+// UnInvertIndex is implemented by various scorch index implementations
+// to provide the un inverting of the postings or other indexed values.
+type UnInvertIndex interface {
+	// apparently need better namings here..
+	VisitDocumentFieldTerms(localDocNum uint64, fields []string,
+		visitor index.DocumentFieldTermVisitor) error
+}
+
 type Scorch struct {
 	readOnly          bool
 	version           uint8
diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go
index e111ce4f7..2709d2f06 100644
--- a/index/scorch/segment/mem/build.go
+++ b/index/scorch/segment/mem/build.go
@@ -116,6 +116,10 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 		if field.Options().IsStored() {
 			storeField(docNum, fieldID, encodeFieldType(field), field.Value(), field.ArrayPositions())
 		}
+		// TODO with mapping changes for dv
+		//if field.Options().IncludeDocValues() {
+		s.DocValueFields[fieldID] = true
+		//}
 	}
 
 	// now that its been rolled up into docMap, walk that
diff --git a/index/scorch/segment/mem/segment.go b/index/scorch/segment/mem/segment.go
index 75ff50cc0..cd22616e8 100644
--- a/index/scorch/segment/mem/segment.go
+++ b/index/scorch/segment/mem/segment.go
@@ -87,12 +87,17 @@ type Segment struct {
 	// stored field array positions
 	//  docNum -> field id -> slice of array positions (each is []uint64)
 	StoredPos []map[uint16][][]uint64
+
+	// for marking the docValue override status
+	// field id -> status
+	DocValueFields map[uint16]bool
 }
 
 // New builds a new empty Segment
 func New() *Segment {
 	return &Segment{
-		FieldsMap: map[string]uint16{},
+		FieldsMap:      map[string]uint16{},
+		DocValueFields: map[uint16]bool{},
 	}
 }
 
diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 8bd00601e..5047acc0e 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -20,6 +20,7 @@ import (
 	"encoding/binary"
 	"math"
 	"os"
+	"sort"
 
 	"github.com/Smerity/govarint"
 	"github.com/blevesearch/bleve/index/scorch/segment/mem"
@@ -48,6 +49,7 @@ func PersistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) (e
 
 	var storedIndexOffset uint64
 	var dictLocs []uint64
+	var docValueOffset uint64
 	if len(memSegment.Stored) > 0 {
 
 		storedIndexOffset, err = persistStored(memSegment, cr)
@@ -78,6 +80,11 @@ func PersistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) (e
 			return err
 		}
 
+		docValueOffset, err = persistFieldDocValues(cr, chunkFactor, memSegment)
+		if err != nil {
+			return err
+		}
+
 	} else {
 		dictLocs = make([]uint64, len(memSegment.FieldsInv))
 	}
@@ -89,7 +96,7 @@ func PersistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) (e
 	}
 
 	err = persistFooter(uint64(len(memSegment.Stored)), storedIndexOffset,
-		fieldIndexStart, chunkFactor, cr)
+		fieldIndexStart, docValueOffset, chunkFactor, cr)
 	if err != nil {
 		return err
 	}
@@ -419,3 +426,104 @@ func persistDictionary(memSegment *mem.Segment, w *CountHashWriter, postingsLocs
 
 	return rv, nil
 }
+
+type docIDRange []uint64
+
+func (a docIDRange) Len() int           { return len(a) }
+func (a docIDRange) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+func (a docIDRange) Less(i, j int) bool { return a[i] < a[j] }
+
+func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
+	chunkFactor uint32) (map[uint16]uint64, error) {
+	fieldChunkOffsets := make(map[uint16]uint64, len(memSegment.DocValueFields))
+	fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
+
+	for fieldID := range memSegment.DocValueFields {
+		field := memSegment.FieldsInv[fieldID]
+		docTermMap := make(map[uint64][]byte, 0)
+		dict, err := memSegment.Dictionary(field)
+		if err != nil {
+			return fieldChunkOffsets, err
+		}
+
+		dictItr := dict.Iterator()
+		next, err := dictItr.Next()
+		for err == nil && next != nil {
+			postings, err1 := dict.PostingsList(next.Term, nil)
+			if err1 != nil {
+				return fieldChunkOffsets, err
+			}
+
+			postingsItr := postings.Iterator()
+			nextPosting, err2 := postingsItr.Next()
+			for err2 == nil && nextPosting != nil {
+				docNum := nextPosting.Number()
+				docTermMap[docNum] = append(docTermMap[docNum], []byte(next.Term)...)
+				docTermMap[docNum] = append(docTermMap[docNum], termSeparator)
+				nextPosting, err2 = postingsItr.Next()
+			}
+			if err2 != nil {
+				return fieldChunkOffsets, err2
+			}
+
+			next, err = dictItr.Next()
+		}
+
+		if err != nil {
+			return fieldChunkOffsets, err
+		}
+		// sort wrt to docIDs
+		var docNumbers docIDRange
+		for k := range docTermMap {
+			docNumbers = append(docNumbers, k)
+		}
+		sort.Sort(docNumbers)
+
+		for _, docNum := range docNumbers {
+			err = fdvEncoder.Add(docNum, docTermMap[docNum])
+			if err != nil {
+				return fieldChunkOffsets, err
+			}
+		}
+
+		fieldChunkOffsets[fieldID] = uint64(w.Count())
+		fdvEncoder.Close()
+		// persist the doc value details for this field
+		_, err = fdvEncoder.Write(w)
+		if err != nil {
+			return fieldChunkOffsets, err
+		}
+		// resetting encoder for the next field
+		fdvEncoder.Reset()
+	}
+
+	return fieldChunkOffsets, nil
+}
+
+func persistFieldDocValues(w *CountHashWriter, chunkFactor uint32,
+	memSegment *mem.Segment) (uint64, error) {
+
+	fieldDvOffsets, err := persistDocValues(memSegment, w, chunkFactor)
+	if err != nil {
+		return math.MaxUint64, err
+	}
+
+	fieldDocValuesOffset := uint64(w.Count())
+	buf := make([]byte, binary.MaxVarintLen64)
+	offset := uint64(math.MaxUint64)
+	ok := true
+	for fieldID := range memSegment.FieldsInv {
+		// if the field isn't configured for docValue, then mark
+		// the offset accordingly
+		if offset, ok = fieldDvOffsets[uint16(fieldID)]; !ok {
+			offset = math.MaxUint64
+		}
+		n := binary.PutUvarint(buf, uint64(offset))
+		_, err := w.Write(buf[:n])
+		if err != nil {
+			return math.MaxUint64, err
+		}
+	}
+
+	return fieldDocValuesOffset, nil
+}
diff --git a/index/scorch/segment/zap/contentcoder.go b/index/scorch/segment/zap/contentcoder.go
new file mode 100644
index 000000000..cf05b6759
--- /dev/null
+++ b/index/scorch/segment/zap/contentcoder.go
@@ -0,0 +1,180 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zap
+
+import (
+	"bytes"
+	"encoding/binary"
+	"io"
+
+	"github.com/golang/snappy"
+)
+
+var termSeparator byte = 0xff
+var termSeparatorSplitSlice = []byte{termSeparator}
+
+type chunkedContentCoder struct {
+	final        []byte
+	chunkSize    uint64
+	currChunk    uint64
+	chunkLens    []uint64
+	chunkMetaBuf bytes.Buffer
+	chunkBuf     bytes.Buffer
+
+	chunkMeta []metaData
+}
+
+// metaData represents the data information inside a
+// chunk.
+type metaData struct {
+	docID    uint64 // docid of the data inside the chunk
+	docDvLoc uint64 // starting offset for a given docid
+	docDvLen uint64 // length of data inside the chunk for the given docid
+}
+
+// newChunkedContentCoder returns a new chunk content coder which
+// packs data into chunks based on the provided chunkSize
+func newChunkedContentCoder(chunkSize uint64, maxDocNum uint64) *chunkedContentCoder {
+	total := maxDocNum/chunkSize + 1
+	rv := &chunkedContentCoder{
+		chunkSize: chunkSize,
+		chunkLens: make([]uint64, total),
+		chunkMeta: []metaData{},
+	}
+
+	return rv
+}
+
+// Reset lets you reuse this chunked content coder. Buffers are reset
+// and re used. You cannot change the chunk size.
+func (c *chunkedContentCoder) Reset() {
+	c.currChunk = 0
+	c.final = c.final[:0]
+	c.chunkBuf.Reset()
+	c.chunkMetaBuf.Reset()
+	for i := range c.chunkLens {
+		c.chunkLens[i] = 0
+	}
+	c.chunkMeta = []metaData{}
+}
+
+// Close indicates you are done calling Add() this allows
+// the final chunk to be encoded.
+func (c *chunkedContentCoder) Close() {
+	c.flushContents()
+}
+
+func (c *chunkedContentCoder) flushContents() error {
+	// flush the contents, with meta information at first
+	buf := make([]byte, binary.MaxVarintLen64)
+	n := binary.PutUvarint(buf, uint64(len(c.chunkMeta)))
+	_, err := c.chunkMetaBuf.Write(buf[:n])
+	if err != nil {
+		return err
+	}
+
+	// write out the metaData slice
+	for _, meta := range c.chunkMeta {
+		n := binary.PutUvarint(buf, meta.docID)
+		_, err = c.chunkMetaBuf.Write(buf[:n])
+		if err != nil {
+			return err
+		}
+
+		n = binary.PutUvarint(buf, meta.docDvLoc)
+		_, err = c.chunkMetaBuf.Write(buf[:n])
+		if err != nil {
+			return err
+		}
+
+		n = binary.PutUvarint(buf, meta.docDvLen)
+		_, err = c.chunkMetaBuf.Write(buf[:n])
+		if err != nil {
+			return err
+		}
+
+	}
+
+	// write the metadata to final data
+	metaData := c.chunkMetaBuf.Bytes()
+	c.final = append(c.final, c.chunkMetaBuf.Bytes()...)
+	// write the compressed data to the final data
+	compressedData := snappy.Encode(nil, c.chunkBuf.Bytes())
+	c.final = append(c.final, compressedData...)
+	//c.chunkLens = append(c.chunkLens, uint64(len(compressedData)+len(metaData)))
+	c.chunkLens[c.currChunk] = uint64(len(compressedData) + len(metaData))
+	return nil
+}
+
+// Add encodes the provided byte slice into the correct chunk for the provided
+// doc num.  You MUST call Add() with increasing docNums.
+func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
+	chunk := docNum / c.chunkSize
+	if chunk != c.currChunk {
+		// flush out the previous chunk details
+		err := c.flushContents()
+		if err != nil {
+			return err
+		}
+		// clearing the chunk specific meta for next chunk
+		c.chunkBuf.Reset()
+		c.chunkMetaBuf.Reset()
+		c.chunkMeta = []metaData{}
+		c.currChunk = chunk
+	}
+
+	// mark the starting offset for this doc
+	dvOffset := c.chunkBuf.Len()
+	dvSize, err := c.chunkBuf.Write(vals)
+	if err != nil {
+		return err
+	}
+
+	c.chunkMeta = append(c.chunkMeta, metaData{
+		docID:    docNum,
+		docDvLoc: uint64(dvOffset),
+		docDvLen: uint64(dvSize),
+	})
+	return nil
+}
+
+// Write commits all the encoded chunked contents to the provided writer.
+func (c *chunkedContentCoder) Write(w io.Writer) (int, error) {
+	var tw int
+	buf := make([]byte, binary.MaxVarintLen64)
+	// write out the number of chunks
+	n := binary.PutUvarint(buf, uint64(len(c.chunkLens)))
+	nw, err := w.Write(buf[:n])
+	tw += nw
+	if err != nil {
+		return tw, err
+	}
+	// write out the chunk lens
+	for _, chunkLen := range c.chunkLens {
+		n := binary.PutUvarint(buf, uint64(chunkLen))
+		nw, err = w.Write(buf[:n])
+		tw += nw
+		if err != nil {
+			return tw, err
+		}
+	}
+	// write out the data
+	nw, err = w.Write(c.final)
+	tw += nw
+	if err != nil {
+		return tw, err
+	}
+	return tw, nil
+}
diff --git a/index/scorch/segment/zap/contentcoder_test.go b/index/scorch/segment/zap/contentcoder_test.go
new file mode 100644
index 000000000..4ae4d8121
--- /dev/null
+++ b/index/scorch/segment/zap/contentcoder_test.go
@@ -0,0 +1,75 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zap
+
+import (
+	"bytes"
+	"reflect"
+	"testing"
+)
+
+func TestChunkContentCoder(t *testing.T) {
+
+	tests := []struct {
+		maxDocNum uint64
+		chunkSize uint64
+		docNums   []uint64
+		vals      [][]byte
+		expected  string
+	}{
+		{
+			maxDocNum: 0,
+			chunkSize: 1,
+			docNums:   []uint64{0},
+			vals:      [][]byte{[]byte("bleve")},
+			// 1 chunk, chunk-0 length 11(b), value
+			expected: string([]byte{0x1, 0xb, 0x1, 0x0, 0x0, 0x05, 0x05, 0x10, 0x62, 0x6c, 0x65, 0x76, 0x65}),
+		},
+		{
+			maxDocNum: 1,
+			chunkSize: 1,
+			docNums:   []uint64{0, 1},
+			vals: [][]byte{
+				[]byte("upside"),
+				[]byte("scorch"),
+			},
+
+			expected: string([]byte{0x02, 0x0c, 0x0c, 0x01, 0x00, 0x00, 0x06, 0x06, 0x14,
+				0x75, 0x70, 0x73, 0x69, 0x64, 0x65, 0x01, 0x01, 0x00, 0x06, 0x06,
+				0x14, 0x73, 0x63, 0x6f, 0x72, 0x63, 0x68}),
+		},
+	}
+
+	for _, test := range tests {
+
+		cic := newChunkedContentCoder(test.chunkSize, test.maxDocNum)
+		for i, docNum := range test.docNums {
+			err := cic.Add(docNum, test.vals[i])
+			if err != nil {
+				t.Fatalf("error adding to intcoder: %v", err)
+			}
+		}
+		cic.Close()
+		var actual bytes.Buffer
+		_, err := cic.Write(&actual)
+		if err != nil {
+			t.Fatalf("error writing: %v", err)
+		}
+
+		if !reflect.DeepEqual(test.expected, string(actual.Bytes())) {
+			t.Errorf("got % s, expected % s", string(actual.Bytes()), test.expected)
+		}
+	}
+}
diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
new file mode 100644
index 000000000..c8dad8fdc
--- /dev/null
+++ b/index/scorch/segment/zap/docvalues.go
@@ -0,0 +1,176 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zap
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"math"
+	"sort"
+
+	"github.com/blevesearch/bleve/index"
+	"github.com/golang/snappy"
+)
+
+type docValueIterator struct {
+	field          string
+	curChunkNum    uint64
+	numChunks      uint64
+	chunkLens      []uint64
+	dvDataLoc      uint64
+	curChunkHeader []metaData
+	curChunkData   []byte // compressed data cache
+}
+
+func (di *docValueIterator) fieldName() string {
+	return di.field
+}
+
+func (di *docValueIterator) curChunkNumber() uint64 {
+	return di.curChunkNum
+}
+
+func (s *Segment) loadFieldDocValueIterator(field string,
+	fieldDvLoc uint64) (*docValueIterator, error) {
+	// get the docValue offset for the given fields
+	if fieldDvLoc == math.MaxUint64 {
+		return nil, fmt.Errorf("loadFieldDocValueConfigs: "+
+			"no docValues found for field: %s", field)
+	}
+
+	// read the number of chunks, chunk lengths
+	var offset uint64
+	numChunks, read := binary.Uvarint(s.mm[fieldDvLoc : fieldDvLoc+binary.MaxVarintLen64])
+	if read <= 0 {
+		return nil, fmt.Errorf("failed to read the field "+
+			"doc values for field %s", field)
+	}
+	offset += uint64(read)
+
+	fdvIter := &docValueIterator{
+		curChunkNum: math.MaxUint64,
+		field:       field,
+		chunkLens:   make([]uint64, int(numChunks)),
+	}
+	for i := 0; i < int(numChunks); i++ {
+		fdvIter.chunkLens[i], read = binary.Uvarint(s.mm[fieldDvLoc+offset : fieldDvLoc+offset+binary.MaxVarintLen64])
+		offset += uint64(read)
+	}
+
+	fdvIter.dvDataLoc = fieldDvLoc + offset
+	return fdvIter, nil
+}
+
+func (di *docValueIterator) loadDvChunk(chunkNumber,
+	localDocNum uint64, s *Segment) error {
+	// advance to the chunk where the docValues
+	// reside for the given docID
+	destChunkDataLoc := di.dvDataLoc
+	for i := 0; i < int(chunkNumber); i++ {
+		destChunkDataLoc += di.chunkLens[i]
+	}
+
+	curChunkSize := di.chunkLens[chunkNumber]
+	// read the number of docs reside in the chunk
+	numDocs, read := binary.Uvarint(s.mm[destChunkDataLoc : destChunkDataLoc+binary.MaxVarintLen64])
+	if read <= 0 {
+		return fmt.Errorf("failed to read the chunk")
+	}
+	chunkMetaLoc := destChunkDataLoc + uint64(read)
+
+	offset := uint64(0)
+	di.curChunkHeader = make([]metaData, int(numDocs))
+	for i := 0; i < int(numDocs); i++ {
+		di.curChunkHeader[i].docID, read = binary.Uvarint(s.mm[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
+		offset += uint64(read)
+		di.curChunkHeader[i].docDvLoc, read = binary.Uvarint(s.mm[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
+		offset += uint64(read)
+		di.curChunkHeader[i].docDvLen, read = binary.Uvarint(s.mm[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
+		offset += uint64(read)
+	}
+
+	compressedDataLoc := chunkMetaLoc + offset
+	dataLength := destChunkDataLoc + curChunkSize - compressedDataLoc
+	di.curChunkData = s.mm[compressedDataLoc : compressedDataLoc+dataLength]
+	di.curChunkNum = chunkNumber
+	return nil
+}
+
+func (di *docValueIterator) visitDocValues(docID uint64,
+	visitor index.DocumentFieldTermVisitor) error {
+	// binary search the term locations for the docID
+	start, length := di.getDocValueLocs(docID)
+	if start == math.MaxUint64 || length == math.MaxUint64 {
+		return nil
+	}
+	// uncompress the already loaded data
+	uncompressed, err := snappy.Decode(nil, di.curChunkData)
+	if err != nil {
+		return err
+	}
+
+	// pick the terms for the given docID
+	uncompressed = uncompressed[start : start+length]
+	for {
+		i := bytes.Index(uncompressed, termSeparatorSplitSlice)
+		if i < 0 {
+			break
+		}
+
+		visitor(di.field, uncompressed[0:i])
+		uncompressed = uncompressed[i+1:]
+	}
+
+	return nil
+}
+
+func (di *docValueIterator) getDocValueLocs(docID uint64) (uint64, uint64) {
+	i := sort.Search(len(di.curChunkHeader), func(i int) bool {
+		return di.curChunkHeader[i].docID >= docID
+	})
+	if i < len(di.curChunkHeader) && di.curChunkHeader[i].docID == docID {
+		return di.curChunkHeader[i].docDvLoc, di.curChunkHeader[i].docDvLen
+	}
+	return math.MaxUint64, math.MaxUint64
+}
+
+// VisitDocumentFieldTerms is an implementation of the UnInvertIndex interface
+func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
+	visitor index.DocumentFieldTermVisitor) error {
+	fieldID := uint16(0)
+	ok := true
+	for _, field := range fields {
+		if fieldID, ok = s.fieldsMap[field]; !ok {
+			continue
+		}
+		// find the chunkNumber where the docValues are stored
+		docInChunk := localDocNum / uint64(s.chunkFactor)
+
+		if dvIter, exists := s.fieldDvIterMap[fieldID-1]; exists &&
+			dvIter != nil {
+			// check if the chunk is already loaded
+			if docInChunk != dvIter.curChunkNumber() {
+				err := dvIter.loadDvChunk(docInChunk, localDocNum, s)
+				if err != nil {
+					continue
+				}
+			}
+
+			dvIter.visitDocValues(localDocNum, visitor)
+		}
+	}
+	return nil
+}
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 5d845bbd1..36959c62e 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -21,6 +21,7 @@ import (
 	"fmt"
 	"math"
 	"os"
+	"sort"
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/Smerity/govarint"
@@ -52,7 +53,7 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
 	newSegDocCount := computeNewDocCount(segments, drops)
 
 	var newDocNums [][]uint64
-	var storedIndexOffset uint64
+	var storedIndexOffset, fieldDvLocsOffset uint64
 	var dictLocs []uint64
 	if newSegDocCount > 0 {
 		storedIndexOffset, newDocNums, err = mergeStoredAndRemap(segments, drops,
@@ -61,7 +62,7 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
 			return nil, err
 		}
 
-		dictLocs, err = persistMergedRest(segments, drops, fieldsInv, fieldsMap,
+		dictLocs, fieldDvLocsOffset, err = persistMergedRest(segments, drops, fieldsInv, fieldsMap,
 			newDocNums, newSegDocCount, chunkFactor, cr)
 		if err != nil {
 			return nil, err
@@ -77,7 +78,7 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
 	}
 
 	err = persistFooter(newSegDocCount, storedIndexOffset,
-		fieldsIndexOffset, chunkFactor, cr)
+		fieldsIndexOffset, fieldDvLocsOffset, chunkFactor, cr)
 	if err != nil {
 		return nil, err
 	}
@@ -126,12 +127,14 @@ func computeNewDocCount(segments []*Segment, drops []*roaring.Bitmap) uint64 {
 func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 	fieldsInv []string, fieldsMap map[string]uint16, newDocNums [][]uint64,
 	newSegDocCount uint64, chunkFactor uint32,
-	w *CountHashWriter) ([]uint64, error) {
+	w *CountHashWriter) ([]uint64, uint64, error) {
 
 	var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64)
 	var bufLoc []uint64
 
-	rv := make([]uint64, len(fieldsInv))
+	rv1 := make([]uint64, len(fieldsInv))
+	fieldDvLocs := make([]uint64, len(fieldsInv))
+	fieldDvLocsOffset := uint64(math.MaxUint64)
 
 	var vellumBuf bytes.Buffer
 	// for each field
@@ -141,23 +144,23 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 		}
 		newVellum, err := vellum.New(&vellumBuf, nil)
 		if err != nil {
-			return nil, err
+			return nil, fieldDvLocsOffset, err
 		}
 
-		// collect FTS iterators from all segments for this field
+		// collect FST iterators from all segments for this field
 		var dicts []*Dictionary
 		var itrs []vellum.Iterator
 		for _, segment := range segments {
 			dict, err2 := segment.dictionary(fieldName)
 			if err2 != nil {
-				return nil, err2
+				return nil, fieldDvLocsOffset, err2
 			}
 			dicts = append(dicts, dict)
 
 			if dict != nil && dict.fst != nil {
 				itr, err2 := dict.fst.Iterator(nil, nil)
 				if err2 != nil && err2 != vellum.ErrIteratorDone {
-					return nil, err2
+					return nil, fieldDvLocsOffset, err2
 				}
 				if itr != nil {
 					itrs = append(itrs, itr)
@@ -173,6 +176,8 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 
 		tfEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
 		locEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
+		fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), newSegDocCount-1)
+		docTermMap := make(map[uint64][]byte, 0)
 		for err == nil {
 			term, _ := mergeItr.Current()
 
@@ -189,7 +194,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 				}
 				postings, err2 := dict.postingsList(string(term), drops[dictI])
 				if err2 != nil {
-					return nil, err2
+					return nil, fieldDvLocsOffset, err2
 				}
 
 				postItr := postings.Iterator()
@@ -197,7 +202,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 				for next != nil && err2 == nil {
 					hitNewDocNum := newDocNums[dictI][next.Number()]
 					if hitNewDocNum == docDropped {
-						return nil, fmt.Errorf("see hit with dropped doc num")
+						return nil, fieldDvLocsOffset, fmt.Errorf("see hit with dropped doc num")
 					}
 					newRoaring.Add(uint32(hitNewDocNum))
 					// encode norm bits
@@ -205,7 +210,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 					normBits := math.Float32bits(float32(norm))
 					err3 := tfEncoder.Add(hitNewDocNum, next.Frequency(), uint64(normBits))
 					if err3 != nil {
-						return nil, err3
+						return nil, fieldDvLocsOffset, err3
 					}
 					locs := next.Locations()
 					if len(locs) > 0 {
@@ -223,14 +228,17 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 							args = append(args, loc.ArrayPositions()...)
 							err = locEncoder.Add(hitNewDocNum, args...)
 							if err != nil {
-								return nil, err
+								return nil, fieldDvLocsOffset, err
 							}
 						}
 					}
+
+					docTermMap[hitNewDocNum] = append(docTermMap[hitNewDocNum], []byte(term)...)
+					docTermMap[hitNewDocNum] = append(docTermMap[hitNewDocNum], termSeparator)
 					next, err2 = postItr.Next()
 				}
 				if err != nil {
-					return nil, err
+					return nil, fieldDvLocsOffset, err
 				}
 
 			}
@@ -242,17 +250,17 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 				freqOffset := uint64(w.Count())
 				_, err = tfEncoder.Write(w)
 				if err != nil {
-					return nil, err
+					return nil, fieldDvLocsOffset, err
 				}
 				locOffset := uint64(w.Count())
 				_, err = locEncoder.Write(w)
 				if err != nil {
-					return nil, err
+					return nil, fieldDvLocsOffset, err
 				}
 				postingLocOffset := uint64(w.Count())
 				_, err = writeRoaringWithLen(newRoaringLocs, w)
 				if err != nil {
-					return nil, err
+					return nil, fieldDvLocsOffset, err
 				}
 				postingOffset := uint64(w.Count())
 				// write out the start of the term info
@@ -260,43 +268,43 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 				n := binary.PutUvarint(buf, freqOffset)
 				_, err = w.Write(buf[:n])
 				if err != nil {
-					return nil, err
+					return nil, fieldDvLocsOffset, err
 				}
 
 				// write out the start of the loc info
 				n = binary.PutUvarint(buf, locOffset)
 				_, err = w.Write(buf[:n])
 				if err != nil {
-					return nil, err
+					return nil, fieldDvLocsOffset, err
 				}
 
 				// write out the start of the loc posting list
 				n = binary.PutUvarint(buf, postingLocOffset)
 				_, err = w.Write(buf[:n])
 				if err != nil {
-					return nil, err
+					return nil, fieldDvLocsOffset, err
 				}
 				_, err = writeRoaringWithLen(newRoaring, w)
 				if err != nil {
-					return nil, err
+					return nil, fieldDvLocsOffset, err
 				}
 
 				err = newVellum.Insert(term, postingOffset)
 				if err != nil {
-					return nil, err
+					return nil, fieldDvLocsOffset, err
 				}
 			}
 
 			err = mergeItr.Next()
 		}
 		if err != nil && err != vellum.ErrIteratorDone {
-			return nil, err
+			return nil, fieldDvLocsOffset, err
 		}
 
 		dictOffset := uint64(w.Count())
 		err = newVellum.Close()
 		if err != nil {
-			return nil, err
+			return nil, fieldDvLocsOffset, err
 		}
 		vellumData := vellumBuf.Bytes()
 
@@ -306,19 +314,51 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 		n := binary.PutUvarint(buf, uint64(len(vellumData)))
 		_, err = w.Write(buf[:n])
 		if err != nil {
-			return nil, err
+			return nil, fieldDvLocsOffset, err
 		}
 
 		// write this vellum to disk
 		_, err = w.Write(vellumData)
 		if err != nil {
-			return nil, err
+			return nil, fieldDvLocsOffset, err
+		}
+
+		rv1[fieldID] = dictOffset
+
+		// update teh doc value
+		var docNumbers docIDRange
+		for k := range docTermMap {
+			docNumbers = append(docNumbers, k)
 		}
+		sort.Sort(docNumbers)
 
-		rv[fieldID] = dictOffset
+		for _, docNum := range docNumbers {
+			err = fdvEncoder.Add(docNum, docTermMap[docNum])
+			if err != nil {
+				return nil, fieldDvLocsOffset, err
+			}
+		}
+		// get the field doc value offset
+		fieldDvLocs[fieldID] = uint64(w.Count())
+		fdvEncoder.Close()
+		// persist the doc value details for this field
+		_, err = fdvEncoder.Write(w)
+		if err != nil {
+			return nil, fieldDvLocsOffset, err
+		}
+	}
+
+	fieldDvLocsOffset = uint64(w.Count())
+	buf := make([]byte, binary.MaxVarintLen64)
+	for _, offset := range fieldDvLocs {
+		n := binary.PutUvarint(buf, uint64(offset))
+		_, err := w.Write(buf[:n])
+		if err != nil {
+			return nil, math.MaxUint64, err
+		}
 	}
 
-	return rv, nil
+	return rv1, fieldDvLocsOffset, nil
 }
 
 const docDropped = math.MaxUint64
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index e78ac392f..c5d5aed22 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -19,6 +19,7 @@ import (
 	"encoding/binary"
 	"fmt"
 	"io"
+	"math"
 	"os"
 	"sync"
 
@@ -44,11 +45,12 @@ func Open(path string) (segment.Segment, error) {
 	}
 
 	rv := &Segment{
-		f:         f,
-		mm:        mm,
-		path:      path,
-		fieldsMap: make(map[string]uint16),
-		refs:      1,
+		f:              f,
+		mm:             mm,
+		path:           path,
+		fieldsMap:      make(map[string]uint16),
+		fieldDvIterMap: make(map[uint16]*docValueIterator),
+		refs:           1,
 	}
 
 	err = rv.loadConfig()
@@ -63,6 +65,12 @@ func Open(path string) (segment.Segment, error) {
 		return nil, err
 	}
 
+	err = rv.loadDvIterators()
+	if err != nil {
+		_ = rv.Close()
+		return nil, err
+	}
+
 	return rv, nil
 }
 
@@ -82,6 +90,9 @@ type Segment struct {
 	fieldsInv     []string
 	fieldsOffsets []uint64
 
+	docValueOffset uint64
+	fieldDvIterMap map[uint16]*docValueIterator // naive chunk cache per field
+
 	m    sync.Mutex // Protects the fields that follow.
 	refs int64
 }
@@ -112,7 +123,11 @@ func (s *Segment) loadConfig() error {
 	}
 	chunkOffset := verOffset - 4
 	s.chunkFactor = binary.BigEndian.Uint32(s.mm[chunkOffset : chunkOffset+4])
-	fieldsOffset := chunkOffset - 8
+
+	docValueOffset := chunkOffset - 8
+	s.docValueOffset = binary.BigEndian.Uint64(s.mm[docValueOffset : docValueOffset+8])
+	fieldsOffset := docValueOffset - 8
+
 	s.fieldsIndexOffset = binary.BigEndian.Uint64(s.mm[fieldsOffset : fieldsOffset+8])
 	storedOffset := fieldsOffset - 8
 	s.storedIndexOffset = binary.BigEndian.Uint64(s.mm[storedOffset : storedOffset+8])
@@ -355,3 +370,20 @@ func (s *Segment) DictAddr(field string) (uint64, error) {
 
 	return s.fieldsOffsets[fieldID-1], nil
 }
+
+func (s *Segment) loadDvIterators() error {
+	if s.docValueOffset == math.MaxUint64 {
+		return nil
+	}
+
+	var read uint64
+	for fieldID, field := range s.fieldsInv {
+		fieldLoc, n := binary.Uvarint(s.mm[s.docValueOffset+read : s.docValueOffset+read+binary.MaxVarintLen64])
+		if n <= 0 {
+			return fmt.Errorf("loadDvIterators: failed to read the docvalue offsets for field %d", fieldID)
+		}
+		s.fieldDvIterMap[uint16(fieldID)], _ = s.loadFieldDocValueIterator(field, fieldLoc)
+		read += uint64(n)
+	}
+	return nil
+}
diff --git a/index/scorch/segment/zap/write.go b/index/scorch/segment/zap/write.go
index a831ef6ae..cfb7e46e9 100644
--- a/index/scorch/segment/zap/write.go
+++ b/index/scorch/segment/zap/write.go
@@ -86,10 +86,10 @@ func persistFields(fieldsInv []string, w *CountHashWriter, dictLocs []uint64) (u
 }
 
 // FooterSize is the size of the footer record in bytes
-// crc + ver + chunk + field offset + stored offset + num docs
-const FooterSize = 4 + 4 + 4 + 8 + 8 + 8
+// crc + ver + chunk + field offset + stored offset + num docs + docValueOffset
+const FooterSize = 4 + 4 + 4 + 8 + 8 + 8 + 8
 
-func persistFooter(numDocs, storedIndexOffset, fieldIndexOffset uint64,
+func persistFooter(numDocs, storedIndexOffset, fieldIndexOffset, docValueOffset uint64,
 	chunkFactor uint32, w *CountHashWriter) error {
 	// write out the number of docs
 	err := binary.Write(w, binary.BigEndian, numDocs)
@@ -106,6 +106,11 @@ func persistFooter(numDocs, storedIndexOffset, fieldIndexOffset uint64,
 	if err != nil {
 		return err
 	}
+	// write out the fieldDocValue location
+	err = binary.Write(w, binary.BigEndian, docValueOffset)
+	if err != nil {
+		return err
+	}
 	// write out 32-bit chunk factor
 	err = binary.Write(w, binary.BigEndian, chunkFactor)
 	if err != nil {
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 5f08a496f..2b2654b58 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -22,6 +22,9 @@ import (
 	"sort"
 	"sync"
 
+	"github.com/blevesearch/bleve/index/scorch/segment/mem"
+	"github.com/blevesearch/bleve/index/scorch/segment/zap"
+
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index"
@@ -401,24 +404,35 @@ func (i *IndexSnapshot) DocumentVisitFieldTerms(id index.IndexInternalID,
 
 	ss := i.segment[segmentIndex]
 
-	err = ss.cachedDocs.prepareFields(fields, ss)
-	if err != nil {
-		return err
-	}
+	switch seg := ss.segment.(type) {
+	case *mem.Segment:
+		err = ss.cachedDocs.prepareFields(fields, ss)
+		if err != nil {
+			return err
+		}
 
-	for _, field := range fields {
-		if cachedFieldDocs, exists := ss.cachedDocs.cache[field]; exists {
-			if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists {
-				for {
-					i := bytes.Index(tlist, TermSeparatorSplitSlice)
-					if i < 0 {
-						break
+		for _, field := range fields {
+			if cachedFieldDocs, exists := ss.cachedDocs.cache[field]; exists {
+				if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists {
+					for {
+						i := bytes.Index(tlist, TermSeparatorSplitSlice)
+						if i < 0 {
+							break
+						}
+						visitor(field, tlist[0:i])
+						tlist = tlist[i+1:]
 					}
-					visitor(field, tlist[0:i])
-					tlist = tlist[i+1:]
 				}
 			}
 		}
+
+	case *zap.Segment:
+		if zaps, ok := ss.segment.(UnInvertIndex); ok {
+			return zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor)
+		}
+
+	default:
+		return fmt.Errorf("snapshot_index: DocumentVisitFieldTerms, unknown segment type: %T", seg)
 	}
 
 	return nil

From 0272451093c3013be4a0df701e69d4f1a83936db Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Thu, 28 Dec 2017 13:05:25 +0530
Subject: [PATCH 111/728] adding checks for robustness

---
 index/scorch/segment/zap/contentcoder.go | 5 +++--
 index/scorch/segment/zap/docvalues.go    | 8 ++++++--
 index/scorch/segment/zap/segment.go      | 2 +-
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/index/scorch/segment/zap/contentcoder.go b/index/scorch/segment/zap/contentcoder.go
index cf05b6759..e13e108e6 100644
--- a/index/scorch/segment/zap/contentcoder.go
+++ b/index/scorch/segment/zap/contentcoder.go
@@ -46,7 +46,8 @@ type metaData struct {
 
 // newChunkedContentCoder returns a new chunk content coder which
 // packs data into chunks based on the provided chunkSize
-func newChunkedContentCoder(chunkSize uint64, maxDocNum uint64) *chunkedContentCoder {
+func newChunkedContentCoder(chunkSize uint64,
+	maxDocNum uint64) *chunkedContentCoder {
 	total := maxDocNum/chunkSize + 1
 	rv := &chunkedContentCoder{
 		chunkSize: chunkSize,
@@ -113,7 +114,7 @@ func (c *chunkedContentCoder) flushContents() error {
 	// write the compressed data to the final data
 	compressedData := snappy.Encode(nil, c.chunkBuf.Bytes())
 	c.final = append(c.final, compressedData...)
-	//c.chunkLens = append(c.chunkLens, uint64(len(compressedData)+len(metaData)))
+
 	c.chunkLens[c.currChunk] = uint64(len(compressedData) + len(metaData))
 	return nil
 }
diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index c8dad8fdc..353cbb650 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -52,7 +52,7 @@ func (s *Segment) loadFieldDocValueIterator(field string,
 	}
 
 	// read the number of chunks, chunk lengths
-	var offset uint64
+	var offset, clen uint64
 	numChunks, read := binary.Uvarint(s.mm[fieldDvLoc : fieldDvLoc+binary.MaxVarintLen64])
 	if read <= 0 {
 		return nil, fmt.Errorf("failed to read the field "+
@@ -66,7 +66,11 @@ func (s *Segment) loadFieldDocValueIterator(field string,
 		chunkLens:   make([]uint64, int(numChunks)),
 	}
 	for i := 0; i < int(numChunks); i++ {
-		fdvIter.chunkLens[i], read = binary.Uvarint(s.mm[fieldDvLoc+offset : fieldDvLoc+offset+binary.MaxVarintLen64])
+		clen, read = binary.Uvarint(s.mm[fieldDvLoc+offset : fieldDvLoc+offset+binary.MaxVarintLen64])
+		if read <= 0 {
+			return nil, fmt.Errorf("corrupted chunk length during segment load")
+		}
+		fdvIter.chunkLens[i] = clen
 		offset += uint64(read)
 	}
 
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index c5d5aed22..5a39f48fb 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -372,7 +372,7 @@ func (s *Segment) DictAddr(field string) (uint64, error) {
 }
 
 func (s *Segment) loadDvIterators() error {
-	if s.docValueOffset == math.MaxUint64 {
+	if s.docValueOffset == math.MaxUint64 || s.docValueOffset == 0 {
 		return nil
 	}
 

From 8abac42796f95b8e9f83d2963b540bf5fac4050a Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Thu, 28 Dec 2017 13:23:57 +0530
Subject: [PATCH 112/728] errCheck fixes

---
 index/scorch/segment/zap/contentcoder.go | 2 +-
 index/scorch/segment/zap/docvalues.go    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/index/scorch/segment/zap/contentcoder.go b/index/scorch/segment/zap/contentcoder.go
index e13e108e6..978500e92 100644
--- a/index/scorch/segment/zap/contentcoder.go
+++ b/index/scorch/segment/zap/contentcoder.go
@@ -74,7 +74,7 @@ func (c *chunkedContentCoder) Reset() {
 // Close indicates you are done calling Add() this allows
 // the final chunk to be encoded.
 func (c *chunkedContentCoder) Close() {
-	c.flushContents()
+	_ = c.flushContents()
 }
 
 func (c *chunkedContentCoder) flushContents() error {
diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index 353cbb650..9f1e9942f 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -173,7 +173,7 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
 				}
 			}
 
-			dvIter.visitDocValues(localDocNum, visitor)
+			_ = dvIter.visitDocValues(localDocNum, visitor)
 		}
 	}
 	return nil

From becd4677cd60e510dccdc13240e4d87575a3e220 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Wed, 27 Dec 2017 17:06:31 -0700
Subject: [PATCH 113/728] Adding num_items_introduced, num_items_persisted
 stats

+ Adding new entries to the stats struct of scorch.
+ These stats are atomically incremented upon every segment
  introduction, and upon successful persistence.
---
 index/scorch/introducer.go | 10 ++++++++--
 index/scorch/persister.go  |  4 ++++
 index/scorch/stats.go      |  4 ++++
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index c1f9321d5..715325626 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -16,6 +16,7 @@ package scorch
 
 import (
 	"fmt"
+	"sync/atomic"
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index/scorch/segment"
@@ -142,12 +143,17 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 	}
 	// append new segment, if any, to end of the new index snapshot
 	if next.data != nil {
-		newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
+		newSegmentSnapshot := &SegmentSnapshot{
 			id:         next.id,
 			segment:    next.data, // take ownership of next.data's ref-count
 			cachedDocs: &cachedDocs{cache: nil},
-		})
+		}
+		newSnapshot.segment = append(newSnapshot.segment, newSegmentSnapshot)
 		newSnapshot.offsets = append(newSnapshot.offsets, running)
+
+		// increment numItemsIntroduced which tracks the number of items
+		// queued for persistence.
+		atomic.AddUint64(&s.stats.numItemsIntroduced, newSegmentSnapshot.Count())
 	}
 	// copy old values
 	for key, oldVal := range s.root.internal {
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 4ad3df80d..52df1ff7a 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -24,6 +24,7 @@ import (
 	"sort"
 	"strconv"
 	"strings"
+	"sync/atomic"
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index/scorch/segment"
@@ -75,6 +76,7 @@ OUTER:
 				_ = ourSnapshot.DecRef()
 				continue OUTER
 			}
+
 			lastPersistedEpoch = ourSnapshot.epoch
 			for _, notifyCh := range notifyChs {
 				close(notifyCh)
@@ -243,6 +245,8 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 				cachedDocs: segmentSnapshot.cachedDocs,
 			}
 			newIndexSnapshot.segment[i] = newSegmentSnapshot
+			// update items persisted incase of a new segment snapshot
+			atomic.AddUint64(&s.stats.numItemsPersisted, newSegmentSnapshot.Count())
 		} else {
 			newIndexSnapshot.segment[i] = s.root.segment[i]
 			newIndexSnapshot.segment[i].segment.AddRef()
diff --git a/index/scorch/stats.go b/index/scorch/stats.go
index 13668480d..807e02107 100644
--- a/index/scorch/stats.go
+++ b/index/scorch/stats.go
@@ -22,6 +22,8 @@ import (
 // Stats tracks statistics about the index
 type Stats struct {
 	analysisTime, indexTime uint64
+	numItemsIntroduced      uint64
+	numItemsPersisted       uint64
 }
 
 // FIXME wire up these other stats again
@@ -36,6 +38,8 @@ func (s *Stats) statsMap() map[string]interface{} {
 	// m["term_searchers_started"] = atomic.LoadUint64(&i.termSearchersStarted)
 	// m["term_searchers_finished"] = atomic.LoadUint64(&i.termSearchersFinished)
 	// m["num_plain_text_bytes_indexed"] = atomic.LoadUint64(&i.numPlainTextBytesIndexed)
+	m["num_items_introduced"] = atomic.LoadUint64(&s.numItemsIntroduced)
+	m["num_items_persisted"] = atomic.LoadUint64(&s.numItemsPersisted)
 
 	return m
 }

From 4bede84fd0fba8750232e8be403a56d08348905c Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Wed, 20 Dec 2017 17:25:21 -0800
Subject: [PATCH 114/728] Wiring up missing stats for scorch

- updates, deletes, batches, errors
- term_searchers_started, term_searchers_finished
- num_plain_test_bytes_indexed
---
 index/scorch/scorch.go             | 15 +++++++++++++--
 index/scorch/snapshot_index.go     |  2 ++
 index/scorch/snapshot_index_tfr.go |  4 ++++
 index/scorch/stats.go              | 25 ++++++++++++++-----------
 4 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index d72a8f886..9d2b9049b 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -192,6 +192,7 @@ func (s *Scorch) Batch(batch *index.Batch) error {
 	resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps))
 
 	var numUpdates uint64
+	var numDeletes uint64
 	var numPlainTextBytes uint64
 	var ids []string
 	for docID, doc := range batch.IndexOps {
@@ -200,6 +201,8 @@ func (s *Scorch) Batch(batch *index.Batch) error {
 			doc.AddField(document.NewTextFieldCustom("_id", nil, []byte(doc.ID), document.IndexField|document.StoreField, nil))
 			numUpdates++
 			numPlainTextBytes += doc.NumPlainTextBytes()
+		} else {
+			numDeletes++
 		}
 		ids = append(ids, docID)
 	}
@@ -234,8 +237,16 @@ func (s *Scorch) Batch(batch *index.Batch) error {
 	}
 
 	err := s.prepareSegment(newSegment, ids, batch.InternalOps)
-	if err != nil && newSegment != nil {
-		_ = newSegment.Close()
+	if err != nil {
+		if newSegment != nil {
+			_ = newSegment.Close()
+		}
+		atomic.AddUint64(&s.stats.errors, 1)
+	} else {
+		atomic.AddUint64(&s.stats.updates, numUpdates)
+		atomic.AddUint64(&s.stats.deletes, numDeletes)
+		atomic.AddUint64(&s.stats.batches, 1)
+		atomic.AddUint64(&s.stats.numPlainTextBytesIndexed, numPlainTextBytes)
 	}
 	return err
 }
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 5f08a496f..9da84278c 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -21,6 +21,7 @@ import (
 	"fmt"
 	"sort"
 	"sync"
+	"sync/atomic"
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/document"
@@ -363,6 +364,7 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
 		rv.postings[i] = pl
 		rv.iterators[i] = pl.Iterator()
 	}
+	atomic.AddUint64(&i.parent.stats.termSearchersStarted, uint64(1))
 	return rv, nil
 }
 
diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go
index 497b83dd7..87fd0d14f 100644
--- a/index/scorch/snapshot_index_tfr.go
+++ b/index/scorch/snapshot_index_tfr.go
@@ -16,6 +16,7 @@ package scorch
 
 import (
 	"bytes"
+	"sync/atomic"
 
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
@@ -124,5 +125,8 @@ func (i *IndexSnapshotTermFieldReader) Count() uint64 {
 }
 
 func (i *IndexSnapshotTermFieldReader) Close() error {
+	if i.snapshot != nil {
+		atomic.AddUint64(&i.snapshot.parent.stats.termSearchersFinished, uint64(1))
+	}
 	return nil
 }
diff --git a/index/scorch/stats.go b/index/scorch/stats.go
index 807e02107..abd054c81 100644
--- a/index/scorch/stats.go
+++ b/index/scorch/stats.go
@@ -21,23 +21,26 @@ import (
 
 // Stats tracks statistics about the index
 type Stats struct {
-	analysisTime, indexTime uint64
-	numItemsIntroduced      uint64
-	numItemsPersisted       uint64
+	updates, deletes, batches, errors uint64
+	analysisTime, indexTime           uint64
+	termSearchersStarted              uint64
+	termSearchersFinished             uint64
+	numPlainTextBytesIndexed          uint64
+	numItemsIntroduced                uint64
+	numItemsPersisted                 uint64
 }
 
-// FIXME wire up these other stats again
 func (s *Stats) statsMap() map[string]interface{} {
 	m := map[string]interface{}{}
-	// m["updates"] = atomic.LoadUint64(&i.updates)
-	// m["deletes"] = atomic.LoadUint64(&i.deletes)
-	// m["batches"] = atomic.LoadUint64(&i.batches)
-	// m["errors"] = atomic.LoadUint64(&i.errors)
+	m["updates"] = atomic.LoadUint64(&s.updates)
+	m["deletes"] = atomic.LoadUint64(&s.deletes)
+	m["batches"] = atomic.LoadUint64(&s.batches)
+	m["errors"] = atomic.LoadUint64(&s.errors)
 	m["analysis_time"] = atomic.LoadUint64(&s.analysisTime)
 	m["index_time"] = atomic.LoadUint64(&s.indexTime)
-	// m["term_searchers_started"] = atomic.LoadUint64(&i.termSearchersStarted)
-	// m["term_searchers_finished"] = atomic.LoadUint64(&i.termSearchersFinished)
-	// m["num_plain_text_bytes_indexed"] = atomic.LoadUint64(&i.numPlainTextBytesIndexed)
+	m["term_searchers_started"] = atomic.LoadUint64(&s.termSearchersStarted)
+	m["term_searchers_finished"] = atomic.LoadUint64(&s.termSearchersFinished)
+	m["num_plain_text_bytes_indexed"] = atomic.LoadUint64(&s.numPlainTextBytesIndexed)
 	m["num_items_introduced"] = atomic.LoadUint64(&s.numItemsIntroduced)
 	m["num_items_persisted"] = atomic.LoadUint64(&s.numItemsPersisted)
 

From c8df014c0ca1b5296968029ddad39e2a905437a0 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Fri, 29 Dec 2017 21:39:29 +0530
Subject: [PATCH 115/728] Updated readme, zap version, added new docvalue cmd,
 fixed the footer and fields cmd, interface name updated

---
 index/scorch/segment/mem/build.go             |   2 +-
 index/scorch/segment/mem/segment.go           |   9 +-
 index/scorch/segment/segment.go               |   8 +
 index/scorch/segment/zap/README.md            |  18 +-
 index/scorch/segment/zap/build.go             |  33 +--
 .../segment/zap/cmd/zap/cmd/docvalue.go       | 224 ++++++++++++++++++
 .../scorch/segment/zap/cmd/zap/cmd/fields.go  |   2 +-
 .../scorch/segment/zap/cmd/zap/cmd/footer.go  |   1 +
 index/scorch/segment/zap/contentcoder.go      |  52 ++--
 index/scorch/segment/zap/contentcoder_test.go |   2 +-
 index/scorch/segment/zap/docvalues.go         |  16 +-
 index/scorch/segment/zap/merge.go             |  55 +++--
 index/scorch/segment/zap/segment.go           |  10 +-
 index/scorch/snapshot_index.go                |  45 ++--
 14 files changed, 360 insertions(+), 117 deletions(-)
 create mode 100644 index/scorch/segment/zap/cmd/zap/cmd/docvalue.go

diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go
index 2709d2f06..246c35680 100644
--- a/index/scorch/segment/mem/build.go
+++ b/index/scorch/segment/mem/build.go
@@ -118,7 +118,7 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 		}
 		// TODO with mapping changes for dv
 		//if field.Options().IncludeDocValues() {
-		s.DocValueFields[fieldID] = true
+		s.DocValueFields = append(s.DocValueFields, fieldID)
 		//}
 	}
 
diff --git a/index/scorch/segment/mem/segment.go b/index/scorch/segment/mem/segment.go
index cd22616e8..705447c22 100644
--- a/index/scorch/segment/mem/segment.go
+++ b/index/scorch/segment/mem/segment.go
@@ -88,16 +88,15 @@ type Segment struct {
 	//  docNum -> field id -> slice of array positions (each is []uint64)
 	StoredPos []map[uint16][][]uint64
 
-	// for marking the docValue override status
-	// field id -> status
-	DocValueFields map[uint16]bool
+	// for storing the docValue persisted fields
+	// field id
+	DocValueFields []uint16
 }
 
 // New builds a new empty Segment
 func New() *Segment {
 	return &Segment{
-		FieldsMap:      map[string]uint16{},
-		DocValueFields: map[uint16]bool{},
+		FieldsMap: map[string]uint16{},
 	}
 }
 
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index 2c91c0ef8..73225c70a 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -87,3 +87,11 @@ type Location interface {
 	Pos() uint64
 	ArrayPositions() []uint64
 }
+
+// DocumentFieldTermVisitable is implemented by various scorch segment
+// implementations to provide the un inverting of the postings
+// or other indexed values.
+type DocumentFieldTermVisitable interface {
+	VisitDocumentFieldTerms(localDocNum uint64, fields []string,
+		visitor index.DocumentFieldTermVisitor) error
+}
diff --git a/index/scorch/segment/zap/README.md b/index/scorch/segment/zap/README.md
index b7a1b9e67..179adceaf 100644
--- a/index/scorch/segment/zap/README.md
+++ b/index/scorch/segment/zap/README.md
@@ -8,7 +8,7 @@ Current usage:
 - crc-32 bytes and version are in fixed position at end of the file
 - reading remainder of footer could be version specific
 - remainder of footer gives us:
-  - 2 important offsets (fields index and stored data index)
+  - 3 important offsets (docValue , fields index and stored data index)
   - 2 important values (number of docs and chunk factor)
 - field data is processed once and memoized onto the heap so that we never have to go back to disk for it
 - access to stored data by doc number means first navigating to the stored data index, then accessing a fixed position offset into that slice, which gives us the actual address of the data.  the first bytes of that section tell us the size of data so that we know where it ends.
@@ -140,12 +140,28 @@ If you know the doc number you're interested in, this format lets you jump to th
 
 NOTE: currently we don't know or record the length of this fields index.  Instead we rely on the fact that we know it immediately precedes a footer of known size.
 
+## fields DocValue
+
+- for each field
+  - preparation phase:
+    - produce a slice containing multiple consecutive chunks, where each chunk is composed of a meta section followed by compressed columnar field data
+    - produce a slice remembering the length of each chunk
+  - file writing phase:
+    - remember the start position of this first field DocValue offset in the footer
+    - write out number of chunks that follow (varint uint64)
+    - write out length of each chunk (each a varint uint64)
+    - write out the byte slice containing all the chunk data
+
+NOTE: currently the meta header inside each chunk gives clue to the location offsets and size of the data pertaining to a given docID and any
+read operation leverage that meta information to extract the document specific data from the file.
+
 ## footer
 
 - file writing phase
   - write number of docs (big endian uint64)
   - write stored field index location (big endian uint64)
   - write field index location (big endian uint64)
+  - write field docValue location (big endian uint64)
   - write out chunk factor (big endian uint32)
   - write out version (big endian uint32)
   - write out file CRC of everything preceding this (big endian uint32)
diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 5047acc0e..96d536964 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -28,7 +28,9 @@ import (
 	"github.com/golang/snappy"
 )
 
-const version uint32 = 1
+const version uint32 = 2
+
+const fieldNotUninverted = math.MaxUint64
 
 // PersistSegment takes the in-memory segment and persists it to the specified
 // path in the zap file format.
@@ -435,15 +437,15 @@ func (a docIDRange) Less(i, j int) bool { return a[i] < a[j] }
 
 func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
 	chunkFactor uint32) (map[uint16]uint64, error) {
-	fieldChunkOffsets := make(map[uint16]uint64, len(memSegment.DocValueFields))
+	fieldChunkOffsets := make(map[uint16]uint64, len(memSegment.FieldsInv))
 	fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
 
-	for fieldID := range memSegment.DocValueFields {
+	for _, fieldID := range memSegment.DocValueFields {
 		field := memSegment.FieldsInv[fieldID]
 		docTermMap := make(map[uint64][]byte, 0)
 		dict, err := memSegment.Dictionary(field)
 		if err != nil {
-			return fieldChunkOffsets, err
+			return nil, err
 		}
 
 		dictItr := dict.Iterator()
@@ -451,7 +453,7 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
 		for err == nil && next != nil {
 			postings, err1 := dict.PostingsList(next.Term, nil)
 			if err1 != nil {
-				return fieldChunkOffsets, err
+				return nil, err
 			}
 
 			postingsItr := postings.Iterator()
@@ -463,14 +465,14 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
 				nextPosting, err2 = postingsItr.Next()
 			}
 			if err2 != nil {
-				return fieldChunkOffsets, err2
+				return nil, err2
 			}
 
 			next, err = dictItr.Next()
 		}
 
 		if err != nil {
-			return fieldChunkOffsets, err
+			return nil, err
 		}
 		// sort wrt to docIDs
 		var docNumbers docIDRange
@@ -482,16 +484,19 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
 		for _, docNum := range docNumbers {
 			err = fdvEncoder.Add(docNum, docTermMap[docNum])
 			if err != nil {
-				return fieldChunkOffsets, err
+				return nil, err
 			}
 		}
 
 		fieldChunkOffsets[fieldID] = uint64(w.Count())
-		fdvEncoder.Close()
+		err = fdvEncoder.Close()
+		if err != nil {
+			return nil, err
+		}
 		// persist the doc value details for this field
 		_, err = fdvEncoder.Write(w)
 		if err != nil {
-			return fieldChunkOffsets, err
+			return nil, err
 		}
 		// resetting encoder for the next field
 		fdvEncoder.Reset()
@@ -505,23 +510,23 @@ func persistFieldDocValues(w *CountHashWriter, chunkFactor uint32,
 
 	fieldDvOffsets, err := persistDocValues(memSegment, w, chunkFactor)
 	if err != nil {
-		return math.MaxUint64, err
+		return 0, err
 	}
 
 	fieldDocValuesOffset := uint64(w.Count())
 	buf := make([]byte, binary.MaxVarintLen64)
-	offset := uint64(math.MaxUint64)
+	offset := uint64(0)
 	ok := true
 	for fieldID := range memSegment.FieldsInv {
 		// if the field isn't configured for docValue, then mark
 		// the offset accordingly
 		if offset, ok = fieldDvOffsets[uint16(fieldID)]; !ok {
-			offset = math.MaxUint64
+			offset = fieldNotUninverted
 		}
 		n := binary.PutUvarint(buf, uint64(offset))
 		_, err := w.Write(buf[:n])
 		if err != nil {
-			return math.MaxUint64, err
+			return 0, err
 		}
 	}
 
diff --git a/index/scorch/segment/zap/cmd/zap/cmd/docvalue.go b/index/scorch/segment/zap/cmd/zap/cmd/docvalue.go
new file mode 100644
index 000000000..f20243ee2
--- /dev/null
+++ b/index/scorch/segment/zap/cmd/zap/cmd/docvalue.go
@@ -0,0 +1,224 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"log"
+	"math"
+	"sort"
+	"strconv"
+
+	"github.com/blevesearch/bleve/index/scorch/segment/zap"
+	"github.com/golang/snappy"
+	"github.com/spf13/cobra"
+)
+
+// docvalueCmd represents the docvalue command
+var docvalueCmd = &cobra.Command{
+	Use:   "docvalue [path] <field> optional <docNum> optional",
+	Short: "docvalue prints the docvalue details by field, and docNum",
+	Long:  `The docvalue command lets you explore the docValues in order of field and by doc number.`,
+	RunE: func(cmd *cobra.Command, args []string) error {
+		if len(args) < 1 {
+			return fmt.Errorf("must specify index file path")
+		}
+
+		data := segment.Data()
+		crcOffset := len(data) - 4
+		verOffset := crcOffset - 4
+		chunkOffset := verOffset - 4
+		fieldsOffset := chunkOffset - 16
+		fieldsIndexOffset := binary.BigEndian.Uint64(data[fieldsOffset : fieldsOffset+8])
+		fieldsIndexEnd := uint64(len(data) - zap.FooterSize)
+
+		// iterate through fields index
+		var fieldInv []string
+		var id, read, fieldLoc uint64
+		var nread int
+		for fieldsIndexOffset+(8*id) < fieldsIndexEnd {
+			addr := binary.BigEndian.Uint64(data[fieldsIndexOffset+(8*id) : fieldsIndexOffset+(8*id)+8])
+			var n uint64
+			_, read := binary.Uvarint(data[addr+n : fieldsIndexEnd])
+			n += uint64(read)
+
+			var nameLen uint64
+			nameLen, read = binary.Uvarint(data[addr+n : fieldsIndexEnd])
+			n += uint64(read)
+
+			name := string(data[addr+n : addr+n+nameLen])
+
+			id++
+			fieldInv = append(fieldInv, name)
+		}
+
+		dvLoc := segment.DocValueOffset()
+		fieldDvLoc := uint64(0)
+		var fieldName string
+		var fieldID uint16
+
+		// if no fields are specified then print the docValue offsets for all fields set
+		for id, field := range fieldInv {
+			fieldLoc, nread = binary.Uvarint(data[dvLoc+read : dvLoc+read+binary.MaxVarintLen64])
+			if nread <= 0 {
+				return fmt.Errorf("loadDvIterators: failed to read the docvalue offsets for field %d", fieldID)
+			}
+			read += uint64(nread)
+			if len(args) == 1 {
+				// if no field args are given, then print out the dv locations for all fields
+				fmt.Printf("fieldID: %d '%s' docvalue at %d (%x)\n", id, field, fieldLoc, fieldLoc)
+				continue
+			}
+
+			if field != args[1] {
+				continue
+			} else {
+				fieldDvLoc = fieldLoc
+				fieldName = field
+				fieldID = uint16(id)
+			}
+
+		}
+
+		// done with the fields dv locs printing for the given zap file
+		if len(args) == 1 {
+			return nil
+		}
+
+		if fieldName == "" || fieldDvLoc == 0 {
+			return fmt.Errorf("no field found for given field arg: %s", args[1])
+		}
+
+		// read the number of chunks
+		var offset, clen, numChunks uint64
+		numChunks, nread = binary.Uvarint(data[fieldDvLoc : fieldDvLoc+binary.MaxVarintLen64])
+		if nread <= 0 {
+			return fmt.Errorf("failed to read the field "+
+				"doc values for field %s", fieldName)
+		}
+		offset += uint64(nread)
+
+		if len(args) == 2 {
+			fmt.Printf("number of chunks: %d\n", numChunks)
+		}
+
+		// read the length of chunks
+		chunkLens := make([]uint64, numChunks)
+		for i := 0; i < int(numChunks); i++ {
+			clen, nread = binary.Uvarint(data[fieldDvLoc+offset : fieldDvLoc+offset+binary.MaxVarintLen64])
+			if nread <= 0 {
+				return fmt.Errorf("corrupted chunk length for chunk number: %d", i)
+			}
+
+			chunkLens[i] = clen
+			offset += uint64(nread)
+			if len(args) == 2 {
+				fmt.Printf("chunk: %d size: %d \n", i, clen)
+			}
+		}
+
+		if len(args) == 2 {
+			return nil
+		}
+
+		localDocNum, err := strconv.Atoi(args[2])
+		if err != nil {
+			return fmt.Errorf("unable to parse doc number: %v", err)
+		}
+
+		if localDocNum >= int(segment.NumDocs()) {
+			return fmt.Errorf("invalid doc number %d (valid 0 - %d)", localDocNum, segment.NumDocs()-1)
+		}
+
+		// find the chunkNumber where the docValues are stored
+		docInChunk := uint64(localDocNum) / uint64(segment.ChunkFactor())
+
+		if numChunks < docInChunk {
+			return fmt.Errorf("no chunk exists for chunk number: %d for docID: %d", docInChunk, localDocNum)
+		}
+
+		destChunkDataLoc := fieldDvLoc + offset
+		for i := 0; i < int(docInChunk); i++ {
+			destChunkDataLoc += chunkLens[i]
+		}
+		curChunkSize := chunkLens[docInChunk]
+
+		// read the number of docs reside in the chunk
+		numDocs := uint64(0)
+		numDocs, nread = binary.Uvarint(data[destChunkDataLoc : destChunkDataLoc+binary.MaxVarintLen64])
+		if nread <= 0 {
+			return fmt.Errorf("failed to read the target chunk: %d", docInChunk)
+		}
+		chunkMetaLoc := destChunkDataLoc + uint64(nread)
+
+		offset = uint64(0)
+		curChunkHeader := make([]zap.MetaData, int(numDocs))
+		for i := 0; i < int(numDocs); i++ {
+			curChunkHeader[i].DocID, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
+			offset += uint64(nread)
+			curChunkHeader[i].DocDvLoc, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
+			offset += uint64(nread)
+			curChunkHeader[i].DocDvLen, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
+			offset += uint64(nread)
+		}
+
+		compressedDataLoc := chunkMetaLoc + offset
+		dataLength := destChunkDataLoc + curChunkSize - compressedDataLoc
+		curChunkData := data[compressedDataLoc : compressedDataLoc+dataLength]
+
+		start, length := getDocValueLocs(uint64(localDocNum), curChunkHeader)
+		if start == math.MaxUint64 || length == math.MaxUint64 {
+			return nil
+		}
+		// uncompress the already loaded data
+		uncompressed, err := snappy.Decode(nil, curChunkData)
+		if err != nil {
+			log.Printf("snappy err %+v ", err)
+			return err
+		}
+
+		var termSeparator byte = 0xff
+		var termSeparatorSplitSlice = []byte{termSeparator}
+		// pick the terms for the given docID
+		uncompressed = uncompressed[start : start+length]
+		for {
+			i := bytes.Index(uncompressed, termSeparatorSplitSlice)
+			if i < 0 {
+				break
+			}
+
+			fmt.Printf(" %s ", uncompressed[0:i])
+			uncompressed = uncompressed[i+1:]
+		}
+		fmt.Printf(" \n ")
+		return nil
+	},
+}
+
+func getDocValueLocs(docID uint64, metaHeader []zap.MetaData) (uint64, uint64) {
+	i := sort.Search(len(metaHeader), func(i int) bool {
+		return metaHeader[i].DocID >= docID
+	})
+	if i < len(metaHeader) && metaHeader[i].DocID == docID {
+		return metaHeader[i].DocDvLoc, metaHeader[i].DocDvLen
+	}
+	return math.MaxUint64, math.MaxUint64
+}
+
+func init() {
+	RootCmd.AddCommand(docvalueCmd)
+}
diff --git a/index/scorch/segment/zap/cmd/zap/cmd/fields.go b/index/scorch/segment/zap/cmd/zap/cmd/fields.go
index 98cdf9d73..cfc40974b 100644
--- a/index/scorch/segment/zap/cmd/zap/cmd/fields.go
+++ b/index/scorch/segment/zap/cmd/zap/cmd/fields.go
@@ -34,7 +34,7 @@ var fieldsCmd = &cobra.Command{
 		crcOffset := len(data) - 4
 		verOffset := crcOffset - 4
 		chunkOffset := verOffset - 4
-		fieldsOffset := chunkOffset - 8
+		fieldsOffset := chunkOffset - 16
 		fieldsIndexOffset := binary.BigEndian.Uint64(data[fieldsOffset : fieldsOffset+8])
 		fieldsIndexEnd := uint64(len(data) - zap.FooterSize)
 
diff --git a/index/scorch/segment/zap/cmd/zap/cmd/footer.go b/index/scorch/segment/zap/cmd/zap/cmd/footer.go
index 177f4e71b..0460360fc 100644
--- a/index/scorch/segment/zap/cmd/zap/cmd/footer.go
+++ b/index/scorch/segment/zap/cmd/zap/cmd/footer.go
@@ -33,6 +33,7 @@ var footerCmd = &cobra.Command{
 		fmt.Printf("Chunk Factor: %d\n", segment.ChunkFactor())
 		fmt.Printf("Fields Idx: %d (%#x)\n", segment.FieldsIndexOffset(), segment.FieldsIndexOffset())
 		fmt.Printf("Stored Idx: %d (%#x)\n", segment.StoredIndexOffset(), segment.StoredIndexOffset())
+		fmt.Printf("DocValue Idx: %d (%#x)\n", segment.DocValueOffset(), segment.DocValueOffset())
 		fmt.Printf("Num Docs: %d\n", segment.NumDocs())
 		return nil
 	},
diff --git a/index/scorch/segment/zap/contentcoder.go b/index/scorch/segment/zap/contentcoder.go
index 978500e92..6887076ac 100644
--- a/index/scorch/segment/zap/contentcoder.go
+++ b/index/scorch/segment/zap/contentcoder.go
@@ -15,6 +15,7 @@
 package zap
 
 import (
+	"bufio"
 	"bytes"
 	"encoding/binary"
 	"io"
@@ -33,15 +34,15 @@ type chunkedContentCoder struct {
 	chunkMetaBuf bytes.Buffer
 	chunkBuf     bytes.Buffer
 
-	chunkMeta []metaData
+	chunkMeta []MetaData
 }
 
-// metaData represents the data information inside a
+// MetaData represents the data information inside a
 // chunk.
-type metaData struct {
-	docID    uint64 // docid of the data inside the chunk
-	docDvLoc uint64 // starting offset for a given docid
-	docDvLen uint64 // length of data inside the chunk for the given docid
+type MetaData struct {
+	DocID    uint64 // docid of the data inside the chunk
+	DocDvLoc uint64 // starting offset for a given docid
+	DocDvLen uint64 // length of data inside the chunk for the given docid
 }
 
 // newChunkedContentCoder returns a new chunk content coder which
@@ -52,7 +53,7 @@ func newChunkedContentCoder(chunkSize uint64,
 	rv := &chunkedContentCoder{
 		chunkSize: chunkSize,
 		chunkLens: make([]uint64, total),
-		chunkMeta: []metaData{},
+		chunkMeta: []MetaData{},
 	}
 
 	return rv
@@ -68,13 +69,13 @@ func (c *chunkedContentCoder) Reset() {
 	for i := range c.chunkLens {
 		c.chunkLens[i] = 0
 	}
-	c.chunkMeta = []metaData{}
+	c.chunkMeta = []MetaData{}
 }
 
 // Close indicates you are done calling Add() this allows
 // the final chunk to be encoded.
-func (c *chunkedContentCoder) Close() {
-	_ = c.flushContents()
+func (c *chunkedContentCoder) Close() error {
+	return c.flushContents()
 }
 
 func (c *chunkedContentCoder) flushContents() error {
@@ -86,26 +87,17 @@ func (c *chunkedContentCoder) flushContents() error {
 		return err
 	}
 
+	w := bufio.NewWriter(&c.chunkMetaBuf)
 	// write out the metaData slice
 	for _, meta := range c.chunkMeta {
-		n := binary.PutUvarint(buf, meta.docID)
-		_, err = c.chunkMetaBuf.Write(buf[:n])
+		_, err := writeUvarints(w, meta.DocID, meta.DocDvLoc, meta.DocDvLen)
 		if err != nil {
 			return err
 		}
-
-		n = binary.PutUvarint(buf, meta.docDvLoc)
-		_, err = c.chunkMetaBuf.Write(buf[:n])
-		if err != nil {
-			return err
-		}
-
-		n = binary.PutUvarint(buf, meta.docDvLen)
-		_, err = c.chunkMetaBuf.Write(buf[:n])
-		if err != nil {
-			return err
-		}
-
+	}
+	err = w.Flush()
+	if err != nil {
+		return err
 	}
 
 	// write the metadata to final data
@@ -132,7 +124,7 @@ func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
 		// clearing the chunk specific meta for next chunk
 		c.chunkBuf.Reset()
 		c.chunkMetaBuf.Reset()
-		c.chunkMeta = []metaData{}
+		c.chunkMeta = []MetaData{}
 		c.currChunk = chunk
 	}
 
@@ -143,10 +135,10 @@ func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
 		return err
 	}
 
-	c.chunkMeta = append(c.chunkMeta, metaData{
-		docID:    docNum,
-		docDvLoc: uint64(dvOffset),
-		docDvLen: uint64(dvSize),
+	c.chunkMeta = append(c.chunkMeta, MetaData{
+		DocID:    docNum,
+		DocDvLoc: uint64(dvOffset),
+		DocDvLen: uint64(dvSize),
 	})
 	return nil
 }
diff --git a/index/scorch/segment/zap/contentcoder_test.go b/index/scorch/segment/zap/contentcoder_test.go
index 4ae4d8121..0e45b783e 100644
--- a/index/scorch/segment/zap/contentcoder_test.go
+++ b/index/scorch/segment/zap/contentcoder_test.go
@@ -61,7 +61,7 @@ func TestChunkContentCoder(t *testing.T) {
 				t.Fatalf("error adding to intcoder: %v", err)
 			}
 		}
-		cic.Close()
+		_ = cic.Close()
 		var actual bytes.Buffer
 		_, err := cic.Write(&actual)
 		if err != nil {
diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index 9f1e9942f..d77c63233 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -31,7 +31,7 @@ type docValueIterator struct {
 	numChunks      uint64
 	chunkLens      []uint64
 	dvDataLoc      uint64
-	curChunkHeader []metaData
+	curChunkHeader []MetaData
 	curChunkData   []byte // compressed data cache
 }
 
@@ -96,13 +96,13 @@ func (di *docValueIterator) loadDvChunk(chunkNumber,
 	chunkMetaLoc := destChunkDataLoc + uint64(read)
 
 	offset := uint64(0)
-	di.curChunkHeader = make([]metaData, int(numDocs))
+	di.curChunkHeader = make([]MetaData, int(numDocs))
 	for i := 0; i < int(numDocs); i++ {
-		di.curChunkHeader[i].docID, read = binary.Uvarint(s.mm[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
+		di.curChunkHeader[i].DocID, read = binary.Uvarint(s.mm[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
 		offset += uint64(read)
-		di.curChunkHeader[i].docDvLoc, read = binary.Uvarint(s.mm[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
+		di.curChunkHeader[i].DocDvLoc, read = binary.Uvarint(s.mm[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
 		offset += uint64(read)
-		di.curChunkHeader[i].docDvLen, read = binary.Uvarint(s.mm[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
+		di.curChunkHeader[i].DocDvLen, read = binary.Uvarint(s.mm[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
 		offset += uint64(read)
 	}
 
@@ -143,10 +143,10 @@ func (di *docValueIterator) visitDocValues(docID uint64,
 
 func (di *docValueIterator) getDocValueLocs(docID uint64) (uint64, uint64) {
 	i := sort.Search(len(di.curChunkHeader), func(i int) bool {
-		return di.curChunkHeader[i].docID >= docID
+		return di.curChunkHeader[i].DocID >= docID
 	})
-	if i < len(di.curChunkHeader) && di.curChunkHeader[i].docID == docID {
-		return di.curChunkHeader[i].docDvLoc, di.curChunkHeader[i].docDvLen
+	if i < len(di.curChunkHeader) && di.curChunkHeader[i].DocID == docID {
+		return di.curChunkHeader[i].DocDvLoc, di.curChunkHeader[i].DocDvLen
 	}
 	return math.MaxUint64, math.MaxUint64
 }
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 36959c62e..aeb81a6e2 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -134,7 +134,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 
 	rv1 := make([]uint64, len(fieldsInv))
 	fieldDvLocs := make([]uint64, len(fieldsInv))
-	fieldDvLocsOffset := uint64(math.MaxUint64)
+	fieldDvLocsOffset := uint64(fieldNotUninverted)
 
 	var vellumBuf bytes.Buffer
 	// for each field
@@ -144,7 +144,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 		}
 		newVellum, err := vellum.New(&vellumBuf, nil)
 		if err != nil {
-			return nil, fieldDvLocsOffset, err
+			return nil, 0, err
 		}
 
 		// collect FST iterators from all segments for this field
@@ -153,14 +153,14 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 		for _, segment := range segments {
 			dict, err2 := segment.dictionary(fieldName)
 			if err2 != nil {
-				return nil, fieldDvLocsOffset, err2
+				return nil, 0, err2
 			}
 			dicts = append(dicts, dict)
 
 			if dict != nil && dict.fst != nil {
 				itr, err2 := dict.fst.Iterator(nil, nil)
 				if err2 != nil && err2 != vellum.ErrIteratorDone {
-					return nil, fieldDvLocsOffset, err2
+					return nil, 0, err2
 				}
 				if itr != nil {
 					itrs = append(itrs, itr)
@@ -194,7 +194,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 				}
 				postings, err2 := dict.postingsList(string(term), drops[dictI])
 				if err2 != nil {
-					return nil, fieldDvLocsOffset, err2
+					return nil, 0, err2
 				}
 
 				postItr := postings.Iterator()
@@ -202,7 +202,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 				for next != nil && err2 == nil {
 					hitNewDocNum := newDocNums[dictI][next.Number()]
 					if hitNewDocNum == docDropped {
-						return nil, fieldDvLocsOffset, fmt.Errorf("see hit with dropped doc num")
+						return nil, 0, fmt.Errorf("see hit with dropped doc num")
 					}
 					newRoaring.Add(uint32(hitNewDocNum))
 					// encode norm bits
@@ -210,7 +210,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 					normBits := math.Float32bits(float32(norm))
 					err3 := tfEncoder.Add(hitNewDocNum, next.Frequency(), uint64(normBits))
 					if err3 != nil {
-						return nil, fieldDvLocsOffset, err3
+						return nil, 0, err3
 					}
 					locs := next.Locations()
 					if len(locs) > 0 {
@@ -228,7 +228,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 							args = append(args, loc.ArrayPositions()...)
 							err = locEncoder.Add(hitNewDocNum, args...)
 							if err != nil {
-								return nil, fieldDvLocsOffset, err
+								return nil, 0, err
 							}
 						}
 					}
@@ -238,7 +238,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 					next, err2 = postItr.Next()
 				}
 				if err != nil {
-					return nil, fieldDvLocsOffset, err
+					return nil, 0, err
 				}
 
 			}
@@ -250,17 +250,17 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 				freqOffset := uint64(w.Count())
 				_, err = tfEncoder.Write(w)
 				if err != nil {
-					return nil, fieldDvLocsOffset, err
+					return nil, 0, err
 				}
 				locOffset := uint64(w.Count())
 				_, err = locEncoder.Write(w)
 				if err != nil {
-					return nil, fieldDvLocsOffset, err
+					return nil, 0, err
 				}
 				postingLocOffset := uint64(w.Count())
 				_, err = writeRoaringWithLen(newRoaringLocs, w)
 				if err != nil {
-					return nil, fieldDvLocsOffset, err
+					return nil, 0, err
 				}
 				postingOffset := uint64(w.Count())
 				// write out the start of the term info
@@ -268,43 +268,43 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 				n := binary.PutUvarint(buf, freqOffset)
 				_, err = w.Write(buf[:n])
 				if err != nil {
-					return nil, fieldDvLocsOffset, err
+					return nil, 0, err
 				}
 
 				// write out the start of the loc info
 				n = binary.PutUvarint(buf, locOffset)
 				_, err = w.Write(buf[:n])
 				if err != nil {
-					return nil, fieldDvLocsOffset, err
+					return nil, 0, err
 				}
 
 				// write out the start of the loc posting list
 				n = binary.PutUvarint(buf, postingLocOffset)
 				_, err = w.Write(buf[:n])
 				if err != nil {
-					return nil, fieldDvLocsOffset, err
+					return nil, 0, err
 				}
 				_, err = writeRoaringWithLen(newRoaring, w)
 				if err != nil {
-					return nil, fieldDvLocsOffset, err
+					return nil, 0, err
 				}
 
 				err = newVellum.Insert(term, postingOffset)
 				if err != nil {
-					return nil, fieldDvLocsOffset, err
+					return nil, 0, err
 				}
 			}
 
 			err = mergeItr.Next()
 		}
 		if err != nil && err != vellum.ErrIteratorDone {
-			return nil, fieldDvLocsOffset, err
+			return nil, 0, err
 		}
 
 		dictOffset := uint64(w.Count())
 		err = newVellum.Close()
 		if err != nil {
-			return nil, fieldDvLocsOffset, err
+			return nil, 0, err
 		}
 		vellumData := vellumBuf.Bytes()
 
@@ -314,18 +314,18 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 		n := binary.PutUvarint(buf, uint64(len(vellumData)))
 		_, err = w.Write(buf[:n])
 		if err != nil {
-			return nil, fieldDvLocsOffset, err
+			return nil, 0, err
 		}
 
 		// write this vellum to disk
 		_, err = w.Write(vellumData)
 		if err != nil {
-			return nil, fieldDvLocsOffset, err
+			return nil, 0, err
 		}
 
 		rv1[fieldID] = dictOffset
 
-		// update teh doc value
+		// update the doc value
 		var docNumbers docIDRange
 		for k := range docTermMap {
 			docNumbers = append(docNumbers, k)
@@ -335,16 +335,19 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 		for _, docNum := range docNumbers {
 			err = fdvEncoder.Add(docNum, docTermMap[docNum])
 			if err != nil {
-				return nil, fieldDvLocsOffset, err
+				return nil, 0, err
 			}
 		}
 		// get the field doc value offset
 		fieldDvLocs[fieldID] = uint64(w.Count())
-		fdvEncoder.Close()
+		err = fdvEncoder.Close()
+		if err != nil {
+			return nil, 0, err
+		}
 		// persist the doc value details for this field
 		_, err = fdvEncoder.Write(w)
 		if err != nil {
-			return nil, fieldDvLocsOffset, err
+			return nil, 0, err
 		}
 	}
 
@@ -354,7 +357,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 		n := binary.PutUvarint(buf, uint64(offset))
 		_, err := w.Write(buf[:n])
 		if err != nil {
-			return nil, math.MaxUint64, err
+			return nil, 0, err
 		}
 	}
 
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 5a39f48fb..4e1da0acd 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -19,7 +19,6 @@ import (
 	"encoding/binary"
 	"fmt"
 	"io"
-	"math"
 	"os"
 	"sync"
 
@@ -349,11 +348,16 @@ func (s *Segment) FieldsIndexOffset() uint64 {
 	return s.fieldsIndexOffset
 }
 
-// StoredIndexOffset returns the stored value index offset in the file foooter
+// StoredIndexOffset returns the stored value index offset in the file footer
 func (s *Segment) StoredIndexOffset() uint64 {
 	return s.storedIndexOffset
 }
 
+// DocValueOffset returns the docValue offset in the file footer
+func (s *Segment) DocValueOffset() uint64 {
+	return s.docValueOffset
+}
+
 // NumDocs returns the number of documents in the file footer
 func (s *Segment) NumDocs() uint64 {
 	return s.numDocs
@@ -372,7 +376,7 @@ func (s *Segment) DictAddr(field string) (uint64, error) {
 }
 
 func (s *Segment) loadDvIterators() error {
-	if s.docValueOffset == math.MaxUint64 || s.docValueOffset == 0 {
+	if s.docValueOffset == fieldNotUninverted || s.docValueOffset == 0 {
 		return nil
 	}
 
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 2b2654b58..530bb66f7 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -22,9 +22,6 @@ import (
 	"sort"
 	"sync"
 
-	"github.com/blevesearch/bleve/index/scorch/segment/mem"
-	"github.com/blevesearch/bleve/index/scorch/segment/zap"
-
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index"
@@ -404,35 +401,29 @@ func (i *IndexSnapshot) DocumentVisitFieldTerms(id index.IndexInternalID,
 
 	ss := i.segment[segmentIndex]
 
-	switch seg := ss.segment.(type) {
-	case *mem.Segment:
-		err = ss.cachedDocs.prepareFields(fields, ss)
-		if err != nil {
-			return err
-		}
+	if zaps, ok := ss.segment.(segment.DocumentFieldTermVisitable); ok {
+		return zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor)
+	}
 
-		for _, field := range fields {
-			if cachedFieldDocs, exists := ss.cachedDocs.cache[field]; exists {
-				if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists {
-					for {
-						i := bytes.Index(tlist, TermSeparatorSplitSlice)
-						if i < 0 {
-							break
-						}
-						visitor(field, tlist[0:i])
-						tlist = tlist[i+1:]
+	// else fallback to the in memory fieldCache
+	err = ss.cachedDocs.prepareFields(fields, ss)
+	if err != nil {
+		return err
+	}
+
+	for _, field := range fields {
+		if cachedFieldDocs, exists := ss.cachedDocs.cache[field]; exists {
+			if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists {
+				for {
+					i := bytes.Index(tlist, TermSeparatorSplitSlice)
+					if i < 0 {
+						break
 					}
+					visitor(field, tlist[0:i])
+					tlist = tlist[i+1:]
 				}
 			}
 		}
-
-	case *zap.Segment:
-		if zaps, ok := ss.segment.(UnInvertIndex); ok {
-			return zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor)
-		}
-
-	default:
-		return fmt.Errorf("snapshot_index: DocumentVisitFieldTerms, unknown segment type: %T", seg)
 	}
 
 	return nil

From 055d3e12df33f44dbc0c9c1ab77eb705486326bf Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Tue, 26 Dec 2017 19:11:14 -0700
Subject: [PATCH 116/728] Adding onEvent callback support for scorch

Event types:
- EventKindCloseStart
- EventKindClose
- EventKindMergerProgress
- EventKindPersisterProgress
- EventKindBatchIntroductionStart
- EventKindBatchIntroduction
---
 index/scorch/merge.go     | 10 ++++++
 index/scorch/persister.go |  6 ++++
 index/scorch/scorch.go    | 65 +++++++++++++++++++++++++++++++++++++--
 3 files changed, 78 insertions(+), 3 deletions(-)

diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 35469d591..0c166df7a 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -19,6 +19,7 @@ import (
 	"log"
 	"os"
 	"sync/atomic"
+	"time"
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index/scorch/mergeplan"
@@ -42,6 +43,8 @@ OUTER:
 			s.rootLock.RUnlock()
 
 			if ourSnapshot.epoch != lastEpochMergePlanned {
+				startTime := time.Now()
+
 				// lets get started
 				err := s.planMergeAtSnapshot(ourSnapshot)
 				if err != nil {
@@ -50,6 +53,9 @@ OUTER:
 					continue OUTER
 				}
 				lastEpochMergePlanned = ourSnapshot.epoch
+
+				s.fireEvent(EventKindMergerProgress, time.Since(startTime))
+
 			}
 			_ = ourSnapshot.DecRef()
 
@@ -71,6 +77,8 @@ OUTER:
 			s.rootLock.RUnlock()
 
 			if ourSnapshot.epoch != lastEpochMergePlanned {
+				startTime := time.Now()
+
 				// lets get started
 				err := s.planMergeAtSnapshot(ourSnapshot)
 				if err != nil {
@@ -78,6 +86,8 @@ OUTER:
 					continue OUTER
 				}
 				lastEpochMergePlanned = ourSnapshot.epoch
+
+				s.fireEvent(EventKindMergerProgress, time.Since(startTime))
 			}
 			_ = ourSnapshot.DecRef()
 
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 52df1ff7a..b54b20134 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -25,6 +25,7 @@ import (
 	"strconv"
 	"strings"
 	"sync/atomic"
+	"time"
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index/scorch/segment"
@@ -64,6 +65,8 @@ OUTER:
 		s.rootLock.Unlock()
 
 		if ourSnapshot != nil {
+			startTime := time.Now()
+
 			err := s.persistSnapshot(ourSnapshot)
 			for _, ch := range ourPersisted {
 				if err != nil {
@@ -90,6 +93,9 @@ OUTER:
 				changed = true
 			}
 			s.rootLock.RUnlock()
+
+			s.fireEvent(EventKindPersisterProgress, time.Since(startTime))
+
 			if changed {
 				continue OUTER
 			}
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 9d2b9049b..2745bda74 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -64,9 +64,44 @@ type Scorch struct {
 
 	eligibleForRemoval   []uint64        // Index snapshot epochs that are safe to GC.
 	ineligibleForRemoval map[string]bool // Filenames that should not be GC'ed yet.
+
+	onEvent func(event Event)
+}
+
+// Event represents the information provided in an OnEvent() callback.
+type Event struct {
+	Kind     EventKind
+	Scorch   *Scorch
+	Duration time.Duration
 }
 
-func NewScorch(storeName string, config map[string]interface{}, analysisQueue *index.AnalysisQueue) (index.Index, error) {
+// EventKind represents an event code for OnEvent() callbacks.
+type EventKind int
+
+// EventKindCLoseStart is fired when a Scorch.Close() has begun.
+var EventKindCloseStart = EventKind(1)
+
+// EventKindClose is fired when a scorch index has been fully closed.
+var EventKindClose = EventKind(2)
+
+// EventKindMergerProgress is fired when the merger has completed a
+// round of merge processing.
+var EventKindMergerProgress = EventKind(3)
+
+// EventKindPersisterProgress is fired when the persister has completed
+// a round of persistence processing.
+var EventKindPersisterProgress = EventKind(4)
+
+// EventKindBatchIntroductionStart is fired when Batch() is invoked which
+// introduces a new segment.
+var EventKindBatchIntroductionStart = EventKind(5)
+
+// EventKindBatchIntroduction is fired when Batch() completes.
+var EventKindBatchIntroduction = EventKind(6)
+
+func NewScorch(storeName string,
+	config map[string]interface{},
+	analysisQueue *index.AnalysisQueue) (index.Index, error) {
 	rv := &Scorch{
 		version:              Version,
 		config:               config,
@@ -88,6 +123,16 @@ func NewScorch(storeName string, config map[string]interface{}, analysisQueue *i
 	return rv, nil
 }
 
+func (s *Scorch) SetEventCallback(f func(Event)) {
+	s.onEvent = f
+}
+
+func (s *Scorch) fireEvent(kind EventKind, dur time.Duration) {
+	if s.onEvent != nil {
+		s.onEvent(Event{Kind: kind, Scorch: s, Duration: dur})
+	}
+}
+
 func (s *Scorch) Open() error {
 	var ok bool
 	s.path, ok = s.config["path"].(string)
@@ -155,6 +200,13 @@ func (s *Scorch) Open() error {
 }
 
 func (s *Scorch) Close() (err error) {
+	startTime := time.Now()
+	defer func() {
+		s.fireEvent(EventKindClose, time.Since(startTime))
+	}()
+
+	s.fireEvent(EventKindCloseStart, 0)
+
 	// signal to async tasks we want to close
 	close(s.closeCh)
 	// wait for them to close
@@ -187,7 +239,11 @@ func (s *Scorch) Delete(id string) error {
 
 // Batch applices a batch of changes to the index atomically
 func (s *Scorch) Batch(batch *index.Batch) error {
-	analysisStart := time.Now()
+	start := time.Now()
+
+	defer func() {
+		s.fireEvent(EventKindBatchIntroduction, time.Since(start))
+	}()
 
 	resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps))
 
@@ -229,7 +285,10 @@ func (s *Scorch) Batch(batch *index.Batch) error {
 	}
 	close(resultChan)
 
-	atomic.AddUint64(&s.stats.analysisTime, uint64(time.Since(analysisStart)))
+	atomic.AddUint64(&s.stats.analysisTime, uint64(time.Since(start)))
+
+	// notify handlers that we're about to introduce a segment
+	s.fireEvent(EventKindBatchIntroductionStart, 0)
 
 	var newSegment segment.Segment
 	if len(analysisResults) > 0 {

From 5c26f5a86d39619fc54dd4fc64378a229d1272a5 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Thu, 28 Dec 2017 18:48:38 -0700
Subject: [PATCH 117/728] Tracking memory consumption for a scorch index

+ Track memory usage at a segment level
+ Add a new scorch API: MemoryUsed()
    - Aggregate the memory consumption across
      segments when API is invoked.

+ TODO:
    - Revisit the second iteration if it can be gotten
      rid off, and the size accounted for during the first
      run while building an in-mem segment.
    - Accounting for pointer and slice overhead.
---
 index/scorch/scorch.go                   | 15 ++++++
 index/scorch/segment/mem/build.go        |  3 ++
 index/scorch/segment/mem/segment.go      | 68 ++++++++++++++++++++++++
 index/scorch/segment/mem/segment_test.go |  4 ++
 index/scorch/segment/segment.go          |  2 +
 index/scorch/segment/zap/segment.go      | 25 +++++++++
 index/scorch/snapshot_segment.go         | 15 ++++++
 7 files changed, 132 insertions(+)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 9d2b9049b..926fa0b64 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -361,6 +361,21 @@ func (s *Scorch) AddEligibleForRemoval(epoch uint64) {
 	s.rootLock.Unlock()
 }
 
+func (s *Scorch) MemoryUsed() uint64 {
+	var memUsed uint64
+	s.rootLock.RLock()
+	for _, segmentSnapshot := range s.root.segment {
+		memUsed += 8 /* size of id -> uint64 */ +
+			segmentSnapshot.segment.SizeInBytes()
+		if segmentSnapshot.deleted != nil {
+			memUsed += segmentSnapshot.deleted.GetSizeInBytes()
+		}
+		memUsed += segmentSnapshot.cachedDocs.sizeInBytes()
+	}
+	s.rootLock.RUnlock()
+	return memUsed
+}
+
 func (s *Scorch) markIneligibleForRemoval(filename string) {
 	s.rootLock.Lock()
 	s.ineligibleForRemoval[filename] = true
diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go
index e111ce4f7..bfcbe2fd0 100644
--- a/index/scorch/segment/mem/build.go
+++ b/index/scorch/segment/mem/build.go
@@ -41,6 +41,9 @@ func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment {
 		sort.Strings(dict)
 	}
 
+	// compute memory usage of segment
+	s.updateSizeInBytes()
+
 	// professional debugging
 	//
 	// log.Printf("fields: %v\n", s.FieldsMap)
diff --git a/index/scorch/segment/mem/segment.go b/index/scorch/segment/mem/segment.go
index 75ff50cc0..94ab137b3 100644
--- a/index/scorch/segment/mem/segment.go
+++ b/index/scorch/segment/mem/segment.go
@@ -87,6 +87,10 @@ type Segment struct {
 	// stored field array positions
 	//  docNum -> field id -> slice of array positions (each is []uint64)
 	StoredPos []map[uint16][][]uint64
+
+	// footprint of the segment, updated when analyzed document mutations
+	// are added into the segment
+	sizeInBytes uint64
 }
 
 // New builds a new empty Segment
@@ -96,6 +100,70 @@ func New() *Segment {
 	}
 }
 
+func (s *Segment) updateSizeInBytes() {
+	var sizeInBytes uint64
+
+	for k, _ := range s.FieldsMap {
+		sizeInBytes += uint64(len(k)*2 /* FieldsMap + FieldsInv */ +
+			2 /* size of uint16 */)
+	}
+
+	for _, entry := range s.Dicts {
+		for k, _ := range entry {
+			sizeInBytes += uint64(len(k)*2 /* Dicts + DictKeys */ +
+				8 /* size of uint64 */)
+		}
+	}
+
+	for i := 0; i < len(s.Postings); i++ {
+		sizeInBytes += s.Postings[i].GetSizeInBytes() + s.PostingsLocs[i].GetSizeInBytes()
+	}
+
+	for i := 0; i < len(s.Freqs); i++ {
+		sizeInBytes += uint64(len(s.Freqs[i])*8 /* size of uint64 */ +
+			len(s.Norms[i])*4 /* size of float32 */)
+	}
+
+	for i := 0; i < len(s.Locfields); i++ {
+		sizeInBytes += uint64(len(s.Locfields[i])*2 /* size of uint16 */ +
+			len(s.Locstarts[i])*8 /* size of uint64 */ +
+			len(s.Locends[i])*8 /* size of uint64 */ +
+			len(s.Locpos[i])*8 /* size of uint64 */)
+
+		for j := 0; j < len(s.Locarraypos[i]); j++ {
+			sizeInBytes += uint64(len(s.Locarraypos[i][j]) * 8 /* size of uint64 */)
+		}
+	}
+
+	for i := 0; i < len(s.Stored); i++ {
+		for _, v := range s.Stored[i] {
+			sizeInBytes += uint64(2 /* size of uint16 */)
+			for _, arr := range v {
+				sizeInBytes += uint64(len(arr))
+			}
+		}
+
+		for _, v := range s.StoredTypes[i] {
+			sizeInBytes += uint64(2 /* size of uint16 */ + len(v))
+		}
+
+		for _, v := range s.StoredPos[i] {
+			sizeInBytes += uint64(2 /* size of uint16 */)
+			for _, arr := range v {
+				sizeInBytes += uint64(len(arr) * 8 /* size of uint64 */)
+			}
+		}
+	}
+
+	sizeInBytes += uint64(8 /* size of sizeInBytes -> uint64*/)
+
+	s.sizeInBytes = sizeInBytes
+}
+
+func (s *Segment) SizeInBytes() uint64 {
+	return s.sizeInBytes
+}
+
 func (s *Segment) AddRef() {
 }
 
diff --git a/index/scorch/segment/mem/segment_test.go b/index/scorch/segment/mem/segment_test.go
index 7eb691476..5e3818c24 100644
--- a/index/scorch/segment/mem/segment_test.go
+++ b/index/scorch/segment/mem/segment_test.go
@@ -169,6 +169,10 @@ func TestSingle(t *testing.T) {
 		t.Fatalf("segment nil, not expected")
 	}
 
+	if segment.SizeInBytes() <= 0 {
+		t.Fatalf("segment size not updated")
+	}
+
 	expectFields := map[string]struct{}{
 		"_id":  struct{}{},
 		"_all": struct{}{},
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index 2c91c0ef8..cad9ae8f3 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -36,6 +36,8 @@ type Segment interface {
 
 	Close() error
 
+	SizeInBytes() uint64
+
 	AddRef()
 	DecRef() error
 }
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index e78ac392f..d38b81987 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -86,6 +86,31 @@ type Segment struct {
 	refs int64
 }
 
+func (s *Segment) SizeInBytes() uint64 {
+	// 4 /* size of crc -> uint32 */ +
+	// 4 /* size of version -> uint32 */ +
+	// 4 /* size of chunkFactor -> uint32 */ +
+	// 8 /* size of numDocs -> uint64 */ +
+	// 8 /* size of storedIndexOffset -> uint64 */ +
+	// 8 /* size of fieldsIndexOffset -> uint64 */
+	sizeOfUints := 36
+
+	sizeInBytes := len(s.mm) + len(s.path) + sizeOfUints
+
+	for k, _ := range s.fieldsMap {
+		sizeInBytes += len(k) + 2 /* size of uint16 */
+	}
+
+	for _, entry := range s.fieldsInv {
+		sizeInBytes += len(entry)
+	}
+
+	sizeInBytes += len(s.fieldsOffsets) * 8 /* size of uint64 */
+	sizeInBytes += 8                        /* size of refs -> int64 */
+
+	return uint64(sizeInBytes)
+}
+
 func (s *Segment) AddRef() {
 	s.m.Lock()
 	s.refs++
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index f2bcfb065..b3b8d8284 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -249,3 +249,18 @@ func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) e
 	c.m.Unlock()
 	return nil
 }
+
+func (c *cachedDocs) sizeInBytes() uint64 {
+	sizeInBytes := 0
+	c.m.Lock()
+	for k, v := range c.cache { // cachedFieldDocs
+		sizeInBytes += len(k)
+		if v != nil {
+			for _, entry := range v.docs { // docs
+				sizeInBytes += 8 /* size of uint64 */ + len(entry)
+			}
+		}
+	}
+	c.m.Unlock()
+	return uint64(sizeInBytes)
+}

From 448201243aed7202bb4a95efc70ad101fb7b99ca Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Sat, 30 Dec 2017 16:54:06 +0530
Subject: [PATCH 118/728] removed redundant buf writer, and checks

---
 index/scorch/segment/zap/build.go        | 2 +-
 index/scorch/segment/zap/contentcoder.go | 8 +-------
 index/scorch/segment/zap/docvalues.go    | 2 +-
 index/scorch/segment/zap/merge.go        | 3 ++-
 index/scorch/segment/zap/segment.go      | 2 +-
 5 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 96d536964..f5a92562d 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -51,7 +51,7 @@ func PersistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) (e
 
 	var storedIndexOffset uint64
 	var dictLocs []uint64
-	var docValueOffset uint64
+	docValueOffset := uint64(fieldNotUninverted)
 	if len(memSegment.Stored) > 0 {
 
 		storedIndexOffset, err = persistStored(memSegment, cr)
diff --git a/index/scorch/segment/zap/contentcoder.go b/index/scorch/segment/zap/contentcoder.go
index 6887076ac..b03940497 100644
--- a/index/scorch/segment/zap/contentcoder.go
+++ b/index/scorch/segment/zap/contentcoder.go
@@ -15,7 +15,6 @@
 package zap
 
 import (
-	"bufio"
 	"bytes"
 	"encoding/binary"
 	"io"
@@ -87,18 +86,13 @@ func (c *chunkedContentCoder) flushContents() error {
 		return err
 	}
 
-	w := bufio.NewWriter(&c.chunkMetaBuf)
 	// write out the metaData slice
 	for _, meta := range c.chunkMeta {
-		_, err := writeUvarints(w, meta.DocID, meta.DocDvLoc, meta.DocDvLen)
+		_, err := writeUvarints(&c.chunkMetaBuf, meta.DocID, meta.DocDvLoc, meta.DocDvLen)
 		if err != nil {
 			return err
 		}
 	}
-	err = w.Flush()
-	if err != nil {
-		return err
-	}
 
 	// write the metadata to final data
 	metaData := c.chunkMetaBuf.Bytes()
diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index d77c63233..3a75f29d2 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -46,7 +46,7 @@ func (di *docValueIterator) curChunkNumber() uint64 {
 func (s *Segment) loadFieldDocValueIterator(field string,
 	fieldDvLoc uint64) (*docValueIterator, error) {
 	// get the docValue offset for the given fields
-	if fieldDvLoc == math.MaxUint64 {
+	if fieldDvLoc == fieldNotUninverted {
 		return nil, fmt.Errorf("loadFieldDocValueConfigs: "+
 			"no docValues found for field: %s", field)
 	}
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index aeb81a6e2..16ec848b2 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -53,7 +53,8 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
 	newSegDocCount := computeNewDocCount(segments, drops)
 
 	var newDocNums [][]uint64
-	var storedIndexOffset, fieldDvLocsOffset uint64
+	var storedIndexOffset uint64
+	fieldDvLocsOffset := uint64(fieldNotUninverted)
 	var dictLocs []uint64
 	if newSegDocCount > 0 {
 		storedIndexOffset, newDocNums, err = mergeStoredAndRemap(segments, drops,
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index a0963d31e..93b7466c8 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -401,7 +401,7 @@ func (s *Segment) DictAddr(field string) (uint64, error) {
 }
 
 func (s *Segment) loadDvIterators() error {
-	if s.docValueOffset == fieldNotUninverted || s.docValueOffset == 0 {
+	if s.docValueOffset == fieldNotUninverted {
 		return nil
 	}
 

From 1a59a1bb99adce5ff36a4f02c78a15e53499b273 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 2 Jan 2018 16:09:55 -0500
Subject: [PATCH 119/728] attempt to fix core reference counting issues

Observed problem:

Persisted index state (in root bolt) would contain index snapshots which
pointed to index files that did not exist.

Debugging this uncovered two main problems:

1.  At the end of persisting a snapshot, the persister creates a new index
snapshot with the SAME epoch as the current root, only it replaces in-memory
segments with the new disk based ones.  This is problematic because reference
counting an index segment triggers "eligible for deletion".  And eligible for
deletion is keyed by epoch.  So having two separate instances going by the same
epoch is problematic.  Specifically, one of them gets to 0 before the other,
and we wrongly conclude it's eligible for deletion, when in fact the "other"
instance with same epoch is actually still in use.

To address this problem, we have modified the behavior of the persister.  Now,
upon completion of persistence, ONLY if new files were actually created do we
proceed to introduce a new snapshot.  AND, this new snapshot now gets it's own
brand new epoch.  BOTH of these are important because since the persister now
also introduces a new epoch, it will see this epoch again in the future AND be
expected to persist it.  That is OK (mostly harmless), but we cannot allow it
to form a loop.  Checking that new files were actually introduced is what
short-circuits the potential loop.  The new epoch introduced by the persister,
if seen again will not have any new segments that actually need persisting to
disk, and the cycle is stopped.

2.  The implementation of NumSnapshotsToKeep, and related code to deleted old
snapshots from the root bolt also contains problems.  Specifically, the
determination of which snapshots to keep vs delete did not consider which ones
were actually persisted.  So, lets say you had set NumSnapshotsToKeep to 3, if
the introducer gets 3 snapshots ahead of the persister, what can happen is that
the three snapshots we choose to keep are all in memory.  We now wrongly delete
all of the snapshots from the root bolt.  But it gets worse, in this instant of
time, we now have files on disk that nothing in the root bolt points to, so we
also go ahead and delete those files.  Those files were still being referenced
by the in-memory snapshots.  But, now even if they get persisted to disk, they
simply have references to non-existent files.  Opening up one of these indexes
results in lost data (often everything).

To address this problem, we made large change to the way this section of code
operates.  First, we now start with a list of all epochs actually persisted in
the root bolt.  Second, we set aside NumSnapshotsToKeep of these snapshots to
keep.  Third, anything else in the eligibleForRemoval list will be deleted.  I
suspect this code is slower and less elegant, but I think it is more correct.
Also, previously NumSnapshotsToKeep defaulted to 0, I have now defaulted it to
1, which feels like saner out-of-the-box behavior (though it's debatable if the
original intent was perhaps instead for "extra" snapshots to keep, but with the
variable named as it is, 1 makes more sense to me)

Other minor changes included in this change:

- Location of 'nextSnapshotEpoch', 'eligibleForRemoval', and
'ineligibleForRemoval' members of Scorch struct were moved into the
paragraph with 'rootLock' to clarify that you must hold the lock to access it.

- TestBatchRaceBug260 was updated to properly Close() the index, which leads to
occasional test failures.
---
 index/scorch/persister.go | 151 ++++++++++++++++++++++++--------------
 index/scorch/scorch.go    |  27 ++++---
 index_test.go             |   6 ++
 3 files changed, 116 insertions(+), 68 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index b54b20134..acf241ebf 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -21,7 +21,6 @@ import (
 	"log"
 	"os"
 	"path/filepath"
-	"sort"
 	"strconv"
 	"strings"
 	"sync/atomic"
@@ -218,59 +217,63 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 		}
 	}
 
-	// now try to open all the new snapshots
-	newSegments := make(map[uint64]segment.Segment)
-	for segmentID, path := range newSegmentPaths {
-		newSegments[segmentID], err = zap.Open(path)
-		if err != nil {
-			for _, s := range newSegments {
-				if s != nil {
-					_ = s.Close() // cleanup segments that were successfully opened
+	// only alter the root if we actually persisted a segment
+	// (sometimes its just a new snapshot, possibly with new internal values)
+	if len(newSegmentPaths) > 0 {
+		// now try to open all the new snapshots
+		newSegments := make(map[uint64]segment.Segment)
+		for segmentID, path := range newSegmentPaths {
+			newSegments[segmentID], err = zap.Open(path)
+			if err != nil {
+				for _, s := range newSegments {
+					if s != nil {
+						_ = s.Close() // cleanup segments that were successfully opened
+					}
 				}
+				return fmt.Errorf("error opening new segment at %s, %v", path, err)
 			}
-			return fmt.Errorf("error opening new segment at %s, %v", path, err)
 		}
-	}
 
-	s.rootLock.Lock()
-	newIndexSnapshot := &IndexSnapshot{
-		parent:   s,
-		epoch:    s.root.epoch,
-		segment:  make([]*SegmentSnapshot, len(s.root.segment)),
-		offsets:  make([]uint64, len(s.root.offsets)),
-		internal: make(map[string][]byte, len(s.root.internal)),
-		refs:     1,
-	}
-	for i, segmentSnapshot := range s.root.segment {
-		// see if this segment has been replaced
-		if replacement, ok := newSegments[segmentSnapshot.id]; ok {
-			newSegmentSnapshot := &SegmentSnapshot{
-				id:         segmentSnapshot.id,
-				segment:    replacement,
-				deleted:    segmentSnapshot.deleted,
-				cachedDocs: segmentSnapshot.cachedDocs,
+		s.rootLock.Lock()
+		newIndexSnapshot := &IndexSnapshot{
+			parent:   s,
+			epoch:    s.nextSnapshotEpoch,
+			segment:  make([]*SegmentSnapshot, len(s.root.segment)),
+			offsets:  make([]uint64, len(s.root.offsets)),
+			internal: make(map[string][]byte, len(s.root.internal)),
+			refs:     1,
+		}
+		s.nextSnapshotEpoch++
+		for i, segmentSnapshot := range s.root.segment {
+			// see if this segment has been replaced
+			if replacement, ok := newSegments[segmentSnapshot.id]; ok {
+				newSegmentSnapshot := &SegmentSnapshot{
+					id:         segmentSnapshot.id,
+					segment:    replacement,
+					deleted:    segmentSnapshot.deleted,
+					cachedDocs: segmentSnapshot.cachedDocs,
+				}
+				newIndexSnapshot.segment[i] = newSegmentSnapshot
+				// update items persisted incase of a new segment snapshot
+				atomic.AddUint64(&s.stats.numItemsPersisted, newSegmentSnapshot.Count())
+			} else {
+				newIndexSnapshot.segment[i] = s.root.segment[i]
+				newIndexSnapshot.segment[i].segment.AddRef()
 			}
-			newIndexSnapshot.segment[i] = newSegmentSnapshot
-			// update items persisted incase of a new segment snapshot
-			atomic.AddUint64(&s.stats.numItemsPersisted, newSegmentSnapshot.Count())
-		} else {
-			newIndexSnapshot.segment[i] = s.root.segment[i]
-			newIndexSnapshot.segment[i].segment.AddRef()
+			newIndexSnapshot.offsets[i] = s.root.offsets[i]
+		}
+		for k, v := range s.root.internal {
+			newIndexSnapshot.internal[k] = v
+		}
+		for _, filename := range filenames {
+			delete(s.ineligibleForRemoval, filename)
+		}
+		rootPrev := s.root
+		s.root = newIndexSnapshot
+		s.rootLock.Unlock()
+		if rootPrev != nil {
+			_ = rootPrev.DecRef()
 		}
-		newIndexSnapshot.offsets[i] = s.root.offsets[i]
-	}
-	for k, v := range s.root.internal {
-		newIndexSnapshot.internal[k] = v
-	}
-	for _, filename := range filenames {
-		delete(s.ineligibleForRemoval, filename)
-	}
-	rootPrev := s.root
-	s.root = newIndexSnapshot
-	s.rootLock.Unlock()
-
-	if rootPrev != nil {
-		_ = rootPrev.DecRef()
 	}
 
 	return nil
@@ -435,19 +438,39 @@ func (s *Scorch) removeOldData() {
 // NumSnapshotsToKeep represents how many recent, old snapshots to
 // keep around per Scorch instance.  Useful for apps that require
 // rollback'ability.
-var NumSnapshotsToKeep int
+var NumSnapshotsToKeep = 1
 
 // Removes enough snapshots from the rootBolt so that the
 // s.eligibleForRemoval stays under the NumSnapshotsToKeep policy.
 func (s *Scorch) removeOldBoltSnapshots() (numRemoved int, err error) {
-	var epochsToRemove []uint64
+	persistedEpochs, err := s.rootBoltSnapshotEpochs()
+	if err != nil {
+		return 0, err
+	}
 
+	if len(persistedEpochs) <= NumSnapshotsToKeep {
+		// we need to keep everything
+		return 0, nil
+	}
+
+	// make a map of epochs to protect from deletion
+	protectedEpochs := make(map[uint64]struct{}, NumSnapshotsToKeep)
+	for _, epoch := range persistedEpochs[0:NumSnapshotsToKeep] {
+		protectedEpochs[epoch] = struct{}{}
+	}
+
+	var epochsToRemove []uint64
+	var newEligible []uint64
 	s.rootLock.Lock()
-	if len(s.eligibleForRemoval) > NumSnapshotsToKeep {
-		sort.Sort(uint64Descending(s.eligibleForRemoval))
-		epochsToRemove = append([]uint64(nil), s.eligibleForRemoval[NumSnapshotsToKeep:]...) // Copy.
-		s.eligibleForRemoval = s.eligibleForRemoval[0:NumSnapshotsToKeep]
+	for _, epoch := range s.eligibleForRemoval {
+		if _, ok := protectedEpochs[epoch]; ok {
+			// protected
+			newEligible = append(newEligible, epoch)
+		} else {
+			epochsToRemove = append(epochsToRemove, epoch)
+		}
 	}
+	s.eligibleForRemoval = newEligible
 	s.rootLock.Unlock()
 
 	if len(epochsToRemove) <= 0 {
@@ -542,6 +565,26 @@ func (s *Scorch) removeOldZapFiles() error {
 	return nil
 }
 
+func (s *Scorch) rootBoltSnapshotEpochs() ([]uint64, error) {
+	var rv []uint64
+	err := s.rootBolt.View(func(tx *bolt.Tx) error {
+		snapshots := tx.Bucket(boltSnapshotsBucket)
+		if snapshots == nil {
+			return nil
+		}
+		sc := snapshots.Cursor()
+		for sk, _ := sc.Last(); sk != nil; sk, _ = sc.Prev() {
+			_, snapshotEpoch, err := segment.DecodeUvarintAscending(sk)
+			if err != nil {
+				continue
+			}
+			rv = append(rv, snapshotEpoch)
+		}
+		return nil
+	})
+	return rv, err
+}
+
 // Returns the *.zap file names that are listed in the rootBolt.
 func (s *Scorch) loadZapFileNames() (map[string]struct{}, error) {
 	rv := map[string]struct{}{}
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 94cbb3e6e..e03cdeee3 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -38,20 +38,22 @@ const Name = "scorch"
 const Version uint8 = 1
 
 type Scorch struct {
-	readOnly          bool
-	version           uint8
-	config            map[string]interface{}
-	analysisQueue     *index.AnalysisQueue
-	stats             *Stats
-	nextSegmentID     uint64
-	nextSnapshotEpoch uint64
-	path              string
+	readOnly      bool
+	version       uint8
+	config        map[string]interface{}
+	analysisQueue *index.AnalysisQueue
+	stats         *Stats
+	nextSegmentID uint64
+	path          string
 
 	unsafeBatch bool
 
-	rootLock      sync.RWMutex
-	root          *IndexSnapshot // holds 1 ref-count on the root
-	rootPersisted []chan error   // closed when root is persisted
+	rootLock             sync.RWMutex
+	root                 *IndexSnapshot // holds 1 ref-count on the root
+	rootPersisted        []chan error   // closed when root is persisted
+	nextSnapshotEpoch    uint64
+	eligibleForRemoval   []uint64        // Index snapshot epochs that are safe to GC.
+	ineligibleForRemoval map[string]bool // Filenames that should not be GC'ed yet.
 
 	closeCh            chan struct{}
 	introductions      chan *segmentIntroduction
@@ -62,9 +64,6 @@ type Scorch struct {
 	rootBolt           *bolt.DB
 	asyncTasks         sync.WaitGroup
 
-	eligibleForRemoval   []uint64        // Index snapshot epochs that are safe to GC.
-	ineligibleForRemoval map[string]bool // Filenames that should not be GC'ed yet.
-
 	onEvent func(event Event)
 }
 
diff --git a/index_test.go b/index_test.go
index 03ef71929..762e3838e 100644
--- a/index_test.go
+++ b/index_test.go
@@ -1565,6 +1565,12 @@ func TestBatchRaceBug260(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
+	defer func() {
+		err := i.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
 	b := i.NewBatch()
 	err = b.Index("1", 1)
 	if err != nil {

From f42ecb0ac727393c59d0fec264be1d7feb996a48 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Thu, 4 Jan 2018 13:58:51 +0530
Subject: [PATCH 120/728] docvalue "zap-path" cmd to print out the dv disk
 sizes

---
 .../segment/zap/cmd/zap/cmd/docvalue.go       | 60 +++++++++++++++++--
 1 file changed, 55 insertions(+), 5 deletions(-)

diff --git a/index/scorch/segment/zap/cmd/zap/cmd/docvalue.go b/index/scorch/segment/zap/cmd/zap/cmd/docvalue.go
index f20243ee2..ee15bac35 100644
--- a/index/scorch/segment/zap/cmd/zap/cmd/docvalue.go
+++ b/index/scorch/segment/zap/cmd/zap/cmd/docvalue.go
@@ -67,20 +67,50 @@ var docvalueCmd = &cobra.Command{
 		}
 
 		dvLoc := segment.DocValueOffset()
-		fieldDvLoc := uint64(0)
+		fieldDvLoc, total, fdvread := uint64(0), uint64(0), int(0)
+
 		var fieldName string
 		var fieldID uint16
 
 		// if no fields are specified then print the docValue offsets for all fields set
 		for id, field := range fieldInv {
-			fieldLoc, nread = binary.Uvarint(data[dvLoc+read : dvLoc+read+binary.MaxVarintLen64])
-			if nread <= 0 {
+			fieldLoc, fdvread = binary.Uvarint(data[dvLoc+read : dvLoc+read+binary.MaxVarintLen64])
+			if fdvread <= 0 {
 				return fmt.Errorf("loadDvIterators: failed to read the docvalue offsets for field %d", fieldID)
 			}
-			read += uint64(nread)
+			read += uint64(fdvread)
+			if fieldLoc == math.MaxUint64 {
+				fmt.Printf("fieldID: %d '%s' docvalue at %d (%x) not persisted \n", id, field, fieldLoc, fieldLoc)
+				continue
+			}
+
+			var offset, clen, numChunks uint64
+			numChunks, nread = binary.Uvarint(data[fieldLoc : fieldLoc+binary.MaxVarintLen64])
+			if nread <= 0 {
+				return fmt.Errorf("failed to read the field "+
+					"doc values for field %s", fieldName)
+			}
+			offset += uint64(nread)
+
+			// read the length of chunks
+			totalSize := uint64(0)
+			chunkLens := make([]uint64, numChunks)
+			for i := 0; i < int(numChunks); i++ {
+				clen, nread = binary.Uvarint(data[fieldLoc+offset : fieldLoc+offset+binary.MaxVarintLen64])
+				if nread <= 0 {
+					return fmt.Errorf("corrupted chunk length for chunk number: %d", i)
+				}
+
+				chunkLens[i] = clen
+				totalSize += clen
+				offset += uint64(nread)
+			}
+
+			total += totalSize
 			if len(args) == 1 {
 				// if no field args are given, then print out the dv locations for all fields
-				fmt.Printf("fieldID: %d '%s' docvalue at %d (%x)\n", id, field, fieldLoc, fieldLoc)
+				mbsize := float64(totalSize) / (1024 * 1024)
+				fmt.Printf("fieldID: %d '%s' docvalue at %d (%x) numChunks %d  diskSize %.3f MB\n", id, field, fieldLoc, fieldLoc, numChunks, mbsize)
 				continue
 			}
 
@@ -94,6 +124,9 @@ var docvalueCmd = &cobra.Command{
 
 		}
 
+		mbsize := float64(total) / (1024 * 1024)
+		fmt.Printf("Total Doc Values Size on Disk: %.3f MB\n", mbsize)
+
 		// done with the fields dv locs printing for the given zap file
 		if len(args) == 1 {
 			return nil
@@ -129,6 +162,12 @@ var docvalueCmd = &cobra.Command{
 			if len(args) == 2 {
 				fmt.Printf("chunk: %d size: %d \n", i, clen)
 			}
+			/*
+				TODO => dump all chunk headers??
+				if len(args) == 3 && args[2] == ">" {
+					dumpChunkDocIDs(data, )
+
+				}*/
 		}
 
 		if len(args) == 2 {
@@ -182,6 +221,8 @@ var docvalueCmd = &cobra.Command{
 
 		start, length := getDocValueLocs(uint64(localDocNum), curChunkHeader)
 		if start == math.MaxUint64 || length == math.MaxUint64 {
+			fmt.Printf("no field values found for docID %d\n", localDocNum)
+			fmt.Printf("Try docIDs present in chunk: %s\n", assortDocID(curChunkHeader))
 			return nil
 		}
 		// uncompress the already loaded data
@@ -219,6 +260,15 @@ func getDocValueLocs(docID uint64, metaHeader []zap.MetaData) (uint64, uint64) {
 	return math.MaxUint64, math.MaxUint64
 }
 
+func assortDocID(metaHeader []zap.MetaData) string {
+	docIDs := ""
+	for _, meta := range metaHeader {
+		id := fmt.Sprintf("%d", meta.DocID)
+		docIDs += id + ", "
+	}
+	return docIDs
+}
+
 func init() {
 	RootCmd.AddCommand(docvalueCmd)
 }

From 71a726bbf6862f156022fb895565ceaac252c277 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Thu, 4 Jan 2018 15:34:55 +0530
Subject: [PATCH 121/728] perf issue was due to duplicate fieldIDs getting
 inserted to the list of dv enabled fields list - DocValueFields in mem
 segment. Moved back to the original type `DocValueFields map[uint16]bool` for
 easy look up to check whether the fieldID is configured for dv storage.

---
 index/scorch/segment/mem/build.go   | 2 +-
 index/scorch/segment/mem/segment.go | 8 ++++----
 index/scorch/segment/zap/build.go   | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go
index 27a297d2f..72ea08eb8 100644
--- a/index/scorch/segment/mem/build.go
+++ b/index/scorch/segment/mem/build.go
@@ -121,7 +121,7 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 		}
 		// TODO with mapping changes for dv
 		//if field.Options().IncludeDocValues() {
-		s.DocValueFields = append(s.DocValueFields, fieldID)
+		s.DocValueFields[fieldID] = true
 		//}
 	}
 
diff --git a/index/scorch/segment/mem/segment.go b/index/scorch/segment/mem/segment.go
index 3ba7df7e9..5ef3e1f34 100644
--- a/index/scorch/segment/mem/segment.go
+++ b/index/scorch/segment/mem/segment.go
@@ -89,9 +89,8 @@ type Segment struct {
 	StoredPos []map[uint16][][]uint64
 
 	// for storing the docValue persisted fields
-	// field id
-	DocValueFields []uint16
-	
+	DocValueFields map[uint16]bool
+
 	// footprint of the segment, updated when analyzed document mutations
 	// are added into the segment
 	sizeInBytes uint64
@@ -100,7 +99,8 @@ type Segment struct {
 // New builds a new empty Segment
 func New() *Segment {
 	return &Segment{
-		FieldsMap: map[string]uint16{},
+		FieldsMap:      map[string]uint16{},
+		DocValueFields: map[uint16]bool{},
 	}
 }
 
diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index f5a92562d..c7f73769e 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -440,7 +440,7 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
 	fieldChunkOffsets := make(map[uint16]uint64, len(memSegment.FieldsInv))
 	fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
 
-	for _, fieldID := range memSegment.DocValueFields {
+	for fieldID := range memSegment.DocValueFields {
 		field := memSegment.FieldsInv[fieldID]
 		docTermMap := make(map[uint64][]byte, 0)
 		dict, err := memSegment.Dictionary(field)

From 111f0d07214d7d6bd773893f1ce535ea83f25ff7 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Wed, 3 Jan 2018 15:26:21 -0800
Subject: [PATCH 122/728] Updated Rollback APIs

New APIs:
+ RollbackPoints()
    - Retrieves the available list of rollback points: epoch+meta.
    - The application will need to check with the meta to decide
    on the rollback point.
+ Rollback()
    - API requires a rollback point identified by the first API.
    - Atomically & Durably rolls back the index to specified point,
    provided the specified rollback point is still available.
+ Unit test: TestIndexRollback
    - Writes a batch.
    - Sets the rollback point.
    - Writes second batch.
    - Rollback to previously decided point.
    - Ensure that data is as is before the second batch.
---
 index/scorch/introducer.go             |   6 +-
 index/scorch/snapshot_rollback.go      | 172 +++++++++++++++++--------
 index/scorch/snapshot_rollback_test.go | 128 ++++++++++++------
 3 files changed, 215 insertions(+), 91 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 715325626..0b9c48537 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -289,7 +289,11 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
 			deleted:    segmentSnapshot.deleted,
 			cachedDocs: segmentSnapshot.cachedDocs,
 		}
-		segmentSnapshot.segment.AddRef()
+		newSnapshot.segment[i].segment.AddRef()
+
+		// remove segment from ineligibleForRemoval map
+		filename := zapFileName(segmentSnapshot.id)
+		delete(s.ineligibleForRemoval, filename)
 	}
 
 	if revertTo.persisted != nil {
diff --git a/index/scorch/snapshot_rollback.go b/index/scorch/snapshot_rollback.go
index d4b1f2eb8..43c3ba9f1 100644
--- a/index/scorch/snapshot_rollback.go
+++ b/index/scorch/snapshot_rollback.go
@@ -15,98 +15,164 @@
 package scorch
 
 import (
-	"bytes"
+	"fmt"
 	"log"
 
 	"github.com/blevesearch/bleve/index/scorch/segment"
+	"github.com/boltdb/bolt"
 )
 
-// PreviousPersistedSnapshot returns the next older, previous
-// IndexSnapshot based on the provided IndexSnapshot. If the provided
-// argument is nil, the most recently persisted IndexSnapshot is returned.
-// This API allows the application to walk backwards into the history
-// of a store to previous points in time. A nil return value indicates
-// that no previous snapshots are available.
-func (s *Scorch) PreviousPersistedSnapshot(is *IndexSnapshot) (*IndexSnapshot, error) {
+type RollbackPoint struct {
+	epoch uint64
+	meta  map[string][]byte
+}
+
+func (r *RollbackPoint) GetInternal(key []byte) []byte {
+	return r.meta[string(key)]
+}
+
+// RollbackPoints returns an array of rollback points available
+// for the application to make a decision on where to rollback
+// to. A nil return value indicates that there are no available
+// rollback points.
+func (s *Scorch) RollbackPoints() ([]*RollbackPoint, error) {
 	if s.rootBolt == nil {
-		return nil, nil
+		return nil, fmt.Errorf("RollbackPoints: root is nil")
 	}
 
-	// start a read-only transaction
+	// start a read-only bolt transaction
 	tx, err := s.rootBolt.Begin(false)
 	if err != nil {
-		return nil, err
+		return nil, fmt.Errorf("RollbackPoints: failed to start" +
+			" read-only transaction")
 	}
 
-	// Read-only bolt transactions to be rolled back.
+	// read-only bolt transactions to be rolled back
 	defer func() {
 		_ = tx.Rollback()
 	}()
 
 	snapshots := tx.Bucket(boltSnapshotsBucket)
 	if snapshots == nil {
-		return nil, nil
+		return nil, fmt.Errorf("RollbackPoints: no snapshots available")
 	}
 
-	pos := []byte(nil)
+	rollbackPoints := []*RollbackPoint{}
 
-	if is != nil {
-		pos = segment.EncodeUvarintAscending(nil, is.epoch)
-	}
-
-	c := snapshots.Cursor()
-	for k, _ := c.Last(); k != nil; k, _ = c.Prev() {
-		if pos == nil || bytes.Compare(k, pos) < 0 {
-			_, snapshotEpoch, err := segment.DecodeUvarintAscending(k)
-			if err != nil {
-				log.Printf("PreviousPersistedSnapshot:"+
-					" unable to parse segment epoch %x, continuing", k)
-				continue
-			}
+	c1 := snapshots.Cursor()
+	for k, _ := c1.Last(); k != nil; k, _ = c1.Prev() {
+		_, snapshotEpoch, err := segment.DecodeUvarintAscending(k)
+		if err != nil {
+			log.Printf("RollbackPoints:"+
+				" unable to parse segment epoch %x, continuing", k)
+			continue
+		}
 
-			snapshot := snapshots.Bucket(k)
-			if snapshot == nil {
-				log.Printf("PreviousPersistedSnapshot:"+
-					" snapshot key, but bucket missing %x, continuing", k)
-				continue
-			}
+		snapshot := snapshots.Bucket(k)
+		if snapshot == nil {
+			log.Printf("RollbackPoints:"+
+				" snapshot key, but bucket missing %x, continuing", k)
+			continue
+		}
 
-			indexSnapshot, err := s.loadSnapshot(snapshot)
-			if err != nil {
-				log.Printf("PreviousPersistedSnapshot:"+
-					" unable to load snapshot, %v, continuing", err)
-				continue
+		meta := map[string][]byte{}
+		c2 := snapshot.Cursor()
+		for j, _ := c2.First(); j != nil; j, _ = c2.Next() {
+			if j[0] == boltInternalKey[0] {
+				internalBucket := snapshot.Bucket(j)
+				err = internalBucket.ForEach(func(key []byte, val []byte) error {
+					copiedVal := append([]byte(nil), val...)
+					meta[string(key)] = copiedVal
+					return nil
+				})
+				if err != nil {
+					break
+				}
 			}
+		}
 
-			indexSnapshot.epoch = snapshotEpoch
-			return indexSnapshot, nil
+		if err != nil {
+			log.Printf("RollbackPoints:"+
+				" failed in fetching internal data: %v", err)
+			continue
 		}
+
+		rollbackPoints = append(rollbackPoints, &RollbackPoint{
+			epoch: snapshotEpoch,
+			meta:  meta,
+		})
 	}
 
-	return nil, nil
+	return rollbackPoints, nil
 }
 
-// SnapshotRevert atomically brings the store back to the point in time
-// as represented by the revertTo IndexSnapshot. SnapshotRevert() should
-// only be passed an IndexSnapshot that came from the same store.
-func (s *Scorch) SnapshotRevert(revertTo *IndexSnapshot) error {
-	revert := &snapshotReversion{
-		snapshot: revertTo,
-		applied:  make(chan error),
+// Rollback atomically and durably (if unsafeBatch is unset) brings
+// the store back to the point in time as represented by the
+// RollbackPoint. Rollback() should only be passed a RollbackPoint
+// that came from the same store using the RollbackPoints() API.
+func (s *Scorch) Rollback(to *RollbackPoint) error {
+	if to == nil {
+		return fmt.Errorf("Rollback: RollbackPoint is nil")
 	}
 
-	if !s.unsafeBatch {
-		revert.persisted = make(chan error)
+	if s.rootBolt == nil {
+		return fmt.Errorf("Rollback: root is nil")
 	}
 
-	s.revertToSnapshots <- revert
+	revert := &snapshotReversion{}
+
+	s.rootLock.Lock()
+
+	err := s.rootBolt.View(func(tx *bolt.Tx) error {
+		snapshots := tx.Bucket(boltSnapshotsBucket)
+		if snapshots == nil {
+			return fmt.Errorf("Rollback: no snapshots available")
+		}
+
+		pos := segment.EncodeUvarintAscending(nil, to.epoch)
+
+		snapshot := snapshots.Bucket(pos)
+		if snapshot == nil {
+			return fmt.Errorf("Rollback: snapshot not found")
+		}
+
+		indexSnapshot, err := s.loadSnapshot(snapshot)
+		if err != nil {
+			return fmt.Errorf("Rollback: unable to load snapshot: %v", err)
+		}
+
+		// add segments referenced by loaded index snapshot to the
+		// ineligibleForRemoval map
+		for _, segSnap := range indexSnapshot.segment {
+			filename := zapFileName(segSnap.id)
+			s.ineligibleForRemoval[filename] = true
+		}
+
+		revert.snapshot = indexSnapshot
+		revert.applied = make(chan error)
+
+		if !s.unsafeBatch {
+			revert.persisted = make(chan error)
+		}
+
+		return nil
+	})
+
+	s.rootLock.Unlock()
 
-	// block until this IndexSnapshot is applied
-	err := <-revert.applied
 	if err != nil {
 		return err
 	}
 
+	// introduce the reversion
+	s.revertToSnapshots <- revert
+
+	// block until this snapshot is applied
+	err = <-revert.applied
+	if err != nil {
+		return fmt.Errorf("Rollback: failed with err: %v", err)
+	}
+
 	if revert.persisted != nil {
 		err = <-revert.persisted
 	}
diff --git a/index/scorch/snapshot_rollback_test.go b/index/scorch/snapshot_rollback_test.go
index 879d01685..9816a51e6 100644
--- a/index/scorch/snapshot_rollback_test.go
+++ b/index/scorch/snapshot_rollback_test.go
@@ -45,70 +45,124 @@ func TestIndexRollback(t *testing.T) {
 		}
 	}()
 
-	// create 2 docs
+	// create a batch, insert 2 new documents
+	batch := index.NewBatch()
 	doc := document.NewDocument("1")
 	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test1")))
-	err = idx.Update(doc)
-	if err != nil {
-		t.Error(err)
-	}
-
+	batch.Update(doc)
 	doc = document.NewDocument("2")
 	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test2")))
-	err = idx.Update(doc)
+	batch.Update(doc)
+
+	err = idx.Batch(batch)
 	if err != nil {
-		t.Error(err)
+		t.Fatal(err)
 	}
 
-	// create a batch, insert new doc, update existing doc, delete existing doc
-	batch := index.NewBatch()
+	sh, ok := idx.(*Scorch)
+	if !ok {
+		t.Fatalf("Not a scorch index?")
+	}
+
+	// fetch rollback points available as of here
+	rollbackPoints, err := sh.RollbackPoints()
+	if err != nil || len(rollbackPoints) == 0 {
+		t.Fatal(err, len(rollbackPoints))
+	}
+
+	// set this as a rollback point for the future
+	rollbackPoint := rollbackPoints[0]
+
+	// create another batch, insert 2 new documents, and delete an existing one
+	batch = index.NewBatch()
 	doc = document.NewDocument("3")
 	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test3")))
 	batch.Update(doc)
-	doc = document.NewDocument("2")
-	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test2updated")))
+	doc = document.NewDocument("4")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test4")))
 	batch.Update(doc)
 	batch.Delete("1")
 
 	err = idx.Batch(batch)
 	if err != nil {
-		t.Error(err)
+		t.Fatal(err)
 	}
 
-	sh, ok := idx.(*Scorch)
-	if !ok {
-		t.Errorf("Not a scorch index?")
+	reader, err := idx.Reader()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	docCount, err := reader.DocCount()
+	if err != nil {
+		t.Fatal(err)
 	}
 
-	// Get Last persisted snapshot
-	ss, err := sh.PreviousPersistedSnapshot(nil)
+	// expect docs 2, 3, 4
+	if docCount != 3 {
+		t.Fatalf("unexpected doc count: %v", docCount)
+	}
+	ret, err := reader.Document("1")
+	if err != nil || ret != nil {
+		t.Fatal(ret, err)
+	}
+	ret, err = reader.Document("2")
+	if err != nil || ret == nil {
+		t.Fatal(ret, err)
+	}
+	ret, err = reader.Document("3")
+	if err != nil || ret == nil {
+		t.Fatal(ret, err)
+	}
+	ret, err = reader.Document("4")
+	if err != nil || ret == nil {
+		t.Fatal(ret, err)
+	}
+
+	err = reader.Close()
 	if err != nil {
-		t.Error(err)
+		t.Fatal(err)
 	}
 
-	// Retrieve the snapshot earlier
-	prev, err := sh.PreviousPersistedSnapshot(ss)
+	// rollback to the selected rollback point
+	err = sh.Rollback(rollbackPoint)
 	if err != nil {
-		t.Error(err)
+		t.Fatal(err)
 	}
 
-	if prev != nil {
-		err = sh.SnapshotRevert(prev)
-		if err != nil {
-			t.Error(err)
-		}
+	reader, err = idx.Reader()
+	if err != nil {
+		t.Fatal(err)
+	}
 
-		newRoot, err := sh.PreviousPersistedSnapshot(nil)
-		if err != nil {
-			t.Error(err)
-		}
+	docCount, err = reader.DocCount()
+	if err != nil {
+		t.Fatal(err)
+	}
 
-		if newRoot == nil {
-			t.Errorf("Failed to retrieve latest persisted snapshot")
-		}
+	// expect only docs 1, 2
+	if docCount != 2 {
+		t.Fatalf("unexpected doc count: %v", docCount)
+	}
+	ret, err = reader.Document("1")
+	if err != nil || ret == nil {
+		t.Fatal(ret, err)
+	}
+	ret, err = reader.Document("2")
+	if err != nil || ret == nil {
+		t.Fatal(ret, err)
+	}
+	ret, err = reader.Document("3")
+	if err != nil || ret != nil {
+		t.Fatal(ret, err)
+	}
+	ret, err = reader.Document("4")
+	if err != nil || ret != nil {
+		t.Fatal(ret, err)
+	}
 
-		if newRoot.epoch <= prev.epoch {
-			t.Errorf("Unexpected epoch, %v <= %v", newRoot.epoch, prev.epoch)
-		}
+	err = reader.Close()
+	if err != nil {
+		t.Fatal(err)
 	}
 }

From c691cd2bb55c81fdd3634a9b23fa0b895fc7b3ce Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 5 Jan 2018 10:17:18 -0500
Subject: [PATCH 123/728] refactor scorch/zap command-line tools under bleve

zap command-line tool added to main bleve command-line tool
this required physical relocation due to the vendoring used
only on the bleve command-line tool (unforseen limitation)

a new scorch command-line tool has also been introduced
and for the same reasons it is physically store under
the top-level bleve command-line tool as well
---
 cmd/bleve/cmd/scorch.go                       | 25 +++++++
 cmd/bleve/cmd/scorch/info.go                  | 59 ++++++++++++++++
 cmd/bleve/cmd/scorch/root.go                  | 70 +++++++++++++++++++
 cmd/bleve/cmd/scorch/snapshot.go              | 46 ++++++++++++
 .../cmd/zap/main.go => cmd/bleve/cmd/zap.go   | 10 +--
 .../cmd/zap/cmd => cmd/bleve/cmd/zap}/dict.go |  2 +-
 .../zap/cmd => cmd/bleve/cmd/zap}/docvalue.go |  2 +-
 .../zap/cmd => cmd/bleve/cmd/zap}/explore.go  |  2 +-
 .../zap/cmd => cmd/bleve/cmd/zap}/fields.go   |  2 +-
 .../zap/cmd => cmd/bleve/cmd/zap}/footer.go   |  2 +-
 .../cmd/zap/cmd => cmd/bleve/cmd/zap}/root.go |  2 +-
 .../zap/cmd => cmd/bleve/cmd/zap}/stored.go   |  2 +-
 index/scorch/persister.go                     |  4 +-
 index/scorch/segment/zap/cmd/zap/README.md    |  3 -
 14 files changed, 215 insertions(+), 16 deletions(-)
 create mode 100644 cmd/bleve/cmd/scorch.go
 create mode 100644 cmd/bleve/cmd/scorch/info.go
 create mode 100644 cmd/bleve/cmd/scorch/root.go
 create mode 100644 cmd/bleve/cmd/scorch/snapshot.go
 rename index/scorch/segment/zap/cmd/zap/main.go => cmd/bleve/cmd/zap.go (79%)
 rename {index/scorch/segment/zap/cmd/zap/cmd => cmd/bleve/cmd/zap}/dict.go (99%)
 rename {index/scorch/segment/zap/cmd/zap/cmd => cmd/bleve/cmd/zap}/docvalue.go (99%)
 rename {index/scorch/segment/zap/cmd/zap/cmd => cmd/bleve/cmd/zap}/explore.go (99%)
 rename {index/scorch/segment/zap/cmd/zap/cmd => cmd/bleve/cmd/zap}/fields.go (99%)
 rename {index/scorch/segment/zap/cmd/zap/cmd => cmd/bleve/cmd/zap}/footer.go (99%)
 rename {index/scorch/segment/zap/cmd/zap/cmd => cmd/bleve/cmd/zap}/root.go (99%)
 rename {index/scorch/segment/zap/cmd/zap/cmd => cmd/bleve/cmd/zap}/stored.go (99%)
 delete mode 100644 index/scorch/segment/zap/cmd/zap/README.md

diff --git a/cmd/bleve/cmd/scorch.go b/cmd/bleve/cmd/scorch.go
new file mode 100644
index 000000000..781db7031
--- /dev/null
+++ b/cmd/bleve/cmd/scorch.go
@@ -0,0 +1,25 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+	"github.com/blevesearch/bleve/cmd/bleve/cmd/scorch"
+)
+
+// make scorch command-line tool a bleve sub-command
+
+func init() {
+	RootCmd.AddCommand(scorch.RootCmd)
+}
diff --git a/cmd/bleve/cmd/scorch/info.go b/cmd/bleve/cmd/scorch/info.go
new file mode 100644
index 000000000..2b4674f06
--- /dev/null
+++ b/cmd/bleve/cmd/scorch/info.go
@@ -0,0 +1,59 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package scorch
+
+import (
+	"fmt"
+
+	"github.com/spf13/cobra"
+)
+
+// dictCmd represents the dict command
+var infoCmd = &cobra.Command{
+	Use:   "info",
+	Short: "info prints basic info about the index",
+	Long:  `The info command prints basic info about the index.`,
+	RunE: func(cmd *cobra.Command, args []string) error {
+
+		reader, err := index.Reader()
+		if err != nil {
+			return err
+		}
+
+		count, err := reader.DocCount()
+		if err != nil {
+			return err
+		}
+
+		fmt.Printf("count: %d\n", count)
+
+		// var numSnapshots int
+		// var rootSnapshot uint64
+		// index.VisitBoltSnapshots(func(snapshotEpoch uint64) error {
+		// 	if rootSnapshot == 0 {
+		// 		rootSnapshot = snapshotEpoch
+		// 	}
+		// 	numSnapshots++
+		// 	return nil
+		// })
+		// fmt.Printf("has %d snapshot(s), root: %d\n", numSnapshots, rootSnapshot)
+
+		return nil
+	},
+}
+
+func init() {
+	RootCmd.AddCommand(infoCmd)
+}
diff --git a/cmd/bleve/cmd/scorch/root.go b/cmd/bleve/cmd/scorch/root.go
new file mode 100644
index 000000000..b27992edc
--- /dev/null
+++ b/cmd/bleve/cmd/scorch/root.go
@@ -0,0 +1,70 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package scorch
+
+import (
+	"fmt"
+	"os"
+
+	"github.com/blevesearch/bleve/index/scorch"
+	"github.com/spf13/cobra"
+)
+
+var index *scorch.Scorch
+
+// RootCmd represents the base command when called without any subcommands
+var RootCmd = &cobra.Command{
+	Use:   "scorch",
+	Short: "command-line tool to interact with a scorch index",
+	Long:  `Scorch is a command-line tool to interact with a scorch index.`,
+	PersistentPreRunE: func(cmd *cobra.Command, args []string) error {
+
+		if len(args) < 1 {
+			return fmt.Errorf("must specify path to scorch index")
+		}
+
+		readOnly := true
+		config := map[string]interface{}{
+			"read_only": readOnly,
+			"path":      args[0],
+		}
+
+		idx, err := scorch.NewScorch(scorch.Name, config, nil)
+		if err != nil {
+			return err
+		}
+
+		err = idx.Open()
+		if err != nil {
+			return fmt.Errorf("error opening: %v", err)
+		}
+
+		index = idx.(*scorch.Scorch)
+
+		return nil
+	},
+	PersistentPostRunE: func(cmd *cobra.Command, args []string) error {
+		return nil
+	},
+}
+
+// Execute adds all child commands to the root command sets flags appropriately.
+// This is called by main.main(). It only needs to happen once to the rootCmd.
+func Execute() {
+	if err := RootCmd.Execute(); err != nil {
+		fmt.Println(err)
+		os.Exit(-1)
+	}
+}
diff --git a/cmd/bleve/cmd/scorch/snapshot.go b/cmd/bleve/cmd/scorch/snapshot.go
new file mode 100644
index 000000000..1c51c056c
--- /dev/null
+++ b/cmd/bleve/cmd/scorch/snapshot.go
@@ -0,0 +1,46 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package scorch
+
+import (
+	"fmt"
+
+	"github.com/spf13/cobra"
+)
+
+// snapshotsCmd represents the snapshots command
+var snapshotCmd = &cobra.Command{
+	Use:   "snapshot",
+	Short: "info prints details about the snapshots in the index",
+	Long:  `The snapshot command prints details about the snapshots in the index.`,
+	RunE: func(cmd *cobra.Command, args []string) error {
+
+		if len(args) < 2 {
+			snapshotEpochs, err := index.RootBoltSnapshotEpochs()
+			if err != nil {
+				return err
+			}
+			for _, snapshotEpoch := range snapshotEpochs {
+				fmt.Printf("%d\n", snapshotEpoch)
+			}
+		}
+
+		return nil
+	},
+}
+
+func init() {
+	RootCmd.AddCommand(snapshotCmd)
+}
diff --git a/index/scorch/segment/zap/cmd/zap/main.go b/cmd/bleve/cmd/zap.go
similarity index 79%
rename from index/scorch/segment/zap/cmd/zap/main.go
rename to cmd/bleve/cmd/zap.go
index 23c500a33..b84d5f2b3 100644
--- a/index/scorch/segment/zap/cmd/zap/main.go
+++ b/cmd/bleve/cmd/zap.go
@@ -12,12 +12,14 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package main
+package cmd
 
 import (
-	"github.com/blevesearch/bleve/index/scorch/segment/zap/cmd/zap/cmd"
+	"github.com/blevesearch/bleve/cmd/bleve/cmd/zap"
 )
 
-func main() {
-	cmd.Execute()
+// make zap command-line tool a bleve sub-command
+
+func init() {
+	RootCmd.AddCommand(zap.RootCmd)
 }
diff --git a/index/scorch/segment/zap/cmd/zap/cmd/dict.go b/cmd/bleve/cmd/zap/dict.go
similarity index 99%
rename from index/scorch/segment/zap/cmd/zap/cmd/dict.go
rename to cmd/bleve/cmd/zap/dict.go
index fa8b3277e..3e2727195 100644
--- a/index/scorch/segment/zap/cmd/zap/cmd/dict.go
+++ b/cmd/bleve/cmd/zap/dict.go
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package cmd
+package zap
 
 import (
 	"encoding/binary"
diff --git a/index/scorch/segment/zap/cmd/zap/cmd/docvalue.go b/cmd/bleve/cmd/zap/docvalue.go
similarity index 99%
rename from index/scorch/segment/zap/cmd/zap/cmd/docvalue.go
rename to cmd/bleve/cmd/zap/docvalue.go
index ee15bac35..165829fdf 100644
--- a/index/scorch/segment/zap/cmd/zap/cmd/docvalue.go
+++ b/cmd/bleve/cmd/zap/docvalue.go
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package cmd
+package zap
 
 import (
 	"bytes"
diff --git a/index/scorch/segment/zap/cmd/zap/cmd/explore.go b/cmd/bleve/cmd/zap/explore.go
similarity index 99%
rename from index/scorch/segment/zap/cmd/zap/cmd/explore.go
rename to cmd/bleve/cmd/zap/explore.go
index 012a829fe..de05c63e7 100644
--- a/index/scorch/segment/zap/cmd/zap/cmd/explore.go
+++ b/cmd/bleve/cmd/zap/explore.go
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package cmd
+package zap
 
 import (
 	"encoding/binary"
diff --git a/index/scorch/segment/zap/cmd/zap/cmd/fields.go b/cmd/bleve/cmd/zap/fields.go
similarity index 99%
rename from index/scorch/segment/zap/cmd/zap/cmd/fields.go
rename to cmd/bleve/cmd/zap/fields.go
index cfc40974b..cf8cc3d86 100644
--- a/index/scorch/segment/zap/cmd/zap/cmd/fields.go
+++ b/cmd/bleve/cmd/zap/fields.go
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package cmd
+package zap
 
 import (
 	"encoding/binary"
diff --git a/index/scorch/segment/zap/cmd/zap/cmd/footer.go b/cmd/bleve/cmd/zap/footer.go
similarity index 99%
rename from index/scorch/segment/zap/cmd/zap/cmd/footer.go
rename to cmd/bleve/cmd/zap/footer.go
index 0460360fc..96078ded6 100644
--- a/index/scorch/segment/zap/cmd/zap/cmd/footer.go
+++ b/cmd/bleve/cmd/zap/footer.go
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package cmd
+package zap
 
 import (
 	"fmt"
diff --git a/index/scorch/segment/zap/cmd/zap/cmd/root.go b/cmd/bleve/cmd/zap/root.go
similarity index 99%
rename from index/scorch/segment/zap/cmd/zap/cmd/root.go
rename to cmd/bleve/cmd/zap/root.go
index f969bbf13..ee2b62602 100644
--- a/index/scorch/segment/zap/cmd/zap/cmd/root.go
+++ b/cmd/bleve/cmd/zap/root.go
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package cmd
+package zap
 
 import (
 	"fmt"
diff --git a/index/scorch/segment/zap/cmd/zap/cmd/stored.go b/cmd/bleve/cmd/zap/stored.go
similarity index 99%
rename from index/scorch/segment/zap/cmd/zap/cmd/stored.go
rename to cmd/bleve/cmd/zap/stored.go
index 64e42c7e6..ba1143cb1 100644
--- a/index/scorch/segment/zap/cmd/zap/cmd/stored.go
+++ b/cmd/bleve/cmd/zap/stored.go
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package cmd
+package zap
 
 import (
 	"encoding/binary"
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index acf241ebf..e0614151e 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -443,7 +443,7 @@ var NumSnapshotsToKeep = 1
 // Removes enough snapshots from the rootBolt so that the
 // s.eligibleForRemoval stays under the NumSnapshotsToKeep policy.
 func (s *Scorch) removeOldBoltSnapshots() (numRemoved int, err error) {
-	persistedEpochs, err := s.rootBoltSnapshotEpochs()
+	persistedEpochs, err := s.RootBoltSnapshotEpochs()
 	if err != nil {
 		return 0, err
 	}
@@ -565,7 +565,7 @@ func (s *Scorch) removeOldZapFiles() error {
 	return nil
 }
 
-func (s *Scorch) rootBoltSnapshotEpochs() ([]uint64, error) {
+func (s *Scorch) RootBoltSnapshotEpochs() ([]uint64, error) {
 	var rv []uint64
 	err := s.rootBolt.View(func(tx *bolt.Tx) error {
 		snapshots := tx.Bucket(boltSnapshotsBucket)
diff --git a/index/scorch/segment/zap/cmd/zap/README.md b/index/scorch/segment/zap/cmd/zap/README.md
deleted file mode 100644
index 99f55d365..000000000
--- a/index/scorch/segment/zap/cmd/zap/README.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# zap command line utility
-
-Kind of a hack just put together quickly to let me debug some issues.

From 57a075afdb5bdd2debe8b228512e50d99e1a352c Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 5 Jan 2018 11:50:07 -0500
Subject: [PATCH 124/728] improving command-line tool for scorch

---
 cmd/bleve/cmd/scorch/ascii.go    | 59 ++++++++++++++++++++++++++++++
 cmd/bleve/cmd/scorch/deleted.go  | 55 ++++++++++++++++++++++++++++
 cmd/bleve/cmd/scorch/internal.go | 61 ++++++++++++++++++++++++++++++++
 cmd/bleve/cmd/scorch/snapshot.go | 20 ++++++++++-
 index/scorch/persister.go        | 22 ++++++++++++
 index/scorch/snapshot_index.go   |  8 +++++
 index/scorch/snapshot_segment.go |  8 +++++
 7 files changed, 232 insertions(+), 1 deletion(-)
 create mode 100644 cmd/bleve/cmd/scorch/ascii.go
 create mode 100644 cmd/bleve/cmd/scorch/deleted.go
 create mode 100644 cmd/bleve/cmd/scorch/internal.go

diff --git a/cmd/bleve/cmd/scorch/ascii.go b/cmd/bleve/cmd/scorch/ascii.go
new file mode 100644
index 000000000..7b36b5b9c
--- /dev/null
+++ b/cmd/bleve/cmd/scorch/ascii.go
@@ -0,0 +1,59 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package scorch
+
+import (
+	"fmt"
+	"strconv"
+
+	"github.com/blevesearch/bleve/index/scorch/mergeplan"
+	"github.com/spf13/cobra"
+)
+
+// asciiCmd represents the snapshots command
+var asciiCmd = &cobra.Command{
+	Use:   "ascii",
+	Short: "ascii prints details an ascii representation of the  snapshots in the index",
+	Long:  `The ascii command prints an ascii representation of the  snapshots in the index.`,
+	RunE: func(cmd *cobra.Command, args []string) error {
+
+		if len(args) < 2 {
+			return fmt.Errorf("snapshot epoch required")
+		} else if len(args) < 3 {
+			snapshotEpoch, err := strconv.ParseUint(args[1], 10, 64)
+			if err != nil {
+				return err
+			}
+			snapshot, err := index.LoadSnapshot(snapshotEpoch)
+			if err != nil {
+				return err
+			}
+			segments := snapshot.Segments()
+			var mergePlanSegments []mergeplan.Segment
+			for _, v := range segments {
+				mergePlanSegments = append(mergePlanSegments, v)
+			}
+
+			str := mergeplan.ToBarChart(args[1], 25, mergePlanSegments, nil)
+			fmt.Printf("%s\n", str)
+		}
+
+		return nil
+	},
+}
+
+func init() {
+	RootCmd.AddCommand(asciiCmd)
+}
diff --git a/cmd/bleve/cmd/scorch/deleted.go b/cmd/bleve/cmd/scorch/deleted.go
new file mode 100644
index 000000000..cb2a9245c
--- /dev/null
+++ b/cmd/bleve/cmd/scorch/deleted.go
@@ -0,0 +1,55 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package scorch
+
+import (
+	"fmt"
+	"strconv"
+
+	"github.com/spf13/cobra"
+)
+
+// deletedCmd represents the deleted command
+var deletedCmd = &cobra.Command{
+	Use:   "deleted",
+	Short: "deleted prints the deleted bitmap for segments in the index snapshot",
+	Long:  `The delete command prints the deleted bitmap for segments in the index snapshot.`,
+	RunE: func(cmd *cobra.Command, args []string) error {
+
+		if len(args) < 2 {
+			return fmt.Errorf("snapshot epoch required")
+		} else if len(args) < 3 {
+			snapshotEpoch, err := strconv.ParseUint(args[1], 10, 64)
+			if err != nil {
+				return err
+			}
+			snapshot, err := index.LoadSnapshot(snapshotEpoch)
+			if err != nil {
+				return err
+			}
+			segments := snapshot.Segments()
+			for i, segmentSnap := range segments {
+				deleted := segmentSnap.Deleted()
+				fmt.Printf("%d %v\n", i, deleted)
+			}
+		}
+
+		return nil
+	},
+}
+
+func init() {
+	RootCmd.AddCommand(deletedCmd)
+}
diff --git a/cmd/bleve/cmd/scorch/internal.go b/cmd/bleve/cmd/scorch/internal.go
new file mode 100644
index 000000000..027e90282
--- /dev/null
+++ b/cmd/bleve/cmd/scorch/internal.go
@@ -0,0 +1,61 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package scorch
+
+import (
+	"fmt"
+	"strconv"
+
+	"github.com/spf13/cobra"
+)
+
+var ascii bool
+
+// internalCmd represents the snapshots command
+var internalCmd = &cobra.Command{
+	Use:   "internal",
+	Short: "internal prints the internal k/v pairs in a snapshot",
+	Long:  `The internal command prints the internal k/v pairs in a snapshot.`,
+	RunE: func(cmd *cobra.Command, args []string) error {
+
+		if len(args) < 2 {
+			return fmt.Errorf("snapshot epoch required")
+		} else if len(args) < 3 {
+			snapshotEpoch, err := strconv.ParseUint(args[1], 10, 64)
+			if err != nil {
+				return err
+			}
+			snapshot, err := index.LoadSnapshot(snapshotEpoch)
+			if err != nil {
+				return err
+			}
+			internal := snapshot.Internal()
+			for k, v := range internal {
+				if ascii {
+					fmt.Printf("%s %s\n", k, string(v))
+				} else {
+					fmt.Printf("%x %x\n", k, v)
+				}
+			}
+		}
+
+		return nil
+	},
+}
+
+func init() {
+	RootCmd.AddCommand(internalCmd)
+	internalCmd.Flags().BoolVarP(&ascii, "ascii", "a", false, "print key/value in ascii")
+}
diff --git a/cmd/bleve/cmd/scorch/snapshot.go b/cmd/bleve/cmd/scorch/snapshot.go
index 1c51c056c..bb035ce59 100644
--- a/cmd/bleve/cmd/scorch/snapshot.go
+++ b/cmd/bleve/cmd/scorch/snapshot.go
@@ -16,11 +16,13 @@ package scorch
 
 import (
 	"fmt"
+	"strconv"
 
+	"github.com/blevesearch/bleve/index/scorch/segment/zap"
 	"github.com/spf13/cobra"
 )
 
-// snapshotsCmd represents the snapshots command
+// snapshotCmd represents the snapshot command
 var snapshotCmd = &cobra.Command{
 	Use:   "snapshot",
 	Short: "info prints details about the snapshots in the index",
@@ -35,6 +37,22 @@ var snapshotCmd = &cobra.Command{
 			for _, snapshotEpoch := range snapshotEpochs {
 				fmt.Printf("%d\n", snapshotEpoch)
 			}
+		} else if len(args) < 3 {
+			snapshotEpoch, err := strconv.ParseUint(args[1], 10, 64)
+			if err != nil {
+				return err
+			}
+			snapshot, err := index.LoadSnapshot(snapshotEpoch)
+			if err != nil {
+				return err
+			}
+			segments := snapshot.Segments()
+			for i, segmentSnap := range segments {
+				segment := segmentSnap.Segment()
+				if segment, ok := segment.(*zap.Segment); ok {
+					fmt.Printf("%d %s\n", i, segment.Path())
+				}
+			}
 		}
 
 		return nil
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index e0614151e..401086543 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -340,6 +340,28 @@ func (s *Scorch) loadFromBolt() error {
 	})
 }
 
+// LoadSnapshot loads the segment with the specified epoch
+// NOTE: this is currently ONLY intended to be used by the command-line tool
+func (s *Scorch) LoadSnapshot(epoch uint64) (rv *IndexSnapshot, err error) {
+	err = s.rootBolt.View(func(tx *bolt.Tx) error {
+		snapshots := tx.Bucket(boltSnapshotsBucket)
+		if snapshots == nil {
+			return nil
+		}
+		snapshotKey := segment.EncodeUvarintAscending(nil, epoch)
+		snapshot := snapshots.Bucket(snapshotKey)
+		if snapshot == nil {
+			return nil
+		}
+		rv, err = s.loadSnapshot(snapshot)
+		return err
+	})
+	if err != nil {
+		return nil, err
+	}
+	return rv, nil
+}
+
 func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
 
 	rv := &IndexSnapshot{
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 074d4c41f..a9b105a0d 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -51,6 +51,14 @@ type IndexSnapshot struct {
 	refs int64
 }
 
+func (i *IndexSnapshot) Segments() []*SegmentSnapshot {
+	return i.segment
+}
+
+func (i *IndexSnapshot) Internal() map[string][]byte {
+	return i.internal
+}
+
 func (i *IndexSnapshot) AddRef() {
 	i.m.Lock()
 	i.refs++
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index b3b8d8284..0c111c505 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -56,6 +56,14 @@ type SegmentSnapshot struct {
 	cachedDocs *cachedDocs
 }
 
+func (s *SegmentSnapshot) Segment() segment.Segment {
+	return s.segment
+}
+
+func (s *SegmentSnapshot) Deleted() *roaring.Bitmap {
+	return s.deleted
+}
+
 func (s *SegmentSnapshot) Id() uint64 {
 	return s.id
 }

From 62374796050991a1ae019c431128e46987b2a5ec Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 5 Jan 2018 13:32:47 -0500
Subject: [PATCH 125/728] fix race condition in setting up event callbacks

previous approach used SetEventCallback method which allowed
you to change the callback, unfotunately that also included
times after the goroutines were started and potentially firing
the callback.

checking lock on this would be too expensive, so instead we go
for an approach that allows callbacks to be registered by name
during process init(), then upon opening up an index a string
config key 'eventCallbackName' is used to look up the
appropriate callback function.  also, since this string config
name is serializable, it fits into the existing bleve index
metadata without any new issues.
---
 index/scorch/event.go      | 52 +++++++++++++++++++++++++++
 index/scorch/event_test.go | 73 ++++++++++++++++++++++++++++++++++++++
 index/scorch/scorch.go     | 41 +++------------------
 3 files changed, 130 insertions(+), 36 deletions(-)
 create mode 100644 index/scorch/event.go
 create mode 100644 index/scorch/event_test.go

diff --git a/index/scorch/event.go b/index/scorch/event.go
new file mode 100644
index 000000000..ef3439a8b
--- /dev/null
+++ b/index/scorch/event.go
@@ -0,0 +1,52 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package scorch
+
+import "time"
+
+// RegistryEventCallbacks should be treated as read-only after
+// process init()'ialization.
+var RegistryEventCallbacks = map[string]func(Event){}
+
+// Event represents the information provided in an OnEvent() callback.
+type Event struct {
+	Kind     EventKind
+	Scorch   *Scorch
+	Duration time.Duration
+}
+
+// EventKind represents an event code for OnEvent() callbacks.
+type EventKind int
+
+// EventKindCloseStart is fired when a Scorch.Close() has begun.
+var EventKindCloseStart = EventKind(1)
+
+// EventKindClose is fired when a scorch index has been fully closed.
+var EventKindClose = EventKind(2)
+
+// EventKindMergerProgress is fired when the merger has completed a
+// round of merge processing.
+var EventKindMergerProgress = EventKind(3)
+
+// EventKindPersisterProgress is fired when the persister has completed
+// a round of persistence processing.
+var EventKindPersisterProgress = EventKind(4)
+
+// EventKindBatchIntroductionStart is fired when Batch() is invoked which
+// introduces a new segment.
+var EventKindBatchIntroductionStart = EventKind(5)
+
+// EventKindBatchIntroduction is fired when Batch() completes.
+var EventKindBatchIntroduction = EventKind(6)
diff --git a/index/scorch/event_test.go b/index/scorch/event_test.go
new file mode 100644
index 000000000..92b49d20d
--- /dev/null
+++ b/index/scorch/event_test.go
@@ -0,0 +1,73 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package scorch
+
+import (
+	"testing"
+
+	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/index"
+)
+
+func TestEventBatchIntroductionStart(t *testing.T) {
+	defer func() {
+		err := DestroyTest()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	var count int
+	RegistryEventCallbacks["test"] = func(e Event) {
+		if e.Kind == EventKindBatchIntroductionStart {
+			count++
+		}
+	}
+
+	ourConfig := make(map[string]interface{}, len(testConfig))
+	for k, v := range testConfig {
+		ourConfig[k] = v
+	}
+	ourConfig["eventCallbackName"] = "test"
+
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, ourConfig, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	err = idx.Open()
+	if err != nil {
+		t.Fatalf("error opening index: %v", err)
+	}
+
+	doc := document.NewDocument("1")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	if count != 1 {
+		t.Fatalf("expected to see 1 batch introduction event event, saw %d", count)
+	}
+}
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index c6b417a28..df4f5de5f 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2017 Couchbase, Inc.
+//  Copyright (c) 2018 Couchbase, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -75,37 +75,6 @@ type Scorch struct {
 	onEvent func(event Event)
 }
 
-// Event represents the information provided in an OnEvent() callback.
-type Event struct {
-	Kind     EventKind
-	Scorch   *Scorch
-	Duration time.Duration
-}
-
-// EventKind represents an event code for OnEvent() callbacks.
-type EventKind int
-
-// EventKindCLoseStart is fired when a Scorch.Close() has begun.
-var EventKindCloseStart = EventKind(1)
-
-// EventKindClose is fired when a scorch index has been fully closed.
-var EventKindClose = EventKind(2)
-
-// EventKindMergerProgress is fired when the merger has completed a
-// round of merge processing.
-var EventKindMergerProgress = EventKind(3)
-
-// EventKindPersisterProgress is fired when the persister has completed
-// a round of persistence processing.
-var EventKindPersisterProgress = EventKind(4)
-
-// EventKindBatchIntroductionStart is fired when Batch() is invoked which
-// introduces a new segment.
-var EventKindBatchIntroductionStart = EventKind(5)
-
-// EventKindBatchIntroduction is fired when Batch() completes.
-var EventKindBatchIntroduction = EventKind(6)
-
 func NewScorch(storeName string,
 	config map[string]interface{},
 	analysisQueue *index.AnalysisQueue) (index.Index, error) {
@@ -127,13 +96,13 @@ func NewScorch(storeName string,
 	if ok {
 		rv.unsafeBatch = ub
 	}
+	ecbName, ok := config["eventCallbackName"].(string)
+	if ok {
+		rv.onEvent = RegistryEventCallbacks[ecbName]
+	}
 	return rv, nil
 }
 
-func (s *Scorch) SetEventCallback(f func(Event)) {
-	s.onEvent = f
-}
-
 func (s *Scorch) fireEvent(kind EventKind, dur time.Duration) {
 	if s.onEvent != nil {
 		s.onEvent(Event{Kind: kind, Scorch: s, Duration: dur})

From e756c7acf0caef33e3d2d2f9c1050de746ec8e23 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 5 Jan 2018 15:29:01 -0500
Subject: [PATCH 126/728] add initial support for async error callback

---
 index/scorch/event.go     |  4 ++++
 index/scorch/merge.go     |  3 +--
 index/scorch/persister.go |  6 +++---
 index/scorch/scorch.go    | 13 ++++++++++++-
 4 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/index/scorch/event.go b/index/scorch/event.go
index ef3439a8b..dd79d6d06 100644
--- a/index/scorch/event.go
+++ b/index/scorch/event.go
@@ -16,6 +16,10 @@ package scorch
 
 import "time"
 
+// RegistryAsyncErrorCallbacks should be treated as read-only after
+// process init()'ialization.
+var RegistryAsyncErrorCallbacks = map[string]func(error){}
+
 // RegistryEventCallbacks should be treated as read-only after
 // process init()'ialization.
 var RegistryEventCallbacks = map[string]func(Event){}
diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 0c166df7a..78c27ddb1 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -16,7 +16,6 @@ package scorch
 
 import (
 	"fmt"
-	"log"
 	"os"
 	"sync/atomic"
 	"time"
@@ -48,7 +47,7 @@ OUTER:
 				// lets get started
 				err := s.planMergeAtSnapshot(ourSnapshot)
 				if err != nil {
-					log.Printf("merging err: %v", err)
+					s.fireAsyncError(fmt.Errorf("merging err: %v", err))
 					_ = ourSnapshot.DecRef()
 					continue OUTER
 				}
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 401086543..658e57aee 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -74,7 +74,7 @@ OUTER:
 				close(ch)
 			}
 			if err != nil {
-				log.Printf("got err persisting snapshot: %v", err)
+				s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err))
 				_ = ourSnapshot.DecRef()
 				continue OUTER
 			}
@@ -446,13 +446,13 @@ func (p uint64Descending) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
 func (s *Scorch) removeOldData() {
 	removed, err := s.removeOldBoltSnapshots()
 	if err != nil {
-		log.Printf("got err removing old bolt snapshots: %v", err)
+		s.fireAsyncError(fmt.Errorf("got err removing old bolt snapshots: %v", err))
 	}
 
 	if removed > 0 {
 		err = s.removeOldZapFiles()
 		if err != nil {
-			log.Printf("got err removing old zap files: %v", err)
+			s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err))
 		}
 	}
 }
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index df4f5de5f..5d0cfb7c9 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -72,7 +72,8 @@ type Scorch struct {
 	rootBolt           *bolt.DB
 	asyncTasks         sync.WaitGroup
 
-	onEvent func(event Event)
+	onEvent      func(event Event)
+	onAsyncError func(err error)
 }
 
 func NewScorch(storeName string,
@@ -100,6 +101,10 @@ func NewScorch(storeName string,
 	if ok {
 		rv.onEvent = RegistryEventCallbacks[ecbName]
 	}
+	aecbName, ok := config["asyncErrorCallbackName"].(string)
+	if ok {
+		rv.onAsyncError = RegistryAsyncErrorCallbacks[aecbName]
+	}
 	return rv, nil
 }
 
@@ -109,6 +114,12 @@ func (s *Scorch) fireEvent(kind EventKind, dur time.Duration) {
 	}
 }
 
+func (s *Scorch) fireAsyncError(err error) {
+	if s.onAsyncError != nil {
+		s.onAsyncError(err)
+	}
+}
+
 func (s *Scorch) Open() error {
 	var ok bool
 	s.path, ok = s.config["path"].(string)

From 94b0367e47fe54650ffbe56b51c073baa4b0e298 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 5 Jan 2018 16:53:16 -0500
Subject: [PATCH 127/728] switch back to upsidedown as default index before
 merge to master

---
 config.go | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/config.go b/config.go
index c1475db74..482efb408 100644
--- a/config.go
+++ b/config.go
@@ -21,10 +21,13 @@ import (
 	"time"
 
 	"github.com/blevesearch/bleve/index"
-	"github.com/blevesearch/bleve/index/scorch"
 	"github.com/blevesearch/bleve/index/store/gtreap"
+	"github.com/blevesearch/bleve/index/upsidedown"
 	"github.com/blevesearch/bleve/registry"
 	"github.com/blevesearch/bleve/search/highlight/highlighter/html"
+
+	// force import of scorch so its accessible by default
+	_ "github.com/blevesearch/bleve/index/scorch"
 )
 
 var bleveExpVar = expvar.NewMap("bleve")
@@ -69,7 +72,7 @@ func init() {
 	Config.DefaultMemKVStore = gtreap.Name
 
 	// default index
-	Config.DefaultIndexType = scorch.Name
+	Config.DefaultIndexType = upsidedown.Name
 
 	bootDuration := time.Since(bootStart)
 	bleveExpVar.Add("bootDuration", int64(bootDuration))

From 637fad78a55a5d207d6a74d6e585330d15706451 Mon Sep 17 00:00:00 2001
From: Tom Kralidis <tomkralidis@gmail.com>
Date: Sat, 6 Jan 2018 21:04:03 -0500
Subject: [PATCH 128/728] fix minor typo

---
 cmd/bleve/cmd/index.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmd/bleve/cmd/index.go b/cmd/bleve/cmd/index.go
index 63ad5f3bf..c8925b49b 100644
--- a/cmd/bleve/cmd/index.go
+++ b/cmd/bleve/cmd/index.go
@@ -111,7 +111,7 @@ func getAllFiles(args []string, rv chan file) {
 func init() {
 	RootCmd.AddCommand(indexCmd)
 
-	indexCmd.Flags().BoolVarP(&keepDir, "keepDir", "d", false, "Keep the directory in the dodcument id, defaults false.")
+	indexCmd.Flags().BoolVarP(&keepDir, "keepDir", "d", false, "Keep the directory in the document id, defaults false.")
 	indexCmd.Flags().BoolVarP(&keepExt, "keepExt", "x", false, "Keep the extension in the document id, defaults false.")
 	indexCmd.Flags().BoolVarP(&parseJSON, "json", "j", true, "Parse the contents as JSON, defaults true.")
 }

From 1788a0380385d48665a82d5c215db48251697a45 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Sat, 6 Jan 2018 21:09:53 -0500
Subject: [PATCH 129/728] remove junk from end of scorch readme

---
 index/scorch/README.md | 143 -----------------------------------------
 1 file changed, 143 deletions(-)

diff --git a/index/scorch/README.md b/index/scorch/README.md
index 690e7d3d5..861335a1b 100644
--- a/index/scorch/README.md
+++ b/index/scorch/README.md
@@ -365,146 +365,3 @@ A few simple principles have been identified.
 - Segments with all items deleted/obsoleted can be dropped.
 
 Merging of a segment should be able to proceed even if that segment is held by an ongoing snapshot, it should only delay the removal of it.
-
-
-## TODO
-
-- need reference counting on the segments, to know when we can safely remove?
-
-- how well will bitmaps perform when large and possibly mmap'd?
-
-
------
-thinking out loud on storage
-
-- fields
- - field name - field id
-- term dictionary
- - field id - FST (values postings ids)
-- postings
- - posting id - postings list
-- freqs
- - posting id - freqs list
-- norms
- - posting id - norms list
-- stored
- - docNum
-  - field id - field values
-
-
-
-----
-
-race dialog with steve:
-
-state: 2, 4, 8
-
-- introducing new segment X
-  - deleted bitmasks, 2, 4, 8
-
-- merger, merge 4 and 8
-  new segment Y
-
-
-- merger wins
-
-   state: 2, 9
-
-   introducer: need to recompute bitmask for 9, could lose again and keep losing race
-
-- introducer wins
-
-  state: 2, 4, 8, X
-         2-X, 4-X, 8-X, nil
-
-  merger finishes: new segment Y, is not valid, need to be recomputed
-
-
-### Bolt Segment Proposal
-
-Bucket
-
-"f"       field storage
-
-          Key                 Val
-          field name          field id (var uint16)
-
-          // TODO field location bits
-
-"d"       term dictionary storage
-          Key                      Val
-          field id (var uint16)    Vellum FST (mapping term to posting id uint64)
-
-
-"p"       postings list storage
-          Key                      Val
-          posting id (var uint64)  Roaring Bitmap Serialization (doc numbers) - see FromBuffer
-
-
-"x"       chunked data storage
-          Key                      Val
-          chunk id (var uint64)    sub-bucket
-
-                                   Key                       Val
-                                   posting id (var uint64)   sub-bucket
-
-
-                                                                              ALL Compressed Integer Encoding []uint64
-                                                             Key      Val
-                                                             "f"      freqs   1 value per hit
-                                                             "n"      norms   1 value per hit
-                                                             "i"      fields  <freq> values per hit
-                                                             "s"      start   <freq> values per hit
-                                                             "e"      end     <freq> values per hit
-                                                             "p"      pos     <freq> values per hit
-                                                             "a"      array pos
-                                                                              <freq> entries
-                                                                                each entry is count
-                                                                                followed by <count> uint64
-
-"s"      stored field data
-         Key                      Val
-         doc num (var uint64)     sub-bucket
-
-                                  Key            Val
-                                  "m"         mossy-like meta packed
-
-                                              16 bits - field id
-                                              8 bits - field type
-                                              2? bits - array pos length
-
-                                              X bits - offset
-                                              X bits - length
-
-                                  "d"         raw []byte data (possibly compressed, need segment level config?)
-
-                                  "a"         array position info, packed slice uint64
-
-
-
-
-
-Notes:
-
-It is assumed that each IndexReader (snapshot) starts a new Bolt TX (read-only) immediately, and holds it up until it is no longer needed.  This allows us to use (unsafely) the raw bytes coming out of BoltDB as return values.  Bolt guarantees they will be safe for the duration of the transaction (which we arrange to be the life of the index snapshot).
-
-Only physically store the fields in one direction, even though at runtime we need both.  Upon opening the index, we can read in all the k/v pairs in the "f" bucket.  We use the unsafe package to create a []string inverted mapping pointing to the underlying []byte in the BoltDB values.
-
-The term dictionary is stored opaquely as Vellum FST for each field.  When accessing these keys, the []byte return to us is mmap'd by bolt under the hood.  We then pass this to vellum using its []byte API, which then operates on it without ever forcing whole thing into memory unless needed.
-
-We do not need to persist the dictkeys slice since it is only there to support the dictionary iterator prefix/range searches, which are supported directly by the FST.
-
-Theory of operation of chunked storage is as follows.  The postings list iterators only allow starting at the beginning, and have no "advance" capability.  In the memory version, this means we always know the Nth hit in the postings list is the Nth entry in some other densely packed slice.  However, while OK when everything is in RAM, this is not as suitable for a structure on disk, where wading through detailed info of records you don't care about is too expensive.  Instead, we assume some fixed chunking, say 1024.  All detailed info for document number N can be found inside of chunk N/1024.  Now, the Advance operation still has to Next it's way through the posting list.  But, now when it reaches a hit, it knows the chunk index as well as the hit index inside that chunk.  Further, we push the chunk offsets to the top of the bolt structure, under the theory that we're likely to access data inside a chunk at the same time.  For example, you're likely to access the frequency and norm values for a document hit together, so by organizing by chunk first, we increase the likelihood that this info is nearby on disk.
-
-The "f" and "n" sub-buckets inside a posting have 1 entry for each hit.  (you must next-next-next within the chunk)
-
-The "i", "s", "e", "p", sub-buckets have <freq> entries for each hit.  (you must have read and know the freq)
-
-The "a" sub-bucket has <freq> groupings, where each grouping starts with a count, followed by <count> entries.
-
-For example, lets say hit docNum 27 has freq of 2.  The first location for the hit has array positions (0, 1) length 2, and the second location for the hit has array positions (1, 3, 2) length 3.  The entries in the slice for this hit look like:
-
-2 0 1 3 1 3 2
-^     ^
-|     next entry, number of ints to follow for it
-number of ints to follow for this entry

From 4c256f56692f10e903bc1582ff7c7db5dbe354d1 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 8 Jan 2018 10:58:33 +0530
Subject: [PATCH 130/728] DocValue Config, new API Changes

-VisitableDocValueFields API for persisted DV field list
-making dv configs overridable at field level
-enabling on the fly/runtime un inverting of doc values
-few UT updates
---
 document/field_boolean.go                |  2 +-
 document/field_datetime.go               |  2 +-
 document/field_numeric.go                |  2 +-
 document/field_text.go                   |  2 +-
 document/indexing_options.go             | 11 +++
 document/indexing_options_test.go        | 24 +++++++
 index/scorch/scorch.go                   |  8 ---
 index/scorch/scorch_test.go              | 69 +++++++++++++++++++
 index/scorch/segment/mem/build.go        |  8 +--
 index/scorch/segment/segment.go          |  9 ++-
 index/scorch/segment/zap/build_test.go   | 87 ++++++++++++++++++++++++
 index/scorch/segment/zap/docvalues.go    | 22 +++++-
 index/scorch/segment/zap/segment_test.go | 83 ++++++++++++++++++++++
 index/scorch/snapshot_index.go           | 71 +++++++++++++++++--
 index/scorch/snapshot_segment.go         | 45 ------------
 mapping/field.go                         | 22 ++++++
 mapping/index.go                         |  8 +++
 mapping/mapping_test.go                  |  3 +-
 18 files changed, 409 insertions(+), 69 deletions(-)

diff --git a/document/field_boolean.go b/document/field_boolean.go
index 668b431a1..c226374c0 100644
--- a/document/field_boolean.go
+++ b/document/field_boolean.go
@@ -20,7 +20,7 @@ import (
 	"github.com/blevesearch/bleve/analysis"
 )
 
-const DefaultBooleanIndexingOptions = StoreField | IndexField
+const DefaultBooleanIndexingOptions = StoreField | IndexField | DocValues
 
 type BooleanField struct {
 	name              string
diff --git a/document/field_datetime.go b/document/field_datetime.go
index 6783d53d0..1db068c87 100644
--- a/document/field_datetime.go
+++ b/document/field_datetime.go
@@ -23,7 +23,7 @@ import (
 	"github.com/blevesearch/bleve/numeric"
 )
 
-const DefaultDateTimeIndexingOptions = StoreField | IndexField
+const DefaultDateTimeIndexingOptions = StoreField | IndexField | DocValues
 const DefaultDateTimePrecisionStep uint = 4
 
 var MinTimeRepresentable = time.Unix(0, math.MinInt64)
diff --git a/document/field_numeric.go b/document/field_numeric.go
index 7faae2bbb..e32993c88 100644
--- a/document/field_numeric.go
+++ b/document/field_numeric.go
@@ -21,7 +21,7 @@ import (
 	"github.com/blevesearch/bleve/numeric"
 )
 
-const DefaultNumericIndexingOptions = StoreField | IndexField
+const DefaultNumericIndexingOptions = StoreField | IndexField | DocValues
 
 const DefaultPrecisionStep uint = 4
 
diff --git a/document/field_text.go b/document/field_text.go
index 37873d36e..5f7a3ab64 100644
--- a/document/field_text.go
+++ b/document/field_text.go
@@ -20,7 +20,7 @@ import (
 	"github.com/blevesearch/bleve/analysis"
 )
 
-const DefaultTextIndexingOptions = IndexField
+const DefaultTextIndexingOptions = IndexField | DocValues
 
 type TextField struct {
 	name              string
diff --git a/document/indexing_options.go b/document/indexing_options.go
index 5d562c1de..44498a8e9 100644
--- a/document/indexing_options.go
+++ b/document/indexing_options.go
@@ -20,6 +20,7 @@ const (
 	IndexField IndexingOptions = 1 << iota
 	StoreField
 	IncludeTermVectors
+	DocValues
 )
 
 func (o IndexingOptions) IsIndexed() bool {
@@ -34,6 +35,10 @@ func (o IndexingOptions) IncludeTermVectors() bool {
 	return o&IncludeTermVectors != 0
 }
 
+func (o IndexingOptions) IncludeDocValues() bool {
+	return o&DocValues != 0
+}
+
 func (o IndexingOptions) String() string {
 	rv := ""
 	if o.IsIndexed() {
@@ -51,5 +56,11 @@ func (o IndexingOptions) String() string {
 		}
 		rv += "TV"
 	}
+	if o.IncludeDocValues() {
+		if rv != "" {
+			rv += ", "
+		}
+		rv += "DV"
+	}
 	return rv
 }
diff --git a/document/indexing_options_test.go b/document/indexing_options_test.go
index f6c6c996f..d88c41070 100644
--- a/document/indexing_options_test.go
+++ b/document/indexing_options_test.go
@@ -24,36 +24,56 @@ func TestIndexingOptions(t *testing.T) {
 		isIndexed          bool
 		isStored           bool
 		includeTermVectors bool
+		docValues          bool
 	}{
 		{
 			options:            IndexField | StoreField | IncludeTermVectors,
 			isIndexed:          true,
 			isStored:           true,
 			includeTermVectors: true,
+			docValues:          false,
 		},
 		{
 			options:            IndexField | IncludeTermVectors,
 			isIndexed:          true,
 			isStored:           false,
 			includeTermVectors: true,
+			docValues:          false,
 		},
 		{
 			options:            StoreField | IncludeTermVectors,
 			isIndexed:          false,
 			isStored:           true,
 			includeTermVectors: true,
+			docValues:          false,
 		},
 		{
 			options:            IndexField,
 			isIndexed:          true,
 			isStored:           false,
 			includeTermVectors: false,
+			docValues:          false,
 		},
 		{
 			options:            StoreField,
 			isIndexed:          false,
 			isStored:           true,
 			includeTermVectors: false,
+			docValues:          false,
+		},
+		{
+			options:            DocValues,
+			isIndexed:          false,
+			isStored:           false,
+			includeTermVectors: false,
+			docValues:          true,
+		},
+		{
+			options:            IndexField | StoreField | IncludeTermVectors | DocValues,
+			isIndexed:          true,
+			isStored:           true,
+			includeTermVectors: true,
+			docValues:          true,
 		},
 	}
 
@@ -70,5 +90,9 @@ func TestIndexingOptions(t *testing.T) {
 		if actuallyIncludeTermVectors != test.includeTermVectors {
 			t.Errorf("expected includeTermVectors to be %v, got %v for %d", test.includeTermVectors, actuallyIncludeTermVectors, test.options)
 		}
+		actuallyDocValues := test.options.IncludeDocValues()
+		if actuallyDocValues != test.docValues {
+			t.Errorf("expected docValue to be %v, got %v for %d", test.docValues, actuallyDocValues, test.options)
+		}
 	}
 }
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 5d0cfb7c9..57a01d7cb 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -37,14 +37,6 @@ const Name = "scorch"
 
 const Version uint8 = 1
 
-// UnInvertIndex is implemented by various scorch index implementations
-// to provide the un inverting of the postings or other indexed values.
-type UnInvertIndex interface {
-	// apparently need better namings here..
-	VisitDocumentFieldTerms(localDocNum uint64, fields []string,
-		visitor index.DocumentFieldTermVisitor) error
-}
-
 type Scorch struct {
 	readOnly      bool
 	version       uint8
diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index a9df3e9e2..6e8ecb0cf 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -1638,3 +1638,72 @@ func TestIndexDocumentVisitFieldTermsWithMultipleDocs(t *testing.T) {
 	}
 
 }
+
+func TestIndexDocumentVisitFieldTermsWithMultipleFieldOptions(t *testing.T) {
+	defer func() {
+		err := DestroyTest()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Fatalf("error opening index: %v", err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	// mix of field options, this exercises the run time/ on the fly un inverting of
+	// doc values for custom options enabled field like designation, dept.
+	options := document.IndexField | document.StoreField | document.IncludeTermVectors
+	doc := document.NewDocument("1")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))    // default doc value persisted
+	doc.AddField(document.NewTextField("title", []uint64{}, []byte("mister"))) // default doc value persisted
+	doc.AddField(document.NewTextFieldWithIndexingOptions("designation", []uint64{}, []byte("engineer"), options))
+	doc.AddField(document.NewTextFieldWithIndexingOptions("dept", []uint64{}, []byte("bleve"), options))
+
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+
+	indexReader, err := idx.Reader()
+	if err != nil {
+		t.Error(err)
+	}
+
+	fieldTerms := make(index.FieldTerms)
+	docNumber, err := indexReader.InternalID("1")
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = indexReader.DocumentVisitFieldTerms(docNumber, []string{"name", "designation", "dept"}, func(field string, term []byte) {
+		fieldTerms[field] = append(fieldTerms[field], string(term))
+	})
+	if err != nil {
+		t.Error(err)
+	}
+	expectedFieldTerms := index.FieldTerms{
+		"name":        []string{"test"},
+		"designation": []string{"engineer"},
+		"dept":        []string{"bleve"},
+	}
+	if !reflect.DeepEqual(fieldTerms, expectedFieldTerms) {
+		t.Errorf("expected field terms: %#v, got: %#v", expectedFieldTerms, fieldTerms)
+	}
+	err = indexReader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+}
diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go
index 72ea08eb8..cd11fb401 100644
--- a/index/scorch/segment/mem/build.go
+++ b/index/scorch/segment/mem/build.go
@@ -119,10 +119,10 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 		if field.Options().IsStored() {
 			storeField(docNum, fieldID, encodeFieldType(field), field.Value(), field.ArrayPositions())
 		}
-		// TODO with mapping changes for dv
-		//if field.Options().IncludeDocValues() {
-		s.DocValueFields[fieldID] = true
-		//}
+
+		if field.Options().IncludeDocValues() {
+			s.DocValueFields[fieldID] = true
+		}
 	}
 
 	// now that its been rolled up into docMap, walk that
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index 9102e0f4e..858ac3590 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -91,9 +91,14 @@ type Location interface {
 }
 
 // DocumentFieldTermVisitable is implemented by various scorch segment
-// implementations to provide the un inverting of the postings
-// or other indexed values.
+// implementations with persistence for the un inverting of the
+// postings or other indexed values.
 type DocumentFieldTermVisitable interface {
 	VisitDocumentFieldTerms(localDocNum uint64, fields []string,
 		visitor index.DocumentFieldTermVisitor) error
+
+	// VisitableDocValueFields implementation should return
+	// the list of fields which are document value persisted and
+	// therefore visitable by the above VisitDocumentFieldTerms method.
+	VisitableDocValueFields() ([]string, error)
 }
diff --git a/index/scorch/segment/zap/build_test.go b/index/scorch/segment/zap/build_test.go
index da0e12ba6..9063980b7 100644
--- a/index/scorch/segment/zap/build_test.go
+++ b/index/scorch/segment/zap/build_test.go
@@ -286,3 +286,90 @@ func buildMemSegmentMulti() *mem.Segment {
 
 	return segment
 }
+
+func buildMemSegmentWithDefaultFieldMapping() (*mem.Segment, []string) {
+
+	doc := &document.Document{
+		ID: "a",
+		Fields: []document.Field{
+			document.NewTextField("_id", nil, []byte("a")),
+			document.NewTextField("name", nil, []byte("wow")),
+			document.NewTextField("desc", nil, []byte("some thing")),
+			document.NewTextField("tag", []uint64{0}, []byte("cold")),
+		},
+		CompositeFields: []*document.CompositeField{
+			document.NewCompositeField("_all", true, nil, []string{"_id"}),
+		},
+	}
+
+	var fields []string
+	fields = append(fields, "_id")
+	fields = append(fields, "name")
+	fields = append(fields, "desc")
+	fields = append(fields, "tag")
+
+	// forge analyzed docs
+	results := []*index.AnalysisResult{
+		&index.AnalysisResult{
+			Document: doc,
+			Analyzed: []analysis.TokenFrequencies{
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      1,
+						Position: 1,
+						Term:     []byte("a"),
+					},
+				}, nil, false),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      3,
+						Position: 1,
+						Term:     []byte("wow"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("some"),
+					},
+					&analysis.Token{
+						Start:    5,
+						End:      10,
+						Position: 2,
+						Term:     []byte("thing"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("cold"),
+					},
+				}, []uint64{0}, true),
+			},
+			Length: []int{
+				1,
+				1,
+				2,
+				1,
+				1,
+			},
+		},
+	}
+
+	// fix up composite fields
+	for _, ar := range results {
+		for i, f := range ar.Document.Fields {
+			for _, cf := range ar.Document.CompositeFields {
+				cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
+			}
+		}
+	}
+
+	return mem.NewFromAnalyzedDocs(results), fields
+}
diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index 3a75f29d2..3774afefb 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -151,7 +151,8 @@ func (di *docValueIterator) getDocValueLocs(docID uint64) (uint64, uint64) {
 	return math.MaxUint64, math.MaxUint64
 }
 
-// VisitDocumentFieldTerms is an implementation of the UnInvertIndex interface
+// VisitDocumentFieldTerms is an implementation of the
+// DocumentFieldTermVisitable interface
 func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
 	visitor index.DocumentFieldTermVisitor) error {
 	fieldID := uint16(0)
@@ -178,3 +179,22 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
 	}
 	return nil
 }
+
+// VisitableDocValueFields returns the list of fields with
+// persisted doc value terms ready to be visitable using the
+// VisitDocumentFieldTerms method.
+func (s *Segment) VisitableDocValueFields() ([]string, error) {
+	if len(s.fieldsInv) == 0 {
+		return nil, nil
+	}
+
+	var rv []string
+	for fieldID, field := range s.fieldsInv {
+		if dvIter, ok := s.fieldDvIterMap[uint16(fieldID)]; ok &&
+			dvIter != nil {
+			rv = append(rv, field)
+		}
+	}
+
+	return rv, nil
+}
diff --git a/index/scorch/segment/zap/segment_test.go b/index/scorch/segment/zap/segment_test.go
index d4241c1d9..fb49f72e5 100644
--- a/index/scorch/segment/zap/segment_test.go
+++ b/index/scorch/segment/zap/segment_test.go
@@ -19,6 +19,9 @@ import (
 	"os"
 	"reflect"
 	"testing"
+
+	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/scorch/segment"
 )
 
 func TestOpen(t *testing.T) {
@@ -515,3 +518,83 @@ func TestOpenMultiWithTwoChunks(t *testing.T) {
 		t.Errorf("expected count to be 1, got %d", count)
 	}
 }
+
+func TestSegmentVisitableDocValueFieldsList(t *testing.T) {
+	_ = os.RemoveAll("/tmp/scorch.zap")
+
+	memSegment := buildMemSegmentMulti()
+	err := PersistSegment(memSegment, "/tmp/scorch.zap", 1)
+	if err != nil {
+		t.Fatalf("error persisting segment: %v", err)
+	}
+
+	seg, err := Open("/tmp/scorch.zap")
+	if err != nil {
+		t.Fatalf("error opening segment: %v", err)
+	}
+
+	cerr := seg.Close()
+	if cerr != nil {
+		t.Fatalf("error closing segment: %v", err)
+	}
+
+	if zaps, ok := seg.(segment.DocumentFieldTermVisitable); ok {
+		fields, err := zaps.VisitableDocValueFields()
+		if err != nil {
+			t.Fatalf("segment VisitableDocValueFields err: %v", err)
+		}
+		// no persisted doc value fields
+		if len(fields) != 0 {
+			t.Errorf("expected no persisted fields for doc values, got: %#v", fields)
+		}
+	}
+
+	_ = os.RemoveAll("/tmp/scorch.zap")
+
+	memSegment, expectedFields := buildMemSegmentWithDefaultFieldMapping()
+	err = PersistSegment(memSegment, "/tmp/scorch.zap", 1)
+	if err != nil {
+		t.Fatalf("error persisting segment: %v", err)
+	}
+
+	seg, err = Open("/tmp/scorch.zap")
+	if err != nil {
+		t.Fatalf("error opening segment: %v", err)
+	}
+	defer func() {
+		cerr := seg.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", err)
+		}
+	}()
+
+	if zaps, ok := seg.(segment.DocumentFieldTermVisitable); ok {
+		fields, err := zaps.VisitableDocValueFields()
+		if err != nil {
+			t.Fatalf("segment VisitableDocValueFields err: %v", err)
+		}
+
+		if !reflect.DeepEqual(fields, expectedFields) {
+			t.Errorf("expected field terms: %#v, got: %#v", expectedFields, fields)
+		}
+
+		fieldTerms := make(index.FieldTerms)
+		err = zaps.VisitDocumentFieldTerms(0, fields, func(field string, term []byte) {
+			fieldTerms[field] = append(fieldTerms[field], string(term))
+		})
+		if err != nil {
+			t.Error(err)
+		}
+
+		expectedFieldTerms := index.FieldTerms{
+			"name": []string{"wow"},
+			"desc": []string{"some", "thing"},
+			"tag":  []string{"cold"},
+			"_id":  []string{"a"},
+		}
+		if !reflect.DeepEqual(fieldTerms, expectedFieldTerms) {
+			t.Errorf("expected field terms: %#v, got: %#v", expectedFieldTerms, fieldTerms)
+		}
+
+	}
+}
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index a9b105a0d..bb9975768 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -412,15 +412,64 @@ func (i *IndexSnapshot) DocumentVisitFieldTerms(id index.IndexInternalID,
 	ss := i.segment[segmentIndex]
 
 	if zaps, ok := ss.segment.(segment.DocumentFieldTermVisitable); ok {
-		return zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor)
+		// get the list of doc value persisted fields
+		pFields, err := zaps.VisitableDocValueFields()
+		if err != nil {
+			return err
+		}
+		// assort the fields for which terms look up have to
+		// be performed runtime
+		dvPendingFields := extractDvPendingFields(fields, pFields)
+		if len(dvPendingFields) == 0 {
+			// all fields are doc value persisted
+			return zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor)
+		}
+
+		// concurrently trigger the runtime doc value preparations for
+		// pending fields as well as the visit of the persisted doc values
+		errCh := make(chan error, 1)
+
+		go func() {
+			defer close(errCh)
+			err := ss.cachedDocs.prepareFields(fields, ss)
+			if err != nil {
+				errCh <- err
+			}
+		}()
+
+		// visit the persisted dv while the cache preparation is in progress
+		err = zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor)
+		if err != nil {
+			return err
+		}
+
+		// err out if fieldCache preparation failed
+		err = <-errCh
+		if err != nil {
+			return err
+		}
+
+		visitDocumentFieldCacheTerms(localDocNum, dvPendingFields, ss, visitor)
+		return nil
 	}
 
-	// else fallback to the in memory fieldCache
-	err = ss.cachedDocs.prepareFields(fields, ss)
+	return prepareCacheVisitDocumentFieldTerms(localDocNum, fields, ss, visitor)
+}
+
+func prepareCacheVisitDocumentFieldTerms(localDocNum uint64, fields []string,
+	ss *SegmentSnapshot, visitor index.DocumentFieldTermVisitor) error {
+	err := ss.cachedDocs.prepareFields(fields, ss)
 	if err != nil {
 		return err
 	}
 
+	visitDocumentFieldCacheTerms(localDocNum, fields, ss, visitor)
+	return nil
+}
+
+func visitDocumentFieldCacheTerms(localDocNum uint64, fields []string,
+	ss *SegmentSnapshot, visitor index.DocumentFieldTermVisitor) {
+
 	for _, field := range fields {
 		if cachedFieldDocs, exists := ss.cachedDocs.cache[field]; exists {
 			if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists {
@@ -436,5 +485,19 @@ func (i *IndexSnapshot) DocumentVisitFieldTerms(id index.IndexInternalID,
 		}
 	}
 
-	return nil
+}
+
+func extractDvPendingFields(requestedFields, persistedFields []string) []string {
+	removeMap := map[string]struct{}{}
+	for _, str := range persistedFields {
+		removeMap[str] = struct{}{}
+	}
+
+	rv := make([]string, 0, len(requestedFields))
+	for _, s := range requestedFields {
+		if _, ok := removeMap[s]; !ok {
+			rv = append(rv, s)
+		}
+	}
+	return rv
 }
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index 0c111c505..5e64cb1f2 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -18,7 +18,6 @@ import (
 	"sync"
 
 	"github.com/RoaringBitmap/roaring"
-	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
 )
 
@@ -84,50 +83,6 @@ func (s *SegmentSnapshot) VisitDocument(num uint64, visitor segment.DocumentFiel
 	return s.segment.VisitDocument(num, visitor)
 }
 
-func (s *SegmentSnapshot) DocumentVisitFieldTerms(num uint64, fields []string,
-	visitor index.DocumentFieldTermVisitor) error {
-	collection := make(map[string][][]byte)
-	// collect field indexed values
-	for _, field := range fields {
-		dict, err := s.Dictionary(field)
-		if err != nil {
-			return err
-		}
-		dictItr := dict.Iterator()
-		var next *index.DictEntry
-		next, err = dictItr.Next()
-		for next != nil && err == nil {
-			postings, err2 := dict.PostingsList(next.Term, nil)
-			if err2 != nil {
-				return err2
-			}
-			postingsItr := postings.Iterator()
-			nextPosting, err2 := postingsItr.Next()
-			for err2 == nil && nextPosting != nil && nextPosting.Number() <= num {
-				if nextPosting.Number() == num {
-					// got what we're looking for
-					collection[field] = append(collection[field], []byte(next.Term))
-				}
-				nextPosting, err = postingsItr.Next()
-			}
-			if err2 != nil {
-				return err
-			}
-			next, err = dictItr.Next()
-		}
-		if err != nil {
-			return err
-		}
-	}
-	// invoke callback
-	for field, values := range collection {
-		for _, value := range values {
-			visitor(field, value)
-		}
-	}
-	return nil
-}
-
 func (s *SegmentSnapshot) Count() uint64 {
 
 	rv := s.segment.Count()
diff --git a/mapping/field.go b/mapping/field.go
index 9f1928ca5..898ee9d79 100644
--- a/mapping/field.go
+++ b/mapping/field.go
@@ -28,6 +28,7 @@ import (
 var (
 	IndexDynamic = true
 	StoreDynamic = true
+	DocValues    = true // TODO revisit default?
 )
 
 // A FieldMapping describes how a specific item
@@ -54,6 +55,10 @@ type FieldMapping struct {
 	IncludeTermVectors bool   `json:"include_term_vectors,omitempty"`
 	IncludeInAll       bool   `json:"include_in_all,omitempty"`
 	DateFormat         string `json:"date_format,omitempty"`
+
+	// DocValues, if true makes the index uninverting possible for this field
+	// It is useful for faceting and sorting queries.
+	DocValues bool `json:"docvalues,omitempty"`
 }
 
 // NewTextFieldMapping returns a default field mapping for text
@@ -64,6 +69,7 @@ func NewTextFieldMapping() *FieldMapping {
 		Index:              true,
 		IncludeTermVectors: true,
 		IncludeInAll:       true,
+		DocValues:          true,
 	}
 }
 
@@ -71,6 +77,7 @@ func newTextFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
 	rv := NewTextFieldMapping()
 	rv.Store = im.StoreDynamic
 	rv.Index = im.IndexDynamic
+	rv.DocValues = im.DocValues
 	return rv
 }
 
@@ -81,6 +88,7 @@ func NewNumericFieldMapping() *FieldMapping {
 		Store:        true,
 		Index:        true,
 		IncludeInAll: true,
+		DocValues:    true,
 	}
 }
 
@@ -88,6 +96,7 @@ func newNumericFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
 	rv := NewNumericFieldMapping()
 	rv.Store = im.StoreDynamic
 	rv.Index = im.IndexDynamic
+	rv.DocValues = im.DocValues
 	return rv
 }
 
@@ -98,6 +107,7 @@ func NewDateTimeFieldMapping() *FieldMapping {
 		Store:        true,
 		Index:        true,
 		IncludeInAll: true,
+		DocValues:    true,
 	}
 }
 
@@ -105,6 +115,7 @@ func newDateTimeFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
 	rv := NewDateTimeFieldMapping()
 	rv.Store = im.StoreDynamic
 	rv.Index = im.IndexDynamic
+	rv.DocValues = im.DocValues
 	return rv
 }
 
@@ -115,6 +126,7 @@ func NewBooleanFieldMapping() *FieldMapping {
 		Store:        true,
 		Index:        true,
 		IncludeInAll: true,
+		DocValues:    true,
 	}
 }
 
@@ -122,6 +134,7 @@ func newBooleanFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
 	rv := NewBooleanFieldMapping()
 	rv.Store = im.StoreDynamic
 	rv.Index = im.IndexDynamic
+	rv.DocValues = im.DocValues
 	return rv
 }
 
@@ -132,6 +145,7 @@ func NewGeoPointFieldMapping() *FieldMapping {
 		Store:        true,
 		Index:        true,
 		IncludeInAll: true,
+		DocValues:    true,
 	}
 }
 
@@ -147,6 +161,9 @@ func (fm *FieldMapping) Options() document.IndexingOptions {
 	if fm.IncludeTermVectors {
 		rv |= document.IncludeTermVectors
 	}
+	if fm.DocValues {
+		rv |= document.DocValues
+	}
 	return rv
 }
 
@@ -308,6 +325,11 @@ func (fm *FieldMapping) UnmarshalJSON(data []byte) error {
 			if err != nil {
 				return err
 			}
+		case "docvalues":
+			err := json.Unmarshal(v, &fm.DocValues)
+			if err != nil {
+				return err
+			}
 		default:
 			invalidKeys = append(invalidKeys, k)
 		}
diff --git a/mapping/index.go b/mapping/index.go
index cefa59803..99ed6353e 100644
--- a/mapping/index.go
+++ b/mapping/index.go
@@ -50,6 +50,7 @@ type IndexMappingImpl struct {
 	DefaultField          string                      `json:"default_field"`
 	StoreDynamic          bool                        `json:"store_dynamic"`
 	IndexDynamic          bool                        `json:"index_dynamic"`
+	DocValues             bool                        `json:"docvalues,omitempty"`
 	CustomAnalysis        *customAnalysis             `json:"analysis,omitempty"`
 	cache                 *registry.Cache
 }
@@ -154,6 +155,7 @@ func NewIndexMapping() *IndexMappingImpl {
 		DefaultField:          defaultField,
 		IndexDynamic:          IndexDynamic,
 		StoreDynamic:          StoreDynamic,
+		DocValues:             DocValues,
 		CustomAnalysis:        newCustomAnalysis(),
 		cache:                 registry.NewCache(),
 	}
@@ -217,6 +219,7 @@ func (im *IndexMappingImpl) UnmarshalJSON(data []byte) error {
 	im.TypeMapping = make(map[string]*DocumentMapping)
 	im.StoreDynamic = StoreDynamic
 	im.IndexDynamic = IndexDynamic
+	im.DocValues = DocValues
 
 	var invalidKeys []string
 	for k, v := range tmp {
@@ -271,6 +274,11 @@ func (im *IndexMappingImpl) UnmarshalJSON(data []byte) error {
 			if err != nil {
 				return err
 			}
+		case "docvalues":
+			err := json.Unmarshal(v, &im.DocValues)
+			if err != nil {
+				return err
+			}
 		default:
 			invalidKeys = append(invalidKeys, k)
 		}
diff --git a/mapping/mapping_test.go b/mapping/mapping_test.go
index 735aef057..5d7527e0a 100644
--- a/mapping/mapping_test.go
+++ b/mapping/mapping_test.go
@@ -40,7 +40,8 @@ var mappingSource = []byte(`{
     						"store": true,
     						"index": true,
                             "include_term_vectors": true,
-                            "include_in_all": true
+                            "include_in_all": true,
+                            "docvalues": true
     					}
     				]
     			}

From 43bfcc00c9704785d66dbd3dd261e47d4bf27dbb Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 8 Jan 2018 16:54:01 -0800
Subject: [PATCH 131/728] Do not account mmap'ed part of zap segments in
 MemoryUsed

This API is designed to only emit the dirty "unpersisted"
bytes only. This does not included the mmap'ed part in the
zap segments (disk).
---
 index/scorch/scorch.go              | 14 ++++++++------
 index/scorch/segment/zap/segment.go |  3 ++-
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 5d0cfb7c9..c2e4b48b4 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -410,13 +410,15 @@ func (s *Scorch) AddEligibleForRemoval(epoch uint64) {
 func (s *Scorch) MemoryUsed() uint64 {
 	var memUsed uint64
 	s.rootLock.RLock()
-	for _, segmentSnapshot := range s.root.segment {
-		memUsed += 8 /* size of id -> uint64 */ +
-			segmentSnapshot.segment.SizeInBytes()
-		if segmentSnapshot.deleted != nil {
-			memUsed += segmentSnapshot.deleted.GetSizeInBytes()
+	if s.root != nil {
+		for _, segmentSnapshot := range s.root.segment {
+			memUsed += 8 /* size of id -> uint64 */ +
+				segmentSnapshot.segment.SizeInBytes()
+			if segmentSnapshot.deleted != nil {
+				memUsed += segmentSnapshot.deleted.GetSizeInBytes()
+			}
+			memUsed += segmentSnapshot.cachedDocs.sizeInBytes()
 		}
-		memUsed += segmentSnapshot.cachedDocs.sizeInBytes()
 	}
 	s.rootLock.RUnlock()
 	return memUsed
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 93b7466c8..9f9910366 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -105,7 +105,8 @@ func (s *Segment) SizeInBytes() uint64 {
 	// 8 /* size of fieldsIndexOffset -> uint64 */
 	sizeOfUints := 36
 
-	sizeInBytes := len(s.mm) + len(s.path) + sizeOfUints
+	// Do not include the mmap'ed part
+	sizeInBytes := len(s.path) + sizeOfUints
 
 	for k, _ := range s.fieldsMap {
 		sizeInBytes += len(k) + 2 /* size of uint16 */

From 53aef2104e8b1d91961d151e056d7ebc8b2f8bb2 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Wed, 10 Jan 2018 22:00:26 +0530
Subject: [PATCH 132/728] fixing err handling in UTs, name changes

---
 index/scorch/segment/zap/docvalues.go    |  5 -----
 index/scorch/segment/zap/segment_test.go | 18 +++++++++---------
 mapping/field.go                         | 14 +++++++-------
 mapping/index.go                         | 10 +++++-----
 4 files changed, 21 insertions(+), 26 deletions(-)

diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index 3774afefb..cdb16ccb9 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -184,10 +184,6 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
 // persisted doc value terms ready to be visitable using the
 // VisitDocumentFieldTerms method.
 func (s *Segment) VisitableDocValueFields() ([]string, error) {
-	if len(s.fieldsInv) == 0 {
-		return nil, nil
-	}
-
 	var rv []string
 	for fieldID, field := range s.fieldsInv {
 		if dvIter, ok := s.fieldDvIterMap[uint16(fieldID)]; ok &&
@@ -195,6 +191,5 @@ func (s *Segment) VisitableDocValueFields() ([]string, error) {
 			rv = append(rv, field)
 		}
 	}
-
 	return rv, nil
 }
diff --git a/index/scorch/segment/zap/segment_test.go b/index/scorch/segment/zap/segment_test.go
index fb49f72e5..704f9e72e 100644
--- a/index/scorch/segment/zap/segment_test.go
+++ b/index/scorch/segment/zap/segment_test.go
@@ -40,7 +40,7 @@ func TestOpen(t *testing.T) {
 	defer func() {
 		cerr := segment.Close()
 		if cerr != nil {
-			t.Fatalf("error closing segment: %v", err)
+			t.Fatalf("error closing segment: %v", cerr)
 		}
 	}()
 
@@ -340,7 +340,7 @@ func TestOpenMulti(t *testing.T) {
 	defer func() {
 		cerr := segment.Close()
 		if cerr != nil {
-			t.Fatalf("error closing segment: %v", err)
+			t.Fatalf("error closing segment: %v", cerr)
 		}
 	}()
 
@@ -440,7 +440,7 @@ func TestOpenMultiWithTwoChunks(t *testing.T) {
 	defer func() {
 		cerr := segment.Close()
 		if cerr != nil {
-			t.Fatalf("error closing segment: %v", err)
+			t.Fatalf("error closing segment: %v", cerr)
 		}
 	}()
 
@@ -533,11 +533,6 @@ func TestSegmentVisitableDocValueFieldsList(t *testing.T) {
 		t.Fatalf("error opening segment: %v", err)
 	}
 
-	cerr := seg.Close()
-	if cerr != nil {
-		t.Fatalf("error closing segment: %v", err)
-	}
-
 	if zaps, ok := seg.(segment.DocumentFieldTermVisitable); ok {
 		fields, err := zaps.VisitableDocValueFields()
 		if err != nil {
@@ -549,6 +544,10 @@ func TestSegmentVisitableDocValueFieldsList(t *testing.T) {
 		}
 	}
 
+	err = seg.Close()
+	if err != nil {
+		t.Fatalf("error closing segment: %v", err)
+	}
 	_ = os.RemoveAll("/tmp/scorch.zap")
 
 	memSegment, expectedFields := buildMemSegmentWithDefaultFieldMapping()
@@ -561,10 +560,11 @@ func TestSegmentVisitableDocValueFieldsList(t *testing.T) {
 	if err != nil {
 		t.Fatalf("error opening segment: %v", err)
 	}
+
 	defer func() {
 		cerr := seg.Close()
 		if cerr != nil {
-			t.Fatalf("error closing segment: %v", err)
+			t.Fatalf("error closing segment: %v", cerr)
 		}
 	}()
 
diff --git a/mapping/field.go b/mapping/field.go
index 898ee9d79..278faa1a9 100644
--- a/mapping/field.go
+++ b/mapping/field.go
@@ -26,9 +26,9 @@ import (
 
 // control the default behavior for dynamic fields (those not explicitly mapped)
 var (
-	IndexDynamic = true
-	StoreDynamic = true
-	DocValues    = true // TODO revisit default?
+	IndexDynamic     = true
+	StoreDynamic     = true
+	DocValuesDynamic = true // TODO revisit default?
 )
 
 // A FieldMapping describes how a specific item
@@ -77,7 +77,7 @@ func newTextFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
 	rv := NewTextFieldMapping()
 	rv.Store = im.StoreDynamic
 	rv.Index = im.IndexDynamic
-	rv.DocValues = im.DocValues
+	rv.DocValues = im.DocValuesDynamic
 	return rv
 }
 
@@ -96,7 +96,7 @@ func newNumericFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
 	rv := NewNumericFieldMapping()
 	rv.Store = im.StoreDynamic
 	rv.Index = im.IndexDynamic
-	rv.DocValues = im.DocValues
+	rv.DocValues = im.DocValuesDynamic
 	return rv
 }
 
@@ -115,7 +115,7 @@ func newDateTimeFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
 	rv := NewDateTimeFieldMapping()
 	rv.Store = im.StoreDynamic
 	rv.Index = im.IndexDynamic
-	rv.DocValues = im.DocValues
+	rv.DocValues = im.DocValuesDynamic
 	return rv
 }
 
@@ -134,7 +134,7 @@ func newBooleanFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
 	rv := NewBooleanFieldMapping()
 	rv.Store = im.StoreDynamic
 	rv.Index = im.IndexDynamic
-	rv.DocValues = im.DocValues
+	rv.DocValues = im.DocValuesDynamic
 	return rv
 }
 
diff --git a/mapping/index.go b/mapping/index.go
index 99ed6353e..fc5d12a73 100644
--- a/mapping/index.go
+++ b/mapping/index.go
@@ -50,7 +50,7 @@ type IndexMappingImpl struct {
 	DefaultField          string                      `json:"default_field"`
 	StoreDynamic          bool                        `json:"store_dynamic"`
 	IndexDynamic          bool                        `json:"index_dynamic"`
-	DocValues             bool                        `json:"docvalues,omitempty"`
+	DocValuesDynamic      bool                        `json:"docvalues_dynamic,omitempty"`
 	CustomAnalysis        *customAnalysis             `json:"analysis,omitempty"`
 	cache                 *registry.Cache
 }
@@ -155,7 +155,7 @@ func NewIndexMapping() *IndexMappingImpl {
 		DefaultField:          defaultField,
 		IndexDynamic:          IndexDynamic,
 		StoreDynamic:          StoreDynamic,
-		DocValues:             DocValues,
+		DocValuesDynamic:      DocValuesDynamic,
 		CustomAnalysis:        newCustomAnalysis(),
 		cache:                 registry.NewCache(),
 	}
@@ -219,7 +219,7 @@ func (im *IndexMappingImpl) UnmarshalJSON(data []byte) error {
 	im.TypeMapping = make(map[string]*DocumentMapping)
 	im.StoreDynamic = StoreDynamic
 	im.IndexDynamic = IndexDynamic
-	im.DocValues = DocValues
+	im.DocValuesDynamic = DocValuesDynamic
 
 	var invalidKeys []string
 	for k, v := range tmp {
@@ -274,8 +274,8 @@ func (im *IndexMappingImpl) UnmarshalJSON(data []byte) error {
 			if err != nil {
 				return err
 			}
-		case "docvalues":
-			err := json.Unmarshal(v, &im.DocValues)
+		case "docvalues_dynamic":
+			err := json.Unmarshal(v, &im.DocValuesDynamic)
 			if err != nil {
 				return err
 			}

From a9532e510ab7de665b7d131abb77eac400699cf3 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Wed, 10 Jan 2018 15:15:31 -0500
Subject: [PATCH 133/728] refactor slightly to use our new hosted snowball
 stemmers

rather than having each package include it directly inside of
bleve, we have decide to host them all in one repo

https://github.com/blevesearch/snowballstem

this makes the easier for the rest of the community to use
outside of bleve contexts
---
 analysis/lang/ru/analyzer_ru.go           |   2 +-
 analysis/lang/ru/analyzer_ru_test.go      |  54 +-
 analysis/lang/ru/snowball/stem_Unicode.go | 737 ----------------------
 analysis/lang/ru/stemmer_ru.go            |  11 +-
 analysis/lang/ru/stemmer_ru_test.go       |   2 +-
 analysis/lang/ru/stop_filter_ru.go        |   2 +-
 analysis/lang/ru/stop_words_ru.go         | 469 +++++++-------
 vendor/manifest                           |   8 +
 8 files changed, 309 insertions(+), 976 deletions(-)
 delete mode 100644 analysis/lang/ru/snowball/stem_Unicode.go

diff --git a/analysis/lang/ru/analyzer_ru.go b/analysis/lang/ru/analyzer_ru.go
index 3b3404037..d1b7688c0 100644
--- a/analysis/lang/ru/analyzer_ru.go
+++ b/analysis/lang/ru/analyzer_ru.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2014 Couchbase, Inc.
+//  Copyright (c) 2018 Couchbase, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/analysis/lang/ru/analyzer_ru_test.go b/analysis/lang/ru/analyzer_ru_test.go
index a7ffef414..6cda4a586 100644
--- a/analysis/lang/ru/analyzer_ru_test.go
+++ b/analysis/lang/ru/analyzer_ru_test.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2014 Couchbase, Inc.
+//  Copyright (c) 2018 Couchbase, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -49,6 +49,58 @@ func TestRussianAnalyzer(t *testing.T) {
 			input:  []byte("как"),
 			output: analysis.TokenStream{},
 		},
+		// digits safe
+		{
+			input: []byte("text 1000"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("text"),
+				},
+				&analysis.Token{
+					Term: []byte("1000"),
+				},
+			},
+		},
+		{
+			input: []byte("Вместе с тем о силе электромагнитной энергии имели представление еще"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("вмест"),
+				},
+				&analysis.Token{
+					Term: []byte("сил"),
+				},
+				&analysis.Token{
+					Term: []byte("электромагнитн"),
+				},
+				&analysis.Token{
+					Term: []byte("энерг"),
+				},
+				&analysis.Token{
+					Term: []byte("имел"),
+				},
+				&analysis.Token{
+					Term: []byte("представлен"),
+				},
+			},
+		},
+		{
+			input: []byte("Но знание это хранилось в тайне"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("знан"),
+				},
+				&analysis.Token{
+					Term: []byte("эт"),
+				},
+				&analysis.Token{
+					Term: []byte("хран"),
+				},
+				&analysis.Token{
+					Term: []byte("тайн"),
+				},
+			},
+		},
 	}
 
 	cache := registry.NewCache()
diff --git a/analysis/lang/ru/snowball/stem_Unicode.go b/analysis/lang/ru/snowball/stem_Unicode.go
deleted file mode 100644
index dfd2f2eb0..000000000
--- a/analysis/lang/ru/snowball/stem_Unicode.go
+++ /dev/null
@@ -1,737 +0,0 @@
-//! This file was generated automatically by the Snowball to Go compiler
-//! http://snowballstem.org/
-
-package snowball
-
-import (
-	snowballRuntime "github.com/snowballstem/snowball/go"
-)
-
-var A_0 = []*snowballRuntime.Among{
-	&snowballRuntime.Among{Str: "\u0432\u0448\u0438\u0441\u044C", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044B\u0432\u0448\u0438\u0441\u044C", A: 0, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u0432\u0448\u0438\u0441\u044C", A: 0, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u0432", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044B\u0432", A: 3, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u0432", A: 3, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u0432\u0448\u0438", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044B\u0432\u0448\u0438", A: 6, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u0432\u0448\u0438", A: 6, B: 2, F: nil},
-}
-
-var A_1 = []*snowballRuntime.Among{
-	&snowballRuntime.Among{Str: "\u0435\u043C\u0443", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u043E\u043C\u0443", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044B\u0445", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u0445", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0443\u044E", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044E\u044E", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0435\u044E", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u043E\u044E", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044F\u044F", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0430\u044F", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044B\u0435", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0435\u0435", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u0435", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u043E\u0435", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044B\u043C\u0438", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u043C\u0438", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044B\u0439", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0435\u0439", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u0439", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u043E\u0439", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044B\u043C", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0435\u043C", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u043C", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u043E\u043C", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0435\u0433\u043E", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u043E\u0433\u043E", A: -1, B: 1, F: nil},
-}
-
-var A_2 = []*snowballRuntime.Among{
-	&snowballRuntime.Among{Str: "\u0432\u0448", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044B\u0432\u0448", A: 0, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u0432\u0448", A: 0, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u0449", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044E\u0449", A: 3, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0443\u044E\u0449", A: 4, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u0435\u043C", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u043D\u043D", A: -1, B: 1, F: nil},
-}
-
-var A_3 = []*snowballRuntime.Among{
-	&snowballRuntime.Among{Str: "\u0441\u044C", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0441\u044F", A: -1, B: 1, F: nil},
-}
-
-var A_4 = []*snowballRuntime.Among{
-	&snowballRuntime.Among{Str: "\u044B\u0442", A: -1, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u044E\u0442", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0443\u044E\u0442", A: 1, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u044F\u0442", A: -1, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u0435\u0442", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0443\u0435\u0442", A: 4, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u0442", A: -1, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u043D\u044B", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0435\u043D\u044B", A: 7, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u0442\u044C", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044B\u0442\u044C", A: 9, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u0442\u044C", A: 9, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u0435\u0448\u044C", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u0448\u044C", A: -1, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u044E", A: -1, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u0443\u044E", A: 14, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u043B\u0430", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044B\u043B\u0430", A: 16, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u043B\u0430", A: 16, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u043D\u0430", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0435\u043D\u0430", A: 19, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u0435\u0442\u0435", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u0442\u0435", A: -1, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u0439\u0442\u0435", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0443\u0439\u0442\u0435", A: 23, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u0435\u0439\u0442\u0435", A: 23, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u043B\u0438", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044B\u043B\u0438", A: 26, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u043B\u0438", A: 26, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u0439", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0443\u0439", A: 29, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u0435\u0439", A: 29, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u043B", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044B\u043B", A: 32, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u043B", A: 32, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u044B\u043C", A: -1, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u0435\u043C", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u043C", A: -1, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u043D", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0435\u043D", A: 38, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u043B\u043E", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044B\u043B\u043E", A: 40, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u043B\u043E", A: 40, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u043D\u043E", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0435\u043D\u043E", A: 43, B: 2, F: nil},
-	&snowballRuntime.Among{Str: "\u043D\u043D\u043E", A: 43, B: 1, F: nil},
-}
-
-var A_5 = []*snowballRuntime.Among{
-	&snowballRuntime.Among{Str: "\u0443", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044F\u0445", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u044F\u0445", A: 1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0430\u0445", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044B", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044C", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044E", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044C\u044E", A: 6, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u044E", A: 6, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044F", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044C\u044F", A: 9, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u044F", A: 9, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0430", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0435\u0432", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u043E\u0432", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0435", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044C\u0435", A: 15, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u0435", A: 15, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0438", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0435\u0438", A: 18, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u0438", A: 18, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044F\u043C\u0438", A: 18, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u044F\u043C\u0438", A: 21, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0430\u043C\u0438", A: 18, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0439", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0435\u0439", A: 24, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u0435\u0439", A: 25, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u0439", A: 24, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u043E\u0439", A: 24, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044F\u043C", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u044F\u043C", A: 29, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0430\u043C", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0435\u043C", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u0438\u0435\u043C", A: 32, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u043E\u043C", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u043E", A: -1, B: 1, F: nil},
-}
-
-var A_6 = []*snowballRuntime.Among{
-	&snowballRuntime.Among{Str: "\u043E\u0441\u0442", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u043E\u0441\u0442\u044C", A: -1, B: 1, F: nil},
-}
-
-var A_7 = []*snowballRuntime.Among{
-	&snowballRuntime.Among{Str: "\u0435\u0439\u0448", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u044C", A: -1, B: 3, F: nil},
-	&snowballRuntime.Among{Str: "\u0435\u0439\u0448\u0435", A: -1, B: 1, F: nil},
-	&snowballRuntime.Among{Str: "\u043D", A: -1, B: 2, F: nil},
-}
-
-var G_v = []byte{33, 65, 8, 232}
-
-type Context struct {
-	i_p2 int
-	i_pV int
-}
-
-func r_mark_regions(env *snowballRuntime.Env, ctx interface{}) bool {
-	context := ctx.(*Context)
-	_ = context
-	// (, line 57
-	context.i_pV = env.Limit
-	context.i_p2 = env.Limit
-	// do, line 61
-	var v_1 = env.Cursor
-lab0:
-	for {
-		// (, line 61
-		// gopast, line 62
-	golab1:
-		for {
-		lab2:
-			for {
-				if !env.InGrouping(G_v, 1072, 1103) {
-					break lab2
-				}
-				break golab1
-			}
-			if env.Cursor >= env.Limit {
-				break lab0
-			}
-			env.NextChar()
-		}
-		// setmark pV, line 62
-		context.i_pV = env.Cursor
-		// gopast, line 62
-	golab3:
-		for {
-		lab4:
-			for {
-				if !env.OutGrouping(G_v, 1072, 1103) {
-					break lab4
-				}
-				break golab3
-			}
-			if env.Cursor >= env.Limit {
-				break lab0
-			}
-			env.NextChar()
-		}
-		// gopast, line 63
-	golab5:
-		for {
-		lab6:
-			for {
-				if !env.InGrouping(G_v, 1072, 1103) {
-					break lab6
-				}
-				break golab5
-			}
-			if env.Cursor >= env.Limit {
-				break lab0
-			}
-			env.NextChar()
-		}
-		// gopast, line 63
-	golab7:
-		for {
-		lab8:
-			for {
-				if !env.OutGrouping(G_v, 1072, 1103) {
-					break lab8
-				}
-				break golab7
-			}
-			if env.Cursor >= env.Limit {
-				break lab0
-			}
-			env.NextChar()
-		}
-		// setmark p2, line 63
-		context.i_p2 = env.Cursor
-		break lab0
-	}
-	env.Cursor = v_1
-	return true
-}
-
-func r_R2(env *snowballRuntime.Env, ctx interface{}) bool {
-	context := ctx.(*Context)
-	_ = context
-	if !(context.i_p2 <= env.Cursor) {
-		return false
-	}
-	return true
-}
-
-func r_perfective_gerund(env *snowballRuntime.Env, ctx interface{}) bool {
-	context := ctx.(*Context)
-	_ = context
-	var among_var int32
-	// (, line 71
-	// [, line 72
-	env.Ket = env.Cursor
-	// substring, line 72
-	among_var = env.FindAmongB(A_0, context)
-	if among_var == 0 {
-		return false
-	}
-	// ], line 72
-	env.Bra = env.Cursor
-	if among_var == 0 {
-		return false
-	} else if among_var == 1 {
-		// (, line 76
-		// or, line 76
-	lab0:
-		for {
-			var v_1 = env.Limit - env.Cursor
-		lab1:
-			for {
-				// literal, line 76
-				if !env.EqSB("\u0430") {
-					break lab1
-				}
-				break lab0
-			}
-			env.Cursor = env.Limit - v_1
-			// literal, line 76
-			if !env.EqSB("\u044F") {
-				return false
-			}
-			break lab0
-		}
-		// delete, line 76
-		if !env.SliceDel() {
-			return false
-		}
-	} else if among_var == 2 {
-		// (, line 83
-		// delete, line 83
-		if !env.SliceDel() {
-			return false
-		}
-	}
-	return true
-}
-
-func r_adjective(env *snowballRuntime.Env, ctx interface{}) bool {
-	context := ctx.(*Context)
-	_ = context
-	var among_var int32
-	// (, line 87
-	// [, line 88
-	env.Ket = env.Cursor
-	// substring, line 88
-	among_var = env.FindAmongB(A_1, context)
-	if among_var == 0 {
-		return false
-	}
-	// ], line 88
-	env.Bra = env.Cursor
-	if among_var == 0 {
-		return false
-	} else if among_var == 1 {
-		// (, line 97
-		// delete, line 97
-		if !env.SliceDel() {
-			return false
-		}
-	}
-	return true
-}
-
-func r_adjectival(env *snowballRuntime.Env, ctx interface{}) bool {
-	context := ctx.(*Context)
-	_ = context
-	var among_var int32
-	// (, line 101
-	// call adjective, line 102
-	if !r_adjective(env, context) {
-		return false
-	}
-	// try, line 109
-	var v_1 = env.Limit - env.Cursor
-lab0:
-	for {
-		// (, line 109
-		// [, line 110
-		env.Ket = env.Cursor
-		// substring, line 110
-		among_var = env.FindAmongB(A_2, context)
-		if among_var == 0 {
-			env.Cursor = env.Limit - v_1
-			break lab0
-		}
-		// ], line 110
-		env.Bra = env.Cursor
-		if among_var == 0 {
-			env.Cursor = env.Limit - v_1
-			break lab0
-		} else if among_var == 1 {
-			// (, line 115
-			// or, line 115
-		lab1:
-			for {
-				var v_2 = env.Limit - env.Cursor
-			lab2:
-				for {
-					// literal, line 115
-					if !env.EqSB("\u0430") {
-						break lab2
-					}
-					break lab1
-				}
-				env.Cursor = env.Limit - v_2
-				// literal, line 115
-				if !env.EqSB("\u044F") {
-					env.Cursor = env.Limit - v_1
-					break lab0
-				}
-				break lab1
-			}
-			// delete, line 115
-			if !env.SliceDel() {
-				return false
-			}
-		} else if among_var == 2 {
-			// (, line 122
-			// delete, line 122
-			if !env.SliceDel() {
-				return false
-			}
-		}
-		break lab0
-	}
-	return true
-}
-
-func r_reflexive(env *snowballRuntime.Env, ctx interface{}) bool {
-	context := ctx.(*Context)
-	_ = context
-	var among_var int32
-	// (, line 128
-	// [, line 129
-	env.Ket = env.Cursor
-	// substring, line 129
-	among_var = env.FindAmongB(A_3, context)
-	if among_var == 0 {
-		return false
-	}
-	// ], line 129
-	env.Bra = env.Cursor
-	if among_var == 0 {
-		return false
-	} else if among_var == 1 {
-		// (, line 132
-		// delete, line 132
-		if !env.SliceDel() {
-			return false
-		}
-	}
-	return true
-}
-
-func r_verb(env *snowballRuntime.Env, ctx interface{}) bool {
-	context := ctx.(*Context)
-	_ = context
-	var among_var int32
-	// (, line 136
-	// [, line 137
-	env.Ket = env.Cursor
-	// substring, line 137
-	among_var = env.FindAmongB(A_4, context)
-	if among_var == 0 {
-		return false
-	}
-	// ], line 137
-	env.Bra = env.Cursor
-	if among_var == 0 {
-		return false
-	} else if among_var == 1 {
-		// (, line 143
-		// or, line 143
-	lab0:
-		for {
-			var v_1 = env.Limit - env.Cursor
-		lab1:
-			for {
-				// literal, line 143
-				if !env.EqSB("\u0430") {
-					break lab1
-				}
-				break lab0
-			}
-			env.Cursor = env.Limit - v_1
-			// literal, line 143
-			if !env.EqSB("\u044F") {
-				return false
-			}
-			break lab0
-		}
-		// delete, line 143
-		if !env.SliceDel() {
-			return false
-		}
-	} else if among_var == 2 {
-		// (, line 151
-		// delete, line 151
-		if !env.SliceDel() {
-			return false
-		}
-	}
-	return true
-}
-
-func r_noun(env *snowballRuntime.Env, ctx interface{}) bool {
-	context := ctx.(*Context)
-	_ = context
-	var among_var int32
-	// (, line 159
-	// [, line 160
-	env.Ket = env.Cursor
-	// substring, line 160
-	among_var = env.FindAmongB(A_5, context)
-	if among_var == 0 {
-		return false
-	}
-	// ], line 160
-	env.Bra = env.Cursor
-	if among_var == 0 {
-		return false
-	} else if among_var == 1 {
-		// (, line 167
-		// delete, line 167
-		if !env.SliceDel() {
-			return false
-		}
-	}
-	return true
-}
-
-func r_derivational(env *snowballRuntime.Env, ctx interface{}) bool {
-	context := ctx.(*Context)
-	_ = context
-	var among_var int32
-	// (, line 175
-	// [, line 176
-	env.Ket = env.Cursor
-	// substring, line 176
-	among_var = env.FindAmongB(A_6, context)
-	if among_var == 0 {
-		return false
-	}
-	// ], line 176
-	env.Bra = env.Cursor
-	// call R2, line 176
-	if !r_R2(env, context) {
-		return false
-	}
-	if among_var == 0 {
-		return false
-	} else if among_var == 1 {
-		// (, line 179
-		// delete, line 179
-		if !env.SliceDel() {
-			return false
-		}
-	}
-	return true
-}
-
-func r_tidy_up(env *snowballRuntime.Env, ctx interface{}) bool {
-	context := ctx.(*Context)
-	_ = context
-	var among_var int32
-	// (, line 183
-	// [, line 184
-	env.Ket = env.Cursor
-	// substring, line 184
-	among_var = env.FindAmongB(A_7, context)
-	if among_var == 0 {
-		return false
-	}
-	// ], line 184
-	env.Bra = env.Cursor
-	if among_var == 0 {
-		return false
-	} else if among_var == 1 {
-		// (, line 188
-		// delete, line 188
-		if !env.SliceDel() {
-			return false
-		}
-		// [, line 189
-		env.Ket = env.Cursor
-		// literal, line 189
-		if !env.EqSB("\u043D") {
-			return false
-		}
-		// ], line 189
-		env.Bra = env.Cursor
-		// literal, line 189
-		if !env.EqSB("\u043D") {
-			return false
-		}
-		// delete, line 189
-		if !env.SliceDel() {
-			return false
-		}
-	} else if among_var == 2 {
-		// (, line 192
-		// literal, line 192
-		if !env.EqSB("\u043D") {
-			return false
-		}
-		// delete, line 192
-		if !env.SliceDel() {
-			return false
-		}
-	} else if among_var == 3 {
-		// (, line 194
-		// delete, line 194
-		if !env.SliceDel() {
-			return false
-		}
-	}
-	return true
-}
-
-func Stem(env *snowballRuntime.Env) bool {
-	var context = &Context{
-		i_p2: 0,
-		i_pV: 0,
-	}
-	_ = context
-	// (, line 199
-	// do, line 201
-	var v_1 = env.Cursor
-lab0:
-	for {
-		// call mark_regions, line 201
-		if !r_mark_regions(env, context) {
-			break lab0
-		}
-		break lab0
-	}
-	env.Cursor = v_1
-	// backwards, line 202
-	env.LimitBackward = env.Cursor
-	env.Cursor = env.Limit
-	// setlimit, line 202
-	var v_2 = env.Limit - env.Cursor
-	// tomark, line 202
-	if env.Cursor < context.i_pV {
-		return false
-	}
-	env.Cursor = context.i_pV
-	var v_3 = env.LimitBackward
-	env.LimitBackward = env.Cursor
-	env.Cursor = env.Limit - v_2
-	// (, line 202
-	// do, line 203
-	var v_4 = env.Limit - env.Cursor
-lab1:
-	for {
-		// (, line 203
-		// or, line 204
-	lab2:
-		for {
-			var v_5 = env.Limit - env.Cursor
-		lab3:
-			for {
-				// call perfective_gerund, line 204
-				if !r_perfective_gerund(env, context) {
-					break lab3
-				}
-				break lab2
-			}
-			env.Cursor = env.Limit - v_5
-			// (, line 205
-			// try, line 205
-			var v_6 = env.Limit - env.Cursor
-		lab4:
-			for {
-				// call reflexive, line 205
-				if !r_reflexive(env, context) {
-					env.Cursor = env.Limit - v_6
-					break lab4
-				}
-				break lab4
-			}
-			// or, line 206
-		lab5:
-			for {
-				var v_7 = env.Limit - env.Cursor
-			lab6:
-				for {
-					// call adjectival, line 206
-					if !r_adjectival(env, context) {
-						break lab6
-					}
-					break lab5
-				}
-				env.Cursor = env.Limit - v_7
-			lab7:
-				for {
-					// call verb, line 206
-					if !r_verb(env, context) {
-						break lab7
-					}
-					break lab5
-				}
-				env.Cursor = env.Limit - v_7
-				// call noun, line 206
-				if !r_noun(env, context) {
-					break lab1
-				}
-				break lab5
-			}
-			break lab2
-		}
-		break lab1
-	}
-	env.Cursor = env.Limit - v_4
-	// try, line 209
-	var v_8 = env.Limit - env.Cursor
-lab8:
-	for {
-		// (, line 209
-		// [, line 209
-		env.Ket = env.Cursor
-		// literal, line 209
-		if !env.EqSB("\u0438") {
-			env.Cursor = env.Limit - v_8
-			break lab8
-		}
-		// ], line 209
-		env.Bra = env.Cursor
-		// delete, line 209
-		if !env.SliceDel() {
-			return false
-		}
-		break lab8
-	}
-	// do, line 212
-	var v_9 = env.Limit - env.Cursor
-lab9:
-	for {
-		// call derivational, line 212
-		if !r_derivational(env, context) {
-			break lab9
-		}
-		break lab9
-	}
-	env.Cursor = env.Limit - v_9
-	// do, line 213
-	var v_10 = env.Limit - env.Cursor
-lab10:
-	for {
-		// call tidy_up, line 213
-		if !r_tidy_up(env, context) {
-			break lab10
-		}
-		break lab10
-	}
-	env.Cursor = env.Limit - v_10
-	env.LimitBackward = v_3
-	env.Cursor = env.LimitBackward
-	return true
-}
diff --git a/analysis/lang/ru/stemmer_ru.go b/analysis/lang/ru/stemmer_ru.go
index 6da9095b0..47a90456b 100644
--- a/analysis/lang/ru/stemmer_ru.go
+++ b/analysis/lang/ru/stemmer_ru.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2014 Couchbase, Inc.
+//  Copyright (c) 2018 Couchbase, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -16,10 +16,10 @@ package ru
 
 import (
 	"github.com/blevesearch/bleve/analysis"
-	"github.com/blevesearch/bleve/analysis/lang/ru/snowball"
 	"github.com/blevesearch/bleve/registry"
 
-	snowballRuntime "github.com/snowballstem/snowball/go"
+	"github.com/blevesearch/snowballstem"
+	"github.com/blevesearch/snowballstem/russian"
 )
 
 const SnowballStemmerName = "stemmer_ru_snowball"
@@ -33,9 +33,8 @@ func NewRussianStemmerFilter() *RussianStemmerFilter {
 
 func (s *RussianStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
 	for _, token := range input {
-
-		env := snowballRuntime.NewEnv(string(token.Term))
-		snowball.Stem(env)
+		env := snowballstem.NewEnv(string(token.Term))
+		russian.Stem(env)
 		token.Term = []byte(env.Current())
 	}
 	return input
diff --git a/analysis/lang/ru/stemmer_ru_test.go b/analysis/lang/ru/stemmer_ru_test.go
index 1795497ff..39949fcb0 100644
--- a/analysis/lang/ru/stemmer_ru_test.go
+++ b/analysis/lang/ru/stemmer_ru_test.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2015 Couchbase, Inc.
+//  Copyright (c) 2018 Couchbase, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/analysis/lang/ru/stop_filter_ru.go b/analysis/lang/ru/stop_filter_ru.go
index 5679420a1..326fb9d56 100644
--- a/analysis/lang/ru/stop_filter_ru.go
+++ b/analysis/lang/ru/stop_filter_ru.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2014 Couchbase, Inc.
+//  Copyright (c) 2018 Couchbase, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/analysis/lang/ru/stop_words_ru.go b/analysis/lang/ru/stop_words_ru.go
index 60bec0236..0129f48c4 100644
--- a/analysis/lang/ru/stop_words_ru.go
+++ b/analysis/lang/ru/stop_words_ru.go
@@ -7,242 +7,253 @@ import (
 
 const StopName = "stop_ru"
 
-var RussianStopWords = []byte(` | From http://snowball.tartarus.org/algorithms/russian/stop.txt
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
+// ` was changed to ' to allow for literal string
 
+var RussianStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
 
-  | a russian stop word list. comments begin with vertical bar. each stop
-  | word is at the start of a line.
+ | a russian stop word list. comments begin with vertical bar. each stop
+ | word is at the start of a line.
 
-  | this is a ranked list (commonest to rarest) of stopwords derived from
-  | a large text sample.
+ | this is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
 
-  | letter 'ё' is translated to 'е'.
+ | letter 'ё' is translated to 'е'.
 
- и              | and
- в              | in/into
- во             | alternative form
- не             | not
- что            | what/that
- он             | he
- на             | on/onto
- я              | i
- с              | from
- со             | alternative form
- как            | how
- а              | milder form of 'no' (but)
- то             | conjunction and form of 'that'
- все            | all
- она            | she
- так            | so, thus
- его            | him
- но             | but
- да             | yes/and
- ты             | thou
- к              | towards, by
- у              | around, chez
- же             | intensifier particle
- вы             | you
- за             | beyond, behind
- бы             | conditional/subj. particle
- по             | up to, along
- только         | only
- ее             | her
- мне            | to me
- было           | it was
- вот            | here is/are, particle
- от             | away from
- меня           | me
- еще            | still, yet, more
- нет            | no, there isnt/arent
- о              | about
- из             | out of
- ему            | to him
- теперь         | now
- когда          | when
- даже           | even
- ну             | so, well
- вдруг          | suddenly
- ли             | interrogative particle
- если           | if
- уже            | already, but homonym of 'narrower'
- или            | or
- ни             | neither
- быть           | to be
- был            | he was
- него           | prepositional form of его
- до             | up to
- вас            | you accusative
- нибудь         | indef. suffix preceded by hyphen
- опять          | again
- уж             | already, but homonym of 'adder'
- вам            | to you
- сказал         | he said
- ведь           | particle 'after all'
- там            | there
- потом          | then
- себя           | oneself
- ничего         | nothing
- ей             | to her
- может          | usually with 'быть' as 'maybe'
- они            | they
- тут            | here
- где            | where
- есть           | there is/are
- надо           | got to, must
- ней            | prepositional form of  ей
- для            | for
- мы             | we
- тебя           | thee
- их             | them, their
- чем            | than
- была           | she was
- сам            | self
- чтоб           | in order to
- без            | without
- будто          | as if
- человек        | man, person, one
- чего           | genitive form of 'what'
- раз            | once
- тоже           | also
- себе           | to oneself
- под            | beneath
- жизнь          | life
- будет          | will be
- ж              | short form of intensifer particle 'же'
- тогда          | then
- кто            | who
- этот           | this
- говорил        | was saying
- того           | genitive form of 'that'
- потому         | for that reason
- этого          | genitive form of 'this'
- какой          | which
- совсем         | altogether
- ним            | prepositional form of 'его', 'они'
- здесь          | here
- этом           | prepositional form of 'этот'
- один           | one
- почти          | almost
- мой            | my
- тем            | instrumental/dative plural of 'тот', 'то'
- чтобы          | full form of 'in order that'
- нее            | her (acc.)
- кажется        | it seems
- сейчас         | now
- были           | they were
- куда           | where to
- зачем          | why
- сказать        | to say
- всех           | all (acc., gen. preposn. plural)
- никогда        | never
- сегодня        | today
- можно          | possible, one can
- при            | by
- наконец        | finally
- два            | two
- об             | alternative form of 'о', about
- другой         | another
- хоть           | even
- после          | after
- над            | above
- больше         | more
- тот            | that one (masc.)
- через          | across, in
- эти            | these
- нас            | us
- про            | about
- всего          | in all, only, of all
- них            | prepositional form of 'они' (they)
- какая          | which, feminine
- много          | lots
- разве          | interrogative particle
- сказала        | she said
- три            | three
- эту            | this, acc. fem. sing.
- моя            | my, feminine
- впрочем        | moreover, besides
- хорошо         | good
- свою           | ones own, acc. fem. sing.
- этой           | oblique form of 'эта', fem. 'this'
- перед          | in front of
- иногда         | sometimes
- лучше          | better
- чуть           | a little
- том            | preposn. form of 'that one'
- нельзя         | one must not
- такой          | such a one
- им             | to them
- более          | more
- всегда         | always
- конечно        | of course
- всю            | acc. fem. sing of 'all'
- между          | between
+и              | and
+в              | in/into
+во             | alternative form
+не             | not
+что            | what/that
+он             | he
+на             | on/onto
+я              | i
+с              | from
+со             | alternative form
+как            | how
+а              | milder form of 'no' (but)
+то             | conjunction and form of 'that'
+все            | all
+она            | she
+так            | so, thus
+его            | him
+но             | but
+да             | yes/and
+ты             | thou
+к              | towards, by
+у              | around, chez
+же             | intensifier particle
+вы             | you
+за             | beyond, behind
+бы             | conditional/subj. particle
+по             | up to, along
+только         | only
+ее             | her
+мне            | to me
+было           | it was
+вот            | here is/are, particle
+от             | away from
+меня           | me
+еще            | still, yet, more
+нет            | no, there isnt/arent
+о              | about
+из             | out of
+ему            | to him
+теперь         | now
+когда          | when
+даже           | even
+ну             | so, well
+вдруг          | suddenly
+ли             | interrogative particle
+если           | if
+уже            | already, but homonym of 'narrower'
+или            | or
+ни             | neither
+быть           | to be
+был            | he was
+него           | prepositional form of его
+до             | up to
+вас            | you accusative
+нибудь         | indef. suffix preceded by hyphen
+опять          | again
+уж             | already, but homonym of 'adder'
+вам            | to you
+сказал         | he said
+ведь           | particle 'after all'
+там            | there
+потом          | then
+себя           | oneself
+ничего         | nothing
+ей             | to her
+может          | usually with 'быть' as 'maybe'
+они            | they
+тут            | here
+где            | where
+есть           | there is/are
+надо           | got to, must
+ней            | prepositional form of  ей
+для            | for
+мы             | we
+тебя           | thee
+их             | them, their
+чем            | than
+была           | she was
+сам            | self
+чтоб           | in order to
+без            | without
+будто          | as if
+человек        | man, person, one
+чего           | genitive form of 'what'
+раз            | once
+тоже           | also
+себе           | to oneself
+под            | beneath
+жизнь          | life
+будет          | will be
+ж              | short form of intensifer particle 'же'
+тогда          | then
+кто            | who
+этот           | this
+говорил        | was saying
+того           | genitive form of 'that'
+потому         | for that reason
+этого          | genitive form of 'this'
+какой          | which
+совсем         | altogether
+ним            | prepositional form of 'его', 'они'
+здесь          | here
+этом           | prepositional form of 'этот'
+один           | one
+почти          | almost
+мой            | my
+тем            | instrumental/dative plural of 'тот', 'то'
+чтобы          | full form of 'in order that'
+нее            | her (acc.)
+кажется        | it seems
+сейчас         | now
+были           | they were
+куда           | where to
+зачем          | why
+сказать        | to say
+всех           | all (acc., gen. preposn. plural)
+никогда        | never
+сегодня        | today
+можно          | possible, one can
+при            | by
+наконец        | finally
+два            | two
+об             | alternative form of 'о', about
+другой         | another
+хоть           | even
+после          | after
+над            | above
+больше         | more
+тот            | that one (masc.)
+через          | across, in
+эти            | these
+нас            | us
+про            | about
+всего          | in all, only, of all
+них            | prepositional form of 'они' (they)
+какая          | which, feminine
+много          | lots
+разве          | interrogative particle
+сказала        | she said
+три            | three
+эту            | this, acc. fem. sing.
+моя            | my, feminine
+впрочем        | moreover, besides
+хорошо         | good
+свою           | ones own, acc. fem. sing.
+этой           | oblique form of 'эта', fem. 'this'
+перед          | in front of
+иногда         | sometimes
+лучше          | better
+чуть           | a little
+том            | preposn. form of 'that one'
+нельзя         | one must not
+такой          | such a one
+им             | to them
+более          | more
+всегда         | always
+конечно        | of course
+всю            | acc. fem. sing of 'all'
+между          | between
 
 
-   | b: some paradigms
-   |
-   | personal pronouns
-   |
-   | я  меня  мне  мной  [мною]
-   | ты  тебя  тебе  тобой  [тобою]
-   | он  его  ему  им  [него, нему, ним]
-   | она  ее  эи  ею  [нее, нэи, нею]
-   | оно  его  ему  им  [него, нему, ним]
-   |
-   | мы  нас  нам  нами
-   | вы  вас  вам  вами
-   | они  их  им  ими  [них, ним, ними]
-   |
-   |   себя  себе  собой   [собою]
-   |
-   | demonstrative pronouns: этот (this), тот (that)
-   |
-   | этот  эта  это  эти
-   | этого  эты  это  эти
-   | этого  этой  этого  этих
-   | этому  этой  этому  этим
-   | этим  этой  этим  [этою]  этими
-   | этом  этой  этом  этих
-   |
-   | тот  та  то  те
-   | того  ту  то  те
-   | того  той  того  тех
-   | тому  той  тому  тем
-   | тем  той  тем  [тою]  теми
-   | том  той  том  тех
-   |
-   | determinative pronouns
-   |
-   | (a) весь (all)
-   |
-   | весь  вся  все  все
-   | всего  всю  все  все
-   | всего  всей  всего  всех
-   | всему  всей  всему  всем
-   | всем  всей  всем  [всею]  всеми
-   | всем  всей  всем  всех
-   |
-   | (b) сам (himself etc)
-   |
-   | сам  сама  само  сами
-   | самого саму  само  самих
-   | самого самой самого  самих
-   | самому самой самому  самим
-   | самим  самой  самим  [самою]  самими
-   | самом самой самом  самих
-   |
-   | stems of verbs 'to be', 'to have', 'to do' and modal
-   |
-   | быть  бы  буд  быв  есть  суть
-   | име
-   | дел
-   | мог   мож  мочь
-   | уме
-   | хоч  хот
-   | долж
-   | можн
-   | нужн
-   | нельзя
+  | b: some paradigms
+  |
+  | personal pronouns
+  |
+  | я  меня  мне  мной  [мною]
+  | ты  тебя  тебе  тобой  [тобою]
+  | он  его  ему  им  [него, нему, ним]
+  | она  ее  эи  ею  [нее, нэи, нею]
+  | оно  его  ему  им  [него, нему, ним]
+  |
+  | мы  нас  нам  нами
+  | вы  вас  вам  вами
+  | они  их  им  ими  [них, ним, ними]
+  |
+  |   себя  себе  собой   [собою]
+  |
+  | demonstrative pronouns: этот (this), тот (that)
+  |
+  | этот  эта  это  эти
+  | этого  эты  это  эти
+  | этого  этой  этого  этих
+  | этому  этой  этому  этим
+  | этим  этой  этим  [этою]  этими
+  | этом  этой  этом  этих
+  |
+  | тот  та  то  те
+  | того  ту  то  те
+  | того  той  того  тех
+  | тому  той  тому  тем
+  | тем  той  тем  [тою]  теми
+  | том  той  том  тех
+  |
+  | determinative pronouns
+  |
+  | (a) весь (all)
+  |
+  | весь  вся  все  все
+  | всего  всю  все  все
+  | всего  всей  всего  всех
+  | всему  всей  всему  всем
+  | всем  всей  всем  [всею]  всеми
+  | всем  всей  всем  всех
+  |
+  | (b) сам (himself etc)
+  |
+  | сам  сама  само  сами
+  | самого саму  само  самих
+  | самого самой самого  самих
+  | самому самой самому  самим
+  | самим  самой  самим  [самою]  самими
+  | самом самой самом  самих
+  |
+  | stems of verbs 'to be', 'to have', 'to do' and modal
+  |
+  | быть  бы  буд  быв  есть  суть
+  | име
+  | дел
+  | мог   мож  мочь
+  | уме
+  | хоч  хот
+  | долж
+  | можн
+  | нужн
+  | нельзя
+
 `)
 
 func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
diff --git a/vendor/manifest b/vendor/manifest
index 60c5fbb5e..641f276e5 100644
--- a/vendor/manifest
+++ b/vendor/manifest
@@ -17,6 +17,14 @@
 			"branch": "master",
 			"notests": true
 		},
+		{
+			"importpath": "github.com/blevesearch/snowballstem",
+			"repository": "https://github.com/blevesearch/snowballstem",
+			"vcs": "",
+			"revision": "26b06a2c243d4f8ca5db3486f94409dd5b2a7467",
+			"branch": "master",
+			"notests": true
+		},
 		{
 			"importpath": "github.com/boltdb/bolt",
 			"repository": "https://github.com/boltdb/bolt",

From 09a61a7a3821dc044f5e81b5dadef698df9340e7 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Wed, 10 Jan 2018 16:00:29 -0500
Subject: [PATCH 134/728] add analyzers for several languages

Having pure Go snowball stemmers allows us to add support for
many languages into the core of bleve.  Specifically we just
added: Russian, Danish, Finnish, Hungarian, Dutch, Norwegian,
Romanian, Swedish, Turkish
---
 analysis/lang/da/analyzer_da.go      |  56 ++++++
 analysis/lang/da/analyzer_da_test.go |  71 ++++++++
 analysis/lang/da/stemmer_da.go       |  49 +++++
 analysis/lang/da/stop_filter_da.go   |  33 ++++
 analysis/lang/da/stop_words_da.go    | 134 ++++++++++++++
 analysis/lang/fi/analyzer_fi.go      |  57 ++++++
 analysis/lang/fi/analyzer_fi_test.go |  70 ++++++++
 analysis/lang/fi/stemmer_fi.go       |  49 +++++
 analysis/lang/fi/stop_filter_fi.go   |  33 ++++
 analysis/lang/fi/stop_words_fi.go    | 121 +++++++++++++
 analysis/lang/hu/analyzer_hu.go      |  57 ++++++
 analysis/lang/hu/analyzer_hu_test.go |  70 ++++++++
 analysis/lang/hu/stemmer_hu.go       |  49 +++++
 analysis/lang/hu/stop_filter_hu.go   |  33 ++++
 analysis/lang/hu/stop_words_hu.go    | 235 ++++++++++++++++++++++++
 analysis/lang/nl/analyzer_nl.go      |  57 ++++++
 analysis/lang/nl/analyzer_nl_test.go |  70 ++++++++
 analysis/lang/nl/stemmer_nl.go       |  49 +++++
 analysis/lang/nl/stop_filter_nl.go   |  33 ++++
 analysis/lang/nl/stop_words_nl.go    | 143 +++++++++++++++
 analysis/lang/no/analyzer_no.go      |  57 ++++++
 analysis/lang/no/analyzer_no_test.go |  70 ++++++++
 analysis/lang/no/stemmer_no.go       |  49 +++++
 analysis/lang/no/stop_filter_no.go   |  33 ++++
 analysis/lang/no/stop_words_no.go    | 218 +++++++++++++++++++++++
 analysis/lang/ro/analyzer_ro.go      |  57 ++++++
 analysis/lang/ro/analyzer_ro_test.go |  70 ++++++++
 analysis/lang/ro/stemmer_ro.go       |  49 +++++
 analysis/lang/ro/stop_filter_ro.go   |  33 ++++
 analysis/lang/ro/stop_words_ro.go    | 257 +++++++++++++++++++++++++++
 analysis/lang/sv/analyzer_sv.go      |  57 ++++++
 analysis/lang/sv/analyzer_sv_test.go |  70 ++++++++
 analysis/lang/sv/stemmer_sv.go       |  49 +++++
 analysis/lang/sv/stop_filter_sv.go   |  33 ++++
 analysis/lang/sv/stop_words_sv.go    | 157 ++++++++++++++++
 analysis/lang/tr/analyzer_tr.go      |  63 +++++++
 analysis/lang/tr/analyzer_tr_test.go |  90 ++++++++++
 analysis/lang/tr/stemmer_tr.go       |  49 +++++
 analysis/lang/tr/stop_filter_tr.go   |  33 ++++
 analysis/lang/tr/stop_words_tr.go    | 236 ++++++++++++++++++++++++
 40 files changed, 3199 insertions(+)
 create mode 100644 analysis/lang/da/analyzer_da.go
 create mode 100644 analysis/lang/da/analyzer_da_test.go
 create mode 100644 analysis/lang/da/stemmer_da.go
 create mode 100644 analysis/lang/da/stop_filter_da.go
 create mode 100644 analysis/lang/da/stop_words_da.go
 create mode 100644 analysis/lang/fi/analyzer_fi.go
 create mode 100644 analysis/lang/fi/analyzer_fi_test.go
 create mode 100644 analysis/lang/fi/stemmer_fi.go
 create mode 100644 analysis/lang/fi/stop_filter_fi.go
 create mode 100644 analysis/lang/fi/stop_words_fi.go
 create mode 100644 analysis/lang/hu/analyzer_hu.go
 create mode 100644 analysis/lang/hu/analyzer_hu_test.go
 create mode 100644 analysis/lang/hu/stemmer_hu.go
 create mode 100644 analysis/lang/hu/stop_filter_hu.go
 create mode 100644 analysis/lang/hu/stop_words_hu.go
 create mode 100644 analysis/lang/nl/analyzer_nl.go
 create mode 100644 analysis/lang/nl/analyzer_nl_test.go
 create mode 100644 analysis/lang/nl/stemmer_nl.go
 create mode 100644 analysis/lang/nl/stop_filter_nl.go
 create mode 100644 analysis/lang/nl/stop_words_nl.go
 create mode 100644 analysis/lang/no/analyzer_no.go
 create mode 100644 analysis/lang/no/analyzer_no_test.go
 create mode 100644 analysis/lang/no/stemmer_no.go
 create mode 100644 analysis/lang/no/stop_filter_no.go
 create mode 100644 analysis/lang/no/stop_words_no.go
 create mode 100644 analysis/lang/ro/analyzer_ro.go
 create mode 100644 analysis/lang/ro/analyzer_ro_test.go
 create mode 100644 analysis/lang/ro/stemmer_ro.go
 create mode 100644 analysis/lang/ro/stop_filter_ro.go
 create mode 100644 analysis/lang/ro/stop_words_ro.go
 create mode 100644 analysis/lang/sv/analyzer_sv.go
 create mode 100644 analysis/lang/sv/analyzer_sv_test.go
 create mode 100644 analysis/lang/sv/stemmer_sv.go
 create mode 100644 analysis/lang/sv/stop_filter_sv.go
 create mode 100644 analysis/lang/sv/stop_words_sv.go
 create mode 100644 analysis/lang/tr/analyzer_tr.go
 create mode 100644 analysis/lang/tr/analyzer_tr_test.go
 create mode 100644 analysis/lang/tr/stemmer_tr.go
 create mode 100644 analysis/lang/tr/stop_filter_tr.go
 create mode 100644 analysis/lang/tr/stop_words_tr.go

diff --git a/analysis/lang/da/analyzer_da.go b/analysis/lang/da/analyzer_da.go
new file mode 100644
index 000000000..dca141779
--- /dev/null
+++ b/analysis/lang/da/analyzer_da.go
@@ -0,0 +1,56 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package da
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/analysis/token/lowercase"
+	"github.com/blevesearch/bleve/analysis/tokenizer/unicode"
+	"github.com/blevesearch/bleve/registry"
+)
+
+const AnalyzerName = "da"
+
+func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
+	unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
+	if err != nil {
+		return nil, err
+	}
+	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
+	if err != nil {
+		return nil, err
+	}
+	stopDaFilter, err := cache.TokenFilterNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	stemmerDaFilter, err := cache.TokenFilterNamed(SnowballStemmerName)
+	if err != nil {
+		return nil, err
+	}
+	rv := analysis.Analyzer{
+		Tokenizer: unicodeTokenizer,
+		TokenFilters: []analysis.TokenFilter{
+			toLowerFilter,
+			stopDaFilter,
+			stemmerDaFilter,
+		},
+	}
+	return &rv, nil
+}
+
+func init() {
+	registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
+}
diff --git a/analysis/lang/da/analyzer_da_test.go b/analysis/lang/da/analyzer_da_test.go
new file mode 100644
index 000000000..d6a1c51a4
--- /dev/null
+++ b/analysis/lang/da/analyzer_da_test.go
@@ -0,0 +1,71 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package da
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+func TestDanishAnalyzer(t *testing.T) {
+	tests := []struct {
+		input  []byte
+		output analysis.TokenStream
+	}{
+		// stemming
+		{
+			input: []byte("undersøg"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("undersøg"),
+					Position: 1,
+					Start:    0,
+					End:      9,
+				},
+			},
+		},
+		{
+			input: []byte("undersøgelse"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("undersøg"),
+					Position: 1,
+					Start:    0,
+					End:      13,
+				},
+			},
+		},
+		// stop word
+		{
+			input:  []byte("på"),
+			output: analysis.TokenStream{},
+		},
+	}
+
+	cache := registry.NewCache()
+	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := analyzer.Analyze(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %v, got %v", test.output, actual)
+		}
+	}
+}
diff --git a/analysis/lang/da/stemmer_da.go b/analysis/lang/da/stemmer_da.go
new file mode 100644
index 000000000..e40e623ab
--- /dev/null
+++ b/analysis/lang/da/stemmer_da.go
@@ -0,0 +1,49 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package da
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+
+	"github.com/blevesearch/snowballstem"
+	"github.com/blevesearch/snowballstem/danish"
+)
+
+const SnowballStemmerName = "stemmer_da_snowball"
+
+type DanishStemmerFilter struct {
+}
+
+func NewDanishStemmerFilter() *DanishStemmerFilter {
+	return &DanishStemmerFilter{}
+}
+
+func (s *DanishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		env := snowballstem.NewEnv(string(token.Term))
+		danish.Stem(env)
+		token.Term = []byte(env.Current())
+	}
+	return input
+}
+
+func DanishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewDanishStemmerFilter(), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(SnowballStemmerName, DanishStemmerFilterConstructor)
+}
diff --git a/analysis/lang/da/stop_filter_da.go b/analysis/lang/da/stop_filter_da.go
new file mode 100644
index 000000000..a146d0b43
--- /dev/null
+++ b/analysis/lang/da/stop_filter_da.go
@@ -0,0 +1,33 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package da
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/analysis/token/stop"
+	"github.com/blevesearch/bleve/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+}
diff --git a/analysis/lang/da/stop_words_da.go b/analysis/lang/da/stop_words_da.go
new file mode 100644
index 000000000..63a407a0c
--- /dev/null
+++ b/analysis/lang/da/stop_words_da.go
@@ -0,0 +1,134 @@
+package da
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+const StopName = "stop_da"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
+// ` was changed to ' to allow for literal string
+
+var DanishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A Danish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+
+og           | and
+i            | in
+jeg          | I
+det          | that (dem. pronoun)/it (pers. pronoun)
+at           | that (in front of a sentence)/to (with infinitive)
+en           | a/an
+den          | it (pers. pronoun)/that (dem. pronoun)
+til          | to/at/for/until/against/by/of/into, more
+er           | present tense of "to be"
+som          | who, as
+på           | on/upon/in/on/at/to/after/of/with/for, on
+de           | they
+med          | with/by/in, along
+han          | he
+af           | of/by/from/off/for/in/with/on, off
+for          | at/for/to/from/by/of/ago, in front/before, because
+ikke         | not
+der          | who/which, there/those
+var          | past tense of "to be"
+mig          | me/myself
+sig          | oneself/himself/herself/itself/themselves
+men          | but
+et           | a/an/one, one (number), someone/somebody/one
+har          | present tense of "to have"
+om           | round/about/for/in/a, about/around/down, if
+vi           | we
+min          | my
+havde        | past tense of "to have"
+ham          | him
+hun          | she
+nu           | now
+over         | over/above/across/by/beyond/past/on/about, over/past
+da           | then, when/as/since
+fra          | from/off/since, off, since
+du           | you
+ud           | out
+sin          | his/her/its/one's
+dem          | them
+os           | us/ourselves
+op           | up
+man          | you/one
+hans         | his
+hvor         | where
+eller        | or
+hvad         | what
+skal         | must/shall etc.
+selv         | myself/youself/herself/ourselves etc., even
+her          | here
+alle         | all/everyone/everybody etc.
+vil          | will (verb)
+blev         | past tense of "to stay/to remain/to get/to become"
+kunne        | could
+ind          | in
+når          | when
+være         | present tense of "to be"
+dog          | however/yet/after all
+noget        | something
+ville        | would
+jo           | you know/you see (adv), yes
+deres        | their/theirs
+efter        | after/behind/according to/for/by/from, later/afterwards
+ned          | down
+skulle       | should
+denne        | this
+end          | than
+dette        | this
+mit          | my/mine
+også         | also
+under        | under/beneath/below/during, below/underneath
+have         | have
+dig          | you
+anden        | other
+hende        | her
+mine         | my
+alt          | everything
+meget        | much/very, plenty of
+sit          | his, her, its, one's
+sine         | his, her, its, one's
+vor          | our
+mod          | against
+disse        | these
+hvis         | if
+din          | your/yours
+nogle        | some
+hos          | by/at
+blive        | be/become
+mange        | many
+ad           | by/through
+bliver       | present tense of "to be/to become"
+hendes       | her/hers
+været        | be
+thi          | for (conj)
+jer          | you
+sådan        | such, like this/like that
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(DanishStopWords)
+	return rv, err
+}
+
+func init() {
+	registry.RegisterTokenMap(StopName, TokenMapConstructor)
+}
diff --git a/analysis/lang/fi/analyzer_fi.go b/analysis/lang/fi/analyzer_fi.go
new file mode 100644
index 000000000..9482e6b36
--- /dev/null
+++ b/analysis/lang/fi/analyzer_fi.go
@@ -0,0 +1,57 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fi
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+
+	"github.com/blevesearch/bleve/analysis/token/lowercase"
+	"github.com/blevesearch/bleve/analysis/tokenizer/unicode"
+)
+
+const AnalyzerName = "fi"
+
+func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
+	unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
+	if err != nil {
+		return nil, err
+	}
+	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
+	if err != nil {
+		return nil, err
+	}
+	stopFiFilter, err := cache.TokenFilterNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	stemmerFiFilter, err := cache.TokenFilterNamed(SnowballStemmerName)
+	if err != nil {
+		return nil, err
+	}
+	rv := analysis.Analyzer{
+		Tokenizer: unicodeTokenizer,
+		TokenFilters: []analysis.TokenFilter{
+			toLowerFilter,
+			stopFiFilter,
+			stemmerFiFilter,
+		},
+	}
+	return &rv, nil
+}
+
+func init() {
+	registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
+}
diff --git a/analysis/lang/fi/analyzer_fi_test.go b/analysis/lang/fi/analyzer_fi_test.go
new file mode 100644
index 000000000..035e7fdb5
--- /dev/null
+++ b/analysis/lang/fi/analyzer_fi_test.go
@@ -0,0 +1,70 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fi
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+func TestFinishAnalyzer(t *testing.T) {
+	tests := []struct {
+		input  []byte
+		output analysis.TokenStream
+	}{
+		// stemming
+		{
+			input: []byte("edeltäjiinsä"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("edeltäj"),
+				},
+			},
+		},
+		{
+			input: []byte("edeltäjistään"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("edeltäj"),
+				},
+			},
+		},
+		// stop word
+		{
+			input:  []byte("olla"),
+			output: analysis.TokenStream{},
+		},
+	}
+
+	cache := registry.NewCache()
+	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := analyzer.Analyze(test.input)
+		if len(actual) != len(test.output) {
+			t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
+		}
+		for i, tok := range actual {
+			if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
+				t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
+			}
+		}
+	}
+}
diff --git a/analysis/lang/fi/stemmer_fi.go b/analysis/lang/fi/stemmer_fi.go
new file mode 100644
index 000000000..14a6a1cbc
--- /dev/null
+++ b/analysis/lang/fi/stemmer_fi.go
@@ -0,0 +1,49 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fi
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+
+	"github.com/blevesearch/snowballstem"
+	"github.com/blevesearch/snowballstem/finnish"
+)
+
+const SnowballStemmerName = "stemmer_fi_snowball"
+
+type FinnishStemmerFilter struct {
+}
+
+func NewFinnishStemmerFilter() *FinnishStemmerFilter {
+	return &FinnishStemmerFilter{}
+}
+
+func (s *FinnishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		env := snowballstem.NewEnv(string(token.Term))
+		finnish.Stem(env)
+		token.Term = []byte(env.Current())
+	}
+	return input
+}
+
+func FinnishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewFinnishStemmerFilter(), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(SnowballStemmerName, FinnishStemmerFilterConstructor)
+}
diff --git a/analysis/lang/fi/stop_filter_fi.go b/analysis/lang/fi/stop_filter_fi.go
new file mode 100644
index 000000000..f3576a2be
--- /dev/null
+++ b/analysis/lang/fi/stop_filter_fi.go
@@ -0,0 +1,33 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fi
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/analysis/token/stop"
+	"github.com/blevesearch/bleve/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+}
diff --git a/analysis/lang/fi/stop_words_fi.go b/analysis/lang/fi/stop_words_fi.go
new file mode 100644
index 000000000..7cf0c9c13
--- /dev/null
+++ b/analysis/lang/fi/stop_words_fi.go
@@ -0,0 +1,121 @@
+package fi
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+const StopName = "stop_fi"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
+// ` was changed to ' to allow for literal string
+
+var FinnishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+ 
+| forms of BE
+
+olla
+olen
+olet
+on
+olemme
+olette
+ovat
+ole        | negative form
+
+oli
+olisi
+olisit
+olisin
+olisimme
+olisitte
+olisivat
+olit
+olin
+olimme
+olitte
+olivat
+ollut
+olleet
+
+en         | negation
+et
+ei
+emme
+ette
+eivät
+
+|Nom   Gen    Acc    Part   Iness   Elat    Illat  Adess   Ablat   Allat   Ess    Trans
+minä   minun  minut  minua  minussa minusta minuun minulla minulta minulle               | I
+sinä   sinun  sinut  sinua  sinussa sinusta sinuun sinulla sinulta sinulle               | you
+hän    hänen  hänet  häntä  hänessä hänestä häneen hänellä häneltä hänelle               | he she
+me     meidän meidät meitä  meissä  meistä  meihin meillä  meiltä  meille                | we
+te     teidän teidät teitä  teissä  teistä  teihin teillä  teiltä  teille                | you
+he     heidän heidät heitä  heissä  heistä  heihin heillä  heiltä  heille                | they
+
+tämä   tämän         tätä   tässä   tästä   tähän  tallä   tältä   tälle   tänä   täksi  | this
+tuo    tuon          tuotä  tuossa  tuosta  tuohon tuolla  tuolta  tuolle  tuona  tuoksi | that
+se     sen           sitä   siinä   siitä   siihen sillä   siltä   sille   sinä   siksi  | it
+nämä   näiden        näitä  näissä  näistä  näihin näillä  näiltä  näille  näinä  näiksi | these
+nuo    noiden        noita  noissa  noista  noihin noilla  noilta  noille  noina  noiksi | those
+ne     niiden        niitä  niissä  niistä  niihin niillä  niiltä  niille  niinä  niiksi | they
+
+kuka   kenen kenet   ketä   kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
+ketkä  keiden ketkä  keitä  keissä  keistä  keihin keillä  keiltä  keille  keinä  keiksi | (pl)
+mikä   minkä minkä   mitä   missä   mistä   mihin  millä   miltä   mille   minä   miksi  | which what
+mitkä                                                                                    | (pl)
+
+joka   jonka         jota   jossa   josta   johon  jolla   jolta   jolle   jona   joksi  | who which
+jotka  joiden        joita  joissa  joista  joihin joilla  joilta  joille  joina  joiksi | (pl)
+
+| conjunctions
+
+että   | that
+ja     | and
+jos    | if
+koska  | because
+kuin   | than
+mutta  | but
+niin   | so
+sekä   | and
+sillä  | for
+tai    | or
+vaan   | but
+vai    | or
+vaikka | although
+
+
+| prepositions
+
+kanssa  | with
+mukaan  | according to
+noin    | about
+poikki  | across
+yli     | over, across
+
+| other
+
+kun    | when
+niin   | so
+nyt    | now
+itse   | self
+
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(FinnishStopWords)
+	return rv, err
+}
+
+func init() {
+	registry.RegisterTokenMap(StopName, TokenMapConstructor)
+}
diff --git a/analysis/lang/hu/analyzer_hu.go b/analysis/lang/hu/analyzer_hu.go
new file mode 100644
index 000000000..6797a91e4
--- /dev/null
+++ b/analysis/lang/hu/analyzer_hu.go
@@ -0,0 +1,57 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package hu
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+
+	"github.com/blevesearch/bleve/analysis/token/lowercase"
+	"github.com/blevesearch/bleve/analysis/tokenizer/unicode"
+)
+
+const AnalyzerName = "hu"
+
+func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
+	unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
+	if err != nil {
+		return nil, err
+	}
+	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
+	if err != nil {
+		return nil, err
+	}
+	stopHuFilter, err := cache.TokenFilterNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	stemmerHuFilter, err := cache.TokenFilterNamed(SnowballStemmerName)
+	if err != nil {
+		return nil, err
+	}
+	rv := analysis.Analyzer{
+		Tokenizer: unicodeTokenizer,
+		TokenFilters: []analysis.TokenFilter{
+			toLowerFilter,
+			stopHuFilter,
+			stemmerHuFilter,
+		},
+	}
+	return &rv, nil
+}
+
+func init() {
+	registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
+}
diff --git a/analysis/lang/hu/analyzer_hu_test.go b/analysis/lang/hu/analyzer_hu_test.go
new file mode 100644
index 000000000..4a14dff68
--- /dev/null
+++ b/analysis/lang/hu/analyzer_hu_test.go
@@ -0,0 +1,70 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package hu
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+func TestHungarianAnalyzer(t *testing.T) {
+	tests := []struct {
+		input  []byte
+		output analysis.TokenStream
+	}{
+		// stemming
+		{
+			input: []byte("babakocsi"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("babakocs"),
+				},
+			},
+		},
+		{
+			input: []byte("babakocsijáért"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("babakocs"),
+				},
+			},
+		},
+		// stop word
+		{
+			input:  []byte("által"),
+			output: analysis.TokenStream{},
+		},
+	}
+
+	cache := registry.NewCache()
+	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := analyzer.Analyze(test.input)
+		if len(actual) != len(test.output) {
+			t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
+		}
+		for i, tok := range actual {
+			if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
+				t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
+			}
+		}
+	}
+}
diff --git a/analysis/lang/hu/stemmer_hu.go b/analysis/lang/hu/stemmer_hu.go
new file mode 100644
index 000000000..b380818a0
--- /dev/null
+++ b/analysis/lang/hu/stemmer_hu.go
@@ -0,0 +1,49 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package hu
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+
+	"github.com/blevesearch/snowballstem"
+	"github.com/blevesearch/snowballstem/hungarian"
+)
+
+const SnowballStemmerName = "stemmer_hu_snowball"
+
+type HungarianStemmerFilter struct {
+}
+
+func NewHungarianStemmerFilter() *HungarianStemmerFilter {
+	return &HungarianStemmerFilter{}
+}
+
+func (s *HungarianStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		env := snowballstem.NewEnv(string(token.Term))
+		hungarian.Stem(env)
+		token.Term = []byte(env.Current())
+	}
+	return input
+}
+
+func HungarianStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewHungarianStemmerFilter(), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(SnowballStemmerName, HungarianStemmerFilterConstructor)
+}
diff --git a/analysis/lang/hu/stop_filter_hu.go b/analysis/lang/hu/stop_filter_hu.go
new file mode 100644
index 000000000..a83fd4ccb
--- /dev/null
+++ b/analysis/lang/hu/stop_filter_hu.go
@@ -0,0 +1,33 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package hu
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/analysis/token/stop"
+	"github.com/blevesearch/bleve/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+}
diff --git a/analysis/lang/hu/stop_words_hu.go b/analysis/lang/hu/stop_words_hu.go
new file mode 100644
index 000000000..fe45d55ea
--- /dev/null
+++ b/analysis/lang/hu/stop_words_hu.go
@@ -0,0 +1,235 @@
+package hu
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+const StopName = "stop_hu"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
+// ` was changed to ' to allow for literal string
+
+var HungarianStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+ 
+| Hungarian stop word list
+| prepared by Anna Tordai
+
+a
+ahogy
+ahol
+aki
+akik
+akkor
+alatt
+által
+általában
+amely
+amelyek
+amelyekben
+amelyeket
+amelyet
+amelynek
+ami
+amit
+amolyan
+amíg
+amikor
+át
+abban
+ahhoz
+annak
+arra
+arról
+az
+azok
+azon
+azt
+azzal
+azért
+aztán
+azután
+azonban
+bár
+be
+belül
+benne
+cikk
+cikkek
+cikkeket
+csak
+de
+e
+eddig
+egész
+egy
+egyes
+egyetlen
+egyéb
+egyik
+egyre
+ekkor
+el
+elég
+ellen
+elő
+először
+előtt
+első
+én
+éppen
+ebben
+ehhez
+emilyen
+ennek
+erre
+ez
+ezt
+ezek
+ezen
+ezzel
+ezért
+és
+fel
+felé
+hanem
+hiszen
+hogy
+hogyan
+igen
+így
+illetve
+ill.
+ill
+ilyen
+ilyenkor
+ison
+ismét
+itt
+jó
+jól
+jobban
+kell
+kellett
+keresztül
+keressünk
+ki
+kívül
+között
+közül
+legalább
+lehet
+lehetett
+legyen
+lenne
+lenni
+lesz
+lett
+maga
+magát
+majd
+majd
+már
+más
+másik
+meg
+még
+mellett
+mert
+mely
+melyek
+mi
+mit
+míg
+miért
+milyen
+mikor
+minden
+mindent
+mindenki
+mindig
+mint
+mintha
+mivel
+most
+nagy
+nagyobb
+nagyon
+ne
+néha
+nekem
+neki
+nem
+néhány
+nélkül
+nincs
+olyan
+ott
+össze
+ő
+ők
+őket
+pedig
+persze
+rá
+s
+saját
+sem
+semmi
+sok
+sokat
+sokkal
+számára
+szemben
+szerint
+szinte
+talán
+tehát
+teljes
+tovább
+továbbá
+több
+úgy
+ugyanis
+új
+újabb
+újra
+után
+utána
+utolsó
+vagy
+vagyis
+valaki
+valami
+valamint
+való
+vagyok
+van
+vannak
+volt
+voltam
+voltak
+voltunk
+vissza
+vele
+viszont
+volna
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(HungarianStopWords)
+	return rv, err
+}
+
+func init() {
+	registry.RegisterTokenMap(StopName, TokenMapConstructor)
+}
diff --git a/analysis/lang/nl/analyzer_nl.go b/analysis/lang/nl/analyzer_nl.go
new file mode 100644
index 000000000..69853a9e1
--- /dev/null
+++ b/analysis/lang/nl/analyzer_nl.go
@@ -0,0 +1,57 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package nl
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+
+	"github.com/blevesearch/bleve/analysis/token/lowercase"
+	"github.com/blevesearch/bleve/analysis/tokenizer/unicode"
+)
+
+const AnalyzerName = "nl"
+
+func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
+	unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
+	if err != nil {
+		return nil, err
+	}
+	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
+	if err != nil {
+		return nil, err
+	}
+	stopNlFilter, err := cache.TokenFilterNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	stemmerNlFilter, err := cache.TokenFilterNamed(SnowballStemmerName)
+	if err != nil {
+		return nil, err
+	}
+	rv := analysis.Analyzer{
+		Tokenizer: unicodeTokenizer,
+		TokenFilters: []analysis.TokenFilter{
+			toLowerFilter,
+			stopNlFilter,
+			stemmerNlFilter,
+		},
+	}
+	return &rv, nil
+}
+
+func init() {
+	registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
+}
diff --git a/analysis/lang/nl/analyzer_nl_test.go b/analysis/lang/nl/analyzer_nl_test.go
new file mode 100644
index 000000000..21e851c33
--- /dev/null
+++ b/analysis/lang/nl/analyzer_nl_test.go
@@ -0,0 +1,70 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package nl
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+func TestDutchAnalyzer(t *testing.T) {
+	tests := []struct {
+		input  []byte
+		output analysis.TokenStream
+	}{
+		// stemming
+		{
+			input: []byte("lichamelijk"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("licham"),
+				},
+			},
+		},
+		{
+			input: []byte("lichamelijke"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("licham"),
+				},
+			},
+		},
+		// stop word
+		{
+			input:  []byte("van"),
+			output: analysis.TokenStream{},
+		},
+	}
+
+	cache := registry.NewCache()
+	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := analyzer.Analyze(test.input)
+		if len(actual) != len(test.output) {
+			t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
+		}
+		for i, tok := range actual {
+			if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
+				t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
+			}
+		}
+	}
+}
diff --git a/analysis/lang/nl/stemmer_nl.go b/analysis/lang/nl/stemmer_nl.go
new file mode 100644
index 000000000..049d92160
--- /dev/null
+++ b/analysis/lang/nl/stemmer_nl.go
@@ -0,0 +1,49 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package nl
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+
+	"github.com/blevesearch/snowballstem"
+	"github.com/blevesearch/snowballstem/dutch"
+)
+
+const SnowballStemmerName = "stemmer_nl_snowball"
+
+type DutchStemmerFilter struct {
+}
+
+func NewDutchStemmerFilter() *DutchStemmerFilter {
+	return &DutchStemmerFilter{}
+}
+
+func (s *DutchStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		env := snowballstem.NewEnv(string(token.Term))
+		dutch.Stem(env)
+		token.Term = []byte(env.Current())
+	}
+	return input
+}
+
+func DutchStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewDutchStemmerFilter(), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(SnowballStemmerName, DutchStemmerFilterConstructor)
+}
diff --git a/analysis/lang/nl/stop_filter_nl.go b/analysis/lang/nl/stop_filter_nl.go
new file mode 100644
index 000000000..218f0f42c
--- /dev/null
+++ b/analysis/lang/nl/stop_filter_nl.go
@@ -0,0 +1,33 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package nl
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/analysis/token/stop"
+	"github.com/blevesearch/bleve/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+}
diff --git a/analysis/lang/nl/stop_words_nl.go b/analysis/lang/nl/stop_words_nl.go
new file mode 100644
index 000000000..4adae1002
--- /dev/null
+++ b/analysis/lang/nl/stop_words_nl.go
@@ -0,0 +1,143 @@
+package nl
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+const StopName = "stop_nl"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
+// ` was changed to ' to allow for literal string
+
+var DutchStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A Dutch stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large sample of Dutch text.
+
+ | Dutch stop words frequently exhibit homonym clashes. These are indicated
+ | clearly below.
+
+de             |  the
+en             |  and
+van            |  of, from
+ik             |  I, the ego
+te             |  (1) chez, at etc, (2) to, (3) too
+dat            |  that, which
+die            |  that, those, who, which
+in             |  in, inside
+een            |  a, an, one
+hij            |  he
+het            |  the, it
+niet           |  not, nothing, naught
+zijn           |  (1) to be, being, (2) his, one's, its
+is             |  is
+was            |  (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
+op             |  on, upon, at, in, up, used up
+aan            |  on, upon, to (as dative)
+met            |  with, by
+als            |  like, such as, when
+voor           |  (1) before, in front of, (2) furrow
+had            |  had, past tense all persons sing. of 'hebben' (have)
+er             |  there
+maar           |  but, only
+om             |  round, about, for etc
+hem            |  him
+dan            |  then
+zou            |  should/would, past tense all persons sing. of 'zullen'
+of             |  or, whether, if
+wat            |  what, something, anything
+mijn           |  possessive and noun 'mine'
+men            |  people, 'one'
+dit            |  this
+zo             |  so, thus, in this way
+door           |  through by
+over           |  over, across
+ze             |  she, her, they, them
+zich           |  oneself
+bij            |  (1) a bee, (2) by, near, at
+ook            |  also, too
+tot            |  till, until
+je             |  you
+mij            |  me
+uit            |  out of, from
+der            |  Old Dutch form of 'van der' still found in surnames
+daar           |  (1) there, (2) because
+haar           |  (1) her, their, them, (2) hair
+naar           |  (1) unpleasant, unwell etc, (2) towards, (3) as
+heb            |  present first person sing. of 'to have'
+hoe            |  how, why
+heeft          |  present third person sing. of 'to have'
+hebben         |  'to have' and various parts thereof
+deze           |  this
+u              |  you
+want           |  (1) for, (2) mitten, (3) rigging
+nog            |  yet, still
+zal            |  'shall', first and third person sing. of verb 'zullen' (will)
+me             |  me
+zij            |  she, they
+nu             |  now
+ge             |  'thou', still used in Belgium and south Netherlands
+geen           |  none
+omdat          |  because
+iets           |  something, somewhat
+worden         |  to become, grow, get
+toch           |  yet, still
+al             |  all, every, each
+waren          |  (1) 'were' (2) to wander, (3) wares, (3)
+veel           |  much, many
+meer           |  (1) more, (2) lake
+doen           |  to do, to make
+toen           |  then, when
+moet           |  noun 'spot/mote' and present form of 'to must'
+ben            |  (1) am, (2) 'are' in interrogative second person singular of 'to be'
+zonder         |  without
+kan            |  noun 'can' and present form of 'to be able'
+hun            |  their, them
+dus            |  so, consequently
+alles          |  all, everything, anything
+onder          |  under, beneath
+ja             |  yes, of course
+eens           |  once, one day
+hier           |  here
+wie            |  who
+werd           |  imperfect third person sing. of 'become'
+altijd         |  always
+doch           |  yet, but etc
+wordt          |  present third person sing. of 'become'
+wezen          |  (1) to be, (2) 'been' as in 'been fishing', (3) orphans
+kunnen         |  to be able
+ons            |  us/our
+zelf           |  self
+tegen          |  against, towards, at
+na             |  after, near
+reeds          |  already
+wil            |  (1) present tense of 'want', (2) 'will', noun, (3) fender
+kon            |  could; past tense of 'to be able'
+niets          |  nothing
+uw             |  your
+iemand         |  somebody
+geweest        |  been; past participle of 'be'
+andere         |  other
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(DutchStopWords)
+	return rv, err
+}
+
+func init() {
+	registry.RegisterTokenMap(StopName, TokenMapConstructor)
+}
diff --git a/analysis/lang/no/analyzer_no.go b/analysis/lang/no/analyzer_no.go
new file mode 100644
index 000000000..57d749eac
--- /dev/null
+++ b/analysis/lang/no/analyzer_no.go
@@ -0,0 +1,57 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package no
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+
+	"github.com/blevesearch/bleve/analysis/token/lowercase"
+	"github.com/blevesearch/bleve/analysis/tokenizer/unicode"
+)
+
+const AnalyzerName = "no"
+
+func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
+	unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
+	if err != nil {
+		return nil, err
+	}
+	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
+	if err != nil {
+		return nil, err
+	}
+	stopNoFilter, err := cache.TokenFilterNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	stemmerNoFilter, err := cache.TokenFilterNamed(SnowballStemmerName)
+	if err != nil {
+		return nil, err
+	}
+	rv := analysis.Analyzer{
+		Tokenizer: unicodeTokenizer,
+		TokenFilters: []analysis.TokenFilter{
+			toLowerFilter,
+			stopNoFilter,
+			stemmerNoFilter,
+		},
+	}
+	return &rv, nil
+}
+
+func init() {
+	registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
+}
diff --git a/analysis/lang/no/analyzer_no_test.go b/analysis/lang/no/analyzer_no_test.go
new file mode 100644
index 000000000..c73f5f731
--- /dev/null
+++ b/analysis/lang/no/analyzer_no_test.go
@@ -0,0 +1,70 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package no
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+func TestNorwegianAnalyzer(t *testing.T) {
+	tests := []struct {
+		input  []byte
+		output analysis.TokenStream
+	}{
+		// stemming
+		{
+			input: []byte("havnedistriktene"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("havnedistrikt"),
+				},
+			},
+		},
+		{
+			input: []byte("havnedistrikter"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("havnedistrikt"),
+				},
+			},
+		},
+		// stop word
+		{
+			input:  []byte("det"),
+			output: analysis.TokenStream{},
+		},
+	}
+
+	cache := registry.NewCache()
+	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := analyzer.Analyze(test.input)
+		if len(actual) != len(test.output) {
+			t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
+		}
+		for i, tok := range actual {
+			if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
+				t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
+			}
+		}
+	}
+}
diff --git a/analysis/lang/no/stemmer_no.go b/analysis/lang/no/stemmer_no.go
new file mode 100644
index 000000000..e61e02477
--- /dev/null
+++ b/analysis/lang/no/stemmer_no.go
@@ -0,0 +1,49 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package no
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+
+	"github.com/blevesearch/snowballstem"
+	"github.com/blevesearch/snowballstem/norwegian"
+)
+
+const SnowballStemmerName = "stemmer_no_snowball"
+
+type NorwegianStemmerFilter struct {
+}
+
+func NewNorwegianStemmerFilter() *NorwegianStemmerFilter {
+	return &NorwegianStemmerFilter{}
+}
+
+func (s *NorwegianStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		env := snowballstem.NewEnv(string(token.Term))
+		norwegian.Stem(env)
+		token.Term = []byte(env.Current())
+	}
+	return input
+}
+
+func NorwegianStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewNorwegianStemmerFilter(), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(SnowballStemmerName, NorwegianStemmerFilterConstructor)
+}
diff --git a/analysis/lang/no/stop_filter_no.go b/analysis/lang/no/stop_filter_no.go
new file mode 100644
index 000000000..093688fa7
--- /dev/null
+++ b/analysis/lang/no/stop_filter_no.go
@@ -0,0 +1,33 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package no
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/analysis/token/stop"
+	"github.com/blevesearch/bleve/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+}
diff --git a/analysis/lang/no/stop_words_no.go b/analysis/lang/no/stop_words_no.go
new file mode 100644
index 000000000..bfca34846
--- /dev/null
+++ b/analysis/lang/no/stop_words_no.go
@@ -0,0 +1,218 @@
+package no
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+const StopName = "stop_no"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
+// ` was changed to ' to allow for literal string
+
+var NorwegianStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A Norwegian stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This stop word list is for the dominant bokmål dialect. Words unique
+ | to nynorsk are marked *.
+
+ | Revised by Jan Bruusgaard <Jan.Bruusgaard@ssb.no>, Jan 2005
+
+og             | and
+i              | in
+jeg            | I
+det            | it/this/that
+at             | to (w. inf.)
+en             | a/an
+et             | a/an
+den            | it/this/that
+til            | to
+er             | is/am/are
+som            | who/that
+på             | on
+de             | they / you(formal)
+med            | with
+han            | he
+av             | of
+ikke           | not
+ikkje          | not *
+der            | there
+så             | so
+var            | was/were
+meg            | me
+seg            | you
+men            | but
+ett            | one
+har            | have
+om             | about
+vi             | we
+min            | my
+mitt           | my
+ha             | have
+hadde          | had
+hun            | she
+nå             | now
+over           | over
+da             | when/as
+ved            | by/know
+fra            | from
+du             | you
+ut             | out
+sin            | your
+dem            | them
+oss            | us
+opp            | up
+man            | you/one
+kan            | can
+hans           | his
+hvor           | where
+eller          | or
+hva            | what
+skal           | shall/must
+selv           | self (reflective)
+sjøl           | self (reflective)
+her            | here
+alle           | all
+vil            | will
+bli            | become
+ble            | became
+blei           | became *
+blitt          | have become
+kunne          | could
+inn            | in
+når            | when
+være           | be
+kom            | come
+noen           | some
+noe            | some
+ville          | would
+dere           | you
+som            | who/which/that
+deres          | their/theirs
+kun            | only/just
+ja             | yes
+etter          | after
+ned            | down
+skulle         | should
+denne          | this
+for            | for/because
+deg            | you
+si             | hers/his
+sine           | hers/his
+sitt           | hers/his
+mot            | against
+å              | to
+meget          | much
+hvorfor        | why
+dette          | this
+disse          | these/those
+uten           | without
+hvordan        | how
+ingen          | none
+din            | your
+ditt           | your
+blir           | become
+samme          | same
+hvilken        | which
+hvilke         | which (plural)
+sånn           | such a
+inni           | inside/within
+mellom         | between
+vår            | our
+hver           | each
+hvem           | who
+vors           | us/ours
+hvis           | whose
+både           | both
+bare           | only/just
+enn            | than
+fordi          | as/because
+før            | before
+mange          | many
+også           | also
+slik           | just
+vært           | been
+være           | to be
+båe            | both *
+begge          | both
+siden          | since
+dykk           | your *
+dykkar         | yours *
+dei            | they *
+deira          | them *
+deires         | theirs *
+deim           | them *
+di             | your (fem.) *
+då             | as/when *
+eg             | I *
+ein            | a/an *
+eit            | a/an *
+eitt           | a/an *
+elles          | or *
+honom          | he *
+hjå            | at *
+ho             | she *
+hoe            | she *
+henne          | her
+hennar         | her/hers
+hennes         | hers
+hoss           | how *
+hossen         | how *
+ikkje          | not *
+ingi           | noone *
+inkje          | noone *
+korleis        | how *
+korso          | how *
+kva            | what/which *
+kvar           | where *
+kvarhelst      | where *
+kven           | who/whom *
+kvi            | why *
+kvifor         | why *
+me             | we *
+medan          | while *
+mi             | my *
+mine           | my *
+mykje          | much *
+no             | now *
+nokon          | some (masc./neut.) *
+noka           | some (fem.) *
+nokor          | some *
+noko           | some *
+nokre          | some *
+si             | his/hers *
+sia            | since *
+sidan          | since *
+so             | so *
+somt           | some *
+somme          | some *
+um             | about*
+upp            | up *
+vere           | be *
+vore           | was *
+verte          | become *
+vort           | become *
+varte          | became *
+vart           | became *
+
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(NorwegianStopWords)
+	return rv, err
+}
+
+func init() {
+	registry.RegisterTokenMap(StopName, TokenMapConstructor)
+}
diff --git a/analysis/lang/ro/analyzer_ro.go b/analysis/lang/ro/analyzer_ro.go
new file mode 100644
index 000000000..e29388155
--- /dev/null
+++ b/analysis/lang/ro/analyzer_ro.go
@@ -0,0 +1,57 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ro
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+
+	"github.com/blevesearch/bleve/analysis/token/lowercase"
+	"github.com/blevesearch/bleve/analysis/tokenizer/unicode"
+)
+
+const AnalyzerName = "ro"
+
+func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
+	unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
+	if err != nil {
+		return nil, err
+	}
+	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
+	if err != nil {
+		return nil, err
+	}
+	stopRoFilter, err := cache.TokenFilterNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	stemmerRoFilter, err := cache.TokenFilterNamed(SnowballStemmerName)
+	if err != nil {
+		return nil, err
+	}
+	rv := analysis.Analyzer{
+		Tokenizer: unicodeTokenizer,
+		TokenFilters: []analysis.TokenFilter{
+			toLowerFilter,
+			stopRoFilter,
+			stemmerRoFilter,
+		},
+	}
+	return &rv, nil
+}
+
+func init() {
+	registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
+}
diff --git a/analysis/lang/ro/analyzer_ro_test.go b/analysis/lang/ro/analyzer_ro_test.go
new file mode 100644
index 000000000..ee8b88f80
--- /dev/null
+++ b/analysis/lang/ro/analyzer_ro_test.go
@@ -0,0 +1,70 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ro
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+func TestRomanianAnalyzer(t *testing.T) {
+	tests := []struct {
+		input  []byte
+		output analysis.TokenStream
+	}{
+		// stemming
+		{
+			input: []byte("absenţa"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("absenţ"),
+				},
+			},
+		},
+		{
+			input: []byte("absenţi"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("absenţ"),
+				},
+			},
+		},
+		// stop word
+		{
+			input:  []byte("îl"),
+			output: analysis.TokenStream{},
+		},
+	}
+
+	cache := registry.NewCache()
+	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := analyzer.Analyze(test.input)
+		if len(actual) != len(test.output) {
+			t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
+		}
+		for i, tok := range actual {
+			if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
+				t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
+			}
+		}
+	}
+}
diff --git a/analysis/lang/ro/stemmer_ro.go b/analysis/lang/ro/stemmer_ro.go
new file mode 100644
index 000000000..3966215ff
--- /dev/null
+++ b/analysis/lang/ro/stemmer_ro.go
@@ -0,0 +1,49 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ro
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+
+	"github.com/blevesearch/snowballstem"
+	"github.com/blevesearch/snowballstem/romanian"
+)
+
+const SnowballStemmerName = "stemmer_ro_snowball"
+
+type RomanianStemmerFilter struct {
+}
+
+func NewRomanianStemmerFilter() *RomanianStemmerFilter {
+	return &RomanianStemmerFilter{}
+}
+
+func (s *RomanianStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		env := snowballstem.NewEnv(string(token.Term))
+		romanian.Stem(env)
+		token.Term = []byte(env.Current())
+	}
+	return input
+}
+
+func RomanianStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewRomanianStemmerFilter(), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(SnowballStemmerName, RomanianStemmerFilterConstructor)
+}
diff --git a/analysis/lang/ro/stop_filter_ro.go b/analysis/lang/ro/stop_filter_ro.go
new file mode 100644
index 000000000..a2f7f6dd9
--- /dev/null
+++ b/analysis/lang/ro/stop_filter_ro.go
@@ -0,0 +1,33 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ro
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/analysis/token/stop"
+	"github.com/blevesearch/bleve/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+}
diff --git a/analysis/lang/ro/stop_words_ro.go b/analysis/lang/ro/stop_words_ro.go
new file mode 100644
index 000000000..e7d62d414
--- /dev/null
+++ b/analysis/lang/ro/stop_words_ro.go
@@ -0,0 +1,257 @@
+package ro
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+const StopName = "stop_ro"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
+// ` was changed to ' to allow for literal string
+
+var RomanianStopWords = []byte(`# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+acea
+aceasta
+această
+aceea
+acei
+aceia
+acel
+acela
+acele
+acelea
+acest
+acesta
+aceste
+acestea
+aceşti
+aceştia
+acolo
+acum
+ai
+aia
+aibă
+aici
+al
+ăla
+ale
+alea
+ălea
+altceva
+altcineva
+am
+ar
+are
+aş
+aşadar
+asemenea
+asta
+ăsta
+astăzi
+astea
+ăstea
+ăştia
+asupra
+aţi
+au
+avea
+avem
+aveţi
+azi
+bine
+bucur
+bună
+ca
+că
+căci
+când
+care
+cărei
+căror
+cărui
+cât
+câte
+câţi
+către
+câtva
+ce
+cel
+ceva
+chiar
+cînd
+cine
+cineva
+cît
+cîte
+cîţi
+cîtva
+contra
+cu
+cum
+cumva
+curând
+curînd
+da
+dă
+dacă
+dar
+datorită
+de
+deci
+deja
+deoarece
+departe
+deşi
+din
+dinaintea
+dintr
+dintre
+drept
+după
+ea
+ei
+el
+ele
+eram
+este
+eşti
+eu
+face
+fără
+fi
+fie
+fiecare
+fii
+fim
+fiţi
+iar
+ieri
+îi
+îl
+îmi
+împotriva
+în 
+înainte
+înaintea
+încât
+încît
+încotro
+între
+întrucât
+întrucît
+îţi
+la
+lângă
+le
+li
+lîngă
+lor
+lui
+mă
+mâine
+mea
+mei
+mele
+mereu
+meu
+mi
+mine
+mult
+multă
+mulţi
+ne
+nicăieri
+nici
+nimeni
+nişte
+noastră
+noastre
+noi
+noştri
+nostru
+nu
+ori
+oricând
+oricare
+oricât
+orice
+oricînd
+oricine
+oricît
+oricum
+oriunde
+până
+pe
+pentru
+peste
+pînă
+poate
+pot
+prea
+prima
+primul
+prin
+printr
+sa
+să
+săi
+sale
+sau
+său
+se
+şi
+sînt
+sîntem
+sînteţi
+spre
+sub
+sunt
+suntem
+sunteţi
+ta
+tăi
+tale
+tău
+te
+ţi
+ţie
+tine
+toată
+toate
+tot
+toţi
+totuşi
+tu
+un
+una
+unde
+undeva
+unei
+unele
+uneori
+unor
+vă
+vi
+voastră
+voastre
+voi
+voştri
+vostru
+vouă
+vreo
+vreun
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(RomanianStopWords)
+	return rv, err
+}
+
+func init() {
+	registry.RegisterTokenMap(StopName, TokenMapConstructor)
+}
diff --git a/analysis/lang/sv/analyzer_sv.go b/analysis/lang/sv/analyzer_sv.go
new file mode 100644
index 000000000..f650158d4
--- /dev/null
+++ b/analysis/lang/sv/analyzer_sv.go
@@ -0,0 +1,57 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package sv
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+
+	"github.com/blevesearch/bleve/analysis/token/lowercase"
+	"github.com/blevesearch/bleve/analysis/tokenizer/unicode"
+)
+
+const AnalyzerName = "sv"
+
+func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
+	unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
+	if err != nil {
+		return nil, err
+	}
+	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
+	if err != nil {
+		return nil, err
+	}
+	stopSvFilter, err := cache.TokenFilterNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	stemmerSvFilter, err := cache.TokenFilterNamed(SnowballStemmerName)
+	if err != nil {
+		return nil, err
+	}
+	rv := analysis.Analyzer{
+		Tokenizer: unicodeTokenizer,
+		TokenFilters: []analysis.TokenFilter{
+			toLowerFilter,
+			stopSvFilter,
+			stemmerSvFilter,
+		},
+	}
+	return &rv, nil
+}
+
+func init() {
+	registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
+}
diff --git a/analysis/lang/sv/analyzer_sv_test.go b/analysis/lang/sv/analyzer_sv_test.go
new file mode 100644
index 000000000..2d358b63e
--- /dev/null
+++ b/analysis/lang/sv/analyzer_sv_test.go
@@ -0,0 +1,70 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package sv
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+func TestSwedishAnalyzer(t *testing.T) {
+	tests := []struct {
+		input  []byte
+		output analysis.TokenStream
+	}{
+		// stemming
+		{
+			input: []byte("jaktkarlarne"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("jaktkarl"),
+				},
+			},
+		},
+		{
+			input: []byte("jaktkarlens"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("jaktkarl"),
+				},
+			},
+		},
+		// stop word
+		{
+			input:  []byte("och"),
+			output: analysis.TokenStream{},
+		},
+	}
+
+	cache := registry.NewCache()
+	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := analyzer.Analyze(test.input)
+		if len(actual) != len(test.output) {
+			t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
+		}
+		for i, tok := range actual {
+			if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
+				t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
+			}
+		}
+	}
+}
diff --git a/analysis/lang/sv/stemmer_sv.go b/analysis/lang/sv/stemmer_sv.go
new file mode 100644
index 000000000..247f11bb2
--- /dev/null
+++ b/analysis/lang/sv/stemmer_sv.go
@@ -0,0 +1,49 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package sv
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+
+	"github.com/blevesearch/snowballstem"
+	"github.com/blevesearch/snowballstem/swedish"
+)
+
+const SnowballStemmerName = "stemmer_sv_snowball"
+
+type SwedishStemmerFilter struct {
+}
+
+func NewSwedishStemmerFilter() *SwedishStemmerFilter {
+	return &SwedishStemmerFilter{}
+}
+
+func (s *SwedishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		env := snowballstem.NewEnv(string(token.Term))
+		swedish.Stem(env)
+		token.Term = []byte(env.Current())
+	}
+	return input
+}
+
+func SwedishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewSwedishStemmerFilter(), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(SnowballStemmerName, SwedishStemmerFilterConstructor)
+}
diff --git a/analysis/lang/sv/stop_filter_sv.go b/analysis/lang/sv/stop_filter_sv.go
new file mode 100644
index 000000000..46a533d17
--- /dev/null
+++ b/analysis/lang/sv/stop_filter_sv.go
@@ -0,0 +1,33 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package sv
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/analysis/token/stop"
+	"github.com/blevesearch/bleve/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+}
diff --git a/analysis/lang/sv/stop_words_sv.go b/analysis/lang/sv/stop_words_sv.go
new file mode 100644
index 000000000..b4022fd90
--- /dev/null
+++ b/analysis/lang/sv/stop_words_sv.go
@@ -0,0 +1,157 @@
+package sv
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+const StopName = "stop_sv"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
+// ` was changed to ' to allow for literal string
+
+var SwedishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A Swedish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | Swedish stop words occasionally exhibit homonym clashes. For example
+ |  så = so, but also seed. These are indicated clearly below.
+
+och            | and
+det            | it, this/that
+att            | to (with infinitive)
+i              | in, at
+en             | a
+jag            | I
+hon            | she
+som            | who, that
+han            | he
+på             | on
+den            | it, this/that
+med            | with
+var            | where, each
+sig            | him(self) etc
+för            | for
+så             | so (also: seed)
+till           | to
+är             | is
+men            | but
+ett            | a
+om             | if; around, about
+hade           | had
+de             | they, these/those
+av             | of
+icke           | not, no
+mig            | me
+du             | you
+henne          | her
+då             | then, when
+sin            | his
+nu             | now
+har            | have
+inte           | inte någon = no one
+hans           | his
+honom          | him
+skulle         | 'sake'
+hennes         | her
+där            | there
+min            | my
+man            | one (pronoun)
+ej             | nor
+vid            | at, by, on (also: vast)
+kunde          | could
+något          | some etc
+från           | from, off
+ut             | out
+när            | when
+efter          | after, behind
+upp            | up
+vi             | we
+dem            | them
+vara           | be
+vad            | what
+över           | over
+än             | than
+dig            | you
+kan            | can
+sina           | his
+här            | here
+ha             | have
+mot            | towards
+alla           | all
+under          | under (also: wonder)
+någon          | some etc
+eller          | or (else)
+allt           | all
+mycket         | much
+sedan          | since
+ju             | why
+denna          | this/that
+själv          | myself, yourself etc
+detta          | this/that
+åt             | to
+utan           | without
+varit          | was
+hur            | how
+ingen          | no
+mitt           | my
+ni             | you
+bli            | to be, become
+blev           | from bli
+oss            | us
+din            | thy
+dessa          | these/those
+några          | some etc
+deras          | their
+blir           | from bli
+mina           | my
+samma          | (the) same
+vilken         | who, that
+er             | you, your
+sådan          | such a
+vår            | our
+blivit         | from bli
+dess           | its
+inom           | within
+mellan         | between
+sådant         | such a
+varför         | why
+varje          | each
+vilka          | who, that
+ditt           | thy
+vem            | who
+vilket         | who, that
+sitta          | his
+sådana         | such a
+vart           | each
+dina           | thy
+vars           | whose
+vårt           | our
+våra           | our
+ert            | your
+era            | your
+vilkas         | whose
+
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(SwedishStopWords)
+	return rv, err
+}
+
+func init() {
+	registry.RegisterTokenMap(StopName, TokenMapConstructor)
+}
diff --git a/analysis/lang/tr/analyzer_tr.go b/analysis/lang/tr/analyzer_tr.go
new file mode 100644
index 000000000..d52a1d5cc
--- /dev/null
+++ b/analysis/lang/tr/analyzer_tr.go
@@ -0,0 +1,63 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tr
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+
+	"github.com/blevesearch/bleve/analysis/token/apostrophe"
+	"github.com/blevesearch/bleve/analysis/token/lowercase"
+	"github.com/blevesearch/bleve/analysis/tokenizer/unicode"
+)
+
+const AnalyzerName = "tr"
+
+func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
+	unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
+	if err != nil {
+		return nil, err
+	}
+	aposFilter, err := cache.TokenFilterNamed(apostrophe.Name)
+	if err != nil {
+		return nil, err
+	}
+	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
+	if err != nil {
+		return nil, err
+	}
+	stopTrFilter, err := cache.TokenFilterNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	stemmerTrFilter, err := cache.TokenFilterNamed(SnowballStemmerName)
+	if err != nil {
+		return nil, err
+	}
+	rv := analysis.Analyzer{
+		Tokenizer: unicodeTokenizer,
+		TokenFilters: []analysis.TokenFilter{
+			aposFilter,
+			toLowerFilter,
+			stopTrFilter,
+			stemmerTrFilter,
+		},
+	}
+	return &rv, nil
+}
+
+func init() {
+	registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
+}
diff --git a/analysis/lang/tr/analyzer_tr_test.go b/analysis/lang/tr/analyzer_tr_test.go
new file mode 100644
index 000000000..fe8980938
--- /dev/null
+++ b/analysis/lang/tr/analyzer_tr_test.go
@@ -0,0 +1,90 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tr
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+func TestTurkishAnalyzer(t *testing.T) {
+	tests := []struct {
+		input  []byte
+		output analysis.TokenStream
+	}{
+		// stemming
+		{
+			input: []byte("ağacı"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ağaç"),
+				},
+			},
+		},
+		{
+			input: []byte("ağaç"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ağaç"),
+				},
+			},
+		},
+		// stop word
+		{
+			input:  []byte("dolayı"),
+			output: analysis.TokenStream{},
+		},
+		// apostrophes
+		{
+			input: []byte("Kıbrıs'ta"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("kıbrıs"),
+				},
+			},
+		},
+		{
+			input: []byte("Van Gölü'ne"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("van"),
+				},
+				&analysis.Token{
+					Term: []byte("göl"),
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := analyzer.Analyze(test.input)
+		if len(actual) != len(test.output) {
+			t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
+		}
+		for i, tok := range actual {
+			if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
+				t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
+			}
+		}
+	}
+}
diff --git a/analysis/lang/tr/stemmer_tr.go b/analysis/lang/tr/stemmer_tr.go
new file mode 100644
index 000000000..ba3034e1a
--- /dev/null
+++ b/analysis/lang/tr/stemmer_tr.go
@@ -0,0 +1,49 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tr
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+
+	"github.com/blevesearch/snowballstem"
+	"github.com/blevesearch/snowballstem/turkish"
+)
+
+const SnowballStemmerName = "stemmer_tr_snowball"
+
+type TurkishStemmerFilter struct {
+}
+
+func NewTurkishStemmerFilter() *TurkishStemmerFilter {
+	return &TurkishStemmerFilter{}
+}
+
+func (s *TurkishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		env := snowballstem.NewEnv(string(token.Term))
+		turkish.Stem(env)
+		token.Term = []byte(env.Current())
+	}
+	return input
+}
+
+func TurkishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewTurkishStemmerFilter(), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(SnowballStemmerName, TurkishStemmerFilterConstructor)
+}
diff --git a/analysis/lang/tr/stop_filter_tr.go b/analysis/lang/tr/stop_filter_tr.go
new file mode 100644
index 000000000..5b616eb9c
--- /dev/null
+++ b/analysis/lang/tr/stop_filter_tr.go
@@ -0,0 +1,33 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tr
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/analysis/token/stop"
+	"github.com/blevesearch/bleve/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+}
diff --git a/analysis/lang/tr/stop_words_tr.go b/analysis/lang/tr/stop_words_tr.go
new file mode 100644
index 000000000..f96fb07ed
--- /dev/null
+++ b/analysis/lang/tr/stop_words_tr.go
@@ -0,0 +1,236 @@
+package tr
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+const StopName = "stop_tr"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
+// ` was changed to ' to allow for literal string
+
+var TurkishStopWords = []byte(`# Turkish stopwords from LUCENE-559
+# merged with the list from "Information Retrieval on Turkish Texts"
+#   (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
+acaba
+altmış
+altı
+ama
+ancak
+arada
+aslında
+ayrıca
+bana
+bazı
+belki
+ben
+benden
+beni
+benim
+beri
+beş
+bile
+bin
+bir
+birçok
+biri
+birkaç
+birkez
+birşey
+birşeyi
+biz
+bize
+bizden
+bizi
+bizim
+böyle
+böylece
+bu
+buna
+bunda
+bundan
+bunlar
+bunları
+bunların
+bunu
+bunun
+burada
+çok
+çünkü
+da
+daha
+dahi
+de
+defa
+değil
+diğer
+diye
+doksan
+dokuz
+dolayı
+dolayısıyla
+dört
+edecek
+eden
+ederek
+edilecek
+ediliyor
+edilmesi
+ediyor
+eğer
+elli
+en
+etmesi
+etti
+ettiği
+ettiğini
+gibi
+göre
+halen
+hangi
+hatta
+hem
+henüz
+hep
+hepsi
+her
+herhangi
+herkesin
+hiç
+hiçbir
+için
+iki
+ile
+ilgili
+ise
+işte
+itibaren
+itibariyle
+kadar
+karşın
+katrilyon
+kendi
+kendilerine
+kendini
+kendisi
+kendisine
+kendisini
+kez
+ki
+kim
+kimden
+kime
+kimi
+kimse
+kırk
+milyar
+milyon
+mu
+mü
+mı
+nasıl
+ne
+neden
+nedenle
+nerde
+nerede
+nereye
+niye
+niçin
+o
+olan
+olarak
+oldu
+olduğu
+olduğunu
+olduklarını
+olmadı
+olmadığı
+olmak
+olması
+olmayan
+olmaz
+olsa
+olsun
+olup
+olur
+olursa
+oluyor
+on
+ona
+ondan
+onlar
+onlardan
+onları
+onların
+onu
+onun
+otuz
+oysa
+öyle
+pek
+rağmen
+sadece
+sanki
+sekiz
+seksen
+sen
+senden
+seni
+senin
+siz
+sizden
+sizi
+sizin
+şey
+şeyden
+şeyi
+şeyler
+şöyle
+şu
+şuna
+şunda
+şundan
+şunları
+şunu
+tarafından
+trilyon
+tüm
+üç
+üzere
+var
+vardı
+ve
+veya
+ya
+yani
+yapacak
+yapılan
+yapılması
+yapıyor
+yapmak
+yaptı
+yaptığı
+yaptığını
+yaptıkları
+yedi
+yerine
+yetmiş
+yine
+yirmi
+yoksa
+yüz
+zaten
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(TurkishStopWords)
+	return rv, err
+}
+
+func init() {
+	registry.RegisterTokenMap(StopName, TokenMapConstructor)
+}

From 3afc5458e00f75c677b83f537e31efa1b9ca5152 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Thu, 11 Jan 2018 14:44:05 +0530
Subject: [PATCH 135/728]  MB-27498 - date range facet query panics

 Initialise the facet results map in case of an
 empty partial hits with a multi node cluster
---
 search.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/search.go b/search.go
index c2ebafbb5..a57a11cb2 100644
--- a/search.go
+++ b/search.go
@@ -481,5 +481,8 @@ func (sr *SearchResult) Merge(other *SearchResult) {
 	if other.MaxScore > sr.MaxScore {
 		sr.MaxScore = other.MaxScore
 	}
+	if len(sr.Facets) == 0 {
+		sr.Facets = make(search.FacetResults)
+	}
 	sr.Facets.Merge(other.Facets)
 }

From 039a4df33be99e8caea443e70d9d96aefcd4ec26 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Thu, 11 Jan 2018 15:09:27 +0530
Subject: [PATCH 136/728] initialize only with an imminent merge

---
 search.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/search.go b/search.go
index a57a11cb2..5d12b7a25 100644
--- a/search.go
+++ b/search.go
@@ -481,7 +481,7 @@ func (sr *SearchResult) Merge(other *SearchResult) {
 	if other.MaxScore > sr.MaxScore {
 		sr.MaxScore = other.MaxScore
 	}
-	if len(sr.Facets) == 0 {
+	if len(sr.Facets) == 0 && len(other.Facets) != 0 {
 		sr.Facets = make(search.FacetResults)
 	}
 	sr.Facets.Merge(other.Facets)

From 4d71e901e866e06c006302d8e2b5bc71b7ce0793 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Thu, 11 Jan 2018 11:00:18 -0500
Subject: [PATCH 137/728] make new analyzers available to consumers of the
 config pkg

many tools and applications using bleve use the config pkg to
include support for many languages out of the box by forcing
import of optional packages.
---
 config/config.go | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/config/config.go b/config/config.go
index c4c5e9153..ad0bdcb9a 100644
--- a/config/config.go
+++ b/config/config.go
@@ -75,19 +75,30 @@ import (
 	_ "github.com/blevesearch/bleve/analysis/lang/cjk"
 	_ "github.com/blevesearch/bleve/analysis/lang/ckb"
 	_ "github.com/blevesearch/bleve/analysis/lang/cs"
+	_ "github.com/blevesearch/bleve/analysis/lang/da"
+	_ "github.com/blevesearch/bleve/analysis/lang/de"
 	_ "github.com/blevesearch/bleve/analysis/lang/el"
 	_ "github.com/blevesearch/bleve/analysis/lang/en"
+	_ "github.com/blevesearch/bleve/analysis/lang/es"
 	_ "github.com/blevesearch/bleve/analysis/lang/eu"
 	_ "github.com/blevesearch/bleve/analysis/lang/fa"
+	_ "github.com/blevesearch/bleve/analysis/lang/fi"
 	_ "github.com/blevesearch/bleve/analysis/lang/fr"
 	_ "github.com/blevesearch/bleve/analysis/lang/ga"
 	_ "github.com/blevesearch/bleve/analysis/lang/gl"
 	_ "github.com/blevesearch/bleve/analysis/lang/hi"
+	_ "github.com/blevesearch/bleve/analysis/lang/hu"
 	_ "github.com/blevesearch/bleve/analysis/lang/hy"
 	_ "github.com/blevesearch/bleve/analysis/lang/id"
 	_ "github.com/blevesearch/bleve/analysis/lang/in"
 	_ "github.com/blevesearch/bleve/analysis/lang/it"
+	_ "github.com/blevesearch/bleve/analysis/lang/nl"
+	_ "github.com/blevesearch/bleve/analysis/lang/no"
 	_ "github.com/blevesearch/bleve/analysis/lang/pt"
+	_ "github.com/blevesearch/bleve/analysis/lang/ro"
+	_ "github.com/blevesearch/bleve/analysis/lang/ru"
+	_ "github.com/blevesearch/bleve/analysis/lang/sv"
+	_ "github.com/blevesearch/bleve/analysis/lang/tr"
 
 	// kv stores
 	_ "github.com/blevesearch/bleve/index/store/boltdb"

From d777d7c3652942a752147f747089d13151b4bd5a Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 15 Jan 2018 11:06:44 -0800
Subject: [PATCH 138/728] scorch mem segment comments consistency

---
 index/scorch/segment/mem/build.go   | 2 +-
 index/scorch/segment/mem/segment.go | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go
index cd11fb401..1dc0a788c 100644
--- a/index/scorch/segment/mem/build.go
+++ b/index/scorch/segment/mem/build.go
@@ -24,7 +24,7 @@ import (
 	"github.com/blevesearch/bleve/index"
 )
 
-// NewFromAnalyzedDocs places the analyzed document mutations into this segment
+// NewFromAnalyzedDocs places the analyzed document mutations into a new segment
 func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment {
 	s := New()
 
diff --git a/index/scorch/segment/mem/segment.go b/index/scorch/segment/mem/segment.go
index 5ef3e1f34..40c071f60 100644
--- a/index/scorch/segment/mem/segment.go
+++ b/index/scorch/segment/mem/segment.go
@@ -46,7 +46,7 @@ type Segment struct {
 	FieldsInv []string
 
 	// term dictionary
-	//  field id -> term -> posting id + 1
+	//  field id -> term -> postings list id + 1
 	Dicts []map[string]uint64
 
 	// term dictionary keys
@@ -54,7 +54,7 @@ type Segment struct {
 	DictKeys [][]string
 
 	// Postings list
-	//  Postings list id -> Postings bitmap
+	//  postings list id -> Postings bitmap
 	Postings []*roaring.Bitmap
 
 	// Postings List has locations

From e7bd6026eb239f7e8b452b02958b8d37b68bc0ad Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 15 Jan 2018 11:52:18 -0800
Subject: [PATCH 139/728] scorch mem segment preallocs docMap/fieldLens with
 capacity

The first time through, startNumFields should be 0, where there ought
to be more optimization assuming later docs have similar fields as the
first doc.
---
 index/scorch/segment/mem/build.go | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go
index 1dc0a788c..29c41d5de 100644
--- a/index/scorch/segment/mem/build.go
+++ b/index/scorch/segment/mem/build.go
@@ -83,9 +83,12 @@ func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment {
 }
 
 func (s *Segment) processDocument(result *index.AnalysisResult) {
+	startNumFields := len(s.FieldsMap)
+
 	// used to collate information across fields
-	docMap := map[uint16]analysis.TokenFrequencies{}
-	fieldLens := map[uint16]int{}
+	docMap := make(map[uint16]analysis.TokenFrequencies, startNumFields)
+	fieldLens := make(map[uint16]int, startNumFields)
+
 	docNum := uint64(s.addDocument())
 
 	processField := func(field uint16, name string, l int, tf analysis.TokenFrequencies) {

From 917c47079122dc59c79ec5e131811ca9dfdd48c3 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 15 Jan 2018 11:54:46 -0800
Subject: [PATCH 140/728] scorch mem segment VisitDocument() accesses
 StoredTypes/Pos outside of loop

---
 index/scorch/segment/mem/segment.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/index/scorch/segment/mem/segment.go b/index/scorch/segment/mem/segment.go
index 40c071f60..3c400b531 100644
--- a/index/scorch/segment/mem/segment.go
+++ b/index/scorch/segment/mem/segment.go
@@ -188,9 +188,11 @@ func (s *Segment) VisitDocument(num uint64, visitor segment.DocumentFieldValueVi
 		return nil
 	}
 	docFields := s.Stored[int(num)]
+	st := s.StoredTypes[int(num)]
+	sp := s.StoredPos[int(num)]
 	for field, values := range docFields {
 		for i, value := range values {
-			keepGoing := visitor(s.FieldsInv[field], s.StoredTypes[int(num)][field][i], value, s.StoredPos[int(num)][field][i])
+			keepGoing := visitor(s.FieldsInv[field], st[field][i], value, sp[field][i])
 			if !keepGoing {
 				return nil
 			}

From a4110d325c2ff0b790d509893328f07be6234cf5 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 15 Jan 2018 16:37:06 -0800
Subject: [PATCH 141/728] scorch mem segment preallocates slices that are
 key'ed by postingId

The scorch mem segment build phase uses the append() idiom to populate
various slices that are keyed by postings list id's.  These slices
include...

* Postings
* PostingsLocs
* Freqs
* Norms
* Locfields
* Locstarts
* Locends
* Locpos
* Locarraypos

This change introduces an initialization step that preallocates those
slices up-front, by assigning postings list id's to terms up-front.

This change also has an additional effect of simplifying the
processDocument() logic to no longer have to worry about a first-time
initialization case, removing some duplicate'ish code.
---
 index/scorch/segment/mem/build.go | 138 +++++++++++++++---------------
 1 file changed, 67 insertions(+), 71 deletions(-)

diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go
index 29c41d5de..8f080338b 100644
--- a/index/scorch/segment/mem/build.go
+++ b/index/scorch/segment/mem/build.go
@@ -31,6 +31,9 @@ func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment {
 	// ensure that _id field get fieldID 0
 	s.getOrDefineField("_id")
 
+	// fill Dicts/DictKeys and preallocate memory
+	s.initializeDict(results)
+
 	// walk each doc
 	for _, result := range results {
 		s.processDocument(result)
@@ -82,12 +85,58 @@ func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment {
 	return s
 }
 
-func (s *Segment) processDocument(result *index.AnalysisResult) {
-	startNumFields := len(s.FieldsMap)
+// fill Dicts/DictKeys and preallocate memory for postings
+func (s *Segment) initializeDict(results []*index.AnalysisResult) {
+	var numPostings int
+
+	processField := func(fieldID uint16, tf analysis.TokenFrequencies) {
+		for term, _ := range tf {
+			_, exists := s.Dicts[fieldID][term]
+			if !exists {
+				numPostings++
+				s.Dicts[fieldID][term] = uint64(numPostings)
+				s.DictKeys[fieldID] = append(s.DictKeys[fieldID], term)
+			}
+		}
+	}
+
+	for _, result := range results {
+		// walk each composite field
+		for _, field := range result.Document.CompositeFields {
+			fieldID := uint16(s.getOrDefineField(field.Name()))
+			_, tf := field.Analyze()
+			processField(fieldID, tf)
+		}
+
+		// walk each field
+		for i, field := range result.Document.Fields {
+			fieldID := uint16(s.getOrDefineField(field.Name()))
+			tf := result.Analyzed[i]
+			processField(fieldID, tf)
+		}
+	}
 
+	s.Postings = make([]*roaring.Bitmap, numPostings)
+	for i := 0; i < numPostings; i++ {
+		s.Postings[i] = roaring.New()
+	}
+	s.PostingsLocs = make([]*roaring.Bitmap, numPostings)
+	for i := 0; i < numPostings; i++ {
+		s.PostingsLocs[i] = roaring.New()
+	}
+	s.Freqs = make([][]uint64, numPostings)
+	s.Norms = make([][]float32, numPostings)
+	s.Locfields = make([][]uint16, numPostings)
+	s.Locstarts = make([][]uint64, numPostings)
+	s.Locends = make([][]uint64, numPostings)
+	s.Locpos = make([][]uint64, numPostings)
+	s.Locarraypos = make([][][]uint64, numPostings)
+}
+
+func (s *Segment) processDocument(result *index.AnalysisResult) {
 	// used to collate information across fields
-	docMap := make(map[uint16]analysis.TokenFrequencies, startNumFields)
-	fieldLens := make(map[uint16]int, startNumFields)
+	docMap := make(map[uint16]analysis.TokenFrequencies, len(s.FieldsMap))
+	fieldLens := make(map[uint16]int, len(s.FieldsMap))
 
 	docNum := uint64(s.addDocument())
 
@@ -132,80 +181,27 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 	for fieldID, tokenFrequencies := range docMap {
 		for term, tokenFreq := range tokenFrequencies {
 			fieldTermPostings := s.Dicts[fieldID][term]
-
-			// FIXME this if/else block has duplicate code that has resulted in
-			// bugs fixed/missed more than once, need to refactor
-			if fieldTermPostings == 0 {
-				// need to build new posting
-				bs := roaring.New()
-				bs.AddInt(int(docNum))
-
-				newPostingID := uint64(len(s.Postings) + 1)
-				// add this new bitset to the postings slice
-				s.Postings = append(s.Postings, bs)
-
-				locationBS := roaring.New()
-				s.PostingsLocs = append(s.PostingsLocs, locationBS)
-				// add this to the details slice
-				s.Freqs = append(s.Freqs, []uint64{uint64(tokenFreq.Frequency())})
-				s.Norms = append(s.Norms, []float32{float32(1.0 / math.Sqrt(float64(fieldLens[fieldID])))})
-				// add to locations
-				var locfields []uint16
-				var locstarts []uint64
-				var locends []uint64
-				var locpos []uint64
-				var locarraypos [][]uint64
-				if len(tokenFreq.Locations) > 0 {
-					locationBS.AddInt(int(docNum))
-				}
-				for _, loc := range tokenFreq.Locations {
-					var locf = fieldID
-					if loc.Field != "" {
-						locf = uint16(s.getOrDefineField(loc.Field))
-					}
-					locfields = append(locfields, locf)
-					locstarts = append(locstarts, uint64(loc.Start))
-					locends = append(locends, uint64(loc.End))
-					locpos = append(locpos, uint64(loc.Position))
-					if len(loc.ArrayPositions) > 0 {
-						locarraypos = append(locarraypos, loc.ArrayPositions)
-					} else {
-						locarraypos = append(locarraypos, nil)
-					}
-				}
-				s.Locfields = append(s.Locfields, locfields)
-				s.Locstarts = append(s.Locstarts, locstarts)
-				s.Locends = append(s.Locends, locends)
-				s.Locpos = append(s.Locpos, locpos)
-				s.Locarraypos = append(s.Locarraypos, locarraypos)
-				// record it
-				s.Dicts[fieldID][term] = newPostingID
-				// this term was new for this field, add it to dictKeys
-				s.DictKeys[fieldID] = append(s.DictKeys[fieldID], term)
-			} else {
-				// posting already started for this field/term
-				// the actual offset is - 1, because 0 is zero value
-				bs := s.Postings[fieldTermPostings-1]
-				bs.AddInt(int(docNum))
-				locationBS := s.PostingsLocs[fieldTermPostings-1]
-				s.Freqs[fieldTermPostings-1] = append(s.Freqs[fieldTermPostings-1], uint64(tokenFreq.Frequency()))
-				s.Norms[fieldTermPostings-1] = append(s.Norms[fieldTermPostings-1], float32(1.0/math.Sqrt(float64(fieldLens[fieldID]))))
-				if len(tokenFreq.Locations) > 0 {
-					locationBS.AddInt(int(docNum))
-				}
+			pid := fieldTermPostings-1
+			bs := s.Postings[pid]
+			bs.AddInt(int(docNum))
+			s.Freqs[pid] = append(s.Freqs[pid], uint64(tokenFreq.Frequency()))
+			s.Norms[pid] = append(s.Norms[pid], float32(1.0/math.Sqrt(float64(fieldLens[fieldID]))))
+			locationBS := s.PostingsLocs[pid]
+			if len(tokenFreq.Locations) > 0 {
+				locationBS.AddInt(int(docNum))
 				for _, loc := range tokenFreq.Locations {
 					var locf = fieldID
 					if loc.Field != "" {
 						locf = uint16(s.getOrDefineField(loc.Field))
 					}
-					s.Locfields[fieldTermPostings-1] = append(s.Locfields[fieldTermPostings-1], locf)
-					s.Locstarts[fieldTermPostings-1] = append(s.Locstarts[fieldTermPostings-1], uint64(loc.Start))
-					s.Locends[fieldTermPostings-1] = append(s.Locends[fieldTermPostings-1], uint64(loc.End))
-					s.Locpos[fieldTermPostings-1] = append(s.Locpos[fieldTermPostings-1], uint64(loc.Position))
+					s.Locfields[pid] = append(s.Locfields[pid], locf)
+					s.Locstarts[pid] = append(s.Locstarts[pid], uint64(loc.Start))
+					s.Locends[pid] = append(s.Locends[pid], uint64(loc.End))
+					s.Locpos[pid] = append(s.Locpos[pid], uint64(loc.Position))
 					if len(loc.ArrayPositions) > 0 {
-						s.Locarraypos[fieldTermPostings-1] = append(s.Locarraypos[fieldTermPostings-1], loc.ArrayPositions)
+						s.Locarraypos[pid] = append(s.Locarraypos[pid], loc.ArrayPositions)
 					} else {
-						s.Locarraypos[fieldTermPostings-1] = append(s.Locarraypos[fieldTermPostings-1], nil)
+						s.Locarraypos[pid] = append(s.Locarraypos[pid], nil)
 					}
 				}
 			}

From a84bd122d2c6f18dc16dbd44a2ef8a5b0f184d34 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 15 Jan 2018 17:04:27 -0800
Subject: [PATCH 142/728] scorch mem segment preallocates sub-slices via #
 terms

This change tracks the number of terms per posting list to
preallocate the sub-slices for the Freqs & Norms.
---
 index/scorch/segment/mem/build.go | 54 +++++++++++++++++++++----------
 1 file changed, 37 insertions(+), 17 deletions(-)

diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go
index 8f080338b..eaf368336 100644
--- a/index/scorch/segment/mem/build.go
+++ b/index/scorch/segment/mem/build.go
@@ -87,17 +87,26 @@ func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment {
 
 // fill Dicts/DictKeys and preallocate memory for postings
 func (s *Segment) initializeDict(results []*index.AnalysisResult) {
-	var numPostings int
+	var numPostingsLists int
+
+	numTermsPerPostingsList := make([]int, 0, 64)
+
+	var numTokenFrequencies int
 
 	processField := func(fieldID uint16, tf analysis.TokenFrequencies) {
 		for term, _ := range tf {
-			_, exists := s.Dicts[fieldID][term]
+			pidPlus1, exists := s.Dicts[fieldID][term]
 			if !exists {
-				numPostings++
-				s.Dicts[fieldID][term] = uint64(numPostings)
+				numPostingsLists++
+				pidPlus1 = uint64(numPostingsLists)
+				s.Dicts[fieldID][term] = pidPlus1
 				s.DictKeys[fieldID] = append(s.DictKeys[fieldID], term)
+				numTermsPerPostingsList = append(numTermsPerPostingsList, 0)
 			}
+			pid := pidPlus1 - 1
+			numTermsPerPostingsList[pid]++
 		}
+		numTokenFrequencies += len(tf)
 	}
 
 	for _, result := range results {
@@ -116,21 +125,33 @@ func (s *Segment) initializeDict(results []*index.AnalysisResult) {
 		}
 	}
 
-	s.Postings = make([]*roaring.Bitmap, numPostings)
-	for i := 0; i < numPostings; i++ {
+	s.Postings = make([]*roaring.Bitmap, numPostingsLists)
+	for i := 0; i < numPostingsLists; i++ {
 		s.Postings[i] = roaring.New()
 	}
-	s.PostingsLocs = make([]*roaring.Bitmap, numPostings)
-	for i := 0; i < numPostings; i++ {
+	s.PostingsLocs = make([]*roaring.Bitmap, numPostingsLists)
+	for i := 0; i < numPostingsLists; i++ {
 		s.PostingsLocs[i] = roaring.New()
 	}
-	s.Freqs = make([][]uint64, numPostings)
-	s.Norms = make([][]float32, numPostings)
-	s.Locfields = make([][]uint16, numPostings)
-	s.Locstarts = make([][]uint64, numPostings)
-	s.Locends = make([][]uint64, numPostings)
-	s.Locpos = make([][]uint64, numPostings)
-	s.Locarraypos = make([][][]uint64, numPostings)
+
+	s.Freqs = make([][]uint64, numPostingsLists)
+	s.Norms = make([][]float32, numPostingsLists)
+	s.Locfields = make([][]uint16, numPostingsLists)
+	s.Locstarts = make([][]uint64, numPostingsLists)
+	s.Locends = make([][]uint64, numPostingsLists)
+	s.Locpos = make([][]uint64, numPostingsLists)
+	s.Locarraypos = make([][][]uint64, numPostingsLists)
+
+	uint64Backing := make([]uint64, numTokenFrequencies)
+	float32Backing := make([]float32, numTokenFrequencies)
+
+	for i, numTerms := range numTermsPerPostingsList {
+		s.Freqs[i] = uint64Backing[0:0]
+		uint64Backing = uint64Backing[numTerms:]
+
+		s.Norms[i] = float32Backing[0:0]
+		float32Backing = float32Backing[numTerms:]
+	}
 }
 
 func (s *Segment) processDocument(result *index.AnalysisResult) {
@@ -180,8 +201,7 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 	// now that its been rolled up into docMap, walk that
 	for fieldID, tokenFrequencies := range docMap {
 		for term, tokenFreq := range tokenFrequencies {
-			fieldTermPostings := s.Dicts[fieldID][term]
-			pid := fieldTermPostings-1
+			pid := s.Dicts[fieldID][term]-1
 			bs := s.Postings[pid]
 			bs.AddInt(int(docNum))
 			s.Freqs[pid] = append(s.Freqs[pid], uint64(tokenFreq.Frequency()))

From 0f19b542a3f91fe5f55b8f38b5e6f7af864be644 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 15 Jan 2018 18:40:28 -0800
Subject: [PATCH 143/728] scorch mem segment prealloc's
 Locfields/starts/ends/pos/arraypos

This change preallocates more of the backing arrays for Locfields,
Locstarts, Locends, Locpos, Locaaraypos sub-slices of a scorch mem
segment.

On small bleve-blast tests (50K wiki docs) on a dev macbook, scorch
indexing throughput seems to improve from 15MB/sec to 20MB/sec after
the recent series of preallocation changes.
---
 index/scorch/segment/mem/build.go | 53 +++++++++++++++++++++++--------
 1 file changed, 40 insertions(+), 13 deletions(-)

diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go
index eaf368336..14cb1cbc4 100644
--- a/index/scorch/segment/mem/build.go
+++ b/index/scorch/segment/mem/build.go
@@ -89,12 +89,14 @@ func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment {
 func (s *Segment) initializeDict(results []*index.AnalysisResult) {
 	var numPostingsLists int
 
-	numTermsPerPostingsList := make([]int, 0, 64)
+	numTermsPerPostingsList := make([]int, 0, 64) // Keyed by postings list id.
+	numLocsPerPostingsList := make([]int, 0, 64)  // Keyed by postings list id.
 
 	var numTokenFrequencies int
+	var numLocs int
 
-	processField := func(fieldID uint16, tf analysis.TokenFrequencies) {
-		for term, _ := range tf {
+	processField := func(fieldID uint16, tfs analysis.TokenFrequencies) {
+		for term, tf := range tfs {
 			pidPlus1, exists := s.Dicts[fieldID][term]
 			if !exists {
 				numPostingsLists++
@@ -102,11 +104,14 @@ func (s *Segment) initializeDict(results []*index.AnalysisResult) {
 				s.Dicts[fieldID][term] = pidPlus1
 				s.DictKeys[fieldID] = append(s.DictKeys[fieldID], term)
 				numTermsPerPostingsList = append(numTermsPerPostingsList, 0)
+				numLocsPerPostingsList = append(numLocsPerPostingsList, 0)
 			}
 			pid := pidPlus1 - 1
-			numTermsPerPostingsList[pid]++
+			numTermsPerPostingsList[pid] += 1
+			numLocsPerPostingsList[pid] += len(tf.Locations)
+			numLocs += len(tf.Locations)
 		}
-		numTokenFrequencies += len(tf)
+		numTokenFrequencies += len(tfs)
 	}
 
 	for _, result := range results {
@@ -136,21 +141,43 @@ func (s *Segment) initializeDict(results []*index.AnalysisResult) {
 
 	s.Freqs = make([][]uint64, numPostingsLists)
 	s.Norms = make([][]float32, numPostingsLists)
+
+	uint64Backing := make([]uint64, numTokenFrequencies)
+	float32Backing := make([]float32, numTokenFrequencies)
+
+	for pid, numTerms := range numTermsPerPostingsList {
+		s.Freqs[pid] = uint64Backing[0:0]
+		uint64Backing = uint64Backing[numTerms:]
+
+		s.Norms[pid] = float32Backing[0:0]
+		float32Backing = float32Backing[numTerms:]
+	}
+
 	s.Locfields = make([][]uint16, numPostingsLists)
 	s.Locstarts = make([][]uint64, numPostingsLists)
 	s.Locends = make([][]uint64, numPostingsLists)
 	s.Locpos = make([][]uint64, numPostingsLists)
 	s.Locarraypos = make([][][]uint64, numPostingsLists)
 
-	uint64Backing := make([]uint64, numTokenFrequencies)
-	float32Backing := make([]float32, numTokenFrequencies)
+	uint16Backing := make([]uint16, numLocs)    // For Locfields.
+	uint64Backing = make([]uint64, numLocs*3)   // For Locstarts, Locends, Locpos.
+	auint64Backing := make([][]uint64, numLocs) // For Locarraypos.
 
-	for i, numTerms := range numTermsPerPostingsList {
-		s.Freqs[i] = uint64Backing[0:0]
-		uint64Backing = uint64Backing[numTerms:]
+	for pid, numLocs := range numLocsPerPostingsList {
+		s.Locfields[pid] = uint16Backing[0:0]
+		uint16Backing = uint16Backing[numLocs:]
 
-		s.Norms[i] = float32Backing[0:0]
-		float32Backing = float32Backing[numTerms:]
+		s.Locstarts[pid] = uint64Backing[0:0]
+		uint64Backing = uint64Backing[numLocs:]
+
+		s.Locends[pid] = uint64Backing[0:0]
+		uint64Backing = uint64Backing[numLocs:]
+
+		s.Locpos[pid] = uint64Backing[0:0]
+		uint64Backing = uint64Backing[numLocs:]
+
+		s.Locarraypos[pid] = auint64Backing[0:0]
+		auint64Backing = auint64Backing[numLocs:]
 	}
 }
 
@@ -201,7 +228,7 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 	// now that its been rolled up into docMap, walk that
 	for fieldID, tokenFrequencies := range docMap {
 		for term, tokenFreq := range tokenFrequencies {
-			pid := s.Dicts[fieldID][term]-1
+			pid := s.Dicts[fieldID][term] - 1
 			bs := s.Postings[pid]
 			bs.AddInt(int(docNum))
 			s.Freqs[pid] = append(s.Freqs[pid], uint64(tokenFreq.Frequency()))

From d682c85a7b599b3d8053caa4b2937860c1febf14 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 15 Jan 2018 19:17:39 -0800
Subject: [PATCH 144/728] scorch mem segments uses backing array trick even
 more

This change invokes make() only once per distinct type to allocate the
large, contiguous backing arrays for the mem segment.
---
 index/scorch/segment/mem/build.go | 40 +++++++++++++++++++------------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go
index 14cb1cbc4..554de8906 100644
--- a/index/scorch/segment/mem/build.go
+++ b/index/scorch/segment/mem/build.go
@@ -93,7 +93,7 @@ func (s *Segment) initializeDict(results []*index.AnalysisResult) {
 	numLocsPerPostingsList := make([]int, 0, 64)  // Keyed by postings list id.
 
 	var numTokenFrequencies int
-	var numLocs int
+	var totLocs int
 
 	processField := func(fieldID uint16, tfs analysis.TokenFrequencies) {
 		for term, tf := range tfs {
@@ -109,7 +109,7 @@ func (s *Segment) initializeDict(results []*index.AnalysisResult) {
 			pid := pidPlus1 - 1
 			numTermsPerPostingsList[pid] += 1
 			numLocsPerPostingsList[pid] += len(tf.Locations)
-			numLocs += len(tf.Locations)
+			totLocs += len(tf.Locations)
 		}
 		numTokenFrequencies += len(tfs)
 	}
@@ -139,12 +139,32 @@ func (s *Segment) initializeDict(results []*index.AnalysisResult) {
 		s.PostingsLocs[i] = roaring.New()
 	}
 
-	s.Freqs = make([][]uint64, numPostingsLists)
+	// Preallocate big, contiguous backing arrays.
+	auint64Backing := make([][]uint64, numPostingsLists*4+totLocs) // For Freqs, Locstarts, Locends, Locpos, sub-Locarraypos.
+	uint64Backing := make([]uint64, numTokenFrequencies+totLocs*3) // For sub-Freqs, sub-Locstarts, sub-Locends, sub-Locpos.
+	float32Backing := make([]float32, numTokenFrequencies)         // For sub-Norms.
+	uint16Backing := make([]uint16, totLocs)                       // For sub-Locfields.
+
+	// Point top-level slices to the backing arrays.
+	s.Freqs = auint64Backing[0:numPostingsLists]
+	auint64Backing = auint64Backing[numPostingsLists:]
+
 	s.Norms = make([][]float32, numPostingsLists)
 
-	uint64Backing := make([]uint64, numTokenFrequencies)
-	float32Backing := make([]float32, numTokenFrequencies)
+	s.Locfields = make([][]uint16, numPostingsLists)
+
+	s.Locstarts = auint64Backing[0:numPostingsLists]
+	auint64Backing = auint64Backing[numPostingsLists:]
+
+	s.Locends = auint64Backing[0:numPostingsLists]
+	auint64Backing = auint64Backing[numPostingsLists:]
+
+	s.Locpos = auint64Backing[0:numPostingsLists]
+	auint64Backing = auint64Backing[numPostingsLists:]
 
+	s.Locarraypos = make([][][]uint64, numPostingsLists)
+
+	// Point sub-slices to the backing arrays.
 	for pid, numTerms := range numTermsPerPostingsList {
 		s.Freqs[pid] = uint64Backing[0:0]
 		uint64Backing = uint64Backing[numTerms:]
@@ -153,16 +173,6 @@ func (s *Segment) initializeDict(results []*index.AnalysisResult) {
 		float32Backing = float32Backing[numTerms:]
 	}
 
-	s.Locfields = make([][]uint16, numPostingsLists)
-	s.Locstarts = make([][]uint64, numPostingsLists)
-	s.Locends = make([][]uint64, numPostingsLists)
-	s.Locpos = make([][]uint64, numPostingsLists)
-	s.Locarraypos = make([][][]uint64, numPostingsLists)
-
-	uint16Backing := make([]uint16, numLocs)    // For Locfields.
-	uint64Backing = make([]uint64, numLocs*3)   // For Locstarts, Locends, Locpos.
-	auint64Backing := make([][]uint64, numLocs) // For Locarraypos.
-
 	for pid, numLocs := range numLocsPerPostingsList {
 		s.Locfields[pid] = uint16Backing[0:0]
 		uint16Backing = uint16Backing[numLocs:]

From d14b290235caff80127fe490fb3068e53269fbe1 Mon Sep 17 00:00:00 2001
From: Ethan Koenig <ethantkoenig@gmail.com>
Date: Mon, 15 Jan 2018 22:23:41 -0800
Subject: [PATCH 145/728] Fix coverage badge in README

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index fa11f906d..7c1a7c7c4 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # ![bleve](docs/bleve.png) bleve
 
-[![Build Status](https://travis-ci.org/blevesearch/bleve.svg?branch=master)](https://travis-ci.org/blevesearch/bleve) [![Coverage Status](https://coveralls.io/repos/blevesearch/bleve/badge.png?branch=master)](https://coveralls.io/r/blevesearch/bleve?branch=master) [![GoDoc](https://godoc.org/github.com/blevesearch/bleve?status.svg)](https://godoc.org/github.com/blevesearch/bleve)
+[![Build Status](https://travis-ci.org/blevesearch/bleve.svg?branch=master)](https://travis-ci.org/blevesearch/bleve) [![Coverage Status](https://coveralls.io/repos/github/blevesearch/bleve/badge.svg?branch=master)](https://coveralls.io/github/blevesearch/bleve?branch=master) [![GoDoc](https://godoc.org/github.com/blevesearch/bleve?status.svg)](https://godoc.org/github.com/blevesearch/bleve)
 [![Join the chat at https://gitter.im/blevesearch/bleve](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/blevesearch/bleve?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
 [![codebeat](https://codebeat.co/badges/38a7cbc9-9cf5-41c0-a315-0746178230f4)](https://codebeat.co/projects/github-com-blevesearch-bleve)
 [![Go Report Card](https://goreportcard.com/badge/blevesearch/bleve)](https://goreportcard.com/report/blevesearch/bleve)

From 71d6d1691b992a25f4511f4ec86c25cf476e3ff8 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 15 Jan 2018 22:43:08 -0800
Subject: [PATCH 146/728] scorch zap optimizations of inner loops and easy
 preallocs

---
 index/scorch/segment/zap/build.go | 54 ++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 19 deletions(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index c7f73769e..1b16b5e35 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -140,12 +140,18 @@ func persistStored(memSegment *mem.Segment, w *CountHashWriter) (uint64, error)
 
 		metaEncoder := govarint.NewU64Base128Encoder(&metaBuf)
 
+		st := memSegment.StoredTypes[docNum]
+		sp := memSegment.StoredPos[docNum]
+
 		// encode fields in order
 		for fieldID := range memSegment.FieldsInv {
 			if storedFieldValues, ok := storedValues[uint16(fieldID)]; ok {
 				// has stored values for this field
 				num := len(storedFieldValues)
 
+				stf := st[uint16(fieldID)]
+				spf := sp[uint16(fieldID)]
+
 				// process each value
 				for i := 0; i < num; i++ {
 					// encode field
@@ -154,7 +160,7 @@ func persistStored(memSegment *mem.Segment, w *CountHashWriter) (uint64, error)
 						return 0, err2
 					}
 					// encode type
-					_, err2 = metaEncoder.PutU64(uint64(memSegment.StoredTypes[docNum][uint16(fieldID)][i]))
+					_, err2 = metaEncoder.PutU64(uint64(stf[i]))
 					if err2 != nil {
 						return 0, err2
 					}
@@ -169,13 +175,13 @@ func persistStored(memSegment *mem.Segment, w *CountHashWriter) (uint64, error)
 						return 0, err2
 					}
 					// encode number of array pos
-					_, err2 = metaEncoder.PutU64(uint64(len(memSegment.StoredPos[docNum][uint16(fieldID)][i])))
+					_, err2 = metaEncoder.PutU64(uint64(len(spf[i])))
 					if err2 != nil {
 						return 0, err2
 					}
 					// encode all array positions
-					for j := 0; j < len(memSegment.StoredPos[docNum][uint16(fieldID)][i]); j++ {
-						_, err2 = metaEncoder.PutU64(memSegment.StoredPos[docNum][uint16(fieldID)][i][j])
+					for _, pos := range spf[i] {
+						_, err2 = metaEncoder.PutU64(pos)
 						if err2 != nil {
 							return 0, err2
 						}
@@ -235,6 +241,8 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
 		if postingID != 0 {
 			tfEncoder.Reset()
 		}
+		freqs := memSegment.Freqs[postingID]
+		norms := memSegment.Norms[postingID]
 		postingsListItr := memSegment.Postings[postingID].Iterator()
 		var offset int
 		for postingsListItr.HasNext() {
@@ -242,13 +250,13 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
 			docNum := uint64(postingsListItr.Next())
 
 			// put freq
-			err := tfEncoder.Add(docNum, memSegment.Freqs[postingID][offset])
+			err := tfEncoder.Add(docNum, freqs[offset])
 			if err != nil {
 				return nil, nil, err
 			}
 
 			// put norm
-			norm := memSegment.Norms[postingID][offset]
+			norm := norms[offset]
 			normBits := math.Float32bits(norm)
 			err = tfEncoder.Add(docNum, uint64(normBits))
 			if err != nil {
@@ -275,40 +283,46 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
 		if postingID != 0 {
 			locEncoder.Reset()
 		}
+		freqs := memSegment.Freqs[postingID]
+		locfields := memSegment.Locfields[postingID]
+		locpos := memSegment.Locpos[postingID]
+		locstarts := memSegment.Locstarts[postingID]
+		locends := memSegment.Locends[postingID]
+		locarraypos := memSegment.Locarraypos[postingID]
 		postingsListItr := memSegment.Postings[postingID].Iterator()
 		var offset int
 		var locOffset int
 		for postingsListItr.HasNext() {
 			docNum := uint64(postingsListItr.Next())
-			for i := 0; i < int(memSegment.Freqs[postingID][offset]); i++ {
-				if len(memSegment.Locfields[postingID]) > 0 {
+			for i := 0; i < int(freqs[offset]); i++ {
+				if len(locfields) > 0 {
 					// put field
-					err := locEncoder.Add(docNum, uint64(memSegment.Locfields[postingID][locOffset]))
+					err := locEncoder.Add(docNum, uint64(locfields[locOffset]))
 					if err != nil {
 						return nil, nil, err
 					}
 
 					// put pos
 
-					err = locEncoder.Add(docNum, memSegment.Locpos[postingID][locOffset])
+					err = locEncoder.Add(docNum, locpos[locOffset])
 					if err != nil {
 						return nil, nil, err
 					}
 
 					// put start
-					err = locEncoder.Add(docNum, memSegment.Locstarts[postingID][locOffset])
+					err = locEncoder.Add(docNum, locstarts[locOffset])
 					if err != nil {
 						return nil, nil, err
 					}
 
 					// put end
-					err = locEncoder.Add(docNum, memSegment.Locends[postingID][locOffset])
+					err = locEncoder.Add(docNum, locends[locOffset])
 					if err != nil {
 						return nil, nil, err
 					}
 
 					// put array positions
-					num := len(memSegment.Locarraypos[postingID][locOffset])
+					num := len(locarraypos[locOffset])
 
 					// put the number of array positions to follow
 					err = locEncoder.Add(docNum, uint64(num))
@@ -317,8 +331,8 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
 					}
 
 					// put each array position
-					for j := 0; j < num; j++ {
-						err = locEncoder.Add(docNum, memSegment.Locarraypos[postingID][locOffset][j])
+					for _, pos := range locarraypos[locOffset] {
+						err = locEncoder.Add(docNum, pos)
 						if err != nil {
 							return nil, nil, err
 						}
@@ -341,6 +355,7 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
 }
 
 func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint64, err error) {
+	rv = make([]uint64, 0, len(memSegment.PostingsLocs))
 	for postingID := range memSegment.PostingsLocs {
 		// record where we start this posting loc
 		rv = append(rv, uint64(w.Count()))
@@ -355,6 +370,7 @@ func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint
 
 func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter,
 	postingsListLocs, freqOffsets, locOffsets []uint64) (rv []uint64, err error) {
+	rv = make([]uint64, 0, len(memSegment.Postings))
 	for postingID := range memSegment.Postings {
 		// record where we start this posting list
 		rv = append(rv, uint64(w.Count()))
@@ -376,7 +392,7 @@ func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter,
 }
 
 func persistDictionary(memSegment *mem.Segment, w *CountHashWriter, postingsLocs []uint64) ([]uint64, error) {
-	var rv []uint64
+	rv := make([]uint64, 0, len(memSegment.DictKeys))
 
 	var buffer bytes.Buffer
 	for fieldID, fieldTerms := range memSegment.DictKeys {
@@ -392,10 +408,10 @@ func persistDictionary(memSegment *mem.Segment, w *CountHashWriter, postingsLocs
 
 		dict := memSegment.Dicts[fieldID]
 		// now walk the dictionary in order of fieldTerms (already sorted)
-		for i := range fieldTerms {
-			postingID := dict[fieldTerms[i]] - 1
+		for _, fieldTerm := range fieldTerms {
+			postingID := dict[fieldTerm] - 1
 			postingsAddr := postingsLocs[postingID]
-			err = builder.Insert([]byte(fieldTerms[i]), postingsAddr)
+			err = builder.Insert([]byte(fieldTerm), postingsAddr)
 			if err != nil {
 				return nil, err
 			}

From 012d436dd7db71913fc3221e832f21229c18cf0e Mon Sep 17 00:00:00 2001
From: Ethan Koenig <ethantkoenig@gmail.com>
Date: Mon, 15 Jan 2018 22:02:18 -0800
Subject: [PATCH 147/728] Add UniqueTerm token filter

---
 analysis/token/unique/unique.go      | 53 ++++++++++++++++++
 analysis/token/unique/unique_test.go | 84 ++++++++++++++++++++++++++++
 2 files changed, 137 insertions(+)
 create mode 100644 analysis/token/unique/unique.go
 create mode 100644 analysis/token/unique/unique_test.go

diff --git a/analysis/token/unique/unique.go b/analysis/token/unique/unique.go
new file mode 100644
index 000000000..f0d96c504
--- /dev/null
+++ b/analysis/token/unique/unique.go
@@ -0,0 +1,53 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package unique
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+const Name = "unique"
+
+// UniqueTermFilter retains only the tokens which mark the first occurence of
+// a term. Tokens whose term appears in a preceding token are dropped.
+type UniqueTermFilter struct{}
+
+func NewUniqueTermFilter() *UniqueTermFilter {
+	return &UniqueTermFilter{}
+}
+
+func (f *UniqueTermFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	encounteredTerms := make(map[string]struct{}, len(input)/4)
+	j := 0
+	for _, token := range input {
+		term := string(token.Term)
+		if _, ok := encounteredTerms[term]; ok {
+			continue
+		}
+		encounteredTerms[term] = struct{}{}
+		input[j] = token
+		j++
+	}
+	return input[:j]
+}
+
+func UniqueTermFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewUniqueTermFilter(), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(Name, UniqueTermFilterConstructor)
+}
diff --git a/analysis/token/unique/unique_test.go b/analysis/token/unique/unique_test.go
new file mode 100644
index 000000000..216d8f1fa
--- /dev/null
+++ b/analysis/token/unique/unique_test.go
@@ -0,0 +1,84 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package unique
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+)
+
+func TestUniqueTermFilter(t *testing.T) {
+	var tests = []struct {
+		input analysis.TokenStream
+		// expected indices of input which should be included in the output. We
+		// use indices instead of another TokenStream, since position/start/end
+		// should be preserved.
+		expectedIndices []int
+	}{
+		{
+			input:           tokenStream(),
+			expectedIndices: []int{},
+		},
+		{
+			input:           tokenStream("a"),
+			expectedIndices: []int{0},
+		},
+		{
+			input:           tokenStream("each", "term", "in", "this", "sentence", "is", "unique"),
+			expectedIndices: []int{0, 1, 2, 3, 4, 5, 6},
+		},
+		{
+			input:           tokenStream("Lui", "è", "alto", "e", "lei", "è", "bassa"),
+			expectedIndices: []int{0, 1, 2, 3, 4, 6},
+		},
+		{
+			input:           tokenStream("a", "a", "A", "a", "a", "A"),
+			expectedIndices: []int{0, 2},
+		},
+	}
+	uniqueTermFilter := NewUniqueTermFilter()
+	for _, test := range tests {
+		expected := subStream(test.input, test.expectedIndices)
+		actual := uniqueTermFilter.Filter(test.input)
+		if !reflect.DeepEqual(actual, expected) {
+			t.Errorf("expected %s \n\n got %s", expected, actual)
+		}
+	}
+}
+
+func tokenStream(termStrs ...string) analysis.TokenStream {
+	tokenStream := make([]*analysis.Token, len(termStrs))
+	index := 0
+	for i, termStr := range termStrs {
+		tokenStream[i] = &analysis.Token{
+			Term:     []byte(termStr),
+			Position: i + 1,
+			Start:    index,
+			End:      index + len(termStr),
+		}
+		index += len(termStr)
+	}
+	return analysis.TokenStream(tokenStream)
+}
+
+func subStream(stream analysis.TokenStream, indices []int) analysis.TokenStream {
+	result := make(analysis.TokenStream, len(indices))
+	for i, index := range indices {
+		result[i] = stream[index]
+	}
+	return result
+}

From 1176c73a9cf3d5272b962575f8c682c0ff3a34dc Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Fri, 12 Jan 2018 12:11:11 -0800
Subject: [PATCH 148/728] Include overhead from data structures in segment's
 SizeInBytes

+ Account for all the overhead incurred from the data structures
  within mem.Segment and zap.Segment.
    - SizeOfMap = 8
    - SizeOfPointer = 8
    - SizeOfSlice = 24
    - SizeOfString = 16
+ Include overhead from certain new fields as well.
---
 index/scorch/segment/mem/segment.go   | 50 ++++++++++++++++++++++-----
 index/scorch/segment/segment.go       |  6 ++++
 index/scorch/segment/zap/docvalues.go | 18 ++++++++++
 index/scorch/segment/zap/segment.go   | 39 +++++++++++++++------
 4 files changed, 93 insertions(+), 20 deletions(-)

diff --git a/index/scorch/segment/mem/segment.go b/index/scorch/segment/mem/segment.go
index 3c400b531..baa4811a4 100644
--- a/index/scorch/segment/mem/segment.go
+++ b/index/scorch/segment/mem/segment.go
@@ -107,27 +107,41 @@ func New() *Segment {
 func (s *Segment) updateSizeInBytes() {
 	var sizeInBytes uint64
 
+	// FieldsMap, FieldsInv
 	for k, _ := range s.FieldsMap {
-		sizeInBytes += uint64(len(k)*2 /* FieldsMap + FieldsInv */ +
+		sizeInBytes += uint64((len(k)+int(segment.SizeOfString))*2 +
 			2 /* size of uint16 */)
 	}
+	// overhead from the data structures
+	sizeInBytes += (segment.SizeOfMap + segment.SizeOfSlice)
 
+	// Dicts, DictKeys
 	for _, entry := range s.Dicts {
 		for k, _ := range entry {
-			sizeInBytes += uint64(len(k)*2 /* Dicts + DictKeys */ +
+			sizeInBytes += uint64((len(k)+int(segment.SizeOfString))*2 +
 				8 /* size of uint64 */)
 		}
+		// overhead from the data structures
+		sizeInBytes += (segment.SizeOfMap + segment.SizeOfSlice)
 	}
+	sizeInBytes += (segment.SizeOfSlice * 2)
 
+	// Postings, PostingsLocs
 	for i := 0; i < len(s.Postings); i++ {
-		sizeInBytes += s.Postings[i].GetSizeInBytes() + s.PostingsLocs[i].GetSizeInBytes()
+		sizeInBytes += (s.Postings[i].GetSizeInBytes() + segment.SizeOfPointer) +
+			(s.PostingsLocs[i].GetSizeInBytes() + segment.SizeOfPointer)
 	}
+	sizeInBytes += (segment.SizeOfSlice * 2)
 
+	// Freqs, Norms
 	for i := 0; i < len(s.Freqs); i++ {
 		sizeInBytes += uint64(len(s.Freqs[i])*8 /* size of uint64 */ +
-			len(s.Norms[i])*4 /* size of float32 */)
+			len(s.Norms[i])*4 /* size of float32 */) +
+			(segment.SizeOfSlice * 2)
 	}
+	sizeInBytes += (segment.SizeOfSlice * 2)
 
+	// Location data
 	for i := 0; i < len(s.Locfields); i++ {
 		sizeInBytes += uint64(len(s.Locfields[i])*2 /* size of uint16 */ +
 			len(s.Locstarts[i])*8 /* size of uint64 */ +
@@ -135,31 +149,49 @@ func (s *Segment) updateSizeInBytes() {
 			len(s.Locpos[i])*8 /* size of uint64 */)
 
 		for j := 0; j < len(s.Locarraypos[i]); j++ {
-			sizeInBytes += uint64(len(s.Locarraypos[i][j]) * 8 /* size of uint64 */)
+			sizeInBytes += uint64(len(s.Locarraypos[i][j])*8 /* size of uint64 */) +
+				segment.SizeOfSlice
 		}
+
+		sizeInBytes += (segment.SizeOfSlice * 5)
 	}
+	sizeInBytes += (segment.SizeOfSlice * 5)
 
+	// Stored data
 	for i := 0; i < len(s.Stored); i++ {
 		for _, v := range s.Stored[i] {
 			sizeInBytes += uint64(2 /* size of uint16 */)
 			for _, arr := range v {
-				sizeInBytes += uint64(len(arr))
+				sizeInBytes += uint64(len(arr)) + segment.SizeOfSlice
 			}
+			sizeInBytes += segment.SizeOfSlice
 		}
 
 		for _, v := range s.StoredTypes[i] {
-			sizeInBytes += uint64(2 /* size of uint16 */ + len(v))
+			sizeInBytes += uint64(2 /* size of uint16 */ +len(v)) + segment.SizeOfSlice
 		}
 
 		for _, v := range s.StoredPos[i] {
 			sizeInBytes += uint64(2 /* size of uint16 */)
 			for _, arr := range v {
-				sizeInBytes += uint64(len(arr) * 8 /* size of uint64 */)
+				sizeInBytes += uint64(len(arr)*8 /* size of uint64 */) +
+					segment.SizeOfSlice
 			}
+			sizeInBytes += segment.SizeOfSlice
 		}
+
+		// overhead from map(s) within Stored, StoredTypes, StoredPos
+		sizeInBytes += (segment.SizeOfMap * 3)
 	}
+	// overhead from data structures: Stored, StoredTypes, StoredPos
+	sizeInBytes += (segment.SizeOfSlice * 3)
+
+	// DocValueFields
+	sizeInBytes += uint64(len(s.DocValueFields)*3 /* size of uint16 + bool */) +
+		segment.SizeOfMap
 
-	sizeInBytes += uint64(8 /* size of sizeInBytes -> uint64*/)
+	// SizeInBytes
+	sizeInBytes += uint64(8)
 
 	s.sizeInBytes = sizeInBytes
 }
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index 858ac3590..d5435ab96 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -19,6 +19,12 @@ import (
 	"github.com/blevesearch/bleve/index"
 )
 
+// Overhead from go data structures when deployed on a 64-bit system.
+const SizeOfMap uint64 = 8
+const SizeOfPointer uint64 = 8
+const SizeOfSlice uint64 = 24
+const SizeOfString uint64 = 16
+
 // DocumentFieldValueVisitor defines a callback to be visited for each
 // stored field value.  The return value determines if the visitor
 // should keep going.  Returning true continues visiting, false stops.
diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index cdb16ccb9..e37ecc74e 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -22,6 +22,7 @@ import (
 	"sort"
 
 	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/scorch/segment"
 	"github.com/golang/snappy"
 )
 
@@ -35,6 +36,23 @@ type docValueIterator struct {
 	curChunkData   []byte // compressed data cache
 }
 
+func (di *docValueIterator) sizeInBytes() uint64 {
+	// curChunkNum, numChunks, dvDataLoc --> uint64
+	sizeInBytes := 24
+
+	// field
+	sizeInBytes += (len(di.field) + int(segment.SizeOfString))
+
+	// chunkLens, curChunkHeader
+	sizeInBytes += len(di.chunkLens)*8 +
+		len(di.curChunkHeader)*24 +
+		int(segment.SizeOfSlice*2) /* overhead from slices */
+
+	// curChunkData is mmap'ed, not included
+
+	return uint64(sizeInBytes)
+}
+
 func (di *docValueIterator) fieldName() string {
 	return di.field
 }
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 9f9910366..18d4ea56c 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -97,27 +97,44 @@ type Segment struct {
 }
 
 func (s *Segment) SizeInBytes() uint64 {
-	// 4 /* size of crc -> uint32 */ +
-	// 4 /* size of version -> uint32 */ +
-	// 4 /* size of chunkFactor -> uint32 */ +
-	// 8 /* size of numDocs -> uint64 */ +
-	// 8 /* size of storedIndexOffset -> uint64 */ +
+	// 8 /* size of file pointer */
+	// 4 /* size of crc -> uint32 */
+	// 4 /* size of version -> uint32 */
+	// 4 /* size of chunkFactor -> uint32 */
+	// 8 /* size of numDocs -> uint64 */
+	// 8 /* size of storedIndexOffset -> uint64 */
 	// 8 /* size of fieldsIndexOffset -> uint64 */
-	sizeOfUints := 36
+	// 8 /* size of docValueOffset -> uint64 */
+	sizeOfUints := 52
 
 	// Do not include the mmap'ed part
-	sizeInBytes := len(s.path) + sizeOfUints
+	sizeInBytes := (len(s.path) + int(segment.SizeOfString)) + sizeOfUints
 
+	// fieldsMap
 	for k, _ := range s.fieldsMap {
-		sizeInBytes += len(k) + 2 /* size of uint16 */
+		sizeInBytes += (len(k) + int(segment.SizeOfString)) + 2 /* size of uint16 */
 	}
+	sizeInBytes += int(segment.SizeOfMap) /* overhead from map */
 
+	// fieldsInv, fieldsOffsets
 	for _, entry := range s.fieldsInv {
-		sizeInBytes += len(entry)
+		sizeInBytes += (len(entry) + int(segment.SizeOfString))
 	}
+	sizeInBytes += len(s.fieldsOffsets) * 8     /* size of uint64 */
+	sizeInBytes += int(segment.SizeOfSlice) * 2 /* overhead from slices */
+
+	// fieldDvIterMap
+	sizeInBytes += len(s.fieldDvIterMap) *
+		int(segment.SizeOfPointer+2 /* size of uint16 */)
+	for _, entry := range s.fieldDvIterMap {
+		if entry != nil {
+			sizeInBytes += int(entry.sizeInBytes())
+		}
+	}
+	sizeInBytes += int(segment.SizeOfMap)
 
-	sizeInBytes += len(s.fieldsOffsets) * 8 /* size of uint64 */
-	sizeInBytes += 8                        /* size of refs -> int64 */
+	// mutex, refs -> int64
+	sizeInBytes += 16
 
 	return uint64(sizeInBytes)
 }

From 47f1c66889db55545776d2cbe6d0c137a0556ccf Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Fri, 19 Jan 2018 11:47:28 +0530
Subject: [PATCH 149/728] adding UT

---
 search.go      |  6 +++--
 search_test.go | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/search.go b/search.go
index 5d12b7a25..46d849c1b 100644
--- a/search.go
+++ b/search.go
@@ -481,8 +481,10 @@ func (sr *SearchResult) Merge(other *SearchResult) {
 	if other.MaxScore > sr.MaxScore {
 		sr.MaxScore = other.MaxScore
 	}
-	if len(sr.Facets) == 0 && len(other.Facets) != 0 {
-		sr.Facets = make(search.FacetResults)
+	if sr.Facets == nil && len(other.Facets) != 0 {
+		sr.Facets = other.Facets
+		return
 	}
+
 	sr.Facets.Merge(other.Facets)
 }
diff --git a/search_test.go b/search_test.go
index 7f5018950..242494132 100644
--- a/search_test.go
+++ b/search_test.go
@@ -326,3 +326,76 @@ func TestFacetNumericDateRangeRequests(t *testing.T) {
 	}
 
 }
+
+func TestSearchResultFacetsMerge(t *testing.T) {
+	lowmed := "2010-01-01"
+	medhi := "2011-01-01"
+	hihigher := "2012-01-01"
+
+	fr := &search.FacetResult{
+		Field:   "birthday",
+		Total:   100,
+		Missing: 25,
+		Other:   25,
+		DateRanges: []*search.DateRangeFacet{
+			{
+				Name:  "low",
+				End:   &lowmed,
+				Count: 25,
+			},
+			{
+				Name:  "med",
+				Count: 24,
+				Start: &lowmed,
+				End:   &medhi,
+			},
+			{
+				Name:  "hi",
+				Count: 1,
+				Start: &medhi,
+				End:   &hihigher,
+			},
+		},
+	}
+	frs := search.FacetResults{
+		"birthdays": fr,
+	}
+
+	l := &SearchResult{
+		Status: &SearchStatus{
+			Total:      10,
+			Successful: 1,
+			Errors:     make(map[string]error),
+		},
+		Total:    10,
+		MaxScore: 1,
+	}
+
+	r := &SearchResult{
+		Status: &SearchStatus{
+			Total:      1,
+			Successful: 1,
+			Errors:     make(map[string]error),
+		},
+		Total:    1,
+		MaxScore: 2,
+		Facets:   frs,
+	}
+
+	expected := &SearchResult{
+		Status: &SearchStatus{
+			Total:      11,
+			Successful: 2,
+			Errors:     make(map[string]error),
+		},
+		Total:    11,
+		MaxScore: 2,
+		Facets:   frs,
+	}
+
+	l.Merge(r)
+
+	if !reflect.DeepEqual(l, expected) {
+		t.Errorf("expected %#v, got %#v", expected, l)
+	}
+}

From 34fd77709f6e58c9005d59d56b74844277f6f0b6 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 20 Jan 2018 17:14:07 -0800
Subject: [PATCH 150/728] scorch unlocks in introduceSegment's DocNumbers()
 error codepath

---
 index/scorch/introducer.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 0b9c48537..4499fa41b 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -117,6 +117,7 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 			var err error
 			delta, err = s.root.segment[i].segment.DocNumbers(next.ids)
 			if err != nil {
+				s.rootLock.Unlock()
 				next.applied <- fmt.Errorf("error computing doc numbers: %v", err)
 				close(next.applied)
 				_ = newSnapshot.DecRef()

From 567d756c27e0733ee4f32ab2eedd2ba985048ac7 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Wed, 24 Jan 2018 14:10:14 -0800
Subject: [PATCH 151/728] Add support for certain disk stats

+ num_bytes_used_disk
+ num_files_on_disk
---
 index/scorch/scorch.go |  5 +++--
 index/scorch/stats.go  | 30 +++++++++++++++++++++++++++---
 2 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 99d6dcd5c..b77c69239 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -75,11 +75,11 @@ func NewScorch(storeName string,
 		version:              Version,
 		config:               config,
 		analysisQueue:        analysisQueue,
-		stats:                &Stats{},
 		nextSnapshotEpoch:    1,
 		closeCh:              make(chan struct{}),
 		ineligibleForRemoval: map[string]bool{},
 	}
+	rv.stats = &Stats{i: rv}
 	rv.root = &IndexSnapshot{parent: rv, refs: 1}
 	ro, ok := config["read_only"].(bool)
 	if ok {
@@ -359,7 +359,8 @@ func (s *Scorch) Stats() json.Marshaler {
 	return s.stats
 }
 func (s *Scorch) StatsMap() map[string]interface{} {
-	return s.stats.statsMap()
+	m, _ := s.stats.statsMap()
+	return m
 }
 
 func (s *Scorch) Analyze(d *document.Document) *index.AnalysisResult {
diff --git a/index/scorch/stats.go b/index/scorch/stats.go
index abd054c81..c44a977bf 100644
--- a/index/scorch/stats.go
+++ b/index/scorch/stats.go
@@ -16,6 +16,7 @@ package scorch
 
 import (
 	"encoding/json"
+	"io/ioutil"
 	"sync/atomic"
 )
 
@@ -28,9 +29,10 @@ type Stats struct {
 	numPlainTextBytesIndexed          uint64
 	numItemsIntroduced                uint64
 	numItemsPersisted                 uint64
+	i                                 *Scorch
 }
 
-func (s *Stats) statsMap() map[string]interface{} {
+func (s *Stats) statsMap() (map[string]interface{}, error) {
 	m := map[string]interface{}{}
 	m["updates"] = atomic.LoadUint64(&s.updates)
 	m["deletes"] = atomic.LoadUint64(&s.deletes)
@@ -44,11 +46,33 @@ func (s *Stats) statsMap() map[string]interface{} {
 	m["num_items_introduced"] = atomic.LoadUint64(&s.numItemsIntroduced)
 	m["num_items_persisted"] = atomic.LoadUint64(&s.numItemsPersisted)
 
-	return m
+	if s.i.path != "" {
+		finfos, err := ioutil.ReadDir(s.i.path)
+		if err != nil {
+			return nil, err
+		}
+
+		var numFilesOnDisk, numBytesUsedDisk uint64
+
+		for _, finfo := range finfos {
+			if !finfo.IsDir() {
+				numBytesUsedDisk += uint64(finfo.Size())
+				numFilesOnDisk++
+			}
+		}
+
+		m["num_bytes_used_disk"] = numBytesUsedDisk
+		m["num_files_on_disk"] = numFilesOnDisk
+	}
+
+	return m, nil
 }
 
 // MarshalJSON implements json.Marshaler
 func (s *Stats) MarshalJSON() ([]byte, error) {
-	m := s.statsMap()
+	m, err := s.statsMap()
+	if err != nil {
+		return nil, err
+	}
 	return json.Marshal(m)
 }

From dc62324e021e5772d45bdcb764aa825082ac8509 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 17 Jan 2018 11:29:32 -0800
Subject: [PATCH 152/728] scorch zap miscellaneous typos

---
 index/scorch/segment/zap/build.go     | 2 +-
 index/scorch/segment/zap/docvalues.go | 2 +-
 index/scorch/segment/zap/segment.go   | 6 ++++--
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 1b16b5e35..1928bdf3a 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -43,7 +43,7 @@ func PersistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) (e
 		return err
 	}
 
-	// bufer the output
+	// buffer the output
 	br := bufio.NewWriter(f)
 
 	// wrap it for counting (tracking offsets)
diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index e37ecc74e..11b3b99d4 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -65,7 +65,7 @@ func (s *Segment) loadFieldDocValueIterator(field string,
 	fieldDvLoc uint64) (*docValueIterator, error) {
 	// get the docValue offset for the given fields
 	if fieldDvLoc == fieldNotUninverted {
-		return nil, fmt.Errorf("loadFieldDocValueConfigs: "+
+		return nil, fmt.Errorf("loadFieldDocValueIterator: "+
 			"no docValues found for field: %s", field)
 	}
 
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 18d4ea56c..60cc034b9 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -168,11 +168,13 @@ func (s *Segment) loadConfig() error {
 
 	docValueOffset := chunkOffset - 8
 	s.docValueOffset = binary.BigEndian.Uint64(s.mm[docValueOffset : docValueOffset+8])
-	fieldsOffset := docValueOffset - 8
 
+	fieldsOffset := docValueOffset - 8
 	s.fieldsIndexOffset = binary.BigEndian.Uint64(s.mm[fieldsOffset : fieldsOffset+8])
+
 	storedOffset := fieldsOffset - 8
 	s.storedIndexOffset = binary.BigEndian.Uint64(s.mm[storedOffset : storedOffset+8])
+
 	docNumOffset := storedOffset - 8
 	s.numDocs = binary.BigEndian.Uint64(s.mm[docNumOffset : docNumOffset+8])
 	return nil
@@ -181,7 +183,7 @@ func (s *Segment) loadConfig() error {
 
 func (s *Segment) loadFields() error {
 	// NOTE for now we assume the fields index immediately preceeds the footer
-	// if this changes, need to adjust accordingly (or store epxlicit length)
+	// if this changes, need to adjust accordingly (or store explicit length)
 	fieldsIndexEnd := uint64(len(s.mm) - FooterSize)
 
 	// iterate through fields index

From 5a035dc9aa4a7660efe56ac3e8a7e64ff27172c1 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 17 Jan 2018 18:46:57 -0800
Subject: [PATCH 153/728] scorch zap in-memory segment representation
 (SegmentBase)

The zap SegmentBase struct is a refactoring of the zap Segment into
the subset of fields that are needed for read-only ops, without any
persistence related info.  This allows us to use zap's optimized data
encoding as scorch's in-memory segments.

The zap Segment struct now embeds a zap SegmentBase struct, and layers
on persistence.  Both the zap Segment and zap SegmentBase implement
scorch's Segment interface.
---
 index/scorch/persister.go             |   7 +-
 index/scorch/scorch.go                |  10 +-
 index/scorch/scorch_test.go           |   2 +-
 index/scorch/segment/mem/build.go     |   8 +-
 index/scorch/segment/mem/segment.go   |  31 ++--
 index/scorch/segment/zap/build.go     | 199 +++++++++++++++++++-------
 index/scorch/segment/zap/count.go     |  18 +--
 index/scorch/segment/zap/dict.go      |  22 +--
 index/scorch/segment/zap/docvalues.go |  26 ++--
 index/scorch/segment/zap/merge.go     |   5 +-
 index/scorch/segment/zap/posting.go   |  20 +--
 index/scorch/segment/zap/read.go      |  12 +-
 index/scorch/segment/zap/segment.go   | 166 +++++++++++----------
 index/scorch/segment/zap/write.go     |  18 +--
 14 files changed, 333 insertions(+), 211 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 658e57aee..cdcee37c2 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -28,11 +28,12 @@ import (
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index/scorch/segment"
-	"github.com/blevesearch/bleve/index/scorch/segment/mem"
 	"github.com/blevesearch/bleve/index/scorch/segment/zap"
 	"github.com/boltdb/bolt"
 )
 
+var DefaultChunkFactor uint32 = 1024
+
 type notificationChan chan struct{}
 
 func (s *Scorch) persisterLoop() {
@@ -178,11 +179,11 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 			return err2
 		}
 		switch seg := segmentSnapshot.segment.(type) {
-		case *mem.Segment:
+		case *zap.SegmentBase:
 			// need to persist this to disk
 			filename := zapFileName(segmentSnapshot.id)
 			path := s.path + string(os.PathSeparator) + filename
-			err2 := zap.PersistSegment(seg, path, 1024)
+			err2 := zap.PersistSegmentBase(seg, path)
 			if err2 != nil {
 				return fmt.Errorf("error persisting segment: %v", err2)
 			}
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 99d6dcd5c..69328d9b8 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -28,6 +28,7 @@ import (
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
 	"github.com/blevesearch/bleve/index/scorch/segment/mem"
+	"github.com/blevesearch/bleve/index/scorch/segment/zap"
 	"github.com/blevesearch/bleve/index/store"
 	"github.com/blevesearch/bleve/registry"
 	"github.com/boltdb/bolt"
@@ -217,7 +218,7 @@ func (s *Scorch) Delete(id string) error {
 }
 
 // Batch applices a batch of changes to the index atomically
-func (s *Scorch) Batch(batch *index.Batch) error {
+func (s *Scorch) Batch(batch *index.Batch) (err error) {
 	start := time.Now()
 
 	defer func() {
@@ -271,10 +272,13 @@ func (s *Scorch) Batch(batch *index.Batch) error {
 
 	var newSegment segment.Segment
 	if len(analysisResults) > 0 {
-		newSegment = mem.NewFromAnalyzedDocs(analysisResults)
+		newSegment, err = zap.NewSegmentBase(mem.NewFromAnalyzedDocs(analysisResults), DefaultChunkFactor)
+		if err != nil {
+			return err
+		}
 	}
 
-	err := s.prepareSegment(newSegment, ids, batch.InternalOps)
+	err = s.prepareSegment(newSegment, ids, batch.InternalOps)
 	if err != nil {
 		if newSegment != nil {
 			_ = newSegment.Close()
diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index 6e8ecb0cf..87e9bdb21 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -1395,7 +1395,7 @@ func TestConcurrentUpdate(t *testing.T) {
 
 	// do some concurrent updates
 	var wg sync.WaitGroup
-	for i := 0; i < 10; i++ {
+	for i := 0; i < 100; i++ {
 		wg.Add(1)
 		go func(i int) {
 			doc := document.NewDocument("1")
diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go
index 554de8906..d3344ce30 100644
--- a/index/scorch/segment/mem/build.go
+++ b/index/scorch/segment/mem/build.go
@@ -267,15 +267,15 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 }
 
 func (s *Segment) getOrDefineField(name string) int {
-	fieldID, ok := s.FieldsMap[name]
+	fieldIDPlus1, ok := s.FieldsMap[name]
 	if !ok {
-		fieldID = uint16(len(s.FieldsInv) + 1)
-		s.FieldsMap[name] = fieldID
+		fieldIDPlus1 = uint16(len(s.FieldsInv) + 1)
+		s.FieldsMap[name] = fieldIDPlus1
 		s.FieldsInv = append(s.FieldsInv, name)
 		s.Dicts = append(s.Dicts, make(map[string]uint64))
 		s.DictKeys = append(s.DictKeys, make([]string, 0))
 	}
-	return int(fieldID - 1)
+	return int(fieldIDPlus1 - 1)
 }
 
 func (s *Segment) addDocument() int {
diff --git a/index/scorch/segment/mem/segment.go b/index/scorch/segment/mem/segment.go
index baa4811a4..04bdb368a 100644
--- a/index/scorch/segment/mem/segment.go
+++ b/index/scorch/segment/mem/segment.go
@@ -40,35 +40,38 @@ const idFieldID uint16 = 0
 // Segment is an in memory implementation of scorch.Segment
 type Segment struct {
 
-	// FieldsMap name -> id+1
+	// FieldsMap adds 1 to field id to avoid zero value issues
+	//  name -> field id + 1
 	FieldsMap map[string]uint16
-	// fields id -> name
+
+	// FieldsInv is the inverse of FieldsMap
+	//  field id -> name
 	FieldsInv []string
 
-	// term dictionary
+	// Term dictionaries for each field
 	//  field id -> term -> postings list id + 1
 	Dicts []map[string]uint64
 
-	// term dictionary keys
-	//  field id -> []dictionary keys
+	// Terms for each field, where terms are sorted ascending
+	//  field id -> []term
 	DictKeys [][]string
 
 	// Postings list
-	//  postings list id -> Postings bitmap
+	//  postings list id -> bitmap by docNum
 	Postings []*roaring.Bitmap
 
-	// Postings List has locations
+	// Postings list has locations
 	PostingsLocs []*roaring.Bitmap
 
-	// term frequencies
+	// Term frequencies
 	//  postings list id -> Freqs (one for each hit in bitmap)
 	Freqs [][]uint64
 
-	// field Norms
+	// Field norms
 	//  postings list id -> Norms (one for each hit in bitmap)
 	Norms [][]float32
 
-	// field/start/end/pos/locarraypos
+	// Field/start/end/pos/locarraypos
 	//  postings list id -> start/end/pos/locarraypos (one for each freq)
 	Locfields   [][]uint16
 	Locstarts   [][]uint64
@@ -80,18 +83,18 @@ type Segment struct {
 	//  docNum -> field id -> slice of values (each value []byte)
 	Stored []map[uint16][][]byte
 
-	// stored field types
+	// Stored field types
 	//  docNum -> field id -> slice of types (each type byte)
 	StoredTypes []map[uint16][]byte
 
-	// stored field array positions
+	// Stored field array positions
 	//  docNum -> field id -> slice of array positions (each is []uint64)
 	StoredPos []map[uint16][][]uint64
 
-	// for storing the docValue persisted fields
+	// For storing the docValue persisted fields
 	DocValueFields map[uint16]bool
 
-	// footprint of the segment, updated when analyzed document mutations
+	// Footprint of the segment, updated when analyzed document mutations
 	// are added into the segment
 	sizeInBytes uint64
 }
diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 1928bdf3a..c9361cbd4 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -32,10 +32,8 @@ const version uint32 = 2
 
 const fieldNotUninverted = math.MaxUint64
 
-// PersistSegment takes the in-memory segment and persists it to the specified
-// path in the zap file format.
-func PersistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) (err error) {
-
+// PersistSegmentBase persists SegmentBase in the zap file format.
+func PersistSegmentBase(sb *SegmentBase, path string) error {
 	flag := os.O_RDWR | os.O_CREATE
 
 	f, err := os.OpenFile(path, flag, 0600)
@@ -43,84 +41,151 @@ func PersistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) (e
 		return err
 	}
 
-	// buffer the output
+	cleanup := func() {
+		_ = f.Close()
+		_ = os.Remove(path)
+	}
+
 	br := bufio.NewWriter(f)
 
-	// wrap it for counting (tracking offsets)
-	cr := NewCountHashWriter(br)
+	_, err = br.Write(sb.mem)
+	if err != nil {
+		cleanup()
+		return err
+	}
 
-	var storedIndexOffset uint64
-	var dictLocs []uint64
-	docValueOffset := uint64(fieldNotUninverted)
-	if len(memSegment.Stored) > 0 {
+	err = persistFooter(sb.numDocs, sb.storedIndexOffset, sb.fieldsIndexOffset, sb.docValueOffset,
+		sb.chunkFactor, sb.memCRC, br)
+	if err != nil {
+		cleanup()
+		return err
+	}
 
-		storedIndexOffset, err = persistStored(memSegment, cr)
-		if err != nil {
-			return err
-		}
+	err = br.Flush()
+	if err != nil {
+		cleanup()
+		return err
+	}
 
-		var freqOffsets, locOffsets []uint64
-		freqOffsets, locOffsets, err = persistPostingDetails(memSegment, cr, chunkFactor)
-		if err != nil {
-			return err
-		}
+	err = f.Sync()
+	if err != nil {
+		cleanup()
+		return err
+	}
 
-		var postingsListLocs []uint64
-		postingsListLocs, err = persistPostingsLocs(memSegment, cr)
-		if err != nil {
-			return err
-		}
+	err = f.Close()
+	if err != nil {
+		cleanup()
+		return err
+	}
 
-		var postingsLocs []uint64
-		postingsLocs, err = persistPostingsLists(memSegment, cr, postingsListLocs, freqOffsets, locOffsets)
-		if err != nil {
-			return err
-		}
+	return nil
+}
 
-		dictLocs, err = persistDictionary(memSegment, cr, postingsLocs)
-		if err != nil {
-			return err
-		}
+// PersistSegment takes the in-memory segment and persists it to
+// the specified path in the zap file format.
+func PersistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) error {
+	flag := os.O_RDWR | os.O_CREATE
 
-		docValueOffset, err = persistFieldDocValues(cr, chunkFactor, memSegment)
-		if err != nil {
-			return err
-		}
+	f, err := os.OpenFile(path, flag, 0600)
+	if err != nil {
+		return err
+	}
 
-	} else {
-		dictLocs = make([]uint64, len(memSegment.FieldsInv))
+	cleanup := func() {
+		_ = f.Close()
+		_ = os.Remove(path)
 	}
 
-	var fieldIndexStart uint64
-	fieldIndexStart, err = persistFields(memSegment.FieldsInv, cr, dictLocs)
+	// buffer the output
+	br := bufio.NewWriter(f)
+
+	// wrap it for counting (tracking offsets)
+	cr := NewCountHashWriter(br)
+
+	numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, _, err :=
+		persistBase(memSegment, cr, chunkFactor)
 	if err != nil {
+		cleanup()
 		return err
 	}
 
-	err = persistFooter(uint64(len(memSegment.Stored)), storedIndexOffset,
-		fieldIndexStart, docValueOffset, chunkFactor, cr)
+	err = persistFooter(numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset,
+		chunkFactor, cr.Sum32(), cr)
 	if err != nil {
+		cleanup()
 		return err
 	}
 
 	err = br.Flush()
 	if err != nil {
+		cleanup()
 		return err
 	}
 
 	err = f.Sync()
 	if err != nil {
+		cleanup()
 		return err
 	}
 
 	err = f.Close()
 	if err != nil {
+		cleanup()
 		return err
 	}
 
 	return nil
 }
 
+func persistBase(memSegment *mem.Segment, cr *CountHashWriter, chunkFactor uint32) (
+	numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64,
+	dictLocs []uint64, err error) {
+	docValueOffset = uint64(fieldNotUninverted)
+
+	if len(memSegment.Stored) > 0 {
+		storedIndexOffset, err = persistStored(memSegment, cr)
+		if err != nil {
+			return 0, 0, 0, 0, nil, err
+		}
+
+		freqOffsets, locOffsets, err := persistPostingDetails(memSegment, cr, chunkFactor)
+		if err != nil {
+			return 0, 0, 0, 0, nil, err
+		}
+
+		postingsListLocs, err := persistPostingsLocs(memSegment, cr)
+		if err != nil {
+			return 0, 0, 0, 0, nil, err
+		}
+
+		postingsLocs, err := persistPostingsLists(memSegment, cr, postingsListLocs, freqOffsets, locOffsets)
+		if err != nil {
+			return 0, 0, 0, 0, nil, err
+		}
+
+		dictLocs, err = persistDictionary(memSegment, cr, postingsLocs)
+		if err != nil {
+			return 0, 0, 0, 0, nil, err
+		}
+
+		docValueOffset, err = persistFieldDocValues(memSegment, cr, chunkFactor)
+		if err != nil {
+			return 0, 0, 0, 0, nil, err
+		}
+	} else {
+		dictLocs = make([]uint64, len(memSegment.FieldsInv))
+	}
+
+	fieldsIndexOffset, err = persistFields(memSegment.FieldsInv, cr, dictLocs)
+	if err != nil {
+		return 0, 0, 0, 0, nil, err
+	}
+
+	return uint64(len(memSegment.Stored)), storedIndexOffset, fieldsIndexOffset, docValueOffset,
+		dictLocs, nil
+}
+
 func persistStored(memSegment *mem.Segment, w *CountHashWriter) (uint64, error) {
 
 	var curr int
@@ -394,6 +459,8 @@ func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter,
 func persistDictionary(memSegment *mem.Segment, w *CountHashWriter, postingsLocs []uint64) ([]uint64, error) {
 	rv := make([]uint64, 0, len(memSegment.DictKeys))
 
+	varintBuf := make([]byte, binary.MaxVarintLen64)
+
 	var buffer bytes.Buffer
 	for fieldID, fieldTerms := range memSegment.DictKeys {
 		if fieldID != 0 {
@@ -427,10 +494,8 @@ func persistDictionary(memSegment *mem.Segment, w *CountHashWriter, postingsLocs
 		vellumData := buffer.Bytes()
 
 		// write out the length of the vellum data
-		buf := make([]byte, binary.MaxVarintLen64)
-		// write out the number of chunks
-		n := binary.PutUvarint(buf, uint64(len(vellumData)))
-		_, err = w.Write(buf[:n])
+		n := binary.PutUvarint(varintBuf, uint64(len(vellumData)))
+		_, err = w.Write(varintBuf[:n])
 		if err != nil {
 			return nil, err
 		}
@@ -521,9 +586,8 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
 	return fieldChunkOffsets, nil
 }
 
-func persistFieldDocValues(w *CountHashWriter, chunkFactor uint32,
-	memSegment *mem.Segment) (uint64, error) {
-
+func persistFieldDocValues(memSegment *mem.Segment, w *CountHashWriter,
+	chunkFactor uint32) (uint64, error) {
 	fieldDvOffsets, err := persistDocValues(memSegment, w, chunkFactor)
 	if err != nil {
 		return 0, err
@@ -548,3 +612,36 @@ func persistFieldDocValues(w *CountHashWriter, chunkFactor uint32,
 
 	return fieldDocValuesOffset, nil
 }
+
+func NewSegmentBase(memSegment *mem.Segment, chunkFactor uint32) (*SegmentBase, error) {
+	var br bytes.Buffer
+
+	cr := NewCountHashWriter(&br)
+
+	numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, dictLocs, err :=
+		persistBase(memSegment, cr, chunkFactor)
+	if err != nil {
+		return nil, err
+	}
+
+	sb := &SegmentBase{
+		mem:               br.Bytes(),
+		memCRC:            cr.Sum32(),
+		chunkFactor:       chunkFactor,
+		fieldsMap:         memSegment.FieldsMap,
+		fieldsInv:         memSegment.FieldsInv,
+		numDocs:           numDocs,
+		storedIndexOffset: storedIndexOffset,
+		fieldsIndexOffset: fieldsIndexOffset,
+		docValueOffset:    docValueOffset,
+		dictLocs:          dictLocs,
+		fieldDvIterMap:    make(map[uint16]*docValueIterator),
+	}
+
+	err = sb.loadDvIterators()
+	if err != nil {
+		return nil, err
+	}
+
+	return sb, nil
+}
diff --git a/index/scorch/segment/zap/count.go b/index/scorch/segment/zap/count.go
index 2f0b92de2..d75e83c03 100644
--- a/index/scorch/segment/zap/count.go
+++ b/index/scorch/segment/zap/count.go
@@ -15,32 +15,28 @@
 package zap
 
 import (
-	"hash"
 	"hash/crc32"
 	"io"
 )
 
 // CountHashWriter is a wrapper around a Writer which counts the number of
-// bytes which have been written
+// bytes which have been written and computes a crc32 hash
 type CountHashWriter struct {
-	w io.Writer
-	h hash.Hash32
-	n int
+	w   io.Writer
+	crc uint32
+	n   int
 }
 
 // NewCountHashWriter returns a CountHashWriter which wraps the provided Writer
 func NewCountHashWriter(w io.Writer) *CountHashWriter {
-	return &CountHashWriter{
-		w: w,
-		h: crc32.NewIEEE(),
-	}
+	return &CountHashWriter{w: w}
 }
 
 // Write writes the provided bytes to the wrapped writer and counts the bytes
 func (c *CountHashWriter) Write(b []byte) (int, error) {
 	n, err := c.w.Write(b)
+	c.crc = crc32.Update(c.crc, crc32.IEEETable, b[:n])
 	c.n += n
-	_, _ = c.h.Write(b)
 	return n, err
 }
 
@@ -51,5 +47,5 @@ func (c *CountHashWriter) Count() int {
 
 // Sum32 returns the CRC-32 hash of the content written to this writer
 func (c *CountHashWriter) Sum32() uint32 {
-	return c.h.Sum32()
+	return c.crc
 }
diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 3221d0616..284bc1898 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -27,7 +27,7 @@ import (
 
 // Dictionary is the zap representation of the term dictionary
 type Dictionary struct {
-	segment *Segment
+	sb      *SegmentBase
 	field   string
 	fieldID uint16
 	fst     *vellum.FST
@@ -40,9 +40,9 @@ func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap) (segment.
 
 func (d *Dictionary) postingsList(term string, except *roaring.Bitmap) (*PostingsList, error) {
 	rv := &PostingsList{
-		dictionary: d,
-		term:       term,
-		except:     except,
+		sb:     d.sb,
+		term:   term,
+		except: except,
 	}
 
 	if d.fst != nil {
@@ -56,19 +56,19 @@ func (d *Dictionary) postingsList(term string, except *roaring.Bitmap) (*Posting
 			var n uint64
 			var read int
 
-			rv.freqOffset, read = binary.Uvarint(d.segment.mm[postingsOffset+n : postingsOffset+binary.MaxVarintLen64])
+			rv.freqOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+binary.MaxVarintLen64])
 			n += uint64(read)
-			rv.locOffset, read = binary.Uvarint(d.segment.mm[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
+			rv.locOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
 			n += uint64(read)
 
 			var locBitmapOffset uint64
-			locBitmapOffset, read = binary.Uvarint(d.segment.mm[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
+			locBitmapOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
 			n += uint64(read)
 
 			// go ahead and load loc bitmap
 			var locBitmapLen uint64
-			locBitmapLen, read = binary.Uvarint(d.segment.mm[locBitmapOffset : locBitmapOffset+binary.MaxVarintLen64])
-			locRoaringBytes := d.segment.mm[locBitmapOffset+uint64(read) : locBitmapOffset+uint64(read)+locBitmapLen]
+			locBitmapLen, read = binary.Uvarint(d.sb.mem[locBitmapOffset : locBitmapOffset+binary.MaxVarintLen64])
+			locRoaringBytes := d.sb.mem[locBitmapOffset+uint64(read) : locBitmapOffset+uint64(read)+locBitmapLen]
 			rv.locBitmap = roaring.NewBitmap()
 			_, err := rv.locBitmap.FromBuffer(locRoaringBytes)
 			if err != nil {
@@ -76,10 +76,10 @@ func (d *Dictionary) postingsList(term string, except *roaring.Bitmap) (*Posting
 			}
 
 			var postingsLen uint64
-			postingsLen, read = binary.Uvarint(d.segment.mm[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
+			postingsLen, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
 			n += uint64(read)
 
-			roaringBytes := d.segment.mm[postingsOffset+n : postingsOffset+n+postingsLen]
+			roaringBytes := d.sb.mem[postingsOffset+n : postingsOffset+n+postingsLen]
 
 			bitmap := roaring.NewBitmap()
 			_, err = bitmap.FromBuffer(roaringBytes)
diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index 11b3b99d4..fb5b348a5 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -61,7 +61,7 @@ func (di *docValueIterator) curChunkNumber() uint64 {
 	return di.curChunkNum
 }
 
-func (s *Segment) loadFieldDocValueIterator(field string,
+func (s *SegmentBase) loadFieldDocValueIterator(field string,
 	fieldDvLoc uint64) (*docValueIterator, error) {
 	// get the docValue offset for the given fields
 	if fieldDvLoc == fieldNotUninverted {
@@ -71,7 +71,7 @@ func (s *Segment) loadFieldDocValueIterator(field string,
 
 	// read the number of chunks, chunk lengths
 	var offset, clen uint64
-	numChunks, read := binary.Uvarint(s.mm[fieldDvLoc : fieldDvLoc+binary.MaxVarintLen64])
+	numChunks, read := binary.Uvarint(s.mem[fieldDvLoc : fieldDvLoc+binary.MaxVarintLen64])
 	if read <= 0 {
 		return nil, fmt.Errorf("failed to read the field "+
 			"doc values for field %s", field)
@@ -84,7 +84,7 @@ func (s *Segment) loadFieldDocValueIterator(field string,
 		chunkLens:   make([]uint64, int(numChunks)),
 	}
 	for i := 0; i < int(numChunks); i++ {
-		clen, read = binary.Uvarint(s.mm[fieldDvLoc+offset : fieldDvLoc+offset+binary.MaxVarintLen64])
+		clen, read = binary.Uvarint(s.mem[fieldDvLoc+offset : fieldDvLoc+offset+binary.MaxVarintLen64])
 		if read <= 0 {
 			return nil, fmt.Errorf("corrupted chunk length during segment load")
 		}
@@ -97,7 +97,7 @@ func (s *Segment) loadFieldDocValueIterator(field string,
 }
 
 func (di *docValueIterator) loadDvChunk(chunkNumber,
-	localDocNum uint64, s *Segment) error {
+	localDocNum uint64, s *SegmentBase) error {
 	// advance to the chunk where the docValues
 	// reside for the given docID
 	destChunkDataLoc := di.dvDataLoc
@@ -107,7 +107,7 @@ func (di *docValueIterator) loadDvChunk(chunkNumber,
 
 	curChunkSize := di.chunkLens[chunkNumber]
 	// read the number of docs reside in the chunk
-	numDocs, read := binary.Uvarint(s.mm[destChunkDataLoc : destChunkDataLoc+binary.MaxVarintLen64])
+	numDocs, read := binary.Uvarint(s.mem[destChunkDataLoc : destChunkDataLoc+binary.MaxVarintLen64])
 	if read <= 0 {
 		return fmt.Errorf("failed to read the chunk")
 	}
@@ -116,17 +116,17 @@ func (di *docValueIterator) loadDvChunk(chunkNumber,
 	offset := uint64(0)
 	di.curChunkHeader = make([]MetaData, int(numDocs))
 	for i := 0; i < int(numDocs); i++ {
-		di.curChunkHeader[i].DocID, read = binary.Uvarint(s.mm[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
+		di.curChunkHeader[i].DocID, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
 		offset += uint64(read)
-		di.curChunkHeader[i].DocDvLoc, read = binary.Uvarint(s.mm[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
+		di.curChunkHeader[i].DocDvLoc, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
 		offset += uint64(read)
-		di.curChunkHeader[i].DocDvLen, read = binary.Uvarint(s.mm[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
+		di.curChunkHeader[i].DocDvLen, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
 		offset += uint64(read)
 	}
 
 	compressedDataLoc := chunkMetaLoc + offset
 	dataLength := destChunkDataLoc + curChunkSize - compressedDataLoc
-	di.curChunkData = s.mm[compressedDataLoc : compressedDataLoc+dataLength]
+	di.curChunkData = s.mem[compressedDataLoc : compressedDataLoc+dataLength]
 	di.curChunkNum = chunkNumber
 	return nil
 }
@@ -171,18 +171,18 @@ func (di *docValueIterator) getDocValueLocs(docID uint64) (uint64, uint64) {
 
 // VisitDocumentFieldTerms is an implementation of the
 // DocumentFieldTermVisitable interface
-func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
+func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
 	visitor index.DocumentFieldTermVisitor) error {
-	fieldID := uint16(0)
+	fieldIDPlus1 := uint16(0)
 	ok := true
 	for _, field := range fields {
-		if fieldID, ok = s.fieldsMap[field]; !ok {
+		if fieldIDPlus1, ok = s.fieldsMap[field]; !ok {
 			continue
 		}
 		// find the chunkNumber where the docValues are stored
 		docInChunk := localDocNum / uint64(s.chunkFactor)
 
-		if dvIter, exists := s.fieldDvIterMap[fieldID-1]; exists &&
+		if dvIter, exists := s.fieldDvIterMap[fieldIDPlus1-1]; exists &&
 			dvIter != nil {
 			// check if the chunk is already loaded
 			if docInChunk != dvIter.curChunkNumber() {
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 16ec848b2..07b9bafcc 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -72,14 +72,13 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
 		dictLocs = make([]uint64, len(fieldsInv))
 	}
 
-	var fieldsIndexOffset uint64
-	fieldsIndexOffset, err = persistFields(fieldsInv, cr, dictLocs)
+	fieldsIndexOffset, err := persistFields(fieldsInv, cr, dictLocs)
 	if err != nil {
 		return nil, err
 	}
 
 	err = persistFooter(newSegDocCount, storedIndexOffset,
-		fieldsIndexOffset, fieldDvLocsOffset, chunkFactor, cr)
+		fieldsIndexOffset, fieldDvLocsOffset, chunkFactor, cr.Sum32(), cr)
 	if err != nil {
 		return nil, err
 	}
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 1b7a0a587..95f350130 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -27,7 +27,7 @@ import (
 
 // PostingsList is an in-memory represenation of a postings list
 type PostingsList struct {
-	dictionary     *Dictionary
+	sb             *SegmentBase
 	term           string
 	postingsOffset uint64
 	freqOffset     uint64
@@ -48,11 +48,11 @@ func (p *PostingsList) Iterator() segment.PostingsIterator {
 		var n uint64
 		var read int
 		var numFreqChunks uint64
-		numFreqChunks, read = binary.Uvarint(p.dictionary.segment.mm[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
+		numFreqChunks, read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
 		n += uint64(read)
 		rv.freqChunkLens = make([]uint64, int(numFreqChunks))
 		for i := 0; i < int(numFreqChunks); i++ {
-			rv.freqChunkLens[i], read = binary.Uvarint(p.dictionary.segment.mm[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
+			rv.freqChunkLens[i], read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
 			n += uint64(read)
 		}
 		rv.freqChunkStart = p.freqOffset + n
@@ -60,11 +60,11 @@ func (p *PostingsList) Iterator() segment.PostingsIterator {
 		// prepare the loc chunk details
 		n = 0
 		var numLocChunks uint64
-		numLocChunks, read = binary.Uvarint(p.dictionary.segment.mm[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
+		numLocChunks, read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
 		n += uint64(read)
 		rv.locChunkLens = make([]uint64, int(numLocChunks))
 		for i := 0; i < int(numLocChunks); i++ {
-			rv.locChunkLens[i], read = binary.Uvarint(p.dictionary.segment.mm[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
+			rv.locChunkLens[i], read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
 			n += uint64(read)
 		}
 		rv.locChunkStart = p.locOffset + n
@@ -133,7 +133,7 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
 		start += i.freqChunkLens[j]
 	}
 	end := start + i.freqChunkLens[chunk]
-	i.currChunkFreqNorm = i.postings.dictionary.segment.mm[start:end]
+	i.currChunkFreqNorm = i.postings.sb.mem[start:end]
 	i.freqNormDecoder = govarint.NewU64Base128Decoder(bytes.NewReader(i.currChunkFreqNorm))
 
 	start = i.locChunkStart
@@ -141,7 +141,7 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
 		start += i.locChunkLens[j]
 	}
 	end = start + i.locChunkLens[chunk]
-	i.currChunkLoc = i.postings.dictionary.segment.mm[start:end]
+	i.currChunkLoc = i.postings.sb.mem[start:end]
 	i.locDecoder = govarint.NewU64Base128Decoder(bytes.NewReader(i.currChunkLoc))
 	i.currChunk = uint32(chunk)
 	return nil
@@ -192,7 +192,7 @@ func (i *PostingsIterator) readLocation(l *Location) error {
 
 	// group these together for less branching
 	if l != nil {
-		l.field = i.postings.dictionary.segment.fieldsInv[fieldID]
+		l.field = i.postings.sb.fieldsInv[fieldID]
 		l.pos = pos
 		l.start = start
 		l.end = end
@@ -221,9 +221,9 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 		return nil, nil
 	}
 	n := i.actual.Next()
-	nChunk := n / i.postings.dictionary.segment.chunkFactor
+	nChunk := n / i.postings.sb.chunkFactor
 	allN := i.all.Next()
-	allNChunk := allN / i.postings.dictionary.segment.chunkFactor
+	allNChunk := allN / i.postings.sb.chunkFactor
 
 	// n is the next actual hit (excluding some postings)
 	// allN is the next hit in the full postings
diff --git a/index/scorch/segment/zap/read.go b/index/scorch/segment/zap/read.go
index c9b3e7720..0c5b9e17f 100644
--- a/index/scorch/segment/zap/read.go
+++ b/index/scorch/segment/zap/read.go
@@ -16,16 +16,16 @@ package zap
 
 import "encoding/binary"
 
-func (s *Segment) getStoredMetaAndCompressed(docNum uint64) ([]byte, []byte) {
+func (s *SegmentBase) getDocStoredMetaAndCompressed(docNum uint64) ([]byte, []byte) {
 	docStoredStartAddr := s.storedIndexOffset + (8 * docNum)
-	docStoredStart := binary.BigEndian.Uint64(s.mm[docStoredStartAddr : docStoredStartAddr+8])
+	docStoredStart := binary.BigEndian.Uint64(s.mem[docStoredStartAddr : docStoredStartAddr+8])
 	var n uint64
-	metaLen, read := binary.Uvarint(s.mm[docStoredStart : docStoredStart+binary.MaxVarintLen64])
+	metaLen, read := binary.Uvarint(s.mem[docStoredStart : docStoredStart+binary.MaxVarintLen64])
 	n += uint64(read)
 	var dataLen uint64
-	dataLen, read = binary.Uvarint(s.mm[docStoredStart+n : docStoredStart+n+binary.MaxVarintLen64])
+	dataLen, read = binary.Uvarint(s.mem[docStoredStart+n : docStoredStart+n+binary.MaxVarintLen64])
 	n += uint64(read)
-	meta := s.mm[docStoredStart+n : docStoredStart+n+metaLen]
-	data := s.mm[docStoredStart+n+metaLen : docStoredStart+n+metaLen+dataLen]
+	meta := s.mem[docStoredStart+n : docStoredStart+n+metaLen]
+	data := s.mem[docStoredStart+n+metaLen : docStoredStart+n+metaLen+dataLen]
 	return meta, data
 }
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 60cc034b9..df71f1d62 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -44,12 +44,15 @@ func Open(path string) (segment.Segment, error) {
 	}
 
 	rv := &Segment{
-		f:              f,
-		mm:             mm,
-		path:           path,
-		fieldsMap:      make(map[string]uint16),
-		fieldDvIterMap: make(map[uint16]*docValueIterator),
-		refs:           1,
+		SegmentBase: SegmentBase{
+			mem:            mm[0 : len(mm)-FooterSize],
+			fieldsMap:      make(map[string]uint16),
+			fieldDvIterMap: make(map[uint16]*docValueIterator),
+		},
+		f:    f,
+		mm:   mm,
+		path: path,
+		refs: 1,
 	}
 
 	err = rv.loadConfig()
@@ -73,24 +76,36 @@ func Open(path string) (segment.Segment, error) {
 	return rv, nil
 }
 
-// Segment implements the segment.Segment inteface over top the zap file format
-type Segment struct {
-	f                 *os.File
-	mm                mmap.MMap
-	path              string
-	crc               uint32
-	version           uint32
+// SegmentBase is a memory only, read-only implementation of the
+// segment.Segment interface, using zap's data representation.
+type SegmentBase struct {
+	mem               []byte
+	memCRC            uint32
 	chunkFactor       uint32
+	fieldsMap         map[string]uint16 // fieldName -> fieldID+1
+	fieldsInv         []string          // fieldID -> fieldName
 	numDocs           uint64
 	storedIndexOffset uint64
 	fieldsIndexOffset uint64
+	docValueOffset    uint64
+	dictLocs          []uint64
+	fieldDvIterMap    map[uint16]*docValueIterator // naive chunk cache per field
+}
 
-	fieldsMap     map[string]uint16
-	fieldsInv     []string
-	fieldsOffsets []uint64
+func (sb *SegmentBase) AddRef()             {}
+func (sb *SegmentBase) DecRef() (err error) { return nil }
+func (sb *SegmentBase) Close() (err error)  { return nil }
+
+// Segment implements a persisted segment.Segment interface, by
+// embedding an mmap()'ed SegmentBase.
+type Segment struct {
+	SegmentBase
 
-	docValueOffset uint64
-	fieldDvIterMap map[uint16]*docValueIterator // naive chunk cache per field
+	f       *os.File
+	mm      mmap.MMap
+	path    string
+	version uint32
+	crc     uint32
 
 	m    sync.Mutex // Protects the fields that follow.
 	refs int64
@@ -98,17 +113,29 @@ type Segment struct {
 
 func (s *Segment) SizeInBytes() uint64 {
 	// 8 /* size of file pointer */
-	// 4 /* size of crc -> uint32 */
 	// 4 /* size of version -> uint32 */
+	// 4 /* size of crc -> uint32 */
+	sizeOfUints := 16
+
+	sizeInBytes := (len(s.path) + int(segment.SizeOfString)) + sizeOfUints
+
+	// mutex, refs -> int64
+	sizeInBytes += 16
+
+	// do not include the mmap'ed part
+	return uint64(sizeInBytes) + s.SegmentBase.SizeInBytes() - uint64(len(s.mem))
+}
+
+func (s *SegmentBase) SizeInBytes() uint64 {
+	// 4 /* size of memCRC -> uint32 */
 	// 4 /* size of chunkFactor -> uint32 */
 	// 8 /* size of numDocs -> uint64 */
 	// 8 /* size of storedIndexOffset -> uint64 */
 	// 8 /* size of fieldsIndexOffset -> uint64 */
 	// 8 /* size of docValueOffset -> uint64 */
-	sizeOfUints := 52
+	sizeInBytes := 40
 
-	// Do not include the mmap'ed part
-	sizeInBytes := (len(s.path) + int(segment.SizeOfString)) + sizeOfUints
+	sizeInBytes += len(s.mem) + int(segment.SizeOfSlice)
 
 	// fieldsMap
 	for k, _ := range s.fieldsMap {
@@ -116,12 +143,12 @@ func (s *Segment) SizeInBytes() uint64 {
 	}
 	sizeInBytes += int(segment.SizeOfMap) /* overhead from map */
 
-	// fieldsInv, fieldsOffsets
+	// fieldsInv, dictLocs
 	for _, entry := range s.fieldsInv {
 		sizeInBytes += (len(entry) + int(segment.SizeOfString))
 	}
-	sizeInBytes += len(s.fieldsOffsets) * 8     /* size of uint64 */
-	sizeInBytes += int(segment.SizeOfSlice) * 2 /* overhead from slices */
+	sizeInBytes += len(s.dictLocs) * 8          /* size of uint64 */
+	sizeInBytes += int(segment.SizeOfSlice) * 3 /* overhead from slices */
 
 	// fieldDvIterMap
 	sizeInBytes += len(s.fieldDvIterMap) *
@@ -133,9 +160,6 @@ func (s *Segment) SizeInBytes() uint64 {
 	}
 	sizeInBytes += int(segment.SizeOfMap)
 
-	// mutex, refs -> int64
-	sizeInBytes += 16
-
 	return uint64(sizeInBytes)
 }
 
@@ -158,49 +182,50 @@ func (s *Segment) DecRef() (err error) {
 func (s *Segment) loadConfig() error {
 	crcOffset := len(s.mm) - 4
 	s.crc = binary.BigEndian.Uint32(s.mm[crcOffset : crcOffset+4])
+
 	verOffset := crcOffset - 4
 	s.version = binary.BigEndian.Uint32(s.mm[verOffset : verOffset+4])
 	if s.version != version {
 		return fmt.Errorf("unsupported version %d", s.version)
 	}
+
 	chunkOffset := verOffset - 4
 	s.chunkFactor = binary.BigEndian.Uint32(s.mm[chunkOffset : chunkOffset+4])
 
 	docValueOffset := chunkOffset - 8
 	s.docValueOffset = binary.BigEndian.Uint64(s.mm[docValueOffset : docValueOffset+8])
 
-	fieldsOffset := docValueOffset - 8
-	s.fieldsIndexOffset = binary.BigEndian.Uint64(s.mm[fieldsOffset : fieldsOffset+8])
+	fieldsIndexOffset := docValueOffset - 8
+	s.fieldsIndexOffset = binary.BigEndian.Uint64(s.mm[fieldsIndexOffset : fieldsIndexOffset+8])
 
-	storedOffset := fieldsOffset - 8
-	s.storedIndexOffset = binary.BigEndian.Uint64(s.mm[storedOffset : storedOffset+8])
+	storedIndexOffset := fieldsIndexOffset - 8
+	s.storedIndexOffset = binary.BigEndian.Uint64(s.mm[storedIndexOffset : storedIndexOffset+8])
 
-	docNumOffset := storedOffset - 8
-	s.numDocs = binary.BigEndian.Uint64(s.mm[docNumOffset : docNumOffset+8])
+	numDocsOffset := storedIndexOffset - 8
+	s.numDocs = binary.BigEndian.Uint64(s.mm[numDocsOffset : numDocsOffset+8])
 	return nil
-
 }
 
-func (s *Segment) loadFields() error {
-	// NOTE for now we assume the fields index immediately preceeds the footer
-	// if this changes, need to adjust accordingly (or store explicit length)
-	fieldsIndexEnd := uint64(len(s.mm) - FooterSize)
+func (s *SegmentBase) loadFields() error {
+	// NOTE for now we assume the fields index immediately preceeds
+	// the footer, and if this changes, need to adjust accordingly (or
+	// store explicit length), where s.mem was sliced from s.mm in Open().
+	fieldsIndexEnd := uint64(len(s.mem))
 
 	// iterate through fields index
 	var fieldID uint64
 	for s.fieldsIndexOffset+(8*fieldID) < fieldsIndexEnd {
-		addr := binary.BigEndian.Uint64(s.mm[s.fieldsIndexOffset+(8*fieldID) : s.fieldsIndexOffset+(8*fieldID)+8])
-		var n uint64
+		addr := binary.BigEndian.Uint64(s.mem[s.fieldsIndexOffset+(8*fieldID) : s.fieldsIndexOffset+(8*fieldID)+8])
 
-		dictLoc, read := binary.Uvarint(s.mm[addr+n : fieldsIndexEnd])
-		n += uint64(read)
-		s.fieldsOffsets = append(s.fieldsOffsets, dictLoc)
+		dictLoc, read := binary.Uvarint(s.mem[addr:fieldsIndexEnd])
+		n := uint64(read)
+		s.dictLocs = append(s.dictLocs, dictLoc)
 
 		var nameLen uint64
-		nameLen, read = binary.Uvarint(s.mm[addr+n : fieldsIndexEnd])
+		nameLen, read = binary.Uvarint(s.mem[addr+n : fieldsIndexEnd])
 		n += uint64(read)
 
-		name := string(s.mm[addr+n : addr+n+nameLen])
+		name := string(s.mem[addr+n : addr+n+nameLen])
 		s.fieldsInv = append(s.fieldsInv, name)
 		s.fieldsMap[name] = uint16(fieldID + 1)
 
@@ -210,7 +235,7 @@ func (s *Segment) loadFields() error {
 }
 
 // Dictionary returns the term dictionary for the specified field
-func (s *Segment) Dictionary(field string) (segment.TermDictionary, error) {
+func (s *SegmentBase) Dictionary(field string) (segment.TermDictionary, error) {
 	dict, err := s.dictionary(field)
 	if err == nil && dict == nil {
 		return &segment.EmptyDictionary{}, nil
@@ -218,21 +243,20 @@ func (s *Segment) Dictionary(field string) (segment.TermDictionary, error) {
 	return dict, err
 }
 
-func (s *Segment) dictionary(field string) (rv *Dictionary, err error) {
-	rv = &Dictionary{
-		segment: s,
-		field:   field,
-	}
-
-	rv.fieldID = s.fieldsMap[field]
-	if rv.fieldID > 0 {
-		rv.fieldID = rv.fieldID - 1
+func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) {
+	fieldIDPlus1 := sb.fieldsMap[field]
+	if fieldIDPlus1 > 0 {
+		rv = &Dictionary{
+			sb:      sb,
+			field:   field,
+			fieldID: fieldIDPlus1 - 1,
+		}
 
-		dictStart := s.fieldsOffsets[rv.fieldID]
+		dictStart := sb.dictLocs[rv.fieldID]
 		if dictStart > 0 {
 			// read the length of the vellum data
-			vellumLen, read := binary.Uvarint(s.mm[dictStart : dictStart+binary.MaxVarintLen64])
-			fstBytes := s.mm[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
+			vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64])
+			fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
 			if fstBytes != nil {
 				rv.fst, err = vellum.Load(fstBytes)
 				if err != nil {
@@ -240,9 +264,6 @@ func (s *Segment) dictionary(field string) (rv *Dictionary, err error) {
 				}
 			}
 		}
-
-	} else {
-		return nil, nil
 	}
 
 	return rv, nil
@@ -250,10 +271,10 @@ func (s *Segment) dictionary(field string) (rv *Dictionary, err error) {
 
 // VisitDocument invokes the DocFieldValueVistor for each stored field
 // for the specified doc number
-func (s *Segment) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
+func (s *SegmentBase) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
 	// first make sure this is a valid number in this segment
 	if num < s.numDocs {
-		meta, compressed := s.getStoredMetaAndCompressed(num)
+		meta, compressed := s.getDocStoredMetaAndCompressed(num)
 		uncompressed, err := snappy.Decode(nil, compressed)
 		if err != nil {
 			return err
@@ -307,13 +328,13 @@ func (s *Segment) VisitDocument(num uint64, visitor segment.DocumentFieldValueVi
 }
 
 // Count returns the number of documents in this segment.
-func (s *Segment) Count() uint64 {
+func (s *SegmentBase) Count() uint64 {
 	return s.numDocs
 }
 
 // DocNumbers returns a bitset corresponding to the doc numbers of all the
 // provided _id strings
-func (s *Segment) DocNumbers(ids []string) (*roaring.Bitmap, error) {
+func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) {
 	rv := roaring.New()
 
 	if len(s.fieldsMap) > 0 {
@@ -337,7 +358,7 @@ func (s *Segment) DocNumbers(ids []string) (*roaring.Bitmap, error) {
 }
 
 // Fields returns the field names used in this segment
-func (s *Segment) Fields() []string {
+func (s *SegmentBase) Fields() []string {
 	return s.fieldsInv
 }
 
@@ -411,23 +432,22 @@ func (s *Segment) NumDocs() uint64 {
 // DictAddr is a helper function to compute the file offset where the
 // dictionary is stored for the specified field.
 func (s *Segment) DictAddr(field string) (uint64, error) {
-	var fieldID uint16
-	var ok bool
-	if fieldID, ok = s.fieldsMap[field]; !ok {
+	fieldIDPlus1, ok := s.fieldsMap[field]
+	if !ok {
 		return 0, fmt.Errorf("no such field '%s'", field)
 	}
 
-	return s.fieldsOffsets[fieldID-1], nil
+	return s.dictLocs[fieldIDPlus1-1], nil
 }
 
-func (s *Segment) loadDvIterators() error {
+func (s *SegmentBase) loadDvIterators() error {
 	if s.docValueOffset == fieldNotUninverted {
 		return nil
 	}
 
 	var read uint64
 	for fieldID, field := range s.fieldsInv {
-		fieldLoc, n := binary.Uvarint(s.mm[s.docValueOffset+read : s.docValueOffset+read+binary.MaxVarintLen64])
+		fieldLoc, n := binary.Uvarint(s.mem[s.docValueOffset+read : s.docValueOffset+read+binary.MaxVarintLen64])
 		if n <= 0 {
 			return fmt.Errorf("loadDvIterators: failed to read the docvalue offsets for field %d", fieldID)
 		}
diff --git a/index/scorch/segment/zap/write.go b/index/scorch/segment/zap/write.go
index cfb7e46e9..246710743 100644
--- a/index/scorch/segment/zap/write.go
+++ b/index/scorch/segment/zap/write.go
@@ -53,12 +53,11 @@ func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer) (int, error) {
 
 func persistFields(fieldsInv []string, w *CountHashWriter, dictLocs []uint64) (uint64, error) {
 	var rv uint64
+	var fieldsOffsets []uint64
 
-	var fieldStarts []uint64
 	for fieldID, fieldName := range fieldsInv {
-
 		// record start of this field
-		fieldStarts = append(fieldStarts, uint64(w.Count()))
+		fieldsOffsets = append(fieldsOffsets, uint64(w.Count()))
 
 		// write out the dict location and field name length
 		_, err := writeUvarints(w, dictLocs[fieldID], uint64(len(fieldName)))
@@ -76,7 +75,7 @@ func persistFields(fieldsInv []string, w *CountHashWriter, dictLocs []uint64) (u
 	// now write out the fields index
 	rv = uint64(w.Count())
 	for fieldID := range fieldsInv {
-		err := binary.Write(w, binary.BigEndian, fieldStarts[fieldID])
+		err := binary.Write(w, binary.BigEndian, fieldsOffsets[fieldID])
 		if err != nil {
 			return 0, err
 		}
@@ -89,8 +88,11 @@ func persistFields(fieldsInv []string, w *CountHashWriter, dictLocs []uint64) (u
 // crc + ver + chunk + field offset + stored offset + num docs + docValueOffset
 const FooterSize = 4 + 4 + 4 + 8 + 8 + 8 + 8
 
-func persistFooter(numDocs, storedIndexOffset, fieldIndexOffset, docValueOffset uint64,
-	chunkFactor uint32, w *CountHashWriter) error {
+func persistFooter(numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64,
+	chunkFactor uint32, crcBeforeFooter uint32, writerIn io.Writer) error {
+	w := NewCountHashWriter(writerIn)
+	w.crc = crcBeforeFooter
+
 	// write out the number of docs
 	err := binary.Write(w, binary.BigEndian, numDocs)
 	if err != nil {
@@ -102,7 +104,7 @@ func persistFooter(numDocs, storedIndexOffset, fieldIndexOffset, docValueOffset
 		return err
 	}
 	// write out the field index location
-	err = binary.Write(w, binary.BigEndian, fieldIndexOffset)
+	err = binary.Write(w, binary.BigEndian, fieldsIndexOffset)
 	if err != nil {
 		return err
 	}
@@ -122,7 +124,7 @@ func persistFooter(numDocs, storedIndexOffset, fieldIndexOffset, docValueOffset
 		return err
 	}
 	// write out CRC-32 of everything upto but not including this CRC
-	err = binary.Write(w, binary.BigEndian, w.Sum32())
+	err = binary.Write(w, binary.BigEndian, w.crc)
 	if err != nil {
 		return err
 	}

From 37121c3b4927a497b62d69d752520a2b4a395e29 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sun, 21 Jan 2018 10:53:58 -0800
Subject: [PATCH 154/728] scorch zap writeRoaringWithLen optimized with reused
 bufs

---
 index/scorch/segment/zap/build.go |  8 ++++++--
 index/scorch/segment/zap/merge.go |  5 +++--
 index/scorch/segment/zap/write.go | 19 +++++++++----------
 3 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index c9361cbd4..769c07958 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -421,11 +421,13 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
 
 func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint64, err error) {
 	rv = make([]uint64, 0, len(memSegment.PostingsLocs))
+	var reuseBuf bytes.Buffer
+	reuseBufVarint := make([]byte, binary.MaxVarintLen64)
 	for postingID := range memSegment.PostingsLocs {
 		// record where we start this posting loc
 		rv = append(rv, uint64(w.Count()))
 		// write out the length and bitmap
-		_, err = writeRoaringWithLen(memSegment.PostingsLocs[postingID], w)
+		_, err = writeRoaringWithLen(memSegment.PostingsLocs[postingID], w, &reuseBuf, reuseBufVarint)
 		if err != nil {
 			return nil, err
 		}
@@ -436,6 +438,8 @@ func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint
 func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter,
 	postingsListLocs, freqOffsets, locOffsets []uint64) (rv []uint64, err error) {
 	rv = make([]uint64, 0, len(memSegment.Postings))
+	var reuseBuf bytes.Buffer
+	reuseBufVarint := make([]byte, binary.MaxVarintLen64)
 	for postingID := range memSegment.Postings {
 		// record where we start this posting list
 		rv = append(rv, uint64(w.Count()))
@@ -448,7 +452,7 @@ func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter,
 		}
 
 		// write out the length and bitmap
-		_, err = writeRoaringWithLen(memSegment.Postings[postingID], w)
+		_, err = writeRoaringWithLen(memSegment.Postings[postingID], w, &reuseBuf, reuseBufVarint)
 		if err != nil {
 			return nil, err
 		}
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 07b9bafcc..f8ca142f1 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -129,6 +129,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 	newSegDocCount uint64, chunkFactor uint32,
 	w *CountHashWriter) ([]uint64, uint64, error) {
 
+	var bufReuse bytes.Buffer
 	var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64)
 	var bufLoc []uint64
 
@@ -258,7 +259,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 					return nil, 0, err
 				}
 				postingLocOffset := uint64(w.Count())
-				_, err = writeRoaringWithLen(newRoaringLocs, w)
+				_, err = writeRoaringWithLen(newRoaringLocs, w, &bufReuse, bufMaxVarintLen64)
 				if err != nil {
 					return nil, 0, err
 				}
@@ -284,7 +285,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 				if err != nil {
 					return nil, 0, err
 				}
-				_, err = writeRoaringWithLen(newRoaring, w)
+				_, err = writeRoaringWithLen(newRoaring, w, &bufReuse, bufMaxVarintLen64)
 				if err != nil {
 					return nil, 0, err
 				}
diff --git a/index/scorch/segment/zap/write.go b/index/scorch/segment/zap/write.go
index 246710743..c5316a99f 100644
--- a/index/scorch/segment/zap/write.go
+++ b/index/scorch/segment/zap/write.go
@@ -23,31 +23,30 @@ import (
 )
 
 // writes out the length of the roaring bitmap in bytes as varint
-// then writs out the roaring bitmap itself
-func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer) (int, error) {
-	var buffer bytes.Buffer
+// then writes out the roaring bitmap itself
+func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer,
+	reuseBuf *bytes.Buffer, reuseBufVarint []byte) (int, error) {
+	reuseBuf.Reset()
+
 	// write out postings list to memory so we know the len
-	postingsListLen, err := r.WriteTo(&buffer)
+	postingsListLen, err := r.WriteTo(reuseBuf)
 	if err != nil {
 		return 0, err
 	}
 	var tw int
 	// write out the length of this postings list
-	buf := make([]byte, binary.MaxVarintLen64)
-	n := binary.PutUvarint(buf, uint64(postingsListLen))
-	nw, err := w.Write(buf[:n])
+	n := binary.PutUvarint(reuseBufVarint, uint64(postingsListLen))
+	nw, err := w.Write(reuseBufVarint[:n])
 	tw += nw
 	if err != nil {
 		return tw, err
 	}
-
 	// write out the postings list itself
-	nw, err = w.Write(buffer.Bytes())
+	nw, err = w.Write(reuseBuf.Bytes())
 	tw += nw
 	if err != nil {
 		return tw, err
 	}
-
 	return tw, nil
 }
 

From 603425c2c585c103a69691fa0d2d399bc2ff282a Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 24 Jan 2018 08:39:50 -0800
Subject: [PATCH 155/728] scorch zap mergerLoop missing fireAsyncError case

---
 index/scorch/merge.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 78c27ddb1..cdf0b40b2 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -54,7 +54,6 @@ OUTER:
 				lastEpochMergePlanned = ourSnapshot.epoch
 
 				s.fireEvent(EventKindMergerProgress, time.Since(startTime))
-
 			}
 			_ = ourSnapshot.DecRef()
 
@@ -81,6 +80,7 @@ OUTER:
 				// lets get started
 				err := s.planMergeAtSnapshot(ourSnapshot)
 				if err != nil {
+					s.fireAsyncError(fmt.Errorf("merging err: %v", err))
 					_ = ourSnapshot.DecRef()
 					continue OUTER
 				}

From 29d526a7c22f91a8c45adfe9c8e5141a373fed2d Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 24 Jan 2018 09:13:16 -0800
Subject: [PATCH 156/728] scorch zap merge uses DefaultChunkFactor

---
 index/scorch/merge.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index cdf0b40b2..5ded29b5a 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -141,7 +141,7 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot) error {
 		filename := zapFileName(newSegmentID)
 		s.markIneligibleForRemoval(filename)
 		path := s.path + string(os.PathSeparator) + filename
-		newDocNums, err := zap.Merge(segmentsToMerge, docsToDrop, path, 1024)
+		newDocNums, err := zap.Merge(segmentsToMerge, docsToDrop, path, DefaultChunkFactor)
 		if err != nil {
 			s.unmarkIneligibleForRemoval(filename)
 			return fmt.Errorf("merging failed: %v", err)

From d389e2bb40dc3a9c2feabb7e1703e6b48c35f1c9 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 24 Jan 2018 09:22:10 -0800
Subject: [PATCH 157/728] scorch zap merge file cleanup on error, and some
 minor prealloc's

---
 index/scorch/segment/zap/merge.go | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index f8ca142f1..d19f3938d 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -29,10 +29,10 @@ import (
 	"github.com/golang/snappy"
 )
 
-// Merge takes a slice of zap segments, bit masks describing which documents
-// from the may be dropped, and creates a new segment containing the remaining
-// data.  This new segment is built at the specified path, with the provided
-// chunkFactor.
+// Merge takes a slice of zap segments and bit masks describing which
+// documents may be dropped, and creates a new segment containing the
+// remaining data.  This new segment is built at the specified path,
+// with the provided chunkFactor.
 func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
 	chunkFactor uint32) ([][]uint64, error) {
 	flag := os.O_RDWR | os.O_CREATE
@@ -42,6 +42,11 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
 		return nil, err
 	}
 
+	cleanup := func() {
+		_ = f.Close()
+		_ = os.Remove(path)
+	}
+
 	// buffer the output
 	br := bufio.NewWriter(f)
 
@@ -50,22 +55,25 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
 
 	fieldsInv := mergeFields(segments)
 	fieldsMap := mapFields(fieldsInv)
-	newSegDocCount := computeNewDocCount(segments, drops)
 
 	var newDocNums [][]uint64
 	var storedIndexOffset uint64
 	fieldDvLocsOffset := uint64(fieldNotUninverted)
 	var dictLocs []uint64
+
+	newSegDocCount := computeNewDocCount(segments, drops)
 	if newSegDocCount > 0 {
 		storedIndexOffset, newDocNums, err = mergeStoredAndRemap(segments, drops,
 			fieldsMap, fieldsInv, newSegDocCount, cr)
 		if err != nil {
+			cleanup()
 			return nil, err
 		}
 
 		dictLocs, fieldDvLocsOffset, err = persistMergedRest(segments, drops, fieldsInv, fieldsMap,
 			newDocNums, newSegDocCount, chunkFactor, cr)
 		if err != nil {
+			cleanup()
 			return nil, err
 		}
 	} else {
@@ -74,27 +82,32 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
 
 	fieldsIndexOffset, err := persistFields(fieldsInv, cr, dictLocs)
 	if err != nil {
+		cleanup()
 		return nil, err
 	}
 
 	err = persistFooter(newSegDocCount, storedIndexOffset,
 		fieldsIndexOffset, fieldDvLocsOffset, chunkFactor, cr.Sum32(), cr)
 	if err != nil {
+		cleanup()
 		return nil, err
 	}
 
 	err = br.Flush()
 	if err != nil {
+		cleanup()
 		return nil, err
 	}
 
 	err = f.Sync()
 	if err != nil {
+		cleanup()
 		return nil, err
 	}
 
 	err = f.Close()
 	if err != nil {
+		cleanup()
 		return nil, err
 	}
 
@@ -103,7 +116,7 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
 
 // mapFields takes the fieldsInv list and builds the map
 func mapFields(fields []string) map[string]uint16 {
-	rv := make(map[string]uint16)
+	rv := make(map[string]uint16, len(fields))
 	for i, fieldName := range fields {
 		rv[fieldName] = uint16(i)
 	}
@@ -327,7 +340,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 		rv1[fieldID] = dictOffset
 
 		// update the doc value
-		var docNumbers docIDRange
+		docNumbers := make(docIDRange, 0, len(docTermMap))
 		for k := range docTermMap {
 			docNumbers = append(docNumbers, k)
 		}
@@ -353,7 +366,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 	}
 
 	fieldDvLocsOffset = uint64(w.Count())
-	buf := make([]byte, binary.MaxVarintLen64)
+	buf := bufMaxVarintLen64
 	for _, offset := range fieldDvLocs {
 		n := binary.PutUvarint(buf, uint64(offset))
 		_, err := w.Write(buf[:n])

From 6a17ff48c71f265da517e1db209e889338f9bb96 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 27 Jan 2018 09:37:57 -0800
Subject: [PATCH 158/728] scorch zap removed uneeded []byte cast of term

---
 index/scorch/segment/zap/merge.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index d19f3938d..1acbbf5d7 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -247,7 +247,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 						}
 					}
 
-					docTermMap[hitNewDocNum] = append(docTermMap[hitNewDocNum], []byte(term)...)
+					docTermMap[hitNewDocNum] = append(docTermMap[hitNewDocNum], term...)
 					docTermMap[hitNewDocNum] = append(docTermMap[hitNewDocNum], termSeparator)
 					next, err2 = postItr.Next()
 				}

From 10dd5489c21a47a6a3138c25a531925961000acf Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 27 Jan 2018 09:42:56 -0800
Subject: [PATCH 159/728] scorch zap Dict.postingsList() takes []byte for more
 mem control

This allows callers that already have a []byte term to avoid
string'ification garbage.
---
 index/scorch/segment/zap/dict.go    | 7 +++----
 index/scorch/segment/zap/merge.go   | 2 +-
 index/scorch/segment/zap/posting.go | 2 +-
 index/scorch/segment/zap/segment.go | 2 +-
 4 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 284bc1898..bb6fd9478 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -35,10 +35,10 @@ type Dictionary struct {
 
 // PostingsList returns the postings list for the specified term
 func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap) (segment.PostingsList, error) {
-	return d.postingsList(term, except)
+	return d.postingsList([]byte(term), except)
 }
 
-func (d *Dictionary) postingsList(term string, except *roaring.Bitmap) (*PostingsList, error) {
+func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap) (*PostingsList, error) {
 	rv := &PostingsList{
 		sb:     d.sb,
 		term:   term,
@@ -46,7 +46,7 @@ func (d *Dictionary) postingsList(term string, except *roaring.Bitmap) (*Posting
 	}
 
 	if d.fst != nil {
-		postingsOffset, exists, err := d.fst.Get([]byte(term))
+		postingsOffset, exists, err := d.fst.Get(term)
 		if err != nil {
 			return nil, fmt.Errorf("vellum err: %v", err)
 		}
@@ -96,7 +96,6 @@ func (d *Dictionary) postingsList(term string, except *roaring.Bitmap) (*Posting
 
 // Iterator returns an iterator for this dictionary
 func (d *Dictionary) Iterator() segment.DictionaryIterator {
-
 	rv := &DictionaryIterator{
 		d: d,
 	}
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 1acbbf5d7..c1a1a38aa 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -206,7 +206,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 				if dict == nil {
 					continue
 				}
-				postings, err2 := dict.postingsList(string(term), drops[dictI])
+				postings, err2 := dict.postingsList(term, drops[dictI])
 				if err2 != nil {
 					return nil, 0, err2
 				}
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 95f350130..e8533a12a 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -28,7 +28,7 @@ import (
 // PostingsList is an in-memory represenation of a postings list
 type PostingsList struct {
 	sb             *SegmentBase
-	term           string
+	term           []byte
 	postingsOffset uint64
 	freqOffset     uint64
 	locOffset      uint64
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index df71f1d62..94268cace 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -344,7 +344,7 @@ func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) {
 		}
 
 		for _, id := range ids {
-			postings, err := idDict.postingsList(id, nil)
+			postings, err := idDict.postingsList([]byte(id), nil)
 			if err != nil {
 				return nil, err
 			}

From 9038d75c98e8597bfd0d0c5e992b198e3709b89f Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 27 Jan 2018 09:56:56 -0800
Subject: [PATCH 160/728] scorch zap allocate govarint.U64Base128Encoder just
 once

Instead of allocating a govarint.U64Base128Encoder in the inner loop,
allocate it just once on the outside, as it appears that it's just a
thin wrapper around binary.PutUvarint().
---
 index/scorch/segment/zap/merge.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index c1a1a38aa..f7e4be3d8 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -390,6 +390,8 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
 	var metaBuf bytes.Buffer
 	var data, compressed []byte
 
+	metaEncoder := govarint.NewU64Base128Encoder(&metaBuf)
+
 	vals := make([][][]byte, len(fieldsInv))
 	typs := make([][]byte, len(fieldsInv))
 	poss := make([][][]uint64, len(fieldsInv))
@@ -407,8 +409,6 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
 			compressed = compressed[:0]
 			curr = 0
 
-			metaEncoder := govarint.NewU64Base128Encoder(&metaBuf)
-
 			if drops[segI] != nil && drops[segI].Contains(uint32(docNum)) {
 				segNewDocNums = append(segNewDocNums, docDropped)
 			} else {

From 3030d4edb5d64dba92d2a1be291588f266005f48 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 27 Jan 2018 10:05:12 -0800
Subject: [PATCH 161/728] scorch zap merge preallocs segNewDocNums capacity

---
 index/scorch/segment/zap/merge.go | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index f7e4be3d8..2c290bba5 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -366,6 +366,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 	}
 
 	fieldDvLocsOffset = uint64(w.Count())
+
 	buf := bufMaxVarintLen64
 	for _, offset := range fieldDvLocs {
 		n := binary.PutUvarint(buf, uint64(offset))
@@ -400,14 +401,14 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
 
 	// for each segment
 	for segI, segment := range segments {
-		var segNewDocNums []uint64
+		segNewDocNums := make([]uint64, 0, segment.numDocs)
 
 		// for each doc num
 		for docNum := uint64(0); docNum < segment.numDocs; docNum++ {
+			curr = 0
 			metaBuf.Reset()
 			data = data[:0]
 			compressed = compressed[:0]
-			curr = 0
 
 			if drops[segI] != nil && drops[segI].Contains(uint32(docNum)) {
 				segNewDocNums = append(segNewDocNums, docDropped)
@@ -481,7 +482,9 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
 
 				metaEncoder.Close()
 				metaBytes := metaBuf.Bytes()
+
 				compressed = snappy.Encode(compressed, data)
+
 				// record where we're about to start writing
 				docNumOffsets[newDocNum] = uint64(w.Count())
 
@@ -510,6 +513,7 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
 
 	// return value is the start of the stored index
 	offset := uint64(w.Count())
+
 	// now write out the stored doc index
 	for docNum := range docNumOffsets {
 		err := binary.Write(w, binary.BigEndian, docNumOffsets[docNum])
@@ -524,13 +528,13 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
 // mergeFields builds a unified list of fields used across all the input segments
 func mergeFields(segments []*Segment) []string {
 	fieldsMap := map[string]struct{}{}
-
 	for _, segment := range segments {
 		fields := segment.Fields()
 		for _, field := range fields {
 			fieldsMap[field] = struct{}{}
 		}
 	}
+
 	rv := make([]string, 0, len(fieldsMap))
 	// ensure _id stays first
 	rv = append(rv, "_id")
@@ -539,6 +543,5 @@ func mergeFields(segments []*Segment) []string {
 			rv = append(rv, k)
 		}
 	}
-
 	return rv
 }

From 56cdb68f35624c8075d1268b68bc788406de4461 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 27 Jan 2018 10:18:40 -0800
Subject: [PATCH 162/728] scorch zap merge checks err2 not err

Also, optimize the appending of the termSeparator so that the
docTermMap is accessed and updated just once.
---
 index/scorch/segment/zap/merge.go | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 2c290bba5..5e7dae1f8 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -146,7 +146,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 	var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64)
 	var bufLoc []uint64
 
-	rv1 := make([]uint64, len(fieldsInv))
+	rv := make([]uint64, len(fieldsInv))
 	fieldDvLocs := make([]uint64, len(fieldsInv))
 	fieldDvLocsOffset := uint64(fieldNotUninverted)
 
@@ -191,12 +191,14 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 		tfEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
 		locEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
 		fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), newSegDocCount-1)
+
 		docTermMap := make(map[uint64][]byte, 0)
 		for err == nil {
 			term, _ := mergeItr.Current()
 
 			newRoaring := roaring.NewBitmap()
 			newRoaringLocs := roaring.NewBitmap()
+
 			tfEncoder.Reset()
 			locEncoder.Reset()
 
@@ -222,9 +224,9 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 					// encode norm bits
 					norm := next.Norm()
 					normBits := math.Float32bits(float32(norm))
-					err3 := tfEncoder.Add(hitNewDocNum, next.Frequency(), uint64(normBits))
-					if err3 != nil {
-						return nil, 0, err3
+					err = tfEncoder.Add(hitNewDocNum, next.Frequency(), uint64(normBits))
+					if err != nil {
+						return nil, 0, err
 					}
 					locs := next.Locations()
 					if len(locs) > 0 {
@@ -247,15 +249,16 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 						}
 					}
 
-					docTermMap[hitNewDocNum] = append(docTermMap[hitNewDocNum], term...)
-					docTermMap[hitNewDocNum] = append(docTermMap[hitNewDocNum], termSeparator)
+					docTermMap[hitNewDocNum] =
+						append(append(docTermMap[hitNewDocNum], term...), termSeparator)
+
 					next, err2 = postItr.Next()
 				}
-				if err != nil {
-					return nil, 0, err
+				if err2 != nil {
+					return nil, 0, err2
 				}
-
 			}
+
 			tfEncoder.Close()
 			locEncoder.Close()
 
@@ -337,7 +340,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 			return nil, 0, err
 		}
 
-		rv1[fieldID] = dictOffset
+		rv[fieldID] = dictOffset
 
 		// update the doc value
 		docNumbers := make(docIDRange, 0, len(docTermMap))
@@ -376,7 +379,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 		}
 	}
 
-	return rv1, fieldDvLocsOffset, nil
+	return rv, fieldDvLocsOffset, nil
 }
 
 const docDropped = math.MaxUint64

From 916bbf41257935f949cdbd80f95f327fbea9596b Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 27 Jan 2018 10:33:38 -0800
Subject: [PATCH 163/728] scorch zap merge prealloc's docTermMap capacity

---
 index/scorch/segment/zap/merge.go | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 5e7dae1f8..4f0333bb4 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -151,6 +151,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 	fieldDvLocsOffset := uint64(fieldNotUninverted)
 
 	var vellumBuf bytes.Buffer
+
 	// for each field
 	for fieldID, fieldName := range fieldsInv {
 		if fieldID != 0 {
@@ -190,9 +191,9 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 
 		tfEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
 		locEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
-		fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), newSegDocCount-1)
 
-		docTermMap := make(map[uint64][]byte, 0)
+		docTermMap := make(map[uint64][]byte, newSegDocCount)
+
 		for err == nil {
 			term, _ := mergeItr.Current()
 
@@ -319,6 +320,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 		}
 
 		dictOffset := uint64(w.Count())
+
 		err = newVellum.Close()
 		if err != nil {
 			return nil, 0, err
@@ -326,10 +328,8 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 		vellumData := vellumBuf.Bytes()
 
 		// write out the length of the vellum data
-		buf := bufMaxVarintLen64
-		// write out the number of chunks
-		n := binary.PutUvarint(buf, uint64(len(vellumData)))
-		_, err = w.Write(buf[:n])
+		n := binary.PutUvarint(bufMaxVarintLen64, uint64(len(vellumData)))
+		_, err = w.Write(bufMaxVarintLen64[:n])
 		if err != nil {
 			return nil, 0, err
 		}
@@ -342,25 +342,28 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 
 		rv[fieldID] = dictOffset
 
-		// update the doc value
+		// update the doc nums
 		docNumbers := make(docIDRange, 0, len(docTermMap))
 		for k := range docTermMap {
 			docNumbers = append(docNumbers, k)
 		}
 		sort.Sort(docNumbers)
 
+		fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), newSegDocCount-1)
 		for _, docNum := range docNumbers {
 			err = fdvEncoder.Add(docNum, docTermMap[docNum])
 			if err != nil {
 				return nil, 0, err
 			}
 		}
-		// get the field doc value offset
-		fieldDvLocs[fieldID] = uint64(w.Count())
 		err = fdvEncoder.Close()
 		if err != nil {
 			return nil, 0, err
 		}
+
+		// get the field doc value offset
+		fieldDvLocs[fieldID] = uint64(w.Count())
+
 		// persist the doc value details for this field
 		_, err = fdvEncoder.Write(w)
 		if err != nil {

From 6985db13a0f626de144ff5ae5f0cbee183d47ee2 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 27 Jan 2018 10:39:02 -0800
Subject: [PATCH 164/728] scorch zap merge reuses docNumbers array

---
 index/scorch/segment/zap/merge.go | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 4f0333bb4..9156675de 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -150,6 +150,8 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 	fieldDvLocs := make([]uint64, len(fieldsInv))
 	fieldDvLocsOffset := uint64(fieldNotUninverted)
 
+	var docNumbers docIDRange
+
 	var vellumBuf bytes.Buffer
 
 	// for each field
@@ -343,7 +345,10 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 		rv[fieldID] = dictOffset
 
 		// update the doc nums
-		docNumbers := make(docIDRange, 0, len(docTermMap))
+		if cap(docNumbers) < len(docTermMap) {
+			docNumbers = make(docIDRange, 0, len(docTermMap))
+		}
+		docNumbers = docNumbers[:0]
 		for k := range docTermMap {
 			docNumbers = append(docNumbers, k)
 		}

From 0041664bc412ed5d93110de5c6648c573557e82a Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 27 Jan 2018 10:45:23 -0800
Subject: [PATCH 165/728] scorch zap merge computeNewDocCount() optimize 1
 variable

---
 index/scorch/segment/zap/merge.go | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 9156675de..a239740cf 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -126,15 +126,14 @@ func mapFields(fields []string) map[string]uint16 {
 // computeNewDocCount determines how many documents will be in the newly
 // merged segment when obsoleted docs are dropped
 func computeNewDocCount(segments []*Segment, drops []*roaring.Bitmap) uint64 {
-	var newSegDocCount uint64
+	var newDocCount uint64
 	for segI, segment := range segments {
-		segIAfterDrop := segment.NumDocs()
+		newDocCount += segment.NumDocs()
 		if drops[segI] != nil {
-			segIAfterDrop -= drops[segI].GetCardinality()
+			newDocCount -= drops[segI].GetCardinality()
 		}
-		newSegDocCount += segIAfterDrop
 	}
-	return newSegDocCount
+	return newDocCount
 }
 
 func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,

From 8dd17a3b2035cd7a82b7b629e06ca821d36a0688 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 27 Jan 2018 10:55:29 -0800
Subject: [PATCH 166/728] scorch zap mergeStoredAndRemap uses continue for less
 indentation

---
 index/scorch/segment/zap/merge.go | 171 +++++++++++++++---------------
 1 file changed, 86 insertions(+), 85 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index a239740cf..db99dc607 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -394,7 +394,7 @@ const docDropped = math.MaxUint64
 func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
 	fieldsMap map[string]uint16, fieldsInv []string, newSegDocCount uint64,
 	w *CountHashWriter) (uint64, [][]uint64, error) {
-	var rv [][]uint64
+	var rv [][]uint64 // The remapped or newDocNums for each segment.
 	var newDocNum int
 
 	var curr int
@@ -415,109 +415,110 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
 
 		// for each doc num
 		for docNum := uint64(0); docNum < segment.numDocs; docNum++ {
+			if drops[segI] != nil && drops[segI].Contains(uint32(docNum)) {
+				segNewDocNums = append(segNewDocNums, docDropped)
+				continue
+			}
+
+			segNewDocNums = append(segNewDocNums, uint64(newDocNum))
+
 			curr = 0
 			metaBuf.Reset()
 			data = data[:0]
 			compressed = compressed[:0]
 
-			if drops[segI] != nil && drops[segI].Contains(uint32(docNum)) {
-				segNewDocNums = append(segNewDocNums, docDropped)
-			} else {
-				segNewDocNums = append(segNewDocNums, uint64(newDocNum))
-				// collect all the data
-				for i := 0; i < len(fieldsInv); i++ {
-					vals[i] = vals[i][:0]
-					typs[i] = typs[i][:0]
-					poss[i] = poss[i][:0]
-				}
-				err := segment.VisitDocument(docNum, func(field string, typ byte, value []byte, pos []uint64) bool {
-					fieldID := int(fieldsMap[field])
-					vals[fieldID] = append(vals[fieldID], value)
-					typs[fieldID] = append(typs[fieldID], typ)
-					poss[fieldID] = append(poss[fieldID], pos)
-					return true
-				})
-				if err != nil {
-					return 0, nil, err
-				}
-
-				// now walk the fields in order
-				for fieldID := range fieldsInv {
+			// collect all the data
+			for i := 0; i < len(fieldsInv); i++ {
+				vals[i] = vals[i][:0]
+				typs[i] = typs[i][:0]
+				poss[i] = poss[i][:0]
+			}
+			err := segment.VisitDocument(docNum, func(field string, typ byte, value []byte, pos []uint64) bool {
+				fieldID := int(fieldsMap[field])
+				vals[fieldID] = append(vals[fieldID], value)
+				typs[fieldID] = append(typs[fieldID], typ)
+				poss[fieldID] = append(poss[fieldID], pos)
+				return true
+			})
+			if err != nil {
+				return 0, nil, err
+			}
 
-					storedFieldValues := vals[int(fieldID)]
+			// now walk the fields in order
+			for fieldID := range fieldsInv {
+				storedFieldValues := vals[int(fieldID)]
 
-					// has stored values for this field
-					num := len(storedFieldValues)
+				// has stored values for this field
+				num := len(storedFieldValues)
 
-					// process each value
-					for i := 0; i < num; i++ {
-						// encode field
-						_, err2 := metaEncoder.PutU64(uint64(fieldID))
-						if err2 != nil {
-							return 0, nil, err2
-						}
-						// encode type
-						_, err2 = metaEncoder.PutU64(uint64(typs[int(fieldID)][i]))
-						if err2 != nil {
-							return 0, nil, err2
-						}
-						// encode start offset
-						_, err2 = metaEncoder.PutU64(uint64(curr))
-						if err2 != nil {
-							return 0, nil, err2
-						}
-						// end len
-						_, err2 = metaEncoder.PutU64(uint64(len(storedFieldValues[i])))
-						if err2 != nil {
-							return 0, nil, err2
-						}
-						// encode number of array pos
-						_, err2 = metaEncoder.PutU64(uint64(len(poss[int(fieldID)][i])))
+				// process each value
+				for i := 0; i < num; i++ {
+					// encode field
+					_, err2 := metaEncoder.PutU64(uint64(fieldID))
+					if err2 != nil {
+						return 0, nil, err2
+					}
+					// encode type
+					_, err2 = metaEncoder.PutU64(uint64(typs[int(fieldID)][i]))
+					if err2 != nil {
+						return 0, nil, err2
+					}
+					// encode start offset
+					_, err2 = metaEncoder.PutU64(uint64(curr))
+					if err2 != nil {
+						return 0, nil, err2
+					}
+					// end len
+					_, err2 = metaEncoder.PutU64(uint64(len(storedFieldValues[i])))
+					if err2 != nil {
+						return 0, nil, err2
+					}
+					// encode number of array pos
+					_, err2 = metaEncoder.PutU64(uint64(len(poss[int(fieldID)][i])))
+					if err2 != nil {
+						return 0, nil, err2
+					}
+					// encode all array positions
+					for j := 0; j < len(poss[int(fieldID)][i]); j++ {
+						_, err2 = metaEncoder.PutU64(poss[int(fieldID)][i][j])
 						if err2 != nil {
 							return 0, nil, err2
 						}
-						// encode all array positions
-						for j := 0; j < len(poss[int(fieldID)][i]); j++ {
-							_, err2 = metaEncoder.PutU64(poss[int(fieldID)][i][j])
-							if err2 != nil {
-								return 0, nil, err2
-							}
-						}
-						// append data
-						data = append(data, storedFieldValues[i]...)
-						// update curr
-						curr += len(storedFieldValues[i])
 					}
+					// append data
+					data = append(data, storedFieldValues[i]...)
+					// update curr
+					curr += len(storedFieldValues[i])
 				}
+			}
 
-				metaEncoder.Close()
-				metaBytes := metaBuf.Bytes()
-
-				compressed = snappy.Encode(compressed, data)
+			metaEncoder.Close()
+			metaBytes := metaBuf.Bytes()
 
-				// record where we're about to start writing
-				docNumOffsets[newDocNum] = uint64(w.Count())
+			compressed = snappy.Encode(compressed, data)
 
-				// write out the meta len and compressed data len
-				_, err = writeUvarints(w,
-					uint64(len(metaBytes)), uint64(len(compressed)))
-				if err != nil {
-					return 0, nil, err
-				}
-				// now write the meta
-				_, err = w.Write(metaBytes)
-				if err != nil {
-					return 0, nil, err
-				}
-				// now write the compressed data
-				_, err = w.Write(compressed)
-				if err != nil {
-					return 0, nil, err
-				}
+			// record where we're about to start writing
+			docNumOffsets[newDocNum] = uint64(w.Count())
 
-				newDocNum++
+			// write out the meta len and compressed data len
+			_, err = writeUvarints(w, uint64(len(metaBytes)), uint64(len(compressed)))
+			if err != nil {
+				return 0, nil, err
 			}
+			// now write the meta
+			_, err = w.Write(metaBytes)
+			if err != nil {
+				return 0, nil, err
+			}
+			// now write the compressed data
+			_, err = w.Write(compressed)
+			if err != nil {
+				return 0, nil, err
+			}
+
+			newDocNum++
 		}
+
 		rv = append(rv, segNewDocNums)
 	}
 

From 745575a6c13b327bc636c91d5281d05512993403 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 27 Jan 2018 11:02:53 -0800
Subject: [PATCH 167/728] scorch zap mergeStoredAndRemap uses array indexing,
 not append()

Since we have right array size preallocated, we don't need the extra
capacity checking of append().
---
 index/scorch/segment/zap/merge.go | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index db99dc607..8c06f2fed 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -395,7 +395,8 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
 	fieldsMap map[string]uint16, fieldsInv []string, newSegDocCount uint64,
 	w *CountHashWriter) (uint64, [][]uint64, error) {
 	var rv [][]uint64 // The remapped or newDocNums for each segment.
-	var newDocNum int
+
+	var newDocNum uint64
 
 	var curr int
 	var metaBuf bytes.Buffer
@@ -411,16 +412,17 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
 
 	// for each segment
 	for segI, segment := range segments {
-		segNewDocNums := make([]uint64, 0, segment.numDocs)
+		segNewDocNums := make([]uint64, segment.numDocs)
 
 		// for each doc num
 		for docNum := uint64(0); docNum < segment.numDocs; docNum++ {
+			// TODO: roaring's API limits docNums to 32-bits?
 			if drops[segI] != nil && drops[segI].Contains(uint32(docNum)) {
-				segNewDocNums = append(segNewDocNums, docDropped)
+				segNewDocNums[docNum] = docDropped
 				continue
 			}
 
-			segNewDocNums = append(segNewDocNums, uint64(newDocNum))
+			segNewDocNums[docNum] = newDocNum
 
 			curr = 0
 			metaBuf.Reset()

From a444c25ddf2101071f27fc1acacf90354023ada7 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 27 Jan 2018 13:23:33 -0800
Subject: [PATCH 168/728] scorch zap merge uses array for docTermMap with no
 sorting

Instead of sorting docNum keys from a hashmap, this change instead
iterates from docNum 0 to N and uses an array instead of hashmap.
The array is also reused across outer loop iterations.

This optimizes for when there's a lot of structural similarity between
docs, where many/most docs have the same fields.  i.e., beers,
breweries.  If every doc has completely different fields, then this
change might produce worse behavior compared to the previous sparse
hashmap approach.
---
 index/scorch/segment/zap/merge.go | 37 +++++++++++++++++--------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 8c06f2fed..ed9c7f98b 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -21,7 +21,6 @@ import (
 	"fmt"
 	"math"
 	"os"
-	"sort"
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/Smerity/govarint"
@@ -149,7 +148,11 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 	fieldDvLocs := make([]uint64, len(fieldsInv))
 	fieldDvLocsOffset := uint64(fieldNotUninverted)
 
-	var docNumbers docIDRange
+	// docTermMap is keyed by docNum, where the array impl provides
+	// better memory usage behavior than a sparse-friendlier hashmap
+	// for when docs have much structural similarity (i.e., every doc
+	// has a given field)
+	var docTermMap [][]byte
 
 	var vellumBuf bytes.Buffer
 
@@ -193,7 +196,14 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 		tfEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
 		locEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
 
-		docTermMap := make(map[uint64][]byte, newSegDocCount)
+		if uint64(cap(docTermMap)) < newSegDocCount {
+			docTermMap = make([][]byte, newSegDocCount)
+		} else {
+			docTermMap = docTermMap[0:newSegDocCount]
+			for docNum := range docTermMap { // reset the docTermMap
+				docTermMap[docNum] = docTermMap[docNum][:0]
+			}
+		}
 
 		for err == nil {
 			term, _ := mergeItr.Current()
@@ -343,21 +353,14 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 
 		rv[fieldID] = dictOffset
 
-		// update the doc nums
-		if cap(docNumbers) < len(docTermMap) {
-			docNumbers = make(docIDRange, 0, len(docTermMap))
-		}
-		docNumbers = docNumbers[:0]
-		for k := range docTermMap {
-			docNumbers = append(docNumbers, k)
-		}
-		sort.Sort(docNumbers)
-
+		// update the field doc values
 		fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), newSegDocCount-1)
-		for _, docNum := range docNumbers {
-			err = fdvEncoder.Add(docNum, docTermMap[docNum])
-			if err != nil {
-				return nil, 0, err
+		for docNum, docTerms := range docTermMap {
+			if len(docTerms) > 0 {
+				err = fdvEncoder.Add(uint64(docNum), docTerms)
+				if err != nil {
+					return nil, 0, err
+				}
 			}
 		}
 		err = fdvEncoder.Close()

From 634cfa05606be02b4e532ff449f2c0d9b8d2aac1 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 27 Jan 2018 14:38:04 -0800
Subject: [PATCH 169/728] scorch zap chunkedIntCoder optimization to prealloc
 some final buf

---
 index/scorch/segment/zap/build.go    |  5 +----
 index/scorch/segment/zap/intcoder.go |  1 +
 index/scorch/segment/zap/merge.go    | 12 ++++++------
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 769c07958..58f9faeaf 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -368,7 +368,6 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
 					}
 
 					// put pos
-
 					err = locEncoder.Add(docNum, locpos[locOffset])
 					if err != nil {
 						return nil, nil, err
@@ -386,10 +385,8 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
 						return nil, nil, err
 					}
 
-					// put array positions
-					num := len(locarraypos[locOffset])
-
 					// put the number of array positions to follow
+					num := len(locarraypos[locOffset])
 					err = locEncoder.Add(docNum, uint64(num))
 					if err != nil {
 						return nil, nil, err
diff --git a/index/scorch/segment/zap/intcoder.go b/index/scorch/segment/zap/intcoder.go
index 7e268bcf3..e9f295023 100644
--- a/index/scorch/segment/zap/intcoder.go
+++ b/index/scorch/segment/zap/intcoder.go
@@ -41,6 +41,7 @@ func newChunkedIntCoder(chunkSize uint64, maxDocNum uint64) *chunkedIntCoder {
 		chunkSize: chunkSize,
 		maxDocNum: maxDocNum,
 		chunkLens: make([]uint64, total),
+		final:     make([]byte, 0, 64),
 	}
 	rv.encoder = govarint.NewU64Base128Encoder(&rv.chunkBuf)
 
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index ed9c7f98b..cc348d720 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -247,12 +247,12 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 							if cap(bufLoc) < 5+len(loc.ArrayPositions()) {
 								bufLoc = make([]uint64, 0, 5+len(loc.ArrayPositions()))
 							}
-							args := bufLoc[0:0]
-							args = append(args, uint64(fieldsMap[loc.Field()]))
-							args = append(args, loc.Pos())
-							args = append(args, loc.Start())
-							args = append(args, loc.End())
-							args = append(args, uint64(len(loc.ArrayPositions())))
+							args := bufLoc[0:5]
+							args[0] = uint64(fieldsMap[loc.Field()])
+							args[1] = loc.Pos()
+							args[2] = loc.Start()
+							args[3] = loc.End()
+							args[4] = uint64(len(loc.ArrayPositions()))
 							args = append(args, loc.ArrayPositions()...)
 							err = locEncoder.Add(hitNewDocNum, args...)
 							if err != nil {

From 6451c8c37f30ee83e9cb45bb948ee8ada1df55ba Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 29 Jan 2018 17:09:18 -0800
Subject: [PATCH 170/728] MB-26396: Handling documents with geopoints in slice
 format

+ The issue lies with parsing documents containing a geopoint
  in slice format - which wasn't handled.
+ Unit test that verifies the fix.
---
 mapping/document.go     |  2 +-
 mapping/mapping_test.go | 66 +++++++++++++++++++++++++++++------------
 2 files changed, 48 insertions(+), 20 deletions(-)

diff --git a/mapping/document.go b/mapping/document.go
index d4c9a8f9d..6ec0c66bb 100644
--- a/mapping/document.go
+++ b/mapping/document.go
@@ -504,7 +504,7 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
 			}
 			dm.walkDocument(property, path, indexes, context)
 		}
-	case reflect.Map:
+	case reflect.Map, reflect.Slice:
 		if subDocMapping != nil {
 			for _, fieldMapping := range subDocMapping.Fields {
 				if fieldMapping.Type == "geopoint" {
diff --git a/mapping/mapping_test.go b/mapping/mapping_test.go
index 5d7527e0a..1a7709049 100644
--- a/mapping/mapping_test.go
+++ b/mapping/mapping_test.go
@@ -869,37 +869,65 @@ func TestMappingForGeo(t *testing.T) {
 	mapping := NewIndexMapping()
 	mapping.DefaultMapping = thingMapping
 
-	x := struct {
+	geopoints := []interface{}{}
+
+	// geopoint as a struct
+	geopoints = append(geopoints, struct {
 		Name     string    `json:"name"`
 		Location *Location `json:"location"`
 	}{
-		Name: "marty",
+		Name: "struct",
 		Location: &Location{
 			Lon: -180,
 			Lat: -90,
 		},
-	}
+	})
 
-	doc := document.NewDocument("1")
-	err := mapping.MapDocument(doc, x)
-	if err != nil {
-		t.Fatal(err)
-	}
+	// geopoint as a map
+	geopoints = append(geopoints, struct {
+		Name     string                 `json:"name"`
+		Location map[string]interface{} `json:"location"`
+	}{
+		Name: "map",
+		Location: map[string]interface{}{
+			"lon": -180,
+			"lat": -90,
+		},
+	})
 
-	var foundGeo bool
-	for _, f := range doc.Fields {
-		if f.Name() == "location" {
-			foundGeo = true
-			got := f.Value()
-			expect := []byte(numeric.MustNewPrefixCodedInt64(0, 0))
-			if !reflect.DeepEqual(got, expect) {
-				t.Errorf("expected geo value: %v, got %v", expect, got)
+	// geopoint as a slice
+	geopoints = append(geopoints, struct {
+		Name     string        `json:"name"`
+		Location []interface{} `json:"location"`
+	}{
+		Name: "slice",
+		Location: []interface{}{
+			-180, -90,
+		},
+	})
+
+	for i, geopoint := range geopoints {
+		doc := document.NewDocument(string(i))
+		err := mapping.MapDocument(doc, geopoint)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		var foundGeo bool
+		for _, f := range doc.Fields {
+			if f.Name() == "location" {
+				foundGeo = true
+				got := f.Value()
+				expect := []byte(numeric.MustNewPrefixCodedInt64(0, 0))
+				if !reflect.DeepEqual(got, expect) {
+					t.Errorf("expected geo value: %v, got %v", expect, got)
+				}
 			}
 		}
-	}
 
-	if !foundGeo {
-		t.Errorf("expected to find geo point, did not")
+		if !foundGeo {
+			t.Errorf("expected to find geo point, did not")
+		}
 	}
 }
 

From 684ee3c0e759314a4208c1ace210519a16c0f88a Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 30 Jan 2018 10:27:07 -0800
Subject: [PATCH 171/728] scorch zap DictIterator term count fixed and more
 merge unit tests

The zap DictionaryIterator Next() was incorrectly returning the
postingsList offset as the term count.  As part of this, refactored
out a PostingsList.read() helper method.

Also added more merge unit test scenarios, including merging a segment
for a few rounds to see if there are differences before/after merging.
---
 index/scorch/segment/zap/dict.go       |  47 +----
 index/scorch/segment/zap/merge_test.go | 272 ++++++++++++++++++++++++-
 index/scorch/segment/zap/posting.go    |  43 ++++
 3 files changed, 318 insertions(+), 44 deletions(-)

diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index bb6fd9478..0f5145fba 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -15,7 +15,6 @@
 package zap
 
 import (
-	"encoding/binary"
 	"fmt"
 
 	"github.com/RoaringBitmap/roaring"
@@ -51,43 +50,10 @@ func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap) (*Posting
 			return nil, fmt.Errorf("vellum err: %v", err)
 		}
 		if exists {
-			rv.postingsOffset = postingsOffset
-			// read the location of the freq/norm details
-			var n uint64
-			var read int
-
-			rv.freqOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+binary.MaxVarintLen64])
-			n += uint64(read)
-			rv.locOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
-			n += uint64(read)
-
-			var locBitmapOffset uint64
-			locBitmapOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
-			n += uint64(read)
-
-			// go ahead and load loc bitmap
-			var locBitmapLen uint64
-			locBitmapLen, read = binary.Uvarint(d.sb.mem[locBitmapOffset : locBitmapOffset+binary.MaxVarintLen64])
-			locRoaringBytes := d.sb.mem[locBitmapOffset+uint64(read) : locBitmapOffset+uint64(read)+locBitmapLen]
-			rv.locBitmap = roaring.NewBitmap()
-			_, err := rv.locBitmap.FromBuffer(locRoaringBytes)
+			err = rv.read(postingsOffset, d)
 			if err != nil {
-				return nil, fmt.Errorf("error loading roaring bitmap of locations with hits: %v", err)
+				return nil, err
 			}
-
-			var postingsLen uint64
-			postingsLen, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
-			n += uint64(read)
-
-			roaringBytes := d.sb.mem[postingsOffset+n : postingsOffset+n+postingsLen]
-
-			bitmap := roaring.NewBitmap()
-			_, err = bitmap.FromBuffer(roaringBytes)
-			if err != nil {
-				return nil, fmt.Errorf("error loading roaring bitmap: %v", err)
-			}
-
-			rv.postings = bitmap
 		}
 	}
 
@@ -160,6 +126,7 @@ type DictionaryIterator struct {
 	d   *Dictionary
 	itr vellum.Iterator
 	err error
+	tmp PostingsList
 }
 
 // Next returns the next entry in the dictionary
@@ -169,10 +136,14 @@ func (i *DictionaryIterator) Next() (*index.DictEntry, error) {
 	} else if i.err != nil {
 		return nil, i.err
 	}
-	term, count := i.itr.Current()
+	term, postingsOffset := i.itr.Current()
+	i.err = i.tmp.read(postingsOffset, i.d)
+	if i.err != nil {
+		return nil, i.err
+	}
 	rv := &index.DictEntry{
 		Term:  string(term),
-		Count: count,
+		Count: i.tmp.Count(),
 	}
 	i.err = i.itr.Next()
 	return rv, nil
diff --git a/index/scorch/segment/zap/merge_test.go b/index/scorch/segment/zap/merge_test.go
index 1e0110418..13807d8eb 100644
--- a/index/scorch/segment/zap/merge_test.go
+++ b/index/scorch/segment/zap/merge_test.go
@@ -15,7 +15,11 @@
 package zap
 
 import (
+	"fmt"
 	"os"
+	"reflect"
+	"sort"
+	"strings"
 	"testing"
 
 	"github.com/RoaringBitmap/roaring"
@@ -72,9 +76,251 @@ func TestMerge(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
+
+	segm, err := Open("/tmp/scorch3.zap")
+	if err != nil {
+		t.Fatalf("error opening merged segment: %v", err)
+	}
+	seg3 := segm.(*Segment)
+	defer func() {
+		cerr := seg3.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", err)
+		}
+	}()
+
+	if seg3.Path() != "/tmp/scorch3.zap" {
+		t.Fatalf("wrong path")
+	}
+	if seg3.Count() != 4 {
+		t.Fatalf("wrong count")
+	}
+	if len(seg3.Fields()) != 5 {
+		t.Fatalf("wrong # fields: %#v\n", seg3.Fields())
+	}
+
+	testMergeWithSelf(t, seg3, 4)
+}
+
+func testMergeWithSelf(t *testing.T, segCur *Segment, expectedCount uint64) {
+	// trying merging the segment with itself for a few rounds
+	var diffs []string
+
+	for i := 0; i < 10; i++ {
+		fname := fmt.Sprintf("scorch-self-%d.zap", i)
+
+		_ = os.RemoveAll("/tmp/" + fname)
+
+		segsToMerge := make([]*Segment, 1)
+		segsToMerge[0] = segCur
+
+		_, err := Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/"+fname, 1024)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		segm, err := Open("/tmp/" + fname)
+		if err != nil {
+			t.Fatalf("error opening merged segment: %v", err)
+		}
+		segNew := segm.(*Segment)
+		defer func(s *Segment) {
+			cerr := s.Close()
+			if cerr != nil {
+				t.Fatalf("error closing segment: %v", err)
+			}
+		}(segNew)
+
+		if segNew.Count() != expectedCount {
+			t.Fatalf("wrong count")
+		}
+		if len(segNew.Fields()) != 5 {
+			t.Fatalf("wrong # fields: %#v\n", segNew.Fields())
+		}
+
+		diff := compareSegments(segCur, segNew)
+		if diff != "" {
+			diffs = append(diffs, fname+" is different than previous:\n"+diff)
+		}
+
+		segCur = segNew
+	}
+
+	if len(diffs) > 0 {
+		t.Errorf("mismatches after repeated self-merging: %v", strings.Join(diffs, "\n"))
+	}
+}
+
+func compareSegments(a, b *Segment) string {
+	var rv []string
+
+	if a.Count() != b.Count() {
+		return "counts"
+	}
+
+	afields := append([]string(nil), a.Fields()...)
+	bfields := append([]string(nil), b.Fields()...)
+	sort.Strings(afields)
+	sort.Strings(bfields)
+	if !reflect.DeepEqual(afields, bfields) {
+		return "fields"
+	}
+
+	for _, fieldName := range afields {
+		adict, err := a.Dictionary(fieldName)
+		if err != nil {
+			return fmt.Sprintf("adict err: %v", err)
+		}
+		bdict, err := b.Dictionary(fieldName)
+		if err != nil {
+			return fmt.Sprintf("bdict err: %v", err)
+		}
+
+		if adict.(*Dictionary).fst.Len() != bdict.(*Dictionary).fst.Len() {
+			rv = append(rv, fmt.Sprintf("field %s, dict fst Len()'s  different: %v %v",
+				fieldName, adict.(*Dictionary).fst.Len(), bdict.(*Dictionary).fst.Len()))
+		}
+
+		aitr := adict.Iterator()
+		bitr := bdict.Iterator()
+		for {
+			anext, aerr := aitr.Next()
+			bnext, berr := bitr.Next()
+			if aerr != berr {
+				rv = append(rv, fmt.Sprintf("field %s, dict iterator Next() errors different: %v %v",
+					fieldName, aerr, berr))
+				break
+			}
+			if !reflect.DeepEqual(anext, bnext) {
+				rv = append(rv, fmt.Sprintf("field %s, dict iterator Next() results different: %#v %#v",
+					fieldName, anext, bnext))
+				// keep going to try to see more diff details at the postingsList level
+			}
+			if aerr != nil || anext == nil ||
+				berr != nil || bnext == nil {
+				break
+			}
+
+			for _, next := range []*index.DictEntry{anext, bnext} {
+				if next == nil {
+					continue
+				}
+
+				aplist, aerr := adict.(*Dictionary).postingsList([]byte(next.Term), nil)
+				bplist, berr := bdict.(*Dictionary).postingsList([]byte(next.Term), nil)
+				if aerr != berr {
+					rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsList() errors different: %v %v",
+						fieldName, next.Term, aerr, berr))
+				}
+
+				if (aplist != nil) != (bplist != nil) {
+					rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsList() results different: %v %v",
+						fieldName, next.Term, aplist, bplist))
+					break
+				}
+
+				if aerr != nil || aplist == nil ||
+					berr != nil || bplist == nil {
+					break
+				}
+
+				if aplist.Count() != bplist.Count() {
+					rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsList().Count()'s different: %v %v",
+						fieldName, next.Term, aplist.Count(), bplist.Count()))
+				}
+
+				apitr := aplist.Iterator()
+				bpitr := bplist.Iterator()
+				if (apitr != nil) != (bpitr != nil) {
+					rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsList.Iterator() results different: %v %v",
+						fieldName, next.Term, apitr, bpitr))
+					break
+				}
+
+				for {
+					apitrn, aerr := apitr.Next()
+					bpitrn, aerr := bpitr.Next()
+					if aerr != berr {
+						rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsListIterator Next() errors different: %v %v",
+							fieldName, next.Term, aerr, berr))
+					}
+
+					if (apitrn != nil) != (bpitrn != nil) {
+						rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsListIterator Next() results different: %v %v",
+							fieldName, next.Term, apitrn, bpitrn))
+						break
+					}
+
+					if aerr != nil || apitrn == nil ||
+						berr != nil || bpitrn == nil {
+						break
+					}
+
+					if apitrn.Number() != bpitrn.Number() {
+						rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsListIterator Next() Number()'s different: %v %v",
+							fieldName, next.Term, apitrn.Number(), bpitrn.Number()))
+					}
+
+					if apitrn.Frequency() != bpitrn.Frequency() {
+						rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsListIterator Next() Frequency()'s different: %v %v",
+							fieldName, next.Term, apitrn.Frequency(), bpitrn.Frequency()))
+					}
+
+					if apitrn.Norm() != bpitrn.Norm() {
+						rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsListIterator Next() Norm()'s different: %v %v",
+							fieldName, next.Term, apitrn.Norm(), bpitrn.Norm()))
+					}
+
+					if len(apitrn.Locations()) != len(bpitrn.Locations()) {
+						rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsListIterator Next() Locations() len's different: %v %v",
+							fieldName, next.Term, len(apitrn.Locations()), len(bpitrn.Locations())))
+					}
+
+					for loci, aloc := range apitrn.Locations() {
+						bloc := bpitrn.Locations()[loci]
+
+						if (aloc != nil) != (bloc != nil) {
+							rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsListIterator Next() loc different: %v %v",
+								fieldName, next.Term, aloc, bloc))
+							break
+						}
+
+						if aloc.Field() != bloc.Field() ||
+							aloc.Start() != bloc.Start() ||
+							aloc.End() != bloc.End() ||
+							aloc.Pos() != bloc.Pos() ||
+							!reflect.DeepEqual(aloc.ArrayPositions(), bloc.ArrayPositions()) {
+							rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsListIterator Next() loc details different: %v %v",
+								fieldName, next.Term, aloc, bloc))
+						}
+					}
+				}
+			}
+		}
+	}
+
+	return strings.Join(rv, "\n")
 }
 
 func TestMergeAndDrop(t *testing.T) {
+	docsToDrop := make([]*roaring.Bitmap, 2)
+	docsToDrop[0] = roaring.NewBitmap()
+	docsToDrop[0].AddInt(1)
+	docsToDrop[1] = roaring.NewBitmap()
+	docsToDrop[1].AddInt(1)
+	testMergeAndDrop(t, docsToDrop)
+}
+
+func TestMergeAndDropAllFromOneSegment(t *testing.T) {
+	docsToDrop := make([]*roaring.Bitmap, 2)
+	docsToDrop[0] = roaring.NewBitmap()
+	docsToDrop[0].AddInt(0)
+	docsToDrop[0].AddInt(1)
+	docsToDrop[1] = roaring.NewBitmap()
+	testMergeAndDrop(t, docsToDrop)
+}
+
+func testMergeAndDrop(t *testing.T, docsToDrop []*roaring.Bitmap) {
 	_ = os.RemoveAll("/tmp/scorch.zap")
 	_ = os.RemoveAll("/tmp/scorch2.zap")
 	_ = os.RemoveAll("/tmp/scorch3.zap")
@@ -117,16 +363,30 @@ func TestMergeAndDrop(t *testing.T) {
 	segsToMerge[0] = segment.(*Segment)
 	segsToMerge[1] = segment2.(*Segment)
 
-	docsToDrop := make([]*roaring.Bitmap, 2)
-	docsToDrop[0] = roaring.NewBitmap()
-	docsToDrop[0].AddInt(1)
-	docsToDrop[1] = roaring.NewBitmap()
-	docsToDrop[1].AddInt(1)
-
 	_, err = Merge(segsToMerge, docsToDrop, "/tmp/scorch3.zap", 1024)
 	if err != nil {
 		t.Fatal(err)
 	}
+
+	segm, err := Open("/tmp/scorch3.zap")
+	if err != nil {
+		t.Fatalf("error opening merged segment: %v", err)
+	}
+	defer func() {
+		cerr := segm.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", err)
+		}
+	}()
+
+	if segm.Count() != 2 {
+		t.Fatalf("wrong count, got: %d", segm.Count())
+	}
+	if len(segm.Fields()) != 5 {
+		t.Fatalf("wrong # fields: %#v\n", segm.Fields())
+	}
+
+	testMergeWithSelf(t, segm.(*Segment), 2)
 }
 
 func buildMemSegmentMulti2() *mem.Segment {
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index e8533a12a..67e08d1ae 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -98,6 +98,49 @@ func (p *PostingsList) Count() uint64 {
 	return 0
 }
 
+func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
+	rv.postingsOffset = postingsOffset
+
+	// read the location of the freq/norm details
+	var n uint64
+	var read int
+
+	rv.freqOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+binary.MaxVarintLen64])
+	n += uint64(read)
+
+	rv.locOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
+	n += uint64(read)
+
+	var locBitmapOffset uint64
+	locBitmapOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
+	n += uint64(read)
+
+	var locBitmapLen uint64
+	locBitmapLen, read = binary.Uvarint(d.sb.mem[locBitmapOffset : locBitmapOffset+binary.MaxVarintLen64])
+
+	locRoaringBytes := d.sb.mem[locBitmapOffset+uint64(read) : locBitmapOffset+uint64(read)+locBitmapLen]
+
+	rv.locBitmap = roaring.NewBitmap()
+	_, err := rv.locBitmap.FromBuffer(locRoaringBytes)
+	if err != nil {
+		return fmt.Errorf("error loading roaring bitmap of locations with hits: %v", err)
+	}
+
+	var postingsLen uint64
+	postingsLen, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
+	n += uint64(read)
+
+	roaringBytes := d.sb.mem[postingsOffset+n : postingsOffset+n+postingsLen]
+
+	rv.postings = roaring.NewBitmap()
+	_, err = rv.postings.FromBuffer(roaringBytes)
+	if err != nil {
+		return fmt.Errorf("error loading roaring bitmap: %v", err)
+	}
+
+	return nil
+}
+
 // PostingsIterator provides a way to iterate through the postings list
 type PostingsIterator struct {
 	postings  *PostingsList

From 4dd64b68fa31081cfdca75a1fe4ab8c3f5cc8759 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 30 Jan 2018 22:14:10 -0800
Subject: [PATCH 172/728] scorch zap TestMergeWithEmptySegment(s)

---
 index/scorch/segment/zap/merge_test.go | 104 +++++++++++++++++++++++++
 1 file changed, 104 insertions(+)

diff --git a/index/scorch/segment/zap/merge_test.go b/index/scorch/segment/zap/merge_test.go
index 13807d8eb..3318c57dd 100644
--- a/index/scorch/segment/zap/merge_test.go
+++ b/index/scorch/segment/zap/merge_test.go
@@ -102,6 +102,110 @@ func TestMerge(t *testing.T) {
 	testMergeWithSelf(t, seg3, 4)
 }
 
+func TestMergeWithEmptySegment(t *testing.T) {
+	testMergeWithEmptySegments(t, true, 1)
+}
+
+func TestMergeWithEmptySegments(t *testing.T) {
+	testMergeWithEmptySegments(t, true, 5)
+}
+
+func TestMergeWithEmptySegmentFirst(t *testing.T) {
+	testMergeWithEmptySegments(t, false, 1)
+}
+
+func TestMergeWithEmptySegmentsFirst(t *testing.T) {
+	testMergeWithEmptySegments(t, false, 5)
+}
+
+func testMergeWithEmptySegments(t *testing.T, before bool, numEmptySegments int) {
+	_ = os.RemoveAll("/tmp/scorch.zap")
+
+	memSegment := buildMemSegmentMulti()
+	err := PersistSegment(memSegment, "/tmp/scorch.zap", 1024)
+	if err != nil {
+		t.Fatal(err)
+	}
+	segment, err := Open("/tmp/scorch.zap")
+	if err != nil {
+		t.Fatalf("error opening segment: %v", err)
+	}
+	defer func() {
+		cerr := segment.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", err)
+		}
+	}()
+
+	var segsToMerge []*Segment
+
+	if before {
+		segsToMerge = append(segsToMerge, segment.(*Segment))
+	}
+
+	for i := 0; i < numEmptySegments; i++ {
+		fname := fmt.Sprintf("scorch-empty-%d.zap", i)
+
+		_ = os.RemoveAll("/tmp/" + fname)
+
+		emptySegment := mem.NewFromAnalyzedDocs([]*index.AnalysisResult{})
+		err = PersistSegment(emptySegment, "/tmp/"+fname, 1024)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		emptyFileSegment, err := Open("/tmp/" + fname)
+		if err != nil {
+			t.Fatalf("error opening segment: %v", err)
+		}
+		defer func(emptyFileSegment *Segment) {
+			cerr := emptyFileSegment.Close()
+			if cerr != nil {
+				t.Fatalf("error closing segment: %v", err)
+			}
+		}(emptyFileSegment.(*Segment))
+
+		segsToMerge = append(segsToMerge, emptyFileSegment.(*Segment))
+	}
+
+	if !before {
+		segsToMerge = append(segsToMerge, segment.(*Segment))
+	}
+
+	_ = os.RemoveAll("/tmp/scorch3.zap")
+
+	drops := make([]*roaring.Bitmap, len(segsToMerge))
+
+	_, err = Merge(segsToMerge, drops, "/tmp/scorch3.zap", 1024)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	segm, err := Open("/tmp/scorch3.zap")
+	if err != nil {
+		t.Fatalf("error opening merged segment: %v", err)
+	}
+	segCur := segm.(*Segment)
+	defer func() {
+		cerr := segCur.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", err)
+		}
+	}()
+
+	if segCur.Path() != "/tmp/scorch3.zap" {
+		t.Fatalf("wrong path")
+	}
+	if segCur.Count() != 2 {
+		t.Fatalf("wrong count, numEmptySegments: %d, got count: %d", numEmptySegments, segCur.Count())
+	}
+	if len(segCur.Fields()) != 5 {
+		t.Fatalf("wrong # fields: %#v\n", segCur.Fields())
+	}
+
+	testMergeWithSelf(t, segCur, 2)
+}
+
 func testMergeWithSelf(t *testing.T, segCur *Segment, expectedCount uint64) {
 	// trying merging the segment with itself for a few rounds
 	var diffs []string

From 93b037cdbb63014054d615c088b062ff8d3052fd Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 31 Jan 2018 11:40:21 -0800
Subject: [PATCH 173/728] scorch zap TestMergeWithUpdates()

---
 index/scorch/segment/zap/merge_test.go | 126 +++++++++++++++++++++----
 1 file changed, 109 insertions(+), 17 deletions(-)

diff --git a/index/scorch/segment/zap/merge_test.go b/index/scorch/segment/zap/merge_test.go
index 3318c57dd..4453cfcd9 100644
--- a/index/scorch/segment/zap/merge_test.go
+++ b/index/scorch/segment/zap/merge_test.go
@@ -427,20 +427,12 @@ func TestMergeAndDropAllFromOneSegment(t *testing.T) {
 func testMergeAndDrop(t *testing.T, docsToDrop []*roaring.Bitmap) {
 	_ = os.RemoveAll("/tmp/scorch.zap")
 	_ = os.RemoveAll("/tmp/scorch2.zap")
-	_ = os.RemoveAll("/tmp/scorch3.zap")
 
 	memSegment := buildMemSegmentMulti()
 	err := PersistSegment(memSegment, "/tmp/scorch.zap", 1024)
 	if err != nil {
 		t.Fatal(err)
 	}
-
-	memSegment2 := buildMemSegmentMulti2()
-	err = PersistSegment(memSegment2, "/tmp/scorch2.zap", 1024)
-	if err != nil {
-		t.Fatal(err)
-	}
-
 	segment, err := Open("/tmp/scorch.zap")
 	if err != nil {
 		t.Fatalf("error opening segment: %v", err)
@@ -452,6 +444,12 @@ func testMergeAndDrop(t *testing.T, docsToDrop []*roaring.Bitmap) {
 		}
 	}()
 
+	memSegment2 := buildMemSegmentMulti2()
+	err = PersistSegment(memSegment2, "/tmp/scorch2.zap", 1024)
+	if err != nil {
+		t.Fatal(err)
+	}
+
 	segment2, err := Open("/tmp/scorch2.zap")
 	if err != nil {
 		t.Fatalf("error opening segment: %v", err)
@@ -467,12 +465,103 @@ func testMergeAndDrop(t *testing.T, docsToDrop []*roaring.Bitmap) {
 	segsToMerge[0] = segment.(*Segment)
 	segsToMerge[1] = segment2.(*Segment)
 
-	_, err = Merge(segsToMerge, docsToDrop, "/tmp/scorch3.zap", 1024)
+	testMergeAndDropSegments(t, segsToMerge, docsToDrop, 2)
+}
+
+func TestMergeWithUpdates(t *testing.T) {
+	segmentDocIds := [][]string{
+		[]string{"a", "b"},
+		[]string{"b", "c"}, // doc "b" updated
+	}
+
+	docsToDrop := make([]*roaring.Bitmap, 2)
+	docsToDrop[0] = roaring.NewBitmap()
+	docsToDrop[0].AddInt(1) // doc "b" updated
+	docsToDrop[1] = roaring.NewBitmap()
+
+	testMergeWithUpdates(t, segmentDocIds, docsToDrop, 3)
+}
+
+func TestMergeWithUpdatesOnManySegments(t *testing.T) {
+	segmentDocIds := [][]string{
+		[]string{"a", "b"},
+		[]string{"b", "c"}, // doc "b" updated
+		[]string{"c", "d"}, // doc "c" updated
+		[]string{"d", "e"}, // doc "d" updated
+	}
+
+	docsToDrop := make([]*roaring.Bitmap, 4)
+	docsToDrop[0] = roaring.NewBitmap()
+	docsToDrop[0].AddInt(1) // doc "b" updated
+	docsToDrop[1] = roaring.NewBitmap()
+	docsToDrop[1].AddInt(1) // doc "c" updated
+	docsToDrop[2] = roaring.NewBitmap()
+	docsToDrop[2].AddInt(1) // doc "d" updated
+	docsToDrop[3] = roaring.NewBitmap()
+
+	testMergeWithUpdates(t, segmentDocIds, docsToDrop, 5)
+}
+
+func TestMergeWithUpdatesOnOneDoc(t *testing.T) {
+	segmentDocIds := [][]string{
+		[]string{"a", "b"},
+		[]string{"a", "c"}, // doc "a" updated
+		[]string{"a", "d"}, // doc "a" updated
+		[]string{"a", "e"}, // doc "a" updated
+	}
+
+	docsToDrop := make([]*roaring.Bitmap, 4)
+	docsToDrop[0] = roaring.NewBitmap()
+	docsToDrop[0].AddInt(0) // doc "a" updated
+	docsToDrop[1] = roaring.NewBitmap()
+	docsToDrop[1].AddInt(0) // doc "a" updated
+	docsToDrop[2] = roaring.NewBitmap()
+	docsToDrop[2].AddInt(0) // doc "a" updated
+	docsToDrop[3] = roaring.NewBitmap()
+
+	testMergeWithUpdates(t, segmentDocIds, docsToDrop, 5)
+}
+
+func testMergeWithUpdates(t *testing.T, segmentDocIds [][]string, docsToDrop []*roaring.Bitmap, expectedNumDocs uint64) {
+	var segsToMerge []*Segment
+
+	// convert segmentDocIds to segsToMerge
+	for i, docIds := range segmentDocIds {
+		fname := fmt.Sprintf("scorch%d.zap", i)
+
+		_ = os.RemoveAll("/tmp/" + fname)
+
+		memSegment := buildMemSegmentMultiHelper(docIds)
+		err := PersistSegment(memSegment, "/tmp/"+fname, 1024)
+		if err != nil {
+			t.Fatal(err)
+		}
+		segment, err := Open("/tmp/" + fname)
+		if err != nil {
+			t.Fatalf("error opening segment: %v", err)
+		}
+		defer func(segment *Segment) {
+			cerr := segment.Close()
+			if cerr != nil {
+				t.Fatalf("error closing segment: %v", err)
+			}
+		}(segment.(*Segment))
+
+		segsToMerge = append(segsToMerge, segment.(*Segment))
+	}
+
+	testMergeAndDropSegments(t, segsToMerge, docsToDrop, expectedNumDocs)
+}
+
+func testMergeAndDropSegments(t *testing.T, segsToMerge []*Segment, docsToDrop []*roaring.Bitmap, expectedNumDocs uint64) {
+	_ = os.RemoveAll("/tmp/scorch-merged.zap")
+
+	_, err := Merge(segsToMerge, docsToDrop, "/tmp/scorch-merged.zap", 1024)
 	if err != nil {
 		t.Fatal(err)
 	}
 
-	segm, err := Open("/tmp/scorch3.zap")
+	segm, err := Open("/tmp/scorch-merged.zap")
 	if err != nil {
 		t.Fatalf("error opening merged segment: %v", err)
 	}
@@ -483,22 +572,25 @@ func testMergeAndDrop(t *testing.T, docsToDrop []*roaring.Bitmap) {
 		}
 	}()
 
-	if segm.Count() != 2 {
-		t.Fatalf("wrong count, got: %d", segm.Count())
+	if segm.Count() != expectedNumDocs {
+		t.Fatalf("wrong count, got: %d, wanted: %d", segm.Count(), expectedNumDocs)
 	}
 	if len(segm.Fields()) != 5 {
 		t.Fatalf("wrong # fields: %#v\n", segm.Fields())
 	}
 
-	testMergeWithSelf(t, segm.(*Segment), 2)
+	testMergeWithSelf(t, segm.(*Segment), expectedNumDocs)
 }
 
 func buildMemSegmentMulti2() *mem.Segment {
+	return buildMemSegmentMultiHelper([]string{"c", "d"})
+}
 
+func buildMemSegmentMultiHelper(docIds []string) *mem.Segment {
 	doc := &document.Document{
 		ID: "c",
 		Fields: []document.Field{
-			document.NewTextFieldCustom("_id", nil, []byte("c"), document.IndexField|document.StoreField, nil),
+			document.NewTextFieldCustom("_id", nil, []byte(docIds[0]), document.IndexField|document.StoreField, nil),
 			document.NewTextFieldCustom("name", nil, []byte("mat"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
 			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
 			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
@@ -512,7 +604,7 @@ func buildMemSegmentMulti2() *mem.Segment {
 	doc2 := &document.Document{
 		ID: "d",
 		Fields: []document.Field{
-			document.NewTextFieldCustom("_id", nil, []byte("d"), document.IndexField|document.StoreField, nil),
+			document.NewTextFieldCustom("_id", nil, []byte(docIds[1]), document.IndexField|document.StoreField, nil),
 			document.NewTextFieldCustom("name", nil, []byte("joa"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
 			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
 			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
@@ -533,7 +625,7 @@ func buildMemSegmentMulti2() *mem.Segment {
 						Start:    0,
 						End:      1,
 						Position: 1,
-						Term:     []byte("c"),
+						Term:     []byte(docIds[0]),
 					},
 				}, nil, false),
 				analysis.TokenFrequency(analysis.TokenStream{
@@ -591,7 +683,7 @@ func buildMemSegmentMulti2() *mem.Segment {
 						Start:    0,
 						End:      1,
 						Position: 1,
-						Term:     []byte("d"),
+						Term:     []byte(docIds[1]),
 					},
 				}, nil, false),
 				analysis.TokenFrequency(analysis.TokenStream{

From 714f5321e0dec8e1ff3b48ea0f7411775de9dcc7 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 31 Jan 2018 14:46:28 -0800
Subject: [PATCH 174/728] scorch zap merge storedFieldVals inner loop
 optimization

---
 index/scorch/segment/zap/merge.go | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index cc348d720..1afe99f49 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -456,6 +456,9 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
 				// has stored values for this field
 				num := len(storedFieldValues)
 
+				stf := typs[int(fieldID)]
+				spf := poss[int(fieldID)]
+
 				// process each value
 				for i := 0; i < num; i++ {
 					// encode field
@@ -464,7 +467,7 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
 						return 0, nil, err2
 					}
 					// encode type
-					_, err2 = metaEncoder.PutU64(uint64(typs[int(fieldID)][i]))
+					_, err2 = metaEncoder.PutU64(uint64(stf[i]))
 					if err2 != nil {
 						return 0, nil, err2
 					}
@@ -479,13 +482,13 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
 						return 0, nil, err2
 					}
 					// encode number of array pos
-					_, err2 = metaEncoder.PutU64(uint64(len(poss[int(fieldID)][i])))
+					_, err2 = metaEncoder.PutU64(uint64(len(spf[i])))
 					if err2 != nil {
 						return 0, nil, err2
 					}
 					// encode all array positions
-					for j := 0; j < len(poss[int(fieldID)][i]); j++ {
-						_, err2 = metaEncoder.PutU64(poss[int(fieldID)][i][j])
+					for _, pos := range spf[i] {
+						_, err2 = metaEncoder.PutU64(pos)
 						if err2 != nil {
 							return 0, nil, err2
 						}

From 678c412157db7c91b9c99a4e43fde441e7ba522f Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Fri, 2 Feb 2018 14:44:02 +0530
Subject: [PATCH 175/728] unblock the files for clean up, esp for merged new
 segment files

---
 index/scorch/persister.go | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index cdcee37c2..552a25e58 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -266,9 +266,7 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 		for k, v := range s.root.internal {
 			newIndexSnapshot.internal[k] = v
 		}
-		for _, filename := range filenames {
-			delete(s.ineligibleForRemoval, filename)
-		}
+
 		rootPrev := s.root
 		s.root = newIndexSnapshot
 		s.rootLock.Unlock()
@@ -276,6 +274,12 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 			_ = rootPrev.DecRef()
 		}
 	}
+	// unlock the files for clean up
+	s.rootLock.Lock()
+	for _, filename := range filenames {
+		delete(s.ineligibleForRemoval, filename)
+	}
+	s.rootLock.Unlock()
 
 	return nil
 }

From 9636209ae51fb46b004763ff65facfb3a043a015 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 5 Feb 2018 20:49:30 +0530
Subject: [PATCH 176/728] Update persister.go

comment updated
---
 index/scorch/persister.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 552a25e58..d656c4d9f 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -274,7 +274,7 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 			_ = rootPrev.DecRef()
 		}
 	}
-	// unlock the files for clean up
+	// allow files to become eligible for removal
 	s.rootLock.Lock()
 	for _, filename := range filenames {
 		delete(s.ineligibleForRemoval, filename)

From eb21bf83154344c183df51c35f67159df1f2383b Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 31 Jan 2018 15:08:31 -0800
Subject: [PATCH 177/728] scorch zap merge & build share
 persistStoredFieldValues()

Refactored out a helper func, persistStoredFieldValues(), that both
the persistence and merge codepaths now share.
---
 index/scorch/segment/zap/build.go | 100 ++++++++++++++++--------------
 index/scorch/segment/zap/merge.go |  46 ++------------
 2 files changed, 59 insertions(+), 87 deletions(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 58f9faeaf..e6625528a 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -187,79 +187,42 @@ func persistBase(memSegment *mem.Segment, cr *CountHashWriter, chunkFactor uint3
 }
 
 func persistStored(memSegment *mem.Segment, w *CountHashWriter) (uint64, error) {
-
 	var curr int
 	var metaBuf bytes.Buffer
 	var data, compressed []byte
 
+	metaEncoder := govarint.NewU64Base128Encoder(&metaBuf)
+
 	docNumOffsets := make(map[int]uint64, len(memSegment.Stored))
 
 	for docNum, storedValues := range memSegment.Stored {
 		if docNum != 0 {
 			// reset buffer if necessary
+			curr = 0
 			metaBuf.Reset()
 			data = data[:0]
 			compressed = compressed[:0]
-			curr = 0
 		}
 
-		metaEncoder := govarint.NewU64Base128Encoder(&metaBuf)
-
 		st := memSegment.StoredTypes[docNum]
 		sp := memSegment.StoredPos[docNum]
 
 		// encode fields in order
 		for fieldID := range memSegment.FieldsInv {
 			if storedFieldValues, ok := storedValues[uint16(fieldID)]; ok {
-				// has stored values for this field
-				num := len(storedFieldValues)
-
 				stf := st[uint16(fieldID)]
 				spf := sp[uint16(fieldID)]
 
-				// process each value
-				for i := 0; i < num; i++ {
-					// encode field
-					_, err2 := metaEncoder.PutU64(uint64(fieldID))
-					if err2 != nil {
-						return 0, err2
-					}
-					// encode type
-					_, err2 = metaEncoder.PutU64(uint64(stf[i]))
-					if err2 != nil {
-						return 0, err2
-					}
-					// encode start offset
-					_, err2 = metaEncoder.PutU64(uint64(curr))
-					if err2 != nil {
-						return 0, err2
-					}
-					// end len
-					_, err2 = metaEncoder.PutU64(uint64(len(storedFieldValues[i])))
-					if err2 != nil {
-						return 0, err2
-					}
-					// encode number of array pos
-					_, err2 = metaEncoder.PutU64(uint64(len(spf[i])))
-					if err2 != nil {
-						return 0, err2
-					}
-					// encode all array positions
-					for _, pos := range spf[i] {
-						_, err2 = metaEncoder.PutU64(pos)
-						if err2 != nil {
-							return 0, err2
-						}
-					}
-					// append data
-					data = append(data, storedFieldValues[i]...)
-					// update curr
-					curr += len(storedFieldValues[i])
+				var err2 error
+				curr, data, err2 = persistStoredFieldValues(fieldID,
+					storedFieldValues, stf, spf, curr, metaEncoder, data)
+				if err2 != nil {
+					return 0, err2
 				}
 			}
 		}
-		metaEncoder.Close()
 
+		metaEncoder.Close()
 		metaBytes := metaBuf.Bytes()
 
 		// compress the data
@@ -299,6 +262,51 @@ func persistStored(memSegment *mem.Segment, w *CountHashWriter) (uint64, error)
 	return rv, nil
 }
 
+func persistStoredFieldValues(fieldID int,
+	storedFieldValues [][]byte, stf []byte, spf [][]uint64,
+	curr int, metaEncoder *govarint.Base128Encoder, data []byte) (
+	int, []byte, error) {
+	for i := 0; i < len(storedFieldValues); i++ {
+		// encode field
+		_, err := metaEncoder.PutU64(uint64(fieldID))
+		if err != nil {
+			return 0, nil, err
+		}
+		// encode type
+		_, err = metaEncoder.PutU64(uint64(stf[i]))
+		if err != nil {
+			return 0, nil, err
+		}
+		// encode start offset
+		_, err = metaEncoder.PutU64(uint64(curr))
+		if err != nil {
+			return 0, nil, err
+		}
+		// end len
+		_, err = metaEncoder.PutU64(uint64(len(storedFieldValues[i])))
+		if err != nil {
+			return 0, nil, err
+		}
+		// encode number of array pos
+		_, err = metaEncoder.PutU64(uint64(len(spf[i])))
+		if err != nil {
+			return 0, nil, err
+		}
+		// encode all array positions
+		for _, pos := range spf[i] {
+			_, err = metaEncoder.PutU64(pos)
+			if err != nil {
+				return 0, nil, err
+			}
+		}
+
+		data = append(data, storedFieldValues[i]...)
+		curr += len(storedFieldValues[i])
+	}
+
+	return curr, data, nil
+}
+
 func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFactor uint32) ([]uint64, []uint64, error) {
 	var freqOffsets, locOfffsets []uint64
 	tfEncoder := newChunkedIntCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 1afe99f49..8fdb07afc 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -453,50 +453,14 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
 			for fieldID := range fieldsInv {
 				storedFieldValues := vals[int(fieldID)]
 
-				// has stored values for this field
-				num := len(storedFieldValues)
-
 				stf := typs[int(fieldID)]
 				spf := poss[int(fieldID)]
 
-				// process each value
-				for i := 0; i < num; i++ {
-					// encode field
-					_, err2 := metaEncoder.PutU64(uint64(fieldID))
-					if err2 != nil {
-						return 0, nil, err2
-					}
-					// encode type
-					_, err2 = metaEncoder.PutU64(uint64(stf[i]))
-					if err2 != nil {
-						return 0, nil, err2
-					}
-					// encode start offset
-					_, err2 = metaEncoder.PutU64(uint64(curr))
-					if err2 != nil {
-						return 0, nil, err2
-					}
-					// end len
-					_, err2 = metaEncoder.PutU64(uint64(len(storedFieldValues[i])))
-					if err2 != nil {
-						return 0, nil, err2
-					}
-					// encode number of array pos
-					_, err2 = metaEncoder.PutU64(uint64(len(spf[i])))
-					if err2 != nil {
-						return 0, nil, err2
-					}
-					// encode all array positions
-					for _, pos := range spf[i] {
-						_, err2 = metaEncoder.PutU64(pos)
-						if err2 != nil {
-							return 0, nil, err2
-						}
-					}
-					// append data
-					data = append(data, storedFieldValues[i]...)
-					// update curr
-					curr += len(storedFieldValues[i])
+				var err2 error
+				curr, data, err2 = persistStoredFieldValues(fieldID,
+					storedFieldValues, stf, spf, curr, metaEncoder, data)
+				if err2 != nil {
+					return 0, nil, err2
 				}
 			}
 

From 65786557584031cb74749681939f673245842b42 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 31 Jan 2018 15:48:03 -0800
Subject: [PATCH 178/728] scorch zap refactored out mergeToWriter() func

This is a step towards supporting in-memory zap segment merging.
---
 index/scorch/segment/zap/merge.go | 71 ++++++++++++++++++-------------
 1 file changed, 41 insertions(+), 30 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 8fdb07afc..327446c5c 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -52,41 +52,15 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
 	// wrap it for counting (tracking offsets)
 	cr := NewCountHashWriter(br)
 
-	fieldsInv := mergeFields(segments)
-	fieldsMap := mapFields(fieldsInv)
-
-	var newDocNums [][]uint64
-	var storedIndexOffset uint64
-	fieldDvLocsOffset := uint64(fieldNotUninverted)
-	var dictLocs []uint64
-
-	newSegDocCount := computeNewDocCount(segments, drops)
-	if newSegDocCount > 0 {
-		storedIndexOffset, newDocNums, err = mergeStoredAndRemap(segments, drops,
-			fieldsMap, fieldsInv, newSegDocCount, cr)
-		if err != nil {
-			cleanup()
-			return nil, err
-		}
-
-		dictLocs, fieldDvLocsOffset, err = persistMergedRest(segments, drops, fieldsInv, fieldsMap,
-			newDocNums, newSegDocCount, chunkFactor, cr)
-		if err != nil {
-			cleanup()
-			return nil, err
-		}
-	} else {
-		dictLocs = make([]uint64, len(fieldsInv))
-	}
-
-	fieldsIndexOffset, err := persistFields(fieldsInv, cr, dictLocs)
+	newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, err :=
+		mergeToWriter(segments, drops, chunkFactor, cr)
 	if err != nil {
 		cleanup()
 		return nil, err
 	}
 
-	err = persistFooter(newSegDocCount, storedIndexOffset,
-		fieldsIndexOffset, fieldDvLocsOffset, chunkFactor, cr.Sum32(), cr)
+	err = persistFooter(numDocs, storedIndexOffset, fieldsIndexOffset,
+		docValueOffset, chunkFactor, cr.Sum32(), cr)
 	if err != nil {
 		cleanup()
 		return nil, err
@@ -113,6 +87,43 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
 	return newDocNums, nil
 }
 
+func mergeToWriter(segments []*Segment, drops []*roaring.Bitmap,
+	chunkFactor uint32, cr *CountHashWriter) (
+	newDocNums [][]uint64,
+	numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64,
+	err error) {
+	docValueOffset = uint64(fieldNotUninverted)
+
+	var dictLocs []uint64
+
+	fieldsInv := mergeFields(segments)
+	fieldsMap := mapFields(fieldsInv)
+
+	numDocs = computeNewDocCount(segments, drops)
+	if numDocs > 0 {
+		storedIndexOffset, newDocNums, err = mergeStoredAndRemap(segments, drops,
+			fieldsMap, fieldsInv, numDocs, cr)
+		if err != nil {
+			return nil, 0, 0, 0, 0, err
+		}
+
+		dictLocs, docValueOffset, err = persistMergedRest(segments, drops, fieldsInv, fieldsMap,
+			newDocNums, numDocs, chunkFactor, cr)
+		if err != nil {
+			return nil, 0, 0, 0, 0, err
+		}
+	} else {
+		dictLocs = make([]uint64, len(fieldsInv))
+	}
+
+	fieldsIndexOffset, err = persistFields(fieldsInv, cr, dictLocs)
+	if err != nil {
+		return nil, 0, 0, 0, 0, err
+	}
+
+	return newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, nil
+}
+
 // mapFields takes the fieldsInv list and builds the map
 func mapFields(fields []string) map[string]uint16 {
 	rv := make(map[string]uint16, len(fields))

From 3da191852de9b0aa860532de7ec39256008d8252 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 1 Feb 2018 16:59:59 -0800
Subject: [PATCH 179/728] scorch zap tighten up prepareSegment()'s lock area

---
 index/scorch/scorch.go | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 311077653..08fffa25e 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -310,17 +310,21 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
 		introduction.persisted = make(chan error, 1)
 	}
 
-	// get read lock, to optimistically prepare obsoleted info
+	// optimistically prepare obsoletes outside of rootLock
 	s.rootLock.RLock()
-	for _, seg := range s.root.segment {
+	root := s.root
+	root.AddRef()
+	s.rootLock.RUnlock()
+
+	for _, seg := range root.segment {
 		delta, err := seg.segment.DocNumbers(ids)
 		if err != nil {
-			s.rootLock.RUnlock()
 			return err
 		}
 		introduction.obsoletes[seg.id] = delta
 	}
-	s.rootLock.RUnlock()
+
+	_ = root.DecRef()
 
 	s.introductions <- introduction
 

From c09e2a08cadda4973bd062baf9a19fcc07b86a5e Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 3 Feb 2018 10:51:24 -0800
Subject: [PATCH 180/728] scorch zap chunkedContentCoder reuses chunk metadata
 slice memory

And, renamed the chunk MetaData.DocID field to DocNum for naming
correctness, where much of this commit is the mechanical effect of
that rename.
---
 cmd/bleve/cmd/zap/docvalue.go            | 27 ++++++++++++------------
 index/scorch/segment/zap/build.go        |  2 +-
 index/scorch/segment/zap/contentcoder.go | 12 +++++------
 index/scorch/segment/zap/docvalues.go    | 18 ++++++++--------
 4 files changed, 29 insertions(+), 30 deletions(-)

diff --git a/cmd/bleve/cmd/zap/docvalue.go b/cmd/bleve/cmd/zap/docvalue.go
index 165829fdf..743974955 100644
--- a/cmd/bleve/cmd/zap/docvalue.go
+++ b/cmd/bleve/cmd/zap/docvalue.go
@@ -165,7 +165,7 @@ var docvalueCmd = &cobra.Command{
 			/*
 				TODO => dump all chunk headers??
 				if len(args) == 3 && args[2] == ">" {
-					dumpChunkDocIDs(data, )
+					dumpChunkDocNums(data, )
 
 				}*/
 		}
@@ -187,7 +187,7 @@ var docvalueCmd = &cobra.Command{
 		docInChunk := uint64(localDocNum) / uint64(segment.ChunkFactor())
 
 		if numChunks < docInChunk {
-			return fmt.Errorf("no chunk exists for chunk number: %d for docID: %d", docInChunk, localDocNum)
+			return fmt.Errorf("no chunk exists for chunk number: %d for localDocNum: %d", docInChunk, localDocNum)
 		}
 
 		destChunkDataLoc := fieldDvLoc + offset
@@ -207,7 +207,7 @@ var docvalueCmd = &cobra.Command{
 		offset = uint64(0)
 		curChunkHeader := make([]zap.MetaData, int(numDocs))
 		for i := 0; i < int(numDocs); i++ {
-			curChunkHeader[i].DocID, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
+			curChunkHeader[i].DocNum, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
 			offset += uint64(nread)
 			curChunkHeader[i].DocDvLoc, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
 			offset += uint64(nread)
@@ -221,8 +221,8 @@ var docvalueCmd = &cobra.Command{
 
 		start, length := getDocValueLocs(uint64(localDocNum), curChunkHeader)
 		if start == math.MaxUint64 || length == math.MaxUint64 {
-			fmt.Printf("no field values found for docID %d\n", localDocNum)
-			fmt.Printf("Try docIDs present in chunk: %s\n", assortDocID(curChunkHeader))
+			fmt.Printf("no field values found for localDocNum: %d\n", localDocNum)
+			fmt.Printf("Try docNums present in chunk: %s\n", metaDataDocNums(curChunkHeader))
 			return nil
 		}
 		// uncompress the already loaded data
@@ -234,7 +234,7 @@ var docvalueCmd = &cobra.Command{
 
 		var termSeparator byte = 0xff
 		var termSeparatorSplitSlice = []byte{termSeparator}
-		// pick the terms for the given docID
+		// pick the terms for the given docNum
 		uncompressed = uncompressed[start : start+length]
 		for {
 			i := bytes.Index(uncompressed, termSeparatorSplitSlice)
@@ -250,23 +250,22 @@ var docvalueCmd = &cobra.Command{
 	},
 }
 
-func getDocValueLocs(docID uint64, metaHeader []zap.MetaData) (uint64, uint64) {
+func getDocValueLocs(docNum uint64, metaHeader []zap.MetaData) (uint64, uint64) {
 	i := sort.Search(len(metaHeader), func(i int) bool {
-		return metaHeader[i].DocID >= docID
+		return metaHeader[i].DocNum >= docNum
 	})
-	if i < len(metaHeader) && metaHeader[i].DocID == docID {
+	if i < len(metaHeader) && metaHeader[i].DocNum == docNum {
 		return metaHeader[i].DocDvLoc, metaHeader[i].DocDvLen
 	}
 	return math.MaxUint64, math.MaxUint64
 }
 
-func assortDocID(metaHeader []zap.MetaData) string {
-	docIDs := ""
+func metaDataDocNums(metaHeader []zap.MetaData) string {
+	docNums := ""
 	for _, meta := range metaHeader {
-		id := fmt.Sprintf("%d", meta.DocID)
-		docIDs += id + ", "
+		docNums += fmt.Sprintf("%d", meta.DocNum) + ", "
 	}
-	return docIDs
+	return docNums
 }
 
 func init() {
diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index e6625528a..60d168e66 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -588,7 +588,7 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
 		if err != nil {
 			return nil, err
 		}
-		// resetting encoder for the next field
+		// reseting encoder for the next field
 		fdvEncoder.Reset()
 	}
 
diff --git a/index/scorch/segment/zap/contentcoder.go b/index/scorch/segment/zap/contentcoder.go
index b03940497..83457146e 100644
--- a/index/scorch/segment/zap/contentcoder.go
+++ b/index/scorch/segment/zap/contentcoder.go
@@ -39,7 +39,7 @@ type chunkedContentCoder struct {
 // MetaData represents the data information inside a
 // chunk.
 type MetaData struct {
-	DocID    uint64 // docid of the data inside the chunk
+	DocNum   uint64 // docNum of the data inside the chunk
 	DocDvLoc uint64 // starting offset for a given docid
 	DocDvLen uint64 // length of data inside the chunk for the given docid
 }
@@ -52,7 +52,7 @@ func newChunkedContentCoder(chunkSize uint64,
 	rv := &chunkedContentCoder{
 		chunkSize: chunkSize,
 		chunkLens: make([]uint64, total),
-		chunkMeta: []MetaData{},
+		chunkMeta: make([]MetaData, 0, total),
 	}
 
 	return rv
@@ -68,7 +68,7 @@ func (c *chunkedContentCoder) Reset() {
 	for i := range c.chunkLens {
 		c.chunkLens[i] = 0
 	}
-	c.chunkMeta = []MetaData{}
+	c.chunkMeta = c.chunkMeta[:0]
 }
 
 // Close indicates you are done calling Add() this allows
@@ -88,7 +88,7 @@ func (c *chunkedContentCoder) flushContents() error {
 
 	// write out the metaData slice
 	for _, meta := range c.chunkMeta {
-		_, err := writeUvarints(&c.chunkMetaBuf, meta.DocID, meta.DocDvLoc, meta.DocDvLen)
+		_, err := writeUvarints(&c.chunkMetaBuf, meta.DocNum, meta.DocDvLoc, meta.DocDvLen)
 		if err != nil {
 			return err
 		}
@@ -118,7 +118,7 @@ func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
 		// clearing the chunk specific meta for next chunk
 		c.chunkBuf.Reset()
 		c.chunkMetaBuf.Reset()
-		c.chunkMeta = []MetaData{}
+		c.chunkMeta = c.chunkMeta[:0]
 		c.currChunk = chunk
 	}
 
@@ -130,7 +130,7 @@ func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
 	}
 
 	c.chunkMeta = append(c.chunkMeta, MetaData{
-		DocID:    docNum,
+		DocNum:   docNum,
 		DocDvLoc: uint64(dvOffset),
 		DocDvLen: uint64(dvSize),
 	})
diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index fb5b348a5..0514bd307 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -99,7 +99,7 @@ func (s *SegmentBase) loadFieldDocValueIterator(field string,
 func (di *docValueIterator) loadDvChunk(chunkNumber,
 	localDocNum uint64, s *SegmentBase) error {
 	// advance to the chunk where the docValues
-	// reside for the given docID
+	// reside for the given docNum
 	destChunkDataLoc := di.dvDataLoc
 	for i := 0; i < int(chunkNumber); i++ {
 		destChunkDataLoc += di.chunkLens[i]
@@ -116,7 +116,7 @@ func (di *docValueIterator) loadDvChunk(chunkNumber,
 	offset := uint64(0)
 	di.curChunkHeader = make([]MetaData, int(numDocs))
 	for i := 0; i < int(numDocs); i++ {
-		di.curChunkHeader[i].DocID, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
+		di.curChunkHeader[i].DocNum, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
 		offset += uint64(read)
 		di.curChunkHeader[i].DocDvLoc, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
 		offset += uint64(read)
@@ -131,10 +131,10 @@ func (di *docValueIterator) loadDvChunk(chunkNumber,
 	return nil
 }
 
-func (di *docValueIterator) visitDocValues(docID uint64,
+func (di *docValueIterator) visitDocValues(docNum uint64,
 	visitor index.DocumentFieldTermVisitor) error {
-	// binary search the term locations for the docID
-	start, length := di.getDocValueLocs(docID)
+	// binary search the term locations for the docNum
+	start, length := di.getDocValueLocs(docNum)
 	if start == math.MaxUint64 || length == math.MaxUint64 {
 		return nil
 	}
@@ -144,7 +144,7 @@ func (di *docValueIterator) visitDocValues(docID uint64,
 		return err
 	}
 
-	// pick the terms for the given docID
+	// pick the terms for the given docNum
 	uncompressed = uncompressed[start : start+length]
 	for {
 		i := bytes.Index(uncompressed, termSeparatorSplitSlice)
@@ -159,11 +159,11 @@ func (di *docValueIterator) visitDocValues(docID uint64,
 	return nil
 }
 
-func (di *docValueIterator) getDocValueLocs(docID uint64) (uint64, uint64) {
+func (di *docValueIterator) getDocValueLocs(docNum uint64) (uint64, uint64) {
 	i := sort.Search(len(di.curChunkHeader), func(i int) bool {
-		return di.curChunkHeader[i].DocID >= docID
+		return di.curChunkHeader[i].DocNum >= docNum
 	})
-	if i < len(di.curChunkHeader) && di.curChunkHeader[i].DocID == docID {
+	if i < len(di.curChunkHeader) && di.curChunkHeader[i].DocNum == docNum {
 		return di.curChunkHeader[i].DocDvLoc, di.curChunkHeader[i].DocDvLen
 	}
 	return math.MaxUint64, math.MaxUint64

From fdb240f5f99cb3301c2adaaa8acfde7c17835e46 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 5 Feb 2018 10:02:44 -0800
Subject: [PATCH 181/728] more zap merge-planner CalcBudget tests at larger
 sizes

Helps provide a sense of how # of segments grows as # of documents
grows.  Ex: 1B docs => budget of 54 segments.
---
 index/scorch/mergeplan/merge_plan_test.go | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/index/scorch/mergeplan/merge_plan_test.go b/index/scorch/mergeplan/merge_plan_test.go
index 4db8eb1e8..9a2c779c8 100644
--- a/index/scorch/mergeplan/merge_plan_test.go
+++ b/index/scorch/mergeplan/merge_plan_test.go
@@ -188,6 +188,18 @@ func TestCalcBudget(t *testing.T) {
 			},
 			7,
 		},
+		{1000, 2000, DefaultMergePlanOptions,
+			1},
+		{5000, 2000, DefaultMergePlanOptions,
+			3},
+		{10000, 2000, DefaultMergePlanOptions,
+			5},
+		{30000, 2000, DefaultMergePlanOptions,
+			11},
+		{1000000, 2000, DefaultMergePlanOptions,
+			24},
+		{1000000000, 2000, DefaultMergePlanOptions,
+			54},
 	}
 
 	for testi, test := range tests {

From a280ba7cf8f9db0247e03bb56109d795a95a7ee3 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 5 Feb 2018 12:12:12 -0800
Subject: [PATCH 182/728] scorch zap TestIndexRollback fixes

The TestIndexRollback unit test was failing more often than ever
(perhaps raciness?), so this commit tries to remove avenues of
raciness in the test...

- The Scorch.Open() method is refactored into an Scorch.openBolt()
  helper method in order to allow unit tests to control which
  background goroutines are started.

- TestIndexRollback() doesn't start the merger goroutine, to simulate
  a really slow merger that never gets around to merging old segments.

- TestIndexRollback() creates a long-lived reader after the first
  batch, so that the first index snapshot isn't removed due to the
  long-lived reader's ref-count.

- TestIndexRollback() temporarily bumps NumSnapshotsToKeep to a large
  number so the persister isn't tempted to removeOldData() that we're
  trying to rollback to.
---
 index/scorch/scorch.go                 | 30 ++++++++++-----
 index/scorch/snapshot_rollback_test.go | 51 ++++++++++++++++++++++----
 2 files changed, 63 insertions(+), 18 deletions(-)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 311077653..4881ff5cb 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -114,6 +114,25 @@ func (s *Scorch) fireAsyncError(err error) {
 }
 
 func (s *Scorch) Open() error {
+	err := s.openBolt()
+	if err != nil {
+		return err
+	}
+
+	s.asyncTasks.Add(1)
+	go s.mainLoop()
+
+	if !s.readOnly && s.path != "" {
+		s.asyncTasks.Add(1)
+		go s.persisterLoop()
+		s.asyncTasks.Add(1)
+		go s.mergerLoop()
+	}
+
+	return nil
+}
+
+func (s *Scorch) openBolt() error {
 	var ok bool
 	s.path, ok = s.config["path"].(string)
 	if !ok {
@@ -136,6 +155,7 @@ func (s *Scorch) Open() error {
 			}
 		}
 	}
+
 	rootBoltPath := s.path + string(os.PathSeparator) + "root.bolt"
 	var err error
 	if s.path != "" {
@@ -166,16 +186,6 @@ func (s *Scorch) Open() error {
 		}
 	}
 
-	s.asyncTasks.Add(1)
-	go s.mainLoop()
-
-	if !s.readOnly && s.path != "" {
-		s.asyncTasks.Add(1)
-		go s.persisterLoop()
-		s.asyncTasks.Add(1)
-		go s.mergerLoop()
-	}
-
 	return nil
 }
 
diff --git a/index/scorch/snapshot_rollback_test.go b/index/scorch/snapshot_rollback_test.go
index 9816a51e6..42d908243 100644
--- a/index/scorch/snapshot_rollback_test.go
+++ b/index/scorch/snapshot_rollback_test.go
@@ -22,7 +22,12 @@ import (
 )
 
 func TestIndexRollback(t *testing.T) {
+	numSnapshotsToKeepOrig := NumSnapshotsToKeep
+	NumSnapshotsToKeep = 1000
+
 	defer func() {
+		NumSnapshotsToKeep = numSnapshotsToKeepOrig
+
 		err := DestroyTest()
 		if err != nil {
 			t.Fatal(err)
@@ -34,10 +39,6 @@ func TestIndexRollback(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	err = idx.Open()
-	if err != nil {
-		t.Fatalf("error opening index: %v", err)
-	}
 	defer func() {
 		err := idx.Close()
 		if err != nil {
@@ -45,6 +46,22 @@ func TestIndexRollback(t *testing.T) {
 		}
 	}()
 
+	sh, ok := idx.(*Scorch)
+	if !ok {
+		t.Fatalf("Not a scorch index?")
+	}
+
+	err = sh.openBolt()
+	if err != nil {
+		t.Fatalf("error opening index: %v", err)
+	}
+
+	// start background goroutines except for the merger, which
+	// simulates a super slow merger
+	sh.asyncTasks.Add(2)
+	go sh.mainLoop()
+	go sh.persisterLoop()
+
 	// create a batch, insert 2 new documents
 	batch := index.NewBatch()
 	doc := document.NewDocument("1")
@@ -59,14 +76,17 @@ func TestIndexRollback(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	sh, ok := idx.(*Scorch)
-	if !ok {
-		t.Fatalf("Not a scorch index?")
+	readerSlow, err := idx.Reader() // keep snapshot around so it's not cleaned up
+	if err != nil {
+		t.Fatal(err)
 	}
+	defer func() {
+		_ = readerSlow.Close()
+	}()
 
 	// fetch rollback points available as of here
 	rollbackPoints, err := sh.RollbackPoints()
-	if err != nil || len(rollbackPoints) == 0 {
+	if err != nil || len(rollbackPoints) != 1 {
 		t.Fatal(err, len(rollbackPoints))
 	}
 
@@ -88,6 +108,21 @@ func TestIndexRollback(t *testing.T) {
 		t.Fatal(err)
 	}
 
+	rollbackPointsB, err := sh.RollbackPoints()
+	if err != nil || len(rollbackPointsB) != 3 {
+		t.Fatal(err, len(rollbackPointsB))
+	}
+
+	found := false
+	for _, p := range rollbackPointsB {
+		if rollbackPoint.epoch == p.epoch {
+			found = true
+		}
+	}
+	if !found {
+		t.Fatalf("expected rollbackPoint epoch to still be available")
+	}
+
 	reader, err := idx.Reader()
 	if err != nil {
 		t.Fatal(err)

From 07274c036d48a1a583d7416bdaeba4981fbdfe4c Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Tue, 6 Feb 2018 13:48:16 +0530
Subject: [PATCH 183/728] tuning the edge for merge-task execution loop

Adjusting the merge task creation loop to accommodate
the newly merged segments so that the eventual merge
results/ number of segments stay within the calculated budget.
---
 index/scorch/mergeplan/merge_plan.go      |  4 ++--
 index/scorch/mergeplan/merge_plan_test.go | 23 +++++++++++++++++++++--
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/index/scorch/mergeplan/merge_plan.go b/index/scorch/mergeplan/merge_plan.go
index 0afc3ce5c..62f643f43 100644
--- a/index/scorch/mergeplan/merge_plan.go
+++ b/index/scorch/mergeplan/merge_plan.go
@@ -186,13 +186,13 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) {
 
 	// While we’re over budget, keep looping, which might produce
 	// another MergeTask.
-	for len(eligibles) > budgetNumSegments {
+	for len(eligibles) > 0 && (len(eligibles)+len(rv.Tasks)) > budgetNumSegments {
 		// Track a current best roster as we examine and score
 		// potential rosters of merges.
 		var bestRoster []Segment
 		var bestRosterScore float64 // Lower score is better.
 
-		for startIdx := 0; startIdx < len(eligibles)-o.SegmentsPerMergeTask; startIdx++ {
+		for startIdx := 0; startIdx < len(eligibles); startIdx++ {
 			var roster []Segment
 			var rosterLiveSize int64
 
diff --git a/index/scorch/mergeplan/merge_plan_test.go b/index/scorch/mergeplan/merge_plan_test.go
index 4db8eb1e8..03fd40f74 100644
--- a/index/scorch/mergeplan/merge_plan_test.go
+++ b/index/scorch/mergeplan/merge_plan_test.go
@@ -73,7 +73,16 @@ func TestSimplePlan(t *testing.T) {
 				segs[2],
 			},
 			nil,
-			&MergePlan{},
+			&MergePlan{
+				Tasks: []*MergeTask{
+					&MergeTask{
+						Segments: []Segment{
+							segs[2],
+							segs[1],
+						},
+					},
+				},
+			},
 			nil,
 		},
 		{"3 segments",
@@ -83,7 +92,17 @@ func TestSimplePlan(t *testing.T) {
 				segs[9],
 			},
 			nil,
-			&MergePlan{},
+			&MergePlan{
+				Tasks: []*MergeTask{
+					&MergeTask{
+						Segments: []Segment{
+							segs[9],
+							segs[2],
+							segs[1],
+						},
+					},
+				},
+			},
 			nil,
 		},
 		{"many segments",

From 0dfd73d6cc2c2b3f5e1830201741aecbd4b0f3bd Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 6 Feb 2018 17:10:41 -0800
Subject: [PATCH 184/728] scorch zap mergeStoredAndRemap loop optimization

This change avoids an array/slice access in a loop body.
---
 index/scorch/segment/zap/merge.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 327446c5c..319d81348 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -506,17 +506,17 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
 	}
 
 	// return value is the start of the stored index
-	offset := uint64(w.Count())
+	storedIndexOffset := uint64(w.Count())
 
 	// now write out the stored doc index
-	for docNum := range docNumOffsets {
-		err := binary.Write(w, binary.BigEndian, docNumOffsets[docNum])
+	for _, docNumOffset := range docNumOffsets {
+		err := binary.Write(w, binary.BigEndian, docNumOffset)
 		if err != nil {
 			return 0, nil, err
 		}
 	}
 
-	return offset, rv, nil
+	return storedIndexOffset, rv, nil
 }
 
 // mergeFields builds a unified list of fields used across all the input segments

From 03c8b2b7ec66795dc9d83365a435d0463202c9e4 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 7 Feb 2018 14:16:58 -0800
Subject: [PATCH 185/728] scorch mem segment optimizes DictEntry's across
 Next() calls

This change optimizes the scorch/mem DictionaryIterator by reusing a
DictEntry struct across multiple Next() calls.  This follows the same
optimization trick and Next() semantics as upsidedown's FieldDict
implementation.
---
 index/scorch/segment/mem/dict.go | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/index/scorch/segment/mem/dict.go b/index/scorch/segment/mem/dict.go
index 939c287e9..cf92ef71f 100644
--- a/index/scorch/segment/mem/dict.go
+++ b/index/scorch/segment/mem/dict.go
@@ -76,6 +76,8 @@ type DictionaryIterator struct {
 	prefix string
 	end    string
 	offset int
+
+	dictEntry index.DictEntry // reused across Next()'s
 }
 
 // Next returns the next entry in the dictionary
@@ -95,8 +97,7 @@ func (d *DictionaryIterator) Next() (*index.DictEntry, error) {
 
 	d.offset++
 	postingID := d.d.segment.Dicts[d.d.fieldID][next]
-	return &index.DictEntry{
-		Term:  next,
-		Count: d.d.segment.Postings[postingID-1].GetCardinality(),
-	}, nil
+	d.dictEntry.Term = next
+	d.dictEntry.Count = d.d.segment.Postings[postingID-1].GetCardinality()
+	return &d.dictEntry, nil
 }

From 8c2520d55cb1fca6c01659386785720dfd061577 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 7 Feb 2018 14:29:51 -0800
Subject: [PATCH 186/728] scorch zap optimize via postingsList reuse

pprof graphs were showing many postingsList allocations during
merging, so this change optimizes by reusing postingList memory in the
merging loops.
---
 index/scorch/segment/zap/dict.go       | 15 +++++++++------
 index/scorch/segment/zap/merge.go      |  5 ++++-
 index/scorch/segment/zap/merge_test.go |  4 ++--
 index/scorch/segment/zap/segment.go    |  3 ++-
 4 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 0f5145fba..137c35d7a 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -34,15 +34,18 @@ type Dictionary struct {
 
 // PostingsList returns the postings list for the specified term
 func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap) (segment.PostingsList, error) {
-	return d.postingsList([]byte(term), except)
+	return d.postingsList([]byte(term), except, nil)
 }
 
-func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap) (*PostingsList, error) {
-	rv := &PostingsList{
-		sb:     d.sb,
-		term:   term,
-		except: except,
+func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
+	if rv == nil {
+		rv = &PostingsList{}
+	} else {
+		*rv = PostingsList{} // clear the struct
 	}
+	rv.sb = d.sb
+	rv.term = term
+	rv.except = except
 
 	if d.fst != nil {
 		postingsOffset, exists, err := d.fst.Get(term)
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 319d81348..1e75439a7 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -155,6 +155,8 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 	var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64)
 	var bufLoc []uint64
 
+	var postings *PostingsList
+
 	rv := make([]uint64, len(fieldsInv))
 	fieldDvLocs := make([]uint64, len(fieldsInv))
 	fieldDvLocsOffset := uint64(fieldNotUninverted)
@@ -231,7 +233,8 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 				if dict == nil {
 					continue
 				}
-				postings, err2 := dict.postingsList(term, drops[dictI])
+				var err2 error
+				postings, err2 = dict.postingsList(term, drops[dictI], postings)
 				if err2 != nil {
 					return nil, 0, err2
 				}
diff --git a/index/scorch/segment/zap/merge_test.go b/index/scorch/segment/zap/merge_test.go
index 4453cfcd9..323fffed4 100644
--- a/index/scorch/segment/zap/merge_test.go
+++ b/index/scorch/segment/zap/merge_test.go
@@ -310,8 +310,8 @@ func compareSegments(a, b *Segment) string {
 					continue
 				}
 
-				aplist, aerr := adict.(*Dictionary).postingsList([]byte(next.Term), nil)
-				bplist, berr := bdict.(*Dictionary).postingsList([]byte(next.Term), nil)
+				aplist, aerr := adict.(*Dictionary).postingsList([]byte(next.Term), nil, nil)
+				bplist, berr := bdict.(*Dictionary).postingsList([]byte(next.Term), nil, nil)
 				if aerr != berr {
 					rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsList() errors different: %v %v",
 						fieldName, next.Term, aerr, berr))
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 94268cace..40c0af274 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -343,8 +343,9 @@ func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) {
 			return nil, err
 		}
 
+		var postings *PostingsList
 		for _, id := range ids {
-			postings, err := idDict.postingsList([]byte(id), nil)
+			postings, err = idDict.postingsList([]byte(id), nil, postings)
 			if err != nil {
 				return nil, err
 			}

From a83ee0f364b9dbadac97f9124563196381bcb2fa Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 7 Feb 2018 14:38:10 -0800
Subject: [PATCH 187/728] scorch zap.MergeToWriter() takes SegmentBases instead
 of Segments

This change turns zap.MergeToWriter() into a public func, so that it's
now directly callable from outside packages (such as from scorch's
top-level merger or persister).  And, MergerToWriter() now takes input
of SegmentBases instead of Segments, so that it can now work on either
in-memory zap segments or file-based zap segments.

This is yet another stepping stone towards in-memory merging of zap
segments.
---
 index/scorch/segment/zap/merge.go | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 1e75439a7..db03c998d 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -46,6 +46,11 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
 		_ = os.Remove(path)
 	}
 
+	segmentBases := make([]*SegmentBase, len(segments))
+	for segmenti, segment := range segments {
+		segmentBases[segmenti] = &segment.SegmentBase
+	}
+
 	// buffer the output
 	br := bufio.NewWriter(f)
 
@@ -53,7 +58,7 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
 	cr := NewCountHashWriter(br)
 
 	newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, err :=
-		mergeToWriter(segments, drops, chunkFactor, cr)
+		MergeToWriter(segmentBases, drops, chunkFactor, cr)
 	if err != nil {
 		cleanup()
 		return nil, err
@@ -87,7 +92,7 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
 	return newDocNums, nil
 }
 
-func mergeToWriter(segments []*Segment, drops []*roaring.Bitmap,
+func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap,
 	chunkFactor uint32, cr *CountHashWriter) (
 	newDocNums [][]uint64,
 	numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64,
@@ -135,10 +140,10 @@ func mapFields(fields []string) map[string]uint16 {
 
 // computeNewDocCount determines how many documents will be in the newly
 // merged segment when obsoleted docs are dropped
-func computeNewDocCount(segments []*Segment, drops []*roaring.Bitmap) uint64 {
+func computeNewDocCount(segments []*SegmentBase, drops []*roaring.Bitmap) uint64 {
 	var newDocCount uint64
 	for segI, segment := range segments {
-		newDocCount += segment.NumDocs()
+		newDocCount += segment.numDocs
 		if drops[segI] != nil {
 			newDocCount -= drops[segI].GetCardinality()
 		}
@@ -146,7 +151,7 @@ func computeNewDocCount(segments []*Segment, drops []*roaring.Bitmap) uint64 {
 	return newDocCount
 }
 
-func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
+func persistMergedRest(segments []*SegmentBase, drops []*roaring.Bitmap,
 	fieldsInv []string, fieldsMap map[string]uint16, newDocNums [][]uint64,
 	newSegDocCount uint64, chunkFactor uint32,
 	w *CountHashWriter) ([]uint64, uint64, error) {
@@ -408,7 +413,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
 
 const docDropped = math.MaxUint64
 
-func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
+func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 	fieldsMap map[string]uint16, fieldsInv []string, newSegDocCount uint64,
 	w *CountHashWriter) (uint64, [][]uint64, error) {
 	var rv [][]uint64 // The remapped or newDocNums for each segment.
@@ -523,7 +528,7 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
 }
 
 // mergeFields builds a unified list of fields used across all the input segments
-func mergeFields(segments []*Segment) []string {
+func mergeFields(segments []*SegmentBase) []string {
 	fieldsMap := map[string]struct{}{}
 	for _, segment := range segments {
 		fields := segment.Fields()

From feecce1eb26302e8127a7556fd5082b682a43127 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 5 Feb 2018 16:37:43 +0530
Subject: [PATCH 188/728] fix for merger persister handshake stalemate

The slow merger was lagging behind the fast persister
to a persister notify send-loop while the persister awaits
for any new introductions from introducer totally blocking
the merger

This fix along with the deleted files eligibilty flipping
makes the file count to around 6 to 11 files  per shard
for both travel and beer samples
---
 index/scorch/merge.go     | 36 ++++++++----------------------------
 index/scorch/persister.go | 33 +++++++++++++++++++++++++++------
 index/scorch/scorch.go    |  4 ++--
 3 files changed, 37 insertions(+), 36 deletions(-)

diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 5ded29b5a..41abe0655 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -58,44 +58,24 @@ OUTER:
 			_ = ourSnapshot.DecRef()
 
 			// tell the persister we're waiting for changes
-			// first make a notification chan
-			notifyUs := make(notificationChan)
+			// first make a epochWatcher chan
+			ew := &epochWatcher{
+				epoch:    lastEpochMergePlanned,
+				notifyCh: make(notificationChan, 1),
+			}
 
 			// give it to the persister
 			select {
 			case <-s.closeCh:
 				break OUTER
-			case s.persisterNotifier <- notifyUs:
+			case s.persisterNotifier <- ew:
 			}
 
-			// check again
-			s.rootLock.RLock()
-			ourSnapshot = s.root
-			ourSnapshot.AddRef()
-			s.rootLock.RUnlock()
-
-			if ourSnapshot.epoch != lastEpochMergePlanned {
-				startTime := time.Now()
-
-				// lets get started
-				err := s.planMergeAtSnapshot(ourSnapshot)
-				if err != nil {
-					s.fireAsyncError(fmt.Errorf("merging err: %v", err))
-					_ = ourSnapshot.DecRef()
-					continue OUTER
-				}
-				lastEpochMergePlanned = ourSnapshot.epoch
-
-				s.fireEvent(EventKindMergerProgress, time.Since(startTime))
-			}
-			_ = ourSnapshot.DecRef()
-
-			// now wait for it (but also detect close)
+			// now wait for persister (but also detect close)
 			select {
 			case <-s.closeCh:
 				break OUTER
-			case <-notifyUs:
-				// woken up, next loop should pick up work
+			case <-ew.notifyCh:
 			}
 		}
 	}
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index cdcee37c2..c9d91dc53 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -39,15 +39,29 @@ type notificationChan chan struct{}
 func (s *Scorch) persisterLoop() {
 	defer s.asyncTasks.Done()
 
-	var notifyChs []notificationChan
+	var persistWatchers []*epochWatcher
 	var lastPersistedEpoch uint64
+
+	notifyWatchers := func() {
+		var watchersNext []*epochWatcher
+		for _, w := range persistWatchers {
+			if w.epoch < lastPersistedEpoch {
+				close(w.notifyCh)
+			} else {
+				watchersNext = append(watchersNext, w)
+			}
+		}
+		persistWatchers = watchersNext
+	}
+
 OUTER:
 	for {
 		select {
 		case <-s.closeCh:
 			break OUTER
-		case notifyCh := <-s.persisterNotifier:
-			notifyChs = append(notifyChs, notifyCh)
+		case ew := <-s.persisterNotifier:
+			persistWatchers = append(persistWatchers, ew)
+			notifyWatchers()
 		default:
 		}
 
@@ -81,10 +95,11 @@ OUTER:
 			}
 
 			lastPersistedEpoch = ourSnapshot.epoch
-			for _, notifyCh := range notifyChs {
-				close(notifyCh)
+			for _, ew := range persistWatchers {
+				close(ew.notifyCh)
 			}
-			notifyChs = nil
+
+			persistWatchers = nil
 			_ = ourSnapshot.DecRef()
 
 			changed := false
@@ -120,6 +135,12 @@ OUTER:
 			break OUTER
 		case <-w.notifyCh:
 			// woken up, next loop should pick up work
+			continue OUTER
+		case ew := <-s.persisterNotifier:
+			// if the watchers are already caught up then let them wait,
+			// else let them continue to do the catch up
+			persistWatchers = append(persistWatchers, ew)
+			notifyWatchers()
 		}
 	}
 }
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 311077653..b4e06fc25 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -61,7 +61,7 @@ type Scorch struct {
 	merges             chan *segmentMerge
 	introducerNotifier chan *epochWatcher
 	revertToSnapshots  chan *snapshotReversion
-	persisterNotifier  chan notificationChan
+	persisterNotifier  chan *epochWatcher
 	rootBolt           *bolt.DB
 	asyncTasks         sync.WaitGroup
 
@@ -156,7 +156,7 @@ func (s *Scorch) Open() error {
 	s.merges = make(chan *segmentMerge)
 	s.introducerNotifier = make(chan *epochWatcher, 1)
 	s.revertToSnapshots = make(chan *snapshotReversion)
-	s.persisterNotifier = make(chan notificationChan)
+	s.persisterNotifier = make(chan *epochWatcher, 1)
 
 	if !s.readOnly && s.path != "" {
 		err := s.removeOldZapFiles() // Before persister or merger create any new files.

From ffdeb8055efd2e69167caf4aadd19e2cdd6e4e27 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 5 Feb 2018 15:42:47 -0800
Subject: [PATCH 189/728] scorch sorts fields by name to assign fieldID's

This is a stepping stone to allow easier future comparisons of field
maps and potential merge optimizations.

In bleve-blast tests on a 2015 macbook (50K wikipedia docs, 8
indexers, batch size 100, ssd), this does not seem to have a distinct
effect on indexing throughput.
---
 index/scorch/segment/mem/build.go        | 15 +++++++++++++++
 index/scorch/segment/zap/merge.go        |  4 ++++
 index/scorch/segment/zap/segment_test.go |  2 ++
 3 files changed, 21 insertions(+)

diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go
index d3344ce30..57d60dc89 100644
--- a/index/scorch/segment/mem/build.go
+++ b/index/scorch/segment/mem/build.go
@@ -95,6 +95,21 @@ func (s *Segment) initializeDict(results []*index.AnalysisResult) {
 	var numTokenFrequencies int
 	var totLocs int
 
+	// initial scan for all fieldID's to sort them
+	for _, result := range results {
+		for _, field := range result.Document.CompositeFields {
+			s.getOrDefineField(field.Name())
+		}
+		for _, field := range result.Document.Fields {
+			s.getOrDefineField(field.Name())
+		}
+	}
+	sort.Strings(s.FieldsInv[1:]) // keep _id as first field
+	s.FieldsMap = make(map[string]uint16, len(s.FieldsInv))
+	for fieldID, fieldName := range s.FieldsInv {
+		s.FieldsMap[fieldName] = uint16(fieldID + 1)
+	}
+
 	processField := func(fieldID uint16, tfs analysis.TokenFrequencies) {
 		for term, tf := range tfs {
 			pidPlus1, exists := s.Dicts[fieldID][term]
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index db03c998d..53b1ffe54 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -21,6 +21,7 @@ import (
 	"fmt"
 	"math"
 	"os"
+	"sort"
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/Smerity/govarint"
@@ -545,5 +546,8 @@ func mergeFields(segments []*SegmentBase) []string {
 			rv = append(rv, k)
 		}
 	}
+
+	sort.Strings(rv[1:]) // leave _id as first
+
 	return rv
 }
diff --git a/index/scorch/segment/zap/segment_test.go b/index/scorch/segment/zap/segment_test.go
index 704f9e72e..9ce354ce3 100644
--- a/index/scorch/segment/zap/segment_test.go
+++ b/index/scorch/segment/zap/segment_test.go
@@ -18,6 +18,7 @@ import (
 	"math"
 	"os"
 	"reflect"
+	"sort"
 	"testing"
 
 	"github.com/blevesearch/bleve/index"
@@ -574,6 +575,7 @@ func TestSegmentVisitableDocValueFieldsList(t *testing.T) {
 			t.Fatalf("segment VisitableDocValueFields err: %v", err)
 		}
 
+		sort.Strings(expectedFields[1:]) // keep _id as first field
 		if !reflect.DeepEqual(fields, expectedFields) {
 			t.Errorf("expected field terms: %#v, got: %#v", expectedFields, fields)
 		}

From 822457542efe74b6b6b228ef52db2f56b8f3aea2 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 5 Feb 2018 16:03:17 -0800
Subject: [PATCH 190/728] scorch zap VERSION bump: check whether fields are the
 same at merge

COMPATIBILITY NOTE: scorch zap version bumped in this commit.

The version bump is because mergeFields() now computes whether fields
are the same across segments and it relies on the previous commit
where fieldID's are assigned in field name sorted order (albeit with
_id field always having fieldID of 0).

Potential future commits might rely on this info that "fields are the
same across segments" for more optimizations, etc.
---
 index/scorch/segment/zap/build.go |  2 +-
 index/scorch/segment/zap/merge.go | 27 ++++++++++++++++++++-------
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 60d168e66..b3bbbab52 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -28,7 +28,7 @@ import (
 	"github.com/golang/snappy"
 )
 
-const version uint32 = 2
+const version uint32 = 3
 
 const fieldNotUninverted = math.MaxUint64
 
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 53b1ffe54..bef2a087e 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -102,13 +102,13 @@ func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap,
 
 	var dictLocs []uint64
 
-	fieldsInv := mergeFields(segments)
+	fieldsSame, fieldsInv := mergeFields(segments)
 	fieldsMap := mapFields(fieldsInv)
 
 	numDocs = computeNewDocCount(segments, drops)
 	if numDocs > 0 {
 		storedIndexOffset, newDocNums, err = mergeStoredAndRemap(segments, drops,
-			fieldsMap, fieldsInv, numDocs, cr)
+			fieldsMap, fieldsInv, fieldsSame, numDocs, cr)
 		if err != nil {
 			return nil, 0, 0, 0, 0, err
 		}
@@ -415,7 +415,7 @@ func persistMergedRest(segments []*SegmentBase, drops []*roaring.Bitmap,
 const docDropped = math.MaxUint64
 
 func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
-	fieldsMap map[string]uint16, fieldsInv []string, newSegDocCount uint64,
+	fieldsMap map[string]uint16, fieldsInv []string, fieldsSame bool, newSegDocCount uint64,
 	w *CountHashWriter) (uint64, [][]uint64, error) {
 	var rv [][]uint64 // The remapped or newDocNums for each segment.
 
@@ -528,13 +528,26 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 	return storedIndexOffset, rv, nil
 }
 
-// mergeFields builds a unified list of fields used across all the input segments
-func mergeFields(segments []*SegmentBase) []string {
+// mergeFields builds a unified list of fields used across all the
+// input segments, and computes whether the fields are the same across
+// segments (which depends on fields to be sorted in the same way
+// across segments)
+func mergeFields(segments []*SegmentBase) (bool, []string) {
+	fieldsSame := true
+
+	var segment0Fields []string
+	if len(segments) > 0 {
+		segment0Fields = segments[0].Fields()
+	}
+
 	fieldsMap := map[string]struct{}{}
 	for _, segment := range segments {
 		fields := segment.Fields()
-		for _, field := range fields {
+		for fieldi, field := range fields {
 			fieldsMap[field] = struct{}{}
+			if len(segment0Fields) != len(fields) || segment0Fields[fieldi] != field {
+				fieldsSame = false
+			}
 		}
 	}
 
@@ -549,5 +562,5 @@ func mergeFields(segments []*SegmentBase) []string {
 
 	sort.Strings(rv[1:]) // leave _id as first
 
-	return rv
+	return fieldsSame, rv
 }

From 0b50a20caccf5c4fae2ab295978af21af8d4c792 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 5 Feb 2018 17:53:47 -0800
Subject: [PATCH 191/728] scorch zap move docDropped const to earlier in file

---
 index/scorch/segment/zap/merge.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index bef2a087e..dbb33110f 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -29,6 +29,8 @@ import (
 	"github.com/golang/snappy"
 )
 
+const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc
+
 // Merge takes a slice of zap segments and bit masks describing which
 // documents may be dropped, and creates a new segment containing the
 // remaining data.  This new segment is built at the specified path,
@@ -412,8 +414,6 @@ func persistMergedRest(segments []*SegmentBase, drops []*roaring.Bitmap,
 	return rv, fieldDvLocsOffset, nil
 }
 
-const docDropped = math.MaxUint64
-
 func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 	fieldsMap map[string]uint16, fieldsInv []string, fieldsSame bool, newSegDocCount uint64,
 	w *CountHashWriter) (uint64, [][]uint64, error) {

From ed4826b189404618f33b57f6add214e86b0353e4 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 6 Feb 2018 09:12:48 -0800
Subject: [PATCH 192/728] scorch zap merge optimization to byte-copy storedDocs

The optimization to byte-copy all the storedDocs for a given segment
during merging kicks in when the fields are the same across all
segments and when there are no deletions for that given segment.  This
can happen, for example, during data loading or insert-only scenarios.

As part of this commit, the Segment.copyStoredDocs() method was added,
which uses a single Write() call to copy all the stored docs bytes of
a segment to a writer in one shot.

And, getDocStoredMetaAndCompressed() was refactored into a related
helper function, getDocStoredOffsets(), which provides the storedDocs
metadata (offsets & lengths) for a doc.
---
 index/scorch/segment/zap/merge.go      | 53 ++++++++++++++++++++++++++
 index/scorch/segment/zap/merge_test.go | 34 +++++++++++++++++
 index/scorch/segment/zap/read.go       | 28 ++++++++++----
 3 files changed, 107 insertions(+), 8 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index dbb33110f..b1eed28bc 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -437,6 +437,24 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 	for segI, segment := range segments {
 		segNewDocNums := make([]uint64, segment.numDocs)
 
+		// optimize when the field mapping is the same across all
+		// segments and there are no deletions, via byte-copying
+		// of stored docs bytes directly to the writer
+		if fieldsSame && (drops[segI] == nil || drops[segI].GetCardinality() == 0) {
+			err := segment.copyStoredDocs(newDocNum, docNumOffsets, w)
+			if err != nil {
+				return 0, nil, err
+			}
+
+			for i := uint64(0); i < segment.numDocs; i++ {
+				segNewDocNums[i] = newDocNum
+				newDocNum++
+			}
+			rv = append(rv, segNewDocNums)
+
+			continue
+		}
+
 		// for each doc num
 		for docNum := uint64(0); docNum < segment.numDocs; docNum++ {
 			// TODO: roaring's API limits docNums to 32-bits?
@@ -528,6 +546,41 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 	return storedIndexOffset, rv, nil
 }
 
+// copyStoredDocs writes out a segment's stored doc info, optimized by
+// using a single Write() call for the entire set of bytes.  The
+// newDocNumOffsets is filled with the new offsets for each doc.
+func (s *SegmentBase) copyStoredDocs(newDocNum uint64, newDocNumOffsets []uint64,
+	w *CountHashWriter) error {
+	if s.numDocs <= 0 {
+		return nil
+	}
+
+	indexOffset0, storedOffset0, _, _, _ :=
+		s.getDocStoredOffsets(0) // the segment's first doc
+
+	indexOffsetN, storedOffsetN, readN, metaLenN, dataLenN :=
+		s.getDocStoredOffsets(s.numDocs - 1) // the segment's last doc
+
+	storedOffset0New := uint64(w.Count())
+
+	storedBytes := s.mem[storedOffset0 : storedOffsetN+readN+metaLenN+dataLenN]
+	_, err := w.Write(storedBytes)
+	if err != nil {
+		return err
+	}
+
+	// remap the storedOffset's for the docs into new offsets relative
+	// to storedOffset0New, filling the given docNumOffsetsOut array
+	for indexOffset := indexOffset0; indexOffset <= indexOffsetN; indexOffset += 8 {
+		storedOffset := binary.BigEndian.Uint64(s.mem[indexOffset : indexOffset+8])
+		storedOffsetNew := storedOffset - storedOffset0 + storedOffset0New
+		newDocNumOffsets[newDocNum] = storedOffsetNew
+		newDocNum += 1
+	}
+
+	return nil
+}
+
 // mergeFields builds a unified list of fields used across all the
 // input segments, and computes whether the fields are the same across
 // segments (which depends on fields to be sorted in the same way
diff --git a/index/scorch/segment/zap/merge_test.go b/index/scorch/segment/zap/merge_test.go
index 323fffed4..bb09f8314 100644
--- a/index/scorch/segment/zap/merge_test.go
+++ b/index/scorch/segment/zap/merge_test.go
@@ -398,6 +398,40 @@ func compareSegments(a, b *Segment) string {
 								fieldName, next.Term, aloc, bloc))
 						}
 					}
+
+					if fieldName == "_id" {
+						docId := next.Term
+						docNumA := apitrn.Number()
+						docNumB := bpitrn.Number()
+						afields := map[string]interface{}{}
+						err = a.VisitDocument(apitrn.Number(),
+							func(field string, typ byte, value []byte, pos []uint64) bool {
+								afields[field+"-typ"] = typ
+								afields[field+"-value"] = value
+								afields[field+"-pos"] = pos
+								return true
+							})
+						if err != nil {
+							rv = append(rv, fmt.Sprintf("a.VisitDocument err: %v", err))
+						}
+						bfields := map[string]interface{}{}
+						err = b.VisitDocument(bpitrn.Number(),
+							func(field string, typ byte, value []byte, pos []uint64) bool {
+								bfields[field+"-typ"] = typ
+								bfields[field+"-value"] = value
+								bfields[field+"-pos"] = pos
+								return true
+							})
+						if err != nil {
+							rv = append(rv, fmt.Sprintf("b.VisitDocument err: %v", err))
+						}
+						if !reflect.DeepEqual(afields, bfields) {
+							rv = append(rv, fmt.Sprintf("afields != bfields,"+
+								" id: %s, docNumA: %d, docNumB: %d,"+
+								" afields: %#v, bfields: %#v",
+								docId, docNumA, docNumB, afields, bfields))
+						}
+					}
 				}
 			}
 		}
diff --git a/index/scorch/segment/zap/read.go b/index/scorch/segment/zap/read.go
index 0c5b9e17f..e47d4c6ab 100644
--- a/index/scorch/segment/zap/read.go
+++ b/index/scorch/segment/zap/read.go
@@ -17,15 +17,27 @@ package zap
 import "encoding/binary"
 
 func (s *SegmentBase) getDocStoredMetaAndCompressed(docNum uint64) ([]byte, []byte) {
-	docStoredStartAddr := s.storedIndexOffset + (8 * docNum)
-	docStoredStart := binary.BigEndian.Uint64(s.mem[docStoredStartAddr : docStoredStartAddr+8])
+	_, storedOffset, n, metaLen, dataLen := s.getDocStoredOffsets(docNum)
+
+	meta := s.mem[storedOffset+n : storedOffset+n+metaLen]
+	data := s.mem[storedOffset+n+metaLen : storedOffset+n+metaLen+dataLen]
+
+	return meta, data
+}
+
+func (s *SegmentBase) getDocStoredOffsets(docNum uint64) (
+	uint64, uint64, uint64, uint64, uint64) {
+	indexOffset := s.storedIndexOffset + (8 * docNum)
+
+	storedOffset := binary.BigEndian.Uint64(s.mem[indexOffset : indexOffset+8])
+
 	var n uint64
-	metaLen, read := binary.Uvarint(s.mem[docStoredStart : docStoredStart+binary.MaxVarintLen64])
+
+	metaLen, read := binary.Uvarint(s.mem[storedOffset : storedOffset+binary.MaxVarintLen64])
 	n += uint64(read)
-	var dataLen uint64
-	dataLen, read = binary.Uvarint(s.mem[docStoredStart+n : docStoredStart+n+binary.MaxVarintLen64])
+
+	dataLen, read := binary.Uvarint(s.mem[storedOffset+n : storedOffset+n+binary.MaxVarintLen64])
 	n += uint64(read)
-	meta := s.mem[docStoredStart+n : docStoredStart+n+metaLen]
-	data := s.mem[docStoredStart+n+metaLen : docStoredStart+n+metaLen+dataLen]
-	return meta, data
+
+	return indexOffset, storedOffset, n, metaLen, dataLen
 }

From 99852accb04dfdb650e5d622416bc451651239a6 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 8 Feb 2018 10:12:20 -0800
Subject: [PATCH 193/728] scorch RollbackPoints() no error at start & fix
 TestIndexRollback

When a scorch is just opened and is "empty", RollbackPoints() no
longer considers that an error situation.

Also, this commit makes the TestIndexRollback unit tests is a bit more
forgiving to races, as we were seeing failures sometimes in travis-CI
environments (TestIndexRollback was passing fine on my dev macbook).
The theory is the double-looping in the persisterLoop would sometimes
be racy, leading to 1 or 2 rollback points.
---
 index/scorch/snapshot_rollback.go      |  9 ++++-----
 index/scorch/snapshot_rollback_test.go | 24 ++++++++++++++++++------
 2 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/index/scorch/snapshot_rollback.go b/index/scorch/snapshot_rollback.go
index 43c3ba9f1..c265b4c31 100644
--- a/index/scorch/snapshot_rollback.go
+++ b/index/scorch/snapshot_rollback.go
@@ -31,10 +31,9 @@ func (r *RollbackPoint) GetInternal(key []byte) []byte {
 	return r.meta[string(key)]
 }
 
-// RollbackPoints returns an array of rollback points available
-// for the application to make a decision on where to rollback
-// to. A nil return value indicates that there are no available
-// rollback points.
+// RollbackPoints returns an array of rollback points available for
+// the application to rollback to, with more recent rollback points
+// (higher epochs) coming first.
 func (s *Scorch) RollbackPoints() ([]*RollbackPoint, error) {
 	if s.rootBolt == nil {
 		return nil, fmt.Errorf("RollbackPoints: root is nil")
@@ -54,7 +53,7 @@ func (s *Scorch) RollbackPoints() ([]*RollbackPoint, error) {
 
 	snapshots := tx.Bucket(boltSnapshotsBucket)
 	if snapshots == nil {
-		return nil, fmt.Errorf("RollbackPoints: no snapshots available")
+		return nil, nil
 	}
 
 	rollbackPoints := []*RollbackPoint{}
diff --git a/index/scorch/snapshot_rollback_test.go b/index/scorch/snapshot_rollback_test.go
index 42d908243..0065a746d 100644
--- a/index/scorch/snapshot_rollback_test.go
+++ b/index/scorch/snapshot_rollback_test.go
@@ -62,6 +62,15 @@ func TestIndexRollback(t *testing.T) {
 	go sh.mainLoop()
 	go sh.persisterLoop()
 
+	// should have no rollback points initially
+	rollbackPoints, err := sh.RollbackPoints()
+	if err != nil {
+		t.Fatalf("expected no err, got: %v, %d", err, len(rollbackPoints))
+	}
+	if len(rollbackPoints) != 0 {
+		t.Fatalf("expected no rollbackPoints, got %d", len(rollbackPoints))
+	}
+
 	// create a batch, insert 2 new documents
 	batch := index.NewBatch()
 	doc := document.NewDocument("1")
@@ -84,10 +93,13 @@ func TestIndexRollback(t *testing.T) {
 		_ = readerSlow.Close()
 	}()
 
-	// fetch rollback points available as of here
-	rollbackPoints, err := sh.RollbackPoints()
-	if err != nil || len(rollbackPoints) != 1 {
-		t.Fatal(err, len(rollbackPoints))
+	// fetch rollback points after first batch
+	rollbackPoints, err = sh.RollbackPoints()
+	if err != nil {
+		t.Fatalf("expected no err, got: %v, %d", err, len(rollbackPoints))
+	}
+	if len(rollbackPoints) == 0 {
+		t.Fatalf("expected some rollbackPoints, got none")
 	}
 
 	// set this as a rollback point for the future
@@ -109,8 +121,8 @@ func TestIndexRollback(t *testing.T) {
 	}
 
 	rollbackPointsB, err := sh.RollbackPoints()
-	if err != nil || len(rollbackPointsB) != 3 {
-		t.Fatal(err, len(rollbackPointsB))
+	if err != nil || len(rollbackPointsB) <= len(rollbackPoints) {
+		t.Fatalf("expected no err, got: %v, %d", err, len(rollbackPointsB))
 	}
 
 	found := false

From d0644fec129f08c58f6df3a298e0a314f590a6d0 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 8 Feb 2018 12:17:19 -0800
Subject: [PATCH 194/728] scorch persistSnapshot comments update

See also: https://github.com/blevesearch/bleve/issues/763
---
 index/scorch/persister.go | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 388c14055..7b1cd837e 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -239,8 +239,13 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 		}
 	}
 
-	// only alter the root if we actually persisted a segment
-	// (sometimes its just a new snapshot, possibly with new internal values)
+	// we need to swap in a new root only when we've persisted 1 or
+	// more segments -- whereby the new root would have 1-for-1
+	// replacements of in-memory segments with file-based segments
+	//
+	// other cases like updates to internal values only, and/or when
+	// there are only deletions, are already covered and persisted by
+	// the newly populated boltdb snapshotBucket above
 	if len(newSegmentPaths) > 0 {
 		// now try to open all the new snapshots
 		newSegments := make(map[uint64]segment.Segment)

From 91ac0d011aaa7a7edbcd252c66443e6b1540baa1 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 8 Feb 2018 13:25:16 -0800
Subject: [PATCH 195/728] scorch uses segment.id to encode boltdb sub-bucket
 key

fixes #764
---
 index/scorch/persister.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 7b1cd837e..61a266adb 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -193,8 +193,8 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 	newSegmentPaths := make(map[uint64]string)
 
 	// first ensure that each segment in this snapshot has been persisted
-	for i, segmentSnapshot := range snapshot.segment {
-		snapshotSegmentKey := segment.EncodeUvarintAscending(nil, uint64(i))
+	for _, segmentSnapshot := range snapshot.segment {
+		snapshotSegmentKey := segment.EncodeUvarintAscending(nil, segmentSnapshot.id)
 		snapshotSegmentBucket, err2 := snapshotBucket.CreateBucketIfNotExists(snapshotSegmentKey)
 		if err2 != nil {
 			return err2

From dee6a2b1c64802c6a84a830f4a149bf25fbf91c4 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 8 Feb 2018 12:33:19 -0800
Subject: [PATCH 196/728] scorch persistSnapshot() consistently uses err to
 commit vs abort

Some codepaths in persistSnapshot() were saving errors into an err2
local variable, which might lead incorrectly to commit during an error
situation rather than abort.
---
 index/scorch/persister.go | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 61a266adb..07f38b81e 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -196,17 +196,17 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 	for _, segmentSnapshot := range snapshot.segment {
 		snapshotSegmentKey := segment.EncodeUvarintAscending(nil, segmentSnapshot.id)
 		snapshotSegmentBucket, err2 := snapshotBucket.CreateBucketIfNotExists(snapshotSegmentKey)
-		if err2 != nil {
-			return err2
+		if err != nil {
+			return err
 		}
 		switch seg := segmentSnapshot.segment.(type) {
 		case *zap.SegmentBase:
 			// need to persist this to disk
 			filename := zapFileName(segmentSnapshot.id)
 			path := s.path + string(os.PathSeparator) + filename
-			err2 := zap.PersistSegmentBase(seg, path)
-			if err2 != nil {
-				return fmt.Errorf("error persisting segment: %v", err2)
+			err = zap.PersistSegmentBase(seg, path)
+			if err != nil {
+				return fmt.Errorf("error persisting segment: %v", err)
 			}
 			newSegmentPaths[segmentSnapshot.id] = path
 			err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename))

From 83272a9629509d40e8db4fe382e7b95e1fd3dcff Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 8 Feb 2018 12:47:07 -0800
Subject: [PATCH 197/728] scorch persistSnapshot() err handling & propagation

---
 index/scorch/persister.go | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 07f38b81e..dab753d7f 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -145,23 +145,15 @@ OUTER:
 	}
 }
 
-func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
+func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) (err error) {
 	// start a write transaction
 	tx, err := s.rootBolt.Begin(true)
 	if err != nil {
 		return err
 	}
-	// defer fsync of the rootbolt
+	// defer rollback on error
 	defer func() {
-		if err == nil {
-			err = s.rootBolt.Sync()
-		}
-	}()
-	// defer commit/rollback transaction
-	defer func() {
-		if err == nil {
-			err = tx.Commit()
-		} else {
+		if err != nil {
 			_ = tx.Rollback()
 		}
 	}()
@@ -195,7 +187,7 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 	// first ensure that each segment in this snapshot has been persisted
 	for _, segmentSnapshot := range snapshot.segment {
 		snapshotSegmentKey := segment.EncodeUvarintAscending(nil, segmentSnapshot.id)
-		snapshotSegmentBucket, err2 := snapshotBucket.CreateBucketIfNotExists(snapshotSegmentKey)
+		snapshotSegmentBucket, err := snapshotBucket.CreateBucketIfNotExists(snapshotSegmentKey)
 		if err != nil {
 			return err
 		}
@@ -300,7 +292,19 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 			_ = rootPrev.DecRef()
 		}
 	}
-	// allow files to become eligible for removal
+
+	err = tx.Commit()
+	if err != nil {
+		return err
+	}
+
+	err = s.rootBolt.Sync()
+	if err != nil {
+		return err
+	}
+
+	// allow files to become eligible for removal after commit, such
+	// as file segments from snapshots that came from the merger
 	s.rootLock.Lock()
 	for _, filename := range filenames {
 		delete(s.ineligibleForRemoval, filename)

From 6f5f90cd41720665b04e36805605638fef3120f9 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 7 Feb 2018 16:54:58 -0800
Subject: [PATCH 198/728] scorch zap segment cleanup handling for some edge
 cases

Two cases in this commit...

If we're shutting down, the merger might not have handed off its
latest merged segment to the introducer yet, so the merger still owns
the segment and needs to Close() that segment itself.

In persistSnapshot(), there migth be cases where the persister might
not be able to swap in its newly persisted segments -- so, the
persistSnapshot() needs to Close() those segments itself.
---
 index/scorch/merge.go     |  1 +
 index/scorch/persister.go | 15 ++++++++++-----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 41abe0655..fb4e80d20 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -146,6 +146,7 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot) error {
 		// give it to the introducer
 		select {
 		case <-s.closeCh:
+			_ = segment.Close()
 			return nil
 		case s.merges <- sm:
 		}
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index dab753d7f..83909a880 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -241,14 +241,18 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) (err error) {
 	if len(newSegmentPaths) > 0 {
 		// now try to open all the new snapshots
 		newSegments := make(map[uint64]segment.Segment)
+		defer func() {
+			for _, s := range newSegments {
+				if s != nil {
+					// cleanup segments that were opened but not
+					// swapped into the new root
+					_ = s.Close()
+				}
+			}
+		}()
 		for segmentID, path := range newSegmentPaths {
 			newSegments[segmentID], err = zap.Open(path)
 			if err != nil {
-				for _, s := range newSegments {
-					if s != nil {
-						_ = s.Close() // cleanup segments that were successfully opened
-					}
-				}
 				return fmt.Errorf("error opening new segment at %s, %v", path, err)
 			}
 		}
@@ -273,6 +277,7 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) (err error) {
 					cachedDocs: segmentSnapshot.cachedDocs,
 				}
 				newIndexSnapshot.segment[i] = newSegmentSnapshot
+				delete(newSegments, segmentSnapshot.id)
 				// update items persisted incase of a new segment snapshot
 				atomic.AddUint64(&s.stats.numItemsPersisted, newSegmentSnapshot.Count())
 			} else {

From f177f07613dcaa6c993c6bcdb4963f6fa58f0a67 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 8 Feb 2018 17:11:35 -0800
Subject: [PATCH 199/728] scorch zap segment merging reuses prealloc'ed
 PostingsIterator

During zap segment merging, a new zap PostingsIterator was allocated
for every field X segment X term.

This change optimizes by reusing a single PostingsIterator instance
per persistMergedRest() invocation.

And, also unused fields are removed from the PostingsIterator.
---
 index/scorch/segment/zap/dict.go    |  1 -
 index/scorch/segment/zap/merge.go   |  3 ++-
 index/scorch/segment/zap/posting.go | 14 ++++++++++----
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 137c35d7a..55796ffa0 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -44,7 +44,6 @@ func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *Posti
 		*rv = PostingsList{} // clear the struct
 	}
 	rv.sb = d.sb
-	rv.term = term
 	rv.except = except
 
 	if d.fst != nil {
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index b1eed28bc..751fcb582 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -164,6 +164,7 @@ func persistMergedRest(segments []*SegmentBase, drops []*roaring.Bitmap,
 	var bufLoc []uint64
 
 	var postings *PostingsList
+	var postItr *PostingsIterator
 
 	rv := make([]uint64, len(fieldsInv))
 	fieldDvLocs := make([]uint64, len(fieldsInv))
@@ -247,7 +248,7 @@ func persistMergedRest(segments []*SegmentBase, drops []*roaring.Bitmap,
 					return nil, 0, err2
 				}
 
-				postItr := postings.Iterator()
+				postItr = postings.iterator(postItr)
 				next, err2 := postItr.Next()
 				for next != nil && err2 == nil {
 					hitNewDocNum := newDocNums[dictI][next.Number()]
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 67e08d1ae..d504885d0 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -28,21 +28,27 @@ import (
 // PostingsList is an in-memory represenation of a postings list
 type PostingsList struct {
 	sb             *SegmentBase
-	term           []byte
 	postingsOffset uint64
 	freqOffset     uint64
 	locOffset      uint64
 	locBitmap      *roaring.Bitmap
 	postings       *roaring.Bitmap
 	except         *roaring.Bitmap
-	postingKey     []byte
 }
 
 // Iterator returns an iterator for this postings list
 func (p *PostingsList) Iterator() segment.PostingsIterator {
-	rv := &PostingsIterator{
-		postings: p,
+	return p.iterator(nil)
+}
+
+func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
+	if rv == nil {
+		rv = &PostingsIterator{}
+	} else {
+		*rv = PostingsIterator{} // clear the struct
 	}
+	rv.postings = p
+
 	if p.postings != nil {
 		// prepare the freq chunk details
 		var n uint64

From e37c563c560a7cc35449fb29b7b4d42b6d105b37 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 8 Feb 2018 18:01:23 -0800
Subject: [PATCH 200/728] scorch zap merge move fieldDvLocsOffset var
 declaration

Move the var declaration to nearer where its used.
---
 index/scorch/segment/zap/merge.go | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 751fcb582..c9e275c58 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -168,7 +168,6 @@ func persistMergedRest(segments []*SegmentBase, drops []*roaring.Bitmap,
 
 	rv := make([]uint64, len(fieldsInv))
 	fieldDvLocs := make([]uint64, len(fieldsInv))
-	fieldDvLocsOffset := uint64(fieldNotUninverted)
 
 	// docTermMap is keyed by docNum, where the array impl provides
 	// better memory usage behavior than a sparse-friendlier hashmap
@@ -401,7 +400,7 @@ func persistMergedRest(segments []*SegmentBase, drops []*roaring.Bitmap,
 		}
 	}
 
-	fieldDvLocsOffset = uint64(w.Count())
+	fieldDvLocsOffset := uint64(w.Count())
 
 	buf := bufMaxVarintLen64
 	for _, offset := range fieldDvLocs {

From 846235593c08572890a24f312661ca142b5f1f7c Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 12 Feb 2018 10:03:43 -0800
Subject: [PATCH 201/728] Update vendor'ed revision for moss to the latest

---
 vendor/manifest | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vendor/manifest b/vendor/manifest
index 641f276e5..9a86629ed 100644
--- a/vendor/manifest
+++ b/vendor/manifest
@@ -37,7 +37,7 @@
 			"importpath": "github.com/couchbase/moss",
 			"repository": "https://github.com/couchbase/moss",
 			"vcs": "git",
-			"revision": "fc637b3f82ec5b8139b0d295f6588c6a2bea5a16",
+			"revision": "013a19c55df3e689a66b632c7c8074e37162217d",
 			"branch": "master",
 			"notests": true
 		},

From 95a4f37e5c3d66feae18c6f1d3aa200e5764b95e Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 12 Feb 2018 14:37:40 -0800
Subject: [PATCH 202/728] scorch zap enumerator impl that joins multiple vellum
 iterators

Unlike vellum's MergeIterator, the enumerator introduced in this
commit doesn't merge when there are matching keys across iterators.

Instead, the enumerator implementation provides a traversal of all the
tuples of (key, iteratorIndex, val) from the underlying vellum
iterators, ordered by key ASC, iteratorIndex ASC.
---
 index/scorch/segment/zap/enumerator.go      | 124 +++++++++++
 index/scorch/segment/zap/enumerator_test.go | 233 ++++++++++++++++++++
 2 files changed, 357 insertions(+)
 create mode 100644 index/scorch/segment/zap/enumerator.go
 create mode 100644 index/scorch/segment/zap/enumerator_test.go

diff --git a/index/scorch/segment/zap/enumerator.go b/index/scorch/segment/zap/enumerator.go
new file mode 100644
index 000000000..3c708dd57
--- /dev/null
+++ b/index/scorch/segment/zap/enumerator.go
@@ -0,0 +1,124 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zap
+
+import (
+	"bytes"
+
+	"github.com/couchbase/vellum"
+)
+
+// enumerator provides an ordered traversal of multiple vellum
+// iterators.  Like JOIN of iterators, the enumerator produces a
+// sequence of (key, iteratorIndex, value) tuples, sorted by key ASC,
+// then iteratorIndex ASC, where the same key might be seen or
+// repeated across multiple child iterators.
+type enumerator struct {
+	itrs   []vellum.Iterator
+	currKs [][]byte
+	currVs []uint64
+
+	lowK    []byte
+	lowIdxs []int
+	lowCurr int
+}
+
+// newEnumerator returns a new enumerator over the vellum Iterators
+func newEnumerator(itrs []vellum.Iterator) (*enumerator, error) {
+	rv := &enumerator{
+		itrs:    itrs,
+		currKs:  make([][]byte, len(itrs)),
+		currVs:  make([]uint64, len(itrs)),
+		lowIdxs: make([]int, 0, len(itrs)),
+	}
+	for i, itr := range rv.itrs {
+		rv.currKs[i], rv.currVs[i] = itr.Current()
+	}
+	rv.updateMatches()
+	if rv.lowK == nil {
+		return rv, vellum.ErrIteratorDone
+	}
+	return rv, nil
+}
+
+// updateMatches maintains the low key matches based on the currKs
+func (m *enumerator) updateMatches() {
+	m.lowK = nil
+	m.lowIdxs = m.lowIdxs[:0]
+	m.lowCurr = 0
+
+	for i, key := range m.currKs {
+		if key == nil {
+			continue
+		}
+
+		cmp := bytes.Compare(key, m.lowK)
+		if cmp < 0 || m.lowK == nil {
+			// reached a new low
+			m.lowK = key
+			m.lowIdxs = m.lowIdxs[:0]
+			m.lowIdxs = append(m.lowIdxs, i)
+		} else if cmp == 0 {
+			m.lowIdxs = append(m.lowIdxs, i)
+		}
+	}
+}
+
+// Current returns the enumerator's current key, iterator-index, and
+// value.  If the enumerator is not pointing at a valid value (because
+// Next returned an error previously), Current will return nil,0,0.
+func (m *enumerator) Current() ([]byte, int, uint64) {
+	var i int
+	var v uint64
+	if m.lowCurr < len(m.lowIdxs) {
+		i = m.lowIdxs[m.lowCurr]
+		v = m.currVs[i]
+	}
+	return m.lowK, i, v
+}
+
+// Next advances the enumerator to the next key/iterator/value result,
+// else vellum.ErrIteratorDone is returned.
+func (m *enumerator) Next() error {
+	m.lowCurr += 1
+	if m.lowCurr >= len(m.lowIdxs) {
+		// move all the current low iterators forwards
+		for _, vi := range m.lowIdxs {
+			err := m.itrs[vi].Next()
+			if err != nil && err != vellum.ErrIteratorDone {
+				return err
+			}
+			m.currKs[vi], m.currVs[vi] = m.itrs[vi].Current()
+		}
+		m.updateMatches()
+	}
+	if m.lowK == nil {
+		return vellum.ErrIteratorDone
+	}
+	return nil
+}
+
+// Close all the underlying Iterators.  The first error, if any, will
+// be returned.
+func (m *enumerator) Close() error {
+	var rv error
+	for _, itr := range m.itrs {
+		err := itr.Close()
+		if rv == nil {
+			rv = err
+		}
+	}
+	return rv
+}
diff --git a/index/scorch/segment/zap/enumerator_test.go b/index/scorch/segment/zap/enumerator_test.go
new file mode 100644
index 000000000..b27788923
--- /dev/null
+++ b/index/scorch/segment/zap/enumerator_test.go
@@ -0,0 +1,233 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied.  See the License for the specific language governing
+// permissions and limitations under the License.
+
+package zap
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/couchbase/vellum"
+)
+
+type enumTestEntry struct {
+	key string
+	val uint64
+}
+
+type enumTestWant struct {
+	key string
+	idx int
+	val uint64
+}
+
+func TestEnumerator(t *testing.T) {
+	tests := []struct {
+		desc string
+		in   [][]enumTestEntry
+		want []enumTestWant
+	}{
+		{
+			desc: "two non-empty enumerators with no duplicate keys",
+			in: [][]enumTestEntry{
+				[]enumTestEntry{
+					{"a", 1},
+					{"c", 3},
+					{"e", 5},
+				},
+				[]enumTestEntry{
+					{"b", 2},
+					{"d", 4},
+					{"f", 6},
+				},
+			},
+			want: []enumTestWant{
+				{"a", 0, 1},
+				{"b", 1, 2},
+				{"c", 0, 3},
+				{"d", 1, 4},
+				{"e", 0, 5},
+				{"f", 1, 6},
+			},
+		},
+		{
+			desc: "two non-empty enumerators with duplicate keys",
+			in: [][]enumTestEntry{
+				[]enumTestEntry{
+					{"a", 1},
+					{"c", 3},
+					{"e", 5},
+				},
+				[]enumTestEntry{
+					{"a", 2},
+					{"c", 4},
+					{"e", 6},
+				},
+			},
+			want: []enumTestWant{
+				{"a", 0, 1},
+				{"a", 1, 2},
+				{"c", 0, 3},
+				{"c", 1, 4},
+				{"e", 0, 5},
+				{"e", 1, 6},
+			},
+		},
+		{
+			desc: "first iterator is empty",
+			in: [][]enumTestEntry{
+				[]enumTestEntry{},
+				[]enumTestEntry{
+					{"a", 2},
+					{"c", 4},
+					{"e", 6},
+				},
+			},
+			want: []enumTestWant{
+				{"a", 1, 2},
+				{"c", 1, 4},
+				{"e", 1, 6},
+			},
+		},
+		{
+			desc: "last iterator is empty",
+			in: [][]enumTestEntry{
+				[]enumTestEntry{
+					{"a", 1},
+					{"c", 3},
+					{"e", 5},
+				},
+				[]enumTestEntry{},
+			},
+			want: []enumTestWant{
+				{"a", 0, 1},
+				{"c", 0, 3},
+				{"e", 0, 5},
+			},
+		},
+		{
+			desc: "two different length enumerators with duplicate keys",
+			in: [][]enumTestEntry{
+				[]enumTestEntry{
+					{"a", 1},
+					{"c", 3},
+					{"e", 5},
+				},
+				[]enumTestEntry{
+					{"a", 2},
+					{"b", 4},
+					{"d", 1000},
+					{"e", 6},
+				},
+			},
+			want: []enumTestWant{
+				{"a", 0, 1},
+				{"a", 1, 2},
+				{"b", 1, 4},
+				{"c", 0, 3},
+				{"d", 1, 1000},
+				{"e", 0, 5},
+				{"e", 1, 6},
+			},
+		},
+	}
+
+	for _, test := range tests {
+		var itrs []vellum.Iterator
+		for _, entries := range test.in {
+			itrs = append(itrs, &testIterator{entries: entries})
+		}
+
+		enumerator, err := newEnumerator(itrs)
+		if err != nil {
+			t.Fatalf("%s - expected no err on newNumerator, got: %v", test.desc, err)
+		}
+
+		wanti := 0
+		for wanti < len(test.want) {
+			if err != nil {
+				t.Fatalf("%s - wanted no err, got: %v", test.desc, err)
+			}
+
+			currK, currIdx, currV := enumerator.Current()
+
+			want := test.want[wanti]
+			if want.key != string(currK) {
+				t.Fatalf("%s - wrong key, wanted: %#v, got: %q, %d, %d", test.desc,
+					want, currK, currIdx, currV)
+			}
+			if want.idx != currIdx {
+				t.Fatalf("%s - wrong idx, wanted: %#v, got: %q, %d, %d", test.desc,
+					want, currK, currIdx, currV)
+			}
+			if want.val != currV {
+				t.Fatalf("%s - wrong val, wanted: %#v, got: %q, %d, %d", test.desc,
+					want, currK, currIdx, currV)
+			}
+
+			wanti += 1
+
+			err = enumerator.Next()
+		}
+
+		if err != vellum.ErrIteratorDone {
+			t.Fatalf("%s - expected ErrIteratorDone, got: %v", test.desc, err)
+		}
+
+		err = enumerator.Close()
+		if err != nil {
+			t.Fatalf("%s - expected nil err on close, got: %v", test.desc, err)
+		}
+
+		for _, itr := range itrs {
+			if itr.(*testIterator).curr != 654321 {
+				t.Fatalf("%s - expected child iter to be closed", test.desc)
+			}
+		}
+	}
+}
+
+type testIterator struct {
+	entries []enumTestEntry
+	curr    int
+}
+
+func (m *testIterator) Current() ([]byte, uint64) {
+	if m.curr >= len(m.entries) {
+		return nil, 0
+	}
+	return []byte(m.entries[m.curr].key), m.entries[m.curr].val
+}
+
+func (m *testIterator) Next() error {
+	m.curr++
+	if m.curr >= len(m.entries) {
+		return vellum.ErrIteratorDone
+	}
+	return nil
+}
+
+func (m *testIterator) Seek(key []byte) error {
+	return fmt.Errorf("not implemented for enumerator unit tests")
+}
+
+func (m *testIterator) Reset(f *vellum.FST,
+	startKeyInclusive, endKeyExclusive []byte, aut vellum.Automaton) error {
+	return fmt.Errorf("not implemented for enumerator unit tests")
+}
+
+func (m *testIterator) Close() error {
+	m.curr = 654321
+	return nil
+}

From 2158e06c40dd40f14685c066531ec91352a57686 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 12 Feb 2018 17:29:50 -0800
Subject: [PATCH 203/728] scorch zap merge collects dicts & itrs in lock-step

The theory with this change is that the dicts and itrs should be
positionally in "lock-step" with paired entries.

And, since later code also uses the same array indexing to access the
drops and newDocNums, those also need to be positionally in pair-wise
lock-step, too.
---
 index/scorch/segment/zap/merge.go | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index c9e275c58..0457fc82a 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -154,8 +154,8 @@ func computeNewDocCount(segments []*SegmentBase, drops []*roaring.Bitmap) uint64
 	return newDocCount
 }
 
-func persistMergedRest(segments []*SegmentBase, drops []*roaring.Bitmap,
-	fieldsInv []string, fieldsMap map[string]uint16, newDocNums [][]uint64,
+func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
+	fieldsInv []string, fieldsMap map[string]uint16, newDocNumsIn [][]uint64,
 	newSegDocCount uint64, chunkFactor uint32,
 	w *CountHashWriter) ([]uint64, uint64, error) {
 
@@ -187,15 +187,17 @@ func persistMergedRest(segments []*SegmentBase, drops []*roaring.Bitmap,
 			return nil, 0, err
 		}
 
-		// collect FST iterators from all segments for this field
+		// collect FST iterators from all active segments for this field
+		var newDocNums [][]uint64
+		var drops []*roaring.Bitmap
 		var dicts []*Dictionary
 		var itrs []vellum.Iterator
-		for _, segment := range segments {
+
+		for segmentI, segment := range segments {
 			dict, err2 := segment.dictionary(fieldName)
 			if err2 != nil {
 				return nil, 0, err2
 			}
-			dicts = append(dicts, dict)
 
 			if dict != nil && dict.fst != nil {
 				itr, err2 := dict.fst.Iterator(nil, nil)
@@ -203,6 +205,9 @@ func persistMergedRest(segments []*SegmentBase, drops []*roaring.Bitmap,
 					return nil, 0, err2
 				}
 				if itr != nil {
+					newDocNums = append(newDocNums, newDocNumsIn[segmentI])
+					drops = append(drops, dropsIn[segmentI])
+					dicts = append(dicts, dict)
 					itrs = append(itrs, itr)
 				}
 			}

From a073424e5ac51ee96053f09ccabac32521625aab Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 12 Feb 2018 17:47:28 -0800
Subject: [PATCH 204/728] scorch zap dict.postingsListFromOffset() method

A helper method that can create a PostingsList if the caller already
knows the postingsOffset.
---
 index/scorch/segment/zap/dict.go | 43 +++++++++++++++++++++-----------
 1 file changed, 28 insertions(+), 15 deletions(-)

diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 55796ffa0..e5d712686 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -38,6 +38,33 @@ func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap) (segment.
 }
 
 func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
+	if d.fst == nil {
+		return d.postingsListInit(rv, except), nil
+	}
+
+	postingsOffset, exists, err := d.fst.Get(term)
+	if err != nil {
+		return nil, fmt.Errorf("vellum err: %v", err)
+	}
+	if !exists {
+		return d.postingsListInit(rv, except), nil
+	}
+
+	return d.postingsListFromOffset(postingsOffset, except, rv)
+}
+
+func (d *Dictionary) postingsListFromOffset(postingsOffset uint64, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
+	rv = d.postingsListInit(rv, except)
+
+	err := rv.read(postingsOffset, d)
+	if err != nil {
+		return nil, err
+	}
+
+	return rv, nil
+}
+
+func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap) *PostingsList {
 	if rv == nil {
 		rv = &PostingsList{}
 	} else {
@@ -45,21 +72,7 @@ func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *Posti
 	}
 	rv.sb = d.sb
 	rv.except = except
-
-	if d.fst != nil {
-		postingsOffset, exists, err := d.fst.Get(term)
-		if err != nil {
-			return nil, fmt.Errorf("vellum err: %v", err)
-		}
-		if exists {
-			err = rv.read(postingsOffset, d)
-			if err != nil {
-				return nil, err
-			}
-		}
-	}
-
-	return rv, nil
+	return rv
 }
 
 // Iterator returns an iterator for this dictionary

From fe544f33522ea1f89df03f4722da37d4e4ad9a40 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 12 Feb 2018 17:48:49 -0800
Subject: [PATCH 205/728] scorch zap merge uses enumerator for
 vellum.Iterator's

---
 index/scorch/segment/zap/merge.go | 199 ++++++++++++++++--------------
 1 file changed, 108 insertions(+), 91 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 0457fc82a..525b7f93d 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -198,7 +198,6 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 			if err2 != nil {
 				return nil, 0, err2
 			}
-
 			if dict != nil && dict.fst != nil {
 				itr, err2 := dict.fst.Iterator(nil, nil)
 				if err2 != nil && err2 != vellum.ErrIteratorDone {
@@ -213,15 +212,6 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 			}
 		}
 
-		// create merging iterator
-		mergeItr, err := vellum.NewMergeIterator(itrs, func(postingOffsets []uint64) uint64 {
-			// we don't actually use the merged value
-			return 0
-		})
-
-		tfEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
-		locEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
-
 		if uint64(cap(docTermMap)) < newSegDocCount {
 			docTermMap = make([][]byte, newSegDocCount)
 		} else {
@@ -231,71 +221,17 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 			}
 		}
 
-		for err == nil {
-			term, _ := mergeItr.Current()
+		var prevTerm []byte
 
-			newRoaring := roaring.NewBitmap()
-			newRoaringLocs := roaring.NewBitmap()
+		newRoaring := roaring.NewBitmap()
+		newRoaringLocs := roaring.NewBitmap()
 
-			tfEncoder.Reset()
-			locEncoder.Reset()
-
-			// now go back and get posting list for this term
-			// but pass in the deleted docs for that segment
-			for dictI, dict := range dicts {
-				if dict == nil {
-					continue
-				}
-				var err2 error
-				postings, err2 = dict.postingsList(term, drops[dictI], postings)
-				if err2 != nil {
-					return nil, 0, err2
-				}
-
-				postItr = postings.iterator(postItr)
-				next, err2 := postItr.Next()
-				for next != nil && err2 == nil {
-					hitNewDocNum := newDocNums[dictI][next.Number()]
-					if hitNewDocNum == docDropped {
-						return nil, 0, fmt.Errorf("see hit with dropped doc num")
-					}
-					newRoaring.Add(uint32(hitNewDocNum))
-					// encode norm bits
-					norm := next.Norm()
-					normBits := math.Float32bits(float32(norm))
-					err = tfEncoder.Add(hitNewDocNum, next.Frequency(), uint64(normBits))
-					if err != nil {
-						return nil, 0, err
-					}
-					locs := next.Locations()
-					if len(locs) > 0 {
-						newRoaringLocs.Add(uint32(hitNewDocNum))
-						for _, loc := range locs {
-							if cap(bufLoc) < 5+len(loc.ArrayPositions()) {
-								bufLoc = make([]uint64, 0, 5+len(loc.ArrayPositions()))
-							}
-							args := bufLoc[0:5]
-							args[0] = uint64(fieldsMap[loc.Field()])
-							args[1] = loc.Pos()
-							args[2] = loc.Start()
-							args[3] = loc.End()
-							args[4] = uint64(len(loc.ArrayPositions()))
-							args = append(args, loc.ArrayPositions()...)
-							err = locEncoder.Add(hitNewDocNum, args...)
-							if err != nil {
-								return nil, 0, err
-							}
-						}
-					}
-
-					docTermMap[hitNewDocNum] =
-						append(append(docTermMap[hitNewDocNum], term...), termSeparator)
+		tfEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
+		locEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
 
-					next, err2 = postItr.Next()
-				}
-				if err2 != nil {
-					return nil, 0, err2
-				}
+		finishTerm := func(term []byte) error {
+			if term == nil {
+				return nil
 			}
 
 			tfEncoder.Close()
@@ -304,59 +240,140 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 			if newRoaring.GetCardinality() > 0 {
 				// this field/term actually has hits in the new segment, lets write it down
 				freqOffset := uint64(w.Count())
-				_, err = tfEncoder.Write(w)
+				_, err := tfEncoder.Write(w)
 				if err != nil {
-					return nil, 0, err
+					return err
 				}
 				locOffset := uint64(w.Count())
 				_, err = locEncoder.Write(w)
 				if err != nil {
-					return nil, 0, err
+					return err
 				}
 				postingLocOffset := uint64(w.Count())
 				_, err = writeRoaringWithLen(newRoaringLocs, w, &bufReuse, bufMaxVarintLen64)
 				if err != nil {
-					return nil, 0, err
+					return err
 				}
 				postingOffset := uint64(w.Count())
+
 				// write out the start of the term info
-				buf := bufMaxVarintLen64
-				n := binary.PutUvarint(buf, freqOffset)
-				_, err = w.Write(buf[:n])
+				n := binary.PutUvarint(bufMaxVarintLen64, freqOffset)
+				_, err = w.Write(bufMaxVarintLen64[:n])
 				if err != nil {
-					return nil, 0, err
+					return err
 				}
-
 				// write out the start of the loc info
-				n = binary.PutUvarint(buf, locOffset)
-				_, err = w.Write(buf[:n])
+				n = binary.PutUvarint(bufMaxVarintLen64, locOffset)
+				_, err = w.Write(bufMaxVarintLen64[:n])
 				if err != nil {
-					return nil, 0, err
+					return err
 				}
-
-				// write out the start of the loc posting list
-				n = binary.PutUvarint(buf, postingLocOffset)
-				_, err = w.Write(buf[:n])
+				// write out the start of the posting locs
+				n = binary.PutUvarint(bufMaxVarintLen64, postingLocOffset)
+				_, err = w.Write(bufMaxVarintLen64[:n])
 				if err != nil {
-					return nil, 0, err
+					return err
 				}
 				_, err = writeRoaringWithLen(newRoaring, w, &bufReuse, bufMaxVarintLen64)
 				if err != nil {
-					return nil, 0, err
+					return err
 				}
 
 				err = newVellum.Insert(term, postingOffset)
+				if err != nil {
+					return err
+				}
+			}
+
+			newRoaring = roaring.NewBitmap()
+			newRoaringLocs = roaring.NewBitmap()
+
+			tfEncoder.Reset()
+			locEncoder.Reset()
+
+			return nil
+		}
+
+		enumerator, err := newEnumerator(itrs)
+
+		for err == nil {
+			term, itrI, postingsOffset := enumerator.Current()
+
+			if !bytes.Equal(prevTerm, term) {
+				// if the term changed, write out the info collected
+				// for the previous term
+				err2 := finishTerm(prevTerm)
+				if err2 != nil {
+					return nil, 0, err2
+				}
+			}
+
+			var err2 error
+			postings, err2 = dicts[itrI].postingsListFromOffset(
+				postingsOffset, drops[itrI], postings)
+			if err2 != nil {
+				return nil, 0, err2
+			}
+
+			postItr = postings.iterator(postItr)
+			next, err2 := postItr.Next()
+			for next != nil && err2 == nil {
+				hitNewDocNum := newDocNums[itrI][next.Number()]
+				if hitNewDocNum == docDropped {
+					return nil, 0, fmt.Errorf("see hit with dropped doc num")
+				}
+				newRoaring.Add(uint32(hitNewDocNum))
+				// encode norm bits
+				norm := next.Norm()
+				normBits := math.Float32bits(float32(norm))
+				err = tfEncoder.Add(hitNewDocNum, next.Frequency(), uint64(normBits))
 				if err != nil {
 					return nil, 0, err
 				}
+				locs := next.Locations()
+				if len(locs) > 0 {
+					newRoaringLocs.Add(uint32(hitNewDocNum))
+					for _, loc := range locs {
+						if cap(bufLoc) < 5+len(loc.ArrayPositions()) {
+							bufLoc = make([]uint64, 0, 5+len(loc.ArrayPositions()))
+						}
+						args := bufLoc[0:5]
+						args[0] = uint64(fieldsMap[loc.Field()])
+						args[1] = loc.Pos()
+						args[2] = loc.Start()
+						args[3] = loc.End()
+						args[4] = uint64(len(loc.ArrayPositions()))
+						args = append(args, loc.ArrayPositions()...)
+						err = locEncoder.Add(hitNewDocNum, args...)
+						if err != nil {
+							return nil, 0, err
+						}
+					}
+				}
+
+				docTermMap[hitNewDocNum] =
+					append(append(docTermMap[hitNewDocNum], term...), termSeparator)
+
+				next, err2 = postItr.Next()
 			}
+			if err2 != nil {
+				return nil, 0, err2
+			}
+
+			prevTerm = prevTerm[:0] // copy to prevTerm in case Next() reuses term mem
+			prevTerm = append(prevTerm, term...)
 
-			err = mergeItr.Next()
+			err = enumerator.Next()
 		}
 		if err != nil && err != vellum.ErrIteratorDone {
 			return nil, 0, err
 		}
 
+		err = finishTerm(prevTerm)
+		if err != nil {
+			return nil, 0, err
+		}
+
 		dictOffset := uint64(w.Count())
 
 		err = newVellum.Close()

From 57fc03258e9ab6507ed01525b061080ff759cdd9 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 8 Feb 2018 14:32:55 -0800
Subject: [PATCH 206/728] scorch rollback ignores unsafeBatch flag

See also: https://github.com/blevesearch/bleve/issues/760
---
 index/scorch/snapshot_rollback.go | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/index/scorch/snapshot_rollback.go b/index/scorch/snapshot_rollback.go
index c265b4c31..247003311 100644
--- a/index/scorch/snapshot_rollback.go
+++ b/index/scorch/snapshot_rollback.go
@@ -149,10 +149,7 @@ func (s *Scorch) Rollback(to *RollbackPoint) error {
 
 		revert.snapshot = indexSnapshot
 		revert.applied = make(chan error)
-
-		if !s.unsafeBatch {
-			revert.persisted = make(chan error)
-		}
+		revert.persisted = make(chan error)
 
 		return nil
 	})
@@ -172,9 +169,5 @@ func (s *Scorch) Rollback(to *RollbackPoint) error {
 		return fmt.Errorf("Rollback: failed with err: %v", err)
 	}
 
-	if revert.persisted != nil {
-		err = <-revert.persisted
-	}
-
-	return err
+	return <-revert.persisted
 }

From 6f2797bec3720b32cdbd2df0b2d81f8cf69a783d Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Wed, 14 Feb 2018 16:39:26 +0530
Subject: [PATCH 207/728] Adding a pause to persister until the merger catches
 up

---
 index/scorch/persister.go | 84 ++++++++++++++++++++++++++++++---------
 1 file changed, 65 insertions(+), 19 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 61a266adb..b19c1205d 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -34,36 +34,40 @@ import (
 
 var DefaultChunkFactor uint32 = 1024
 
+// Arbitrary number, need to make it configurable.
+// Lower values like 10/making persister really slow
+// doesn't work well as it is creating more files to
+// persist for in next persist iteration and spikes the # FDs.
+// Ideal value should let persister also proceed at
+// an optimum pace so that the merger can skip
+// many intermediate snapshots.
+// This needs to be based on empirical data.
+// With high segment count with snapshots,
+// doubtful on the effectiveness of this approach.
+var epochDistance = uint64(100)
+
 type notificationChan chan struct{}
 
 func (s *Scorch) persisterLoop() {
 	defer s.asyncTasks.Done()
 
 	var persistWatchers []*epochWatcher
-	var lastPersistedEpoch uint64
-
-	notifyWatchers := func() {
-		var watchersNext []*epochWatcher
-		for _, w := range persistWatchers {
-			if w.epoch < lastPersistedEpoch {
-				close(w.notifyCh)
-			} else {
-				watchersNext = append(watchersNext, w)
-			}
-		}
-		persistWatchers = watchersNext
-	}
-
+	var lastPersistedEpoch, lastMergedEpoch uint64
+	var ew *epochWatcher
 OUTER:
 	for {
 		select {
 		case <-s.closeCh:
 			break OUTER
-		case ew := <-s.persisterNotifier:
+		case ew = <-s.persisterNotifier:
 			persistWatchers = append(persistWatchers, ew)
-			notifyWatchers()
 		default:
 		}
+		if ew != nil {
+			lastMergedEpoch = ew.epoch
+			persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch,
+				&lastMergedEpoch, persistWatchers)
+		}
 
 		var ourSnapshot *IndexSnapshot
 		var ourPersisted []chan error
@@ -136,16 +140,58 @@ OUTER:
 		case <-w.notifyCh:
 			// woken up, next loop should pick up work
 			continue OUTER
-		case ew := <-s.persisterNotifier:
+		case ew = <-s.persisterNotifier:
 			// if the watchers are already caught up then let them wait,
 			// else let them continue to do the catch up
 			persistWatchers = append(persistWatchers, ew)
-			notifyWatchers()
 		}
 	}
 }
 
-func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
+func notifyMergeWatchers(lastPersistedEpoch uint64,
+	persistWatchers []*epochWatcher) []*epochWatcher {
+	var watchersNext []*epochWatcher
+	for _, w := range persistWatchers {
+		if w.epoch < lastPersistedEpoch {
+			close(w.notifyCh)
+		} else {
+			watchersNext = append(watchersNext, w)
+		}
+	}
+	return watchersNext
+}
+
+func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastMergedEpoch *uint64,
+	persistWatchers []*epochWatcher) []*epochWatcher {
+
+OUTER:
+	for {
+
+		// first, let the watchers proceed if they lag behind
+		persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
+
+		// check for slow merger and pause persister until merger catch up
+		if lastPersistedEpoch > *lastMergedEpoch &&
+			lastPersistedEpoch-*lastMergedEpoch > epochDistance {
+
+			select {
+			case <-s.closeCh:
+				break OUTER
+			case ew := <-s.persisterNotifier:
+				persistWatchers = append(persistWatchers, ew)
+				*lastMergedEpoch = ew.epoch
+				log.Printf("persister waiting as lastPersistedEpoch->%d merger epoch->%d", lastPersistedEpoch, *lastMergedEpoch)
+				continue OUTER
+			}
+		} else {
+			break OUTER
+		}
+	}
+
+	return persistWatchers
+}
+
+func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) (err error) {
 	// start a write transaction
 	tx, err := s.rootBolt.Begin(true)
 	if err != nil {

From 606a270669be7ca3064110020f0155845a070b96 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 12 Feb 2018 21:54:33 +0530
Subject: [PATCH 208/728] Fix for empty segment merge handling

Avoid creating new files with emtpy segments tasks
during the merge operation, skips the
incorrect appending of a newer segment during merge.
---
 index/scorch/introducer.go | 26 +++++++++++++-------
 index/scorch/merge.go      | 50 ++++++++++++++++++++++++--------------
 2 files changed, 49 insertions(+), 27 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 4499fa41b..f75801910 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -193,6 +193,12 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 	// prepare new index snapshot
 	currSize := len(s.root.segment)
 	newSize := currSize + 1 - len(nextMerge.old)
+
+	// empty segments deletion
+	if nextMerge.new == nil {
+		newSize--
+	}
+
 	newSnapshot := &IndexSnapshot{
 		parent:   s,
 		segment:  make([]*SegmentSnapshot, 0, newSize),
@@ -210,7 +216,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 		segmentID := s.root.segment[i].id
 		if segSnapAtMerge, ok := nextMerge.old[segmentID]; ok {
 			// this segment is going away, see if anything else was deleted since we started the merge
-			if s.root.segment[i].deleted != nil {
+			if segSnapAtMerge != nil && s.root.segment[i].deleted != nil {
 				// assume all these deletes are new
 				deletedSince := s.root.segment[i].deleted
 				// if we already knew about some of them, remove
@@ -238,14 +244,16 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 		}
 	}
 
-	// put new segment at end
-	newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
-		id:         nextMerge.id,
-		segment:    nextMerge.new, // take ownership for nextMerge.new's ref-count
-		deleted:    newSegmentDeleted,
-		cachedDocs: &cachedDocs{cache: nil},
-	})
-	newSnapshot.offsets = append(newSnapshot.offsets, running)
+	if nextMerge.new != nil {
+		// put new segment at end
+		newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
+			id:         nextMerge.id,
+			segment:    nextMerge.new, // take ownership for nextMerge.new's ref-count
+			deleted:    newSegmentDeleted,
+			cachedDocs: &cachedDocs{cache: nil},
+		})
+		newSnapshot.offsets = append(newSnapshot.offsets, running)
+	}
 
 	// swap in new segment
 	rootPrev := s.root
diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 5ded29b5a..540976f41 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -124,6 +124,10 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot) error {
 	// process tasks in serial for now
 	var notifications []notificationChan
 	for _, task := range resultMergePlan.Tasks {
+		if len(task.Segments) == 0 {
+			continue
+		}
+
 		oldMap := make(map[uint64]*SegmentSnapshot)
 		newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
 		segmentsToMerge := make([]*zap.Segment, 0, len(task.Segments))
@@ -132,36 +136,46 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot) error {
 			if segSnapshot, ok := planSegment.(*SegmentSnapshot); ok {
 				oldMap[segSnapshot.id] = segSnapshot
 				if zapSeg, ok := segSnapshot.segment.(*zap.Segment); ok {
-					segmentsToMerge = append(segmentsToMerge, zapSeg)
-					docsToDrop = append(docsToDrop, segSnapshot.deleted)
+					if segSnapshot.LiveSize() == 0 {
+						oldMap[segSnapshot.id] = nil
+					} else {
+						segmentsToMerge = append(segmentsToMerge, zapSeg)
+						docsToDrop = append(docsToDrop, segSnapshot.deleted)
+					}
 				}
 			}
 		}
 
-		filename := zapFileName(newSegmentID)
-		s.markIneligibleForRemoval(filename)
-		path := s.path + string(os.PathSeparator) + filename
-		newDocNums, err := zap.Merge(segmentsToMerge, docsToDrop, path, DefaultChunkFactor)
-		if err != nil {
-			s.unmarkIneligibleForRemoval(filename)
-			return fmt.Errorf("merging failed: %v", err)
-		}
-		segment, err := zap.Open(path)
-		if err != nil {
-			s.unmarkIneligibleForRemoval(filename)
-			return err
+		var oldNewDocNums map[uint64][]uint64
+		var segment segment.Segment
+		if len(segmentsToMerge) > 0 {
+			filename := zapFileName(newSegmentID)
+			s.markIneligibleForRemoval(filename)
+			path := s.path + string(os.PathSeparator) + filename
+			newDocNums, err := zap.Merge(segmentsToMerge, docsToDrop, path, 1024)
+			if err != nil {
+				s.unmarkIneligibleForRemoval(filename)
+				return fmt.Errorf("merging failed: %v", err)
+			}
+			segment, err = zap.Open(path)
+			if err != nil {
+				s.unmarkIneligibleForRemoval(filename)
+				return err
+			}
+			oldNewDocNums = make(map[uint64][]uint64)
+			for i, segNewDocNums := range newDocNums {
+				oldNewDocNums[task.Segments[i].Id()] = segNewDocNums
+			}
 		}
+
 		sm := &segmentMerge{
 			id:            newSegmentID,
 			old:           oldMap,
-			oldNewDocNums: make(map[uint64][]uint64),
+			oldNewDocNums: oldNewDocNums,
 			new:           segment,
 			notify:        make(notificationChan),
 		}
 		notifications = append(notifications, sm.notify)
-		for i, segNewDocNums := range newDocNums {
-			sm.oldNewDocNums[task.Segments[i].Id()] = segNewDocNums
-		}
 
 		// give it to the introducer
 		select {

From 720010783ec5b3a3a2ceffbf256d863196aad6a3 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 14 Feb 2018 14:50:30 -0800
Subject: [PATCH 209/728] scorch zap InitSegmentBase() helper func

Refactored out a zap.InitSegmentBase() func so that non-zap packages
can create SegmentBase instances.
---
 index/scorch/segment/zap/build.go | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index b3bbbab52..72357ae7d 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -633,12 +633,21 @@ func NewSegmentBase(memSegment *mem.Segment, chunkFactor uint32) (*SegmentBase,
 		return nil, err
 	}
 
+	return InitSegmentBase(br.Bytes(), cr.Sum32(), chunkFactor,
+		memSegment.FieldsMap, memSegment.FieldsInv, numDocs,
+		storedIndexOffset, fieldsIndexOffset, docValueOffset, dictLocs)
+}
+
+func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32,
+	fieldsMap map[string]uint16, fieldsInv []string, numDocs uint64,
+	storedIndexOffset uint64, fieldsIndexOffset uint64, docValueOffset uint64,
+	dictLocs []uint64) (*SegmentBase, error) {
 	sb := &SegmentBase{
-		mem:               br.Bytes(),
-		memCRC:            cr.Sum32(),
+		mem:               mem,
+		memCRC:            memCRC,
 		chunkFactor:       chunkFactor,
-		fieldsMap:         memSegment.FieldsMap,
-		fieldsInv:         memSegment.FieldsInv,
+		fieldsMap:         fieldsMap,
+		fieldsInv:         fieldsInv,
 		numDocs:           numDocs,
 		storedIndexOffset: storedIndexOffset,
 		fieldsIndexOffset: fieldsIndexOffset,
@@ -647,7 +656,7 @@ func NewSegmentBase(memSegment *mem.Segment, chunkFactor uint32) (*SegmentBase,
 		fieldDvIterMap:    make(map[uint16]*docValueIterator),
 	}
 
-	err = sb.loadDvIterators()
+	err := sb.loadDvIterators()
 	if err != nil {
 		return nil, err
 	}

From a0b7508da7e3effe66e7f4ac325446d57f643122 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 14 Feb 2018 14:53:28 -0800
Subject: [PATCH 210/728] scorch zap mergeSegmentBases() func

As part of this, zap.MergeToWriter() now returns more information --
enough so that callers can now create their own SegmentBase instances.

Also, the fieldsMap maintained and returned by zap.MergeToWriter() is
now a mapping from fieldName ==> fieldID+1 (instead of the previous
mapping from fieldName ==> fieldID).  This makes it similar to how
fieldsMap are handled in other parts of zap to avoid "zero value"
issues.
---
 index/scorch/merge.go             | 55 +++++++++++++++++++++++++++++++
 index/scorch/segment/zap/merge.go | 35 ++++++++++----------
 2 files changed, 73 insertions(+), 17 deletions(-)

diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index fb4e80d20..7c7eedac5 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -15,6 +15,7 @@
 package scorch
 
 import (
+	"bytes"
 	"fmt"
 	"os"
 	"sync/atomic"
@@ -168,3 +169,57 @@ type segmentMerge struct {
 	new           segment.Segment
 	notify        notificationChan
 }
+
+// perform in-memory merging of the given SegmentBase instances, and
+// synchronously introduce the merged segment into the root
+func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
+	sbs []*zap.SegmentBase, sbsDrops []*roaring.Bitmap, sbsIndexes []int,
+	chunkFactor uint32) (uint64, error) {
+	var br bytes.Buffer
+
+	cr := zap.NewCountHashWriter(&br)
+
+	newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset,
+		docValueOffset, dictLocs, fieldsInv, fieldsMap, err :=
+		zap.MergeToWriter(sbs, sbsDrops, chunkFactor, cr)
+	if err != nil {
+		return 0, nil
+	}
+
+	segment, err := zap.InitSegmentBase(br.Bytes(), cr.Sum32(), chunkFactor,
+		fieldsMap, fieldsInv, numDocs, storedIndexOffset, fieldsIndexOffset,
+		docValueOffset, dictLocs)
+	if err != nil {
+		return 0, nil
+	}
+
+	newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
+
+	sm := &segmentMerge{
+		id:            newSegmentID,
+		old:           make(map[uint64]*SegmentSnapshot),
+		oldNewDocNums: make(map[uint64][]uint64),
+		new:           segment,
+		notify:        make(notificationChan),
+	}
+
+	for i, idx := range sbsIndexes {
+		ss := snapshot.segment[idx]
+		sm.old[ss.id] = ss
+		sm.oldNewDocNums[ss.id] = newDocNums[i]
+	}
+
+	select { // send to introducer
+	case <-s.closeCh:
+		return 0, nil // TODO: instead return some ErrInterruptedClosed?
+	case s.merges <- sm:
+	}
+
+	select { // wait for introduction to complete
+	case <-s.closeCh:
+		return 0, nil // TODO: instead return some ErrInterruptedClosed?
+	case <-sm.notify:
+	}
+
+	return numDocs, nil
+}
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 525b7f93d..808b16b75 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -60,7 +60,7 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
 	// wrap it for counting (tracking offsets)
 	cr := NewCountHashWriter(br)
 
-	newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, err :=
+	newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, _, _, _, err :=
 		MergeToWriter(segmentBases, drops, chunkFactor, cr)
 	if err != nil {
 		cleanup()
@@ -99,26 +99,26 @@ func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap,
 	chunkFactor uint32, cr *CountHashWriter) (
 	newDocNums [][]uint64,
 	numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64,
+	dictLocs []uint64, fieldsInv []string, fieldsMap map[string]uint16,
 	err error) {
 	docValueOffset = uint64(fieldNotUninverted)
 
-	var dictLocs []uint64
-
-	fieldsSame, fieldsInv := mergeFields(segments)
-	fieldsMap := mapFields(fieldsInv)
+	var fieldsSame bool
+	fieldsSame, fieldsInv = mergeFields(segments)
+	fieldsMap = mapFields(fieldsInv)
 
 	numDocs = computeNewDocCount(segments, drops)
 	if numDocs > 0 {
 		storedIndexOffset, newDocNums, err = mergeStoredAndRemap(segments, drops,
 			fieldsMap, fieldsInv, fieldsSame, numDocs, cr)
 		if err != nil {
-			return nil, 0, 0, 0, 0, err
+			return nil, 0, 0, 0, 0, nil, nil, nil, err
 		}
 
 		dictLocs, docValueOffset, err = persistMergedRest(segments, drops, fieldsInv, fieldsMap,
 			newDocNums, numDocs, chunkFactor, cr)
 		if err != nil {
-			return nil, 0, 0, 0, 0, err
+			return nil, 0, 0, 0, 0, nil, nil, nil, err
 		}
 	} else {
 		dictLocs = make([]uint64, len(fieldsInv))
@@ -126,17 +126,18 @@ func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap,
 
 	fieldsIndexOffset, err = persistFields(fieldsInv, cr, dictLocs)
 	if err != nil {
-		return nil, 0, 0, 0, 0, err
+		return nil, 0, 0, 0, 0, nil, nil, nil, err
 	}
 
-	return newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, nil
+	return newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, dictLocs, fieldsInv, fieldsMap, nil
 }
 
-// mapFields takes the fieldsInv list and builds the map
+// mapFields takes the fieldsInv list and returns a map of fieldName
+// to fieldID+1
 func mapFields(fields []string) map[string]uint16 {
 	rv := make(map[string]uint16, len(fields))
 	for i, fieldName := range fields {
-		rv[fieldName] = uint16(i)
+		rv[fieldName] = uint16(i) + 1
 	}
 	return rv
 }
@@ -338,7 +339,7 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 							bufLoc = make([]uint64, 0, 5+len(loc.ArrayPositions()))
 						}
 						args := bufLoc[0:5]
-						args[0] = uint64(fieldsMap[loc.Field()])
+						args[0] = uint64(fieldsMap[loc.Field()] - 1)
 						args[1] = loc.Pos()
 						args[2] = loc.Start()
 						args[3] = loc.End()
@@ -499,7 +500,7 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 				poss[i] = poss[i][:0]
 			}
 			err := segment.VisitDocument(docNum, func(field string, typ byte, value []byte, pos []uint64) bool {
-				fieldID := int(fieldsMap[field])
+				fieldID := int(fieldsMap[field]) - 1
 				vals[fieldID] = append(vals[fieldID], value)
 				typs[fieldID] = append(typs[fieldID], typ)
 				poss[fieldID] = append(poss[fieldID], pos)
@@ -615,21 +616,21 @@ func mergeFields(segments []*SegmentBase) (bool, []string) {
 		segment0Fields = segments[0].Fields()
 	}
 
-	fieldsMap := map[string]struct{}{}
+	fieldsExist := map[string]struct{}{}
 	for _, segment := range segments {
 		fields := segment.Fields()
 		for fieldi, field := range fields {
-			fieldsMap[field] = struct{}{}
+			fieldsExist[field] = struct{}{}
 			if len(segment0Fields) != len(fields) || segment0Fields[fieldi] != field {
 				fieldsSame = false
 			}
 		}
 	}
 
-	rv := make([]string, 0, len(fieldsMap))
+	rv := make([]string, 0, len(fieldsExist))
 	// ensure _id stays first
 	rv = append(rv, "_id")
-	for k := range fieldsMap {
+	for k := range fieldsExist {
 		if k != "_id" {
 			rv = append(rv, k)
 		}

From a8ebf2a553c21012733a24bc950d88fb2830f955 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Wed, 21 Feb 2018 17:25:14 +0530
Subject: [PATCH 211/728] lowering epochDistance to 5, fixing the
 lastMergedEpoch value updates

---
 index/scorch/persister.go | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 4ba0ca8e8..24988c9c6 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -44,7 +44,7 @@ var DefaultChunkFactor uint32 = 1024
 // This needs to be based on empirical data.
 // With high segment count with snapshots,
 // doubtful on the effectiveness of this approach.
-var epochDistance = uint64(100)
+var epochDistance = uint64(5)
 
 type notificationChan chan struct{}
 
@@ -63,11 +63,12 @@ OUTER:
 			persistWatchers = append(persistWatchers, ew)
 		default:
 		}
-		if ew != nil {
+		if ew != nil && ew.epoch > lastMergedEpoch {
 			lastMergedEpoch = ew.epoch
-			persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch,
-				&lastMergedEpoch, persistWatchers)
 		}
+		persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch,
+			&lastMergedEpoch, persistWatchers)
+
 
 		var ourSnapshot *IndexSnapshot
 		var ourPersisted []chan error
@@ -171,8 +172,7 @@ OUTER:
 		persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
 
 		// check for slow merger and pause persister until merger catch up
-		if lastPersistedEpoch > *lastMergedEpoch &&
-			lastPersistedEpoch-*lastMergedEpoch > epochDistance {
+		if lastPersistedEpoch > *lastMergedEpoch+epochDistance {
 
 			select {
 			case <-s.closeCh:
@@ -180,7 +180,6 @@ OUTER:
 			case ew := <-s.persisterNotifier:
 				persistWatchers = append(persistWatchers, ew)
 				*lastMergedEpoch = ew.epoch
-				log.Printf("persister waiting as lastPersistedEpoch->%d merger epoch->%d", lastPersistedEpoch, *lastMergedEpoch)
 				continue OUTER
 			}
 		} else {

From a1db057656c6ab92fafbc3da14a218ea686b3928 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Fri, 23 Feb 2018 15:35:58 +0530
Subject: [PATCH 212/728] configurable mergePlanner options

mergePlanner options are parsed from the
scorch configs parameters
---
 index/scorch/merge.go | 36 +++++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index fb4e80d20..77368b369 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -28,6 +28,7 @@ import (
 
 func (s *Scorch) mergerLoop() {
 	var lastEpochMergePlanned uint64
+	mergePlannerOptions := s.parseMergePlannerOptions()
 OUTER:
 	for {
 		select {
@@ -45,7 +46,7 @@ OUTER:
 				startTime := time.Now()
 
 				// lets get started
-				err := s.planMergeAtSnapshot(ourSnapshot)
+				err := s.planMergeAtSnapshot(ourSnapshot, mergePlannerOptions)
 				if err != nil {
 					s.fireAsyncError(fmt.Errorf("merging err: %v", err))
 					_ = ourSnapshot.DecRef()
@@ -82,7 +83,36 @@ OUTER:
 	s.asyncTasks.Done()
 }
 
-func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot) error {
+func (s *Scorch) parseMergePlannerOptions() *mergeplan.MergePlanOptions {
+	mergePlannerOptions := &mergeplan.DefaultMergePlanOptions
+	scorchOptions := map[string]interface{}{}
+	if v, ok := s.config["scorchOptions"]; ok {
+		if scorchOptions, ok = v.(map[string]interface{}); ok {
+			if v, ok := scorchOptions["maxSegmentsPerTier"].(float64); ok {
+				mergePlannerOptions.MaxSegmentsPerTier = int(v)
+			}
+			if v, ok := scorchOptions["maxSegmentSize"].(float64); ok {
+				mergePlannerOptions.MaxSegmentSize = int64(v)
+			}
+			if v, ok := scorchOptions["tierGrowth"].(float64); ok {
+				mergePlannerOptions.TierGrowth = v
+			}
+			if v, ok := scorchOptions["segmentsPerMergeTask"].(float64); ok {
+				mergePlannerOptions.SegmentsPerMergeTask = int(v)
+			}
+			if v, ok := scorchOptions["floorSegmentSize"].(float64); ok {
+				mergePlannerOptions.FloorSegmentSize = int64(v)
+			}
+			if v, ok := scorchOptions["reclaimDeletesWeight"].(float64); ok {
+				mergePlannerOptions.ReclaimDeletesWeight = v
+			}
+		}
+	}
+	return mergePlannerOptions
+}
+
+func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
+	options *mergeplan.MergePlanOptions) error {
 	// build list of zap segments in this snapshot
 	var onlyZapSnapshots []mergeplan.Segment
 	for _, segmentSnapshot := range ourSnapshot.segment {
@@ -92,7 +122,7 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot) error {
 	}
 
 	// give this list to the planner
-	resultMergePlan, err := mergeplan.Plan(onlyZapSnapshots, nil)
+	resultMergePlan, err := mergeplan.Plan(onlyZapSnapshots, options)
 	if err != nil {
 		return fmt.Errorf("merge planning err: %v", err)
 	}

From c50d9b4023943f89514f2de842dfd77b1c93f585 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 14 Feb 2018 14:57:46 -0800
Subject: [PATCH 213/728] scorch conditional merging during persistSnapshot()

As part of this change, there are nw helper methods --
persistSnapshotMaybeMerge() and persistSnapshotDirect().
---
 index/scorch/introducer.go |  5 +-
 index/scorch/merge.go      | 48 ++++++++++++-------
 index/scorch/persister.go  | 95 +++++++++++++++++++++++++++++++++++++-
 3 files changed, 130 insertions(+), 18 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 4499fa41b..e0cdf44a0 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -247,6 +247,8 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 	})
 	newSnapshot.offsets = append(newSnapshot.offsets, running)
 
+	newSnapshot.AddRef() // 1 ref for the nextMerge.notify response
+
 	// swap in new segment
 	rootPrev := s.root
 	s.root = newSnapshot
@@ -257,7 +259,8 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 		_ = rootPrev.DecRef()
 	}
 
-	// notify merger we incorporated this
+	// notify requester that we incorporated this
+	nextMerge.notify <- newSnapshot
 	close(nextMerge.notify)
 }
 
diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 7c7eedac5..434841757 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -103,7 +103,7 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot) error {
 	}
 
 	// process tasks in serial for now
-	var notifications []notificationChan
+	var notifications []chan *IndexSnapshot
 	for _, task := range resultMergePlan.Tasks {
 		oldMap := make(map[uint64]*SegmentSnapshot)
 		newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
@@ -137,7 +137,7 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot) error {
 			old:           oldMap,
 			oldNewDocNums: make(map[uint64][]uint64),
 			new:           segment,
-			notify:        make(notificationChan),
+			notify:        make(chan *IndexSnapshot, 1),
 		}
 		notifications = append(notifications, sm.notify)
 		for i, segNewDocNums := range newDocNums {
@@ -156,7 +156,10 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot) error {
 		select {
 		case <-s.closeCh:
 			return nil
-		case <-notification:
+		case newSnapshot := <-notification:
+			if newSnapshot != nil {
+				_ = newSnapshot.DecRef()
+			}
 		}
 	}
 	return nil
@@ -167,14 +170,15 @@ type segmentMerge struct {
 	old           map[uint64]*SegmentSnapshot
 	oldNewDocNums map[uint64][]uint64
 	new           segment.Segment
-	notify        notificationChan
+	notify        chan *IndexSnapshot
 }
 
-// perform in-memory merging of the given SegmentBase instances, and
-// synchronously introduce the merged segment into the root
+// perform a merging of the given SegmentBase instances into a new,
+// persisted segment, and synchronously introduce that new segment
+// into the root
 func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 	sbs []*zap.SegmentBase, sbsDrops []*roaring.Bitmap, sbsIndexes []int,
-	chunkFactor uint32) (uint64, error) {
+	chunkFactor uint32) (uint64, *IndexSnapshot, uint64, error) {
 	var br bytes.Buffer
 
 	cr := zap.NewCountHashWriter(&br)
@@ -183,24 +187,36 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 		docValueOffset, dictLocs, fieldsInv, fieldsMap, err :=
 		zap.MergeToWriter(sbs, sbsDrops, chunkFactor, cr)
 	if err != nil {
-		return 0, nil
+		return 0, nil, 0, err
 	}
 
-	segment, err := zap.InitSegmentBase(br.Bytes(), cr.Sum32(), chunkFactor,
+	sb, err := zap.InitSegmentBase(br.Bytes(), cr.Sum32(), chunkFactor,
 		fieldsMap, fieldsInv, numDocs, storedIndexOffset, fieldsIndexOffset,
 		docValueOffset, dictLocs)
 	if err != nil {
-		return 0, nil
+		return 0, nil, 0, err
 	}
 
 	newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
 
+	filename := zapFileName(newSegmentID)
+	path := s.path + string(os.PathSeparator) + filename
+	err = zap.PersistSegmentBase(sb, path)
+	if err != nil {
+		return 0, nil, 0, err
+	}
+
+	segment, err := zap.Open(path)
+	if err != nil {
+		return 0, nil, 0, err
+	}
+
 	sm := &segmentMerge{
 		id:            newSegmentID,
 		old:           make(map[uint64]*SegmentSnapshot),
 		oldNewDocNums: make(map[uint64][]uint64),
 		new:           segment,
-		notify:        make(notificationChan),
+		notify:        make(chan *IndexSnapshot, 1),
 	}
 
 	for i, idx := range sbsIndexes {
@@ -211,15 +227,15 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 
 	select { // send to introducer
 	case <-s.closeCh:
-		return 0, nil // TODO: instead return some ErrInterruptedClosed?
+		_ = segment.DecRef()
+		return 0, nil, 0, nil // TODO: return ErrInterruptedClosed?
 	case s.merges <- sm:
 	}
 
 	select { // wait for introduction to complete
 	case <-s.closeCh:
-		return 0, nil // TODO: instead return some ErrInterruptedClosed?
-	case <-sm.notify:
+		return 0, nil, 0, nil // TODO: return ErrInterruptedClosed?
+	case newSnapshot := <-sm.notify:
+		return numDocs, newSnapshot, newSegmentID, nil
 	}
-
-	return numDocs, nil
 }
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 83909a880..1b7c1b5eb 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -145,7 +145,100 @@ OUTER:
 	}
 }
 
-func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) (err error) {
+func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
+	persisted, err := s.persistSnapshotMaybeMerge(snapshot)
+	if err != nil {
+		return err
+	}
+	if persisted {
+		return nil
+	}
+
+	return s.persistSnapshotDirect(snapshot)
+}
+
+// DefaultMinSegmentsForInMemoryMerge represents the default number of
+// in-memory zap segments that persistSnapshotMaybeMerge() needs to
+// see in an IndexSnapshot before it decides to merge and persist
+// those segments
+var DefaultMinSegmentsForInMemoryMerge = 2
+
+// persistSnapshotMaybeMerge examines the snapshot and might merge and
+// persist the in-memory zap segments if there are enough of them
+func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot) (
+	bool, error) {
+	// collect the in-memory zap segments (SegmentBase instances)
+	var sbs []*zap.SegmentBase
+	var sbsDrops []*roaring.Bitmap
+	var sbsIndexes []int
+
+	for i, segmentSnapshot := range snapshot.segment {
+		if sb, ok := segmentSnapshot.segment.(*zap.SegmentBase); ok {
+			sbs = append(sbs, sb)
+			sbsDrops = append(sbsDrops, segmentSnapshot.deleted)
+			sbsIndexes = append(sbsIndexes, i)
+		}
+	}
+
+	if len(sbs) < DefaultMinSegmentsForInMemoryMerge {
+		return false, nil
+	}
+
+	_, newSnapshot, newSegmentID, err := s.mergeSegmentBases(
+		snapshot, sbs, sbsDrops, sbsIndexes, DefaultChunkFactor)
+	if err != nil {
+		return false, err
+	}
+	if newSnapshot == nil {
+		return false, nil
+	}
+
+	defer func() {
+		_ = newSnapshot.DecRef()
+	}()
+
+	mergedSegmentIDs := map[uint64]struct{}{}
+	for _, idx := range sbsIndexes {
+		mergedSegmentIDs[snapshot.segment[idx].id] = struct{}{}
+	}
+
+	// construct a snapshot that's logically equivalent to the input
+	// snapshot, but with merged segments replaced by the new segment
+	equiv := &IndexSnapshot{
+		parent:   snapshot.parent,
+		segment:  make([]*SegmentSnapshot, 0, len(snapshot.segment)),
+		internal: snapshot.internal,
+		epoch:    snapshot.epoch,
+	}
+
+	// copy to the equiv the segments that weren't replaced
+	for _, segment := range snapshot.segment {
+		if _, wasMerged := mergedSegmentIDs[segment.id]; !wasMerged {
+			equiv.segment = append(equiv.segment, segment)
+		}
+	}
+
+	// append to the equiv the new segment
+	for _, segment := range newSnapshot.segment {
+		if segment.id == newSegmentID {
+			equiv.segment = append(equiv.segment, &SegmentSnapshot{
+				id:      newSegmentID,
+				segment: segment.segment,
+				deleted: nil, // nil since merging handled deletions
+			})
+			break
+		}
+	}
+
+	err = s.persistSnapshotDirect(equiv)
+	if err != nil {
+		return false, err
+	}
+
+	return true, nil
+}
+
+func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
 	// start a write transaction
 	tx, err := s.rootBolt.Begin(true)
 	if err != nil {

From da70758635de8f574c3dc2082f162dac8aaa4e9a Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Fri, 23 Feb 2018 14:43:59 -0800
Subject: [PATCH 214/728] Handle case where store snapshot isn't closed in
 upsidedown's Batch() API

---
 index/upsidedown/upsidedown.go | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/index/upsidedown/upsidedown.go b/index/upsidedown/upsidedown.go
index 1243375b7..70e6e457f 100644
--- a/index/upsidedown/upsidedown.go
+++ b/index/upsidedown/upsidedown.go
@@ -837,6 +837,11 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
 			docBackIndexRowErr = err
 			return
 		}
+		defer func() {
+			if cerr := kvreader.Close(); err == nil && cerr != nil {
+				docBackIndexRowErr = cerr
+			}
+		}()
 
 		for docID, doc := range batch.IndexOps {
 			backIndexRow, err := backIndexRowForDoc(kvreader, index.IndexInternalID(docID))
@@ -847,12 +852,6 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
 
 			docBackIndexRowCh <- &docBackIndexRow{docID, doc, backIndexRow}
 		}
-
-		err = kvreader.Close()
-		if err != nil {
-			docBackIndexRowErr = err
-			return
-		}
 	}()
 
 	// wait for analysis result

From 683e195ac4b55700696ba0334ed8d79f8fa53ece Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Fri, 16 Feb 2018 13:56:57 +0530
Subject: [PATCH 215/728] adding empty segment handling during introduction
 cleaning up the segment live size check

---
 index/scorch/introducer.go | 54 +++++++++++++++++++++++++++++---------
 1 file changed, 41 insertions(+), 13 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index f75801910..1f2de98b7 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -100,8 +100,8 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 	// prepare new index snapshot
 	newSnapshot := &IndexSnapshot{
 		parent:   s,
-		segment:  make([]*SegmentSnapshot, nsegs, nsegs+1),
-		offsets:  make([]uint64, nsegs, nsegs+1),
+		segment:  make([]*SegmentSnapshot, 0, nsegs+1),
+		offsets:  make([]uint64, 0, nsegs+1),
 		internal: make(map[string][]byte, len(s.root.internal)),
 		epoch:    s.nextSnapshotEpoch,
 		refs:     1,
@@ -124,24 +124,29 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 				return err
 			}
 		}
-		newSnapshot.segment[i] = &SegmentSnapshot{
+
+		newss := &SegmentSnapshot{
 			id:         s.root.segment[i].id,
 			segment:    s.root.segment[i].segment,
 			cachedDocs: s.root.segment[i].cachedDocs,
 		}
-		s.root.segment[i].segment.AddRef()
-
+		
 		// apply new obsoletions
 		if s.root.segment[i].deleted == nil {
-			newSnapshot.segment[i].deleted = delta
+			newss.deleted = delta
 		} else {
-			newSnapshot.segment[i].deleted = roaring.Or(s.root.segment[i].deleted, delta)
+			newss.deleted = roaring.Or(s.root.segment[i].deleted, delta)
+		}
+		
+		// check for live size before copying
+		if newss.LiveSize() > 0 {
+			newSnapshot.segment = append(newSnapshot.segment, newss)
+			s.root.segment[i].segment.AddRef()
+			newSnapshot.offsets = append(newSnapshot.offsets, running)
+			running += s.root.segment[i].Count()
 		}
-
-		newSnapshot.offsets[i] = running
-		running += s.root.segment[i].Count()
-
 	}
+
 	// append new segment, if any, to end of the new index snapshot
 	if next.data != nil {
 		newSegmentSnapshot := &SegmentSnapshot{
@@ -230,7 +235,13 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 					newSegmentDeleted.Add(uint32(newDocNum))
 				}
 			}
-		} else {
+			// clean up the old segment map to figure out the
+			// obsolete segments wrt root in meantime, whatever
+			// segments left behind in old map after processing
+			// the root segments would be the obsolete segment set
+			delete(nextMerge.old, segmentID)
+
+		} else if s.root.segment[i].LiveSize() > 0 { 
 			// this segment is staying
 			newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
 				id:         s.root.segment[i].id,
@@ -244,7 +255,24 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 		}
 	}
 
-	if nextMerge.new != nil {
+	// before the newMerge introduction, need to clean the newly
+	// merged segment wrt the current root segments, hence
+	// applying the obsolete segment contents to newly merged segment
+	for segID, ss := range nextMerge.old {
+		obsoleted := ss.DocNumbersLive()
+		if obsoleted != nil {
+			obsoletedIter := obsoleted.Iterator()
+			for obsoletedIter.HasNext() {
+				oldDocNum := obsoletedIter.Next()
+				newDocNum := nextMerge.oldNewDocNums[segID][oldDocNum]
+				newSegmentDeleted.Add(uint32(newDocNum))
+			}
+		}
+	}
+	// In case where all the docs in the newly merged segment getting 
+	// deleted by the time we reach here, can skip the introduction.
+	if nextMerge.new != nil &&
+		nextMerge.new.Count() > newSegmentDeleted.GetCardinality() {
 		// put new segment at end
 		newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
 			id:         nextMerge.id,

From 874829759b1b22981092b32a2942744851de4b6b Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Sat, 24 Feb 2018 23:53:49 +0530
Subject: [PATCH 216/728] cleaning up the wait loop

---
 index/scorch/persister.go | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 24988c9c6..8069ae4e6 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -42,8 +42,7 @@ var DefaultChunkFactor uint32 = 1024
 // an optimum pace so that the merger can skip
 // many intermediate snapshots.
 // This needs to be based on empirical data.
-// With high segment count with snapshots,
-// doubtful on the effectiveness of this approach.
+// TODO - may need to revisit this approach/value.
 var epochDistance = uint64(5)
 
 type notificationChan chan struct{}
@@ -172,19 +171,17 @@ OUTER:
 		persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
 
 		// check for slow merger and pause persister until merger catch up
-		if lastPersistedEpoch > *lastMergedEpoch+epochDistance {
-
-			select {
-			case <-s.closeCh:
-				break OUTER
-			case ew := <-s.persisterNotifier:
-				persistWatchers = append(persistWatchers, ew)
-				*lastMergedEpoch = ew.epoch
-				continue OUTER
-			}
-		} else {
+		if lastPersistedEpoch <= *lastMergedEpoch+epochDistance {
 			break OUTER
 		}
+
+		select {
+		case <-s.closeCh:
+			break OUTER
+		case ew := <-s.persisterNotifier:
+			persistWatchers = append(persistWatchers, ew)
+			*lastMergedEpoch = ew.epoch
+		}
 	}
 
 	return persistWatchers

From f0a65f041d4c53f8fbb9c5956154e2bfb8603939 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Sun, 25 Feb 2018 20:58:53 +0530
Subject: [PATCH 217/728] cleaning up the wait loop

---
 index/scorch/persister.go | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 55b09d168..40333aaa1 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -164,16 +164,12 @@ func notifyMergeWatchers(lastPersistedEpoch uint64,
 func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastMergedEpoch *uint64,
 	persistWatchers []*epochWatcher) []*epochWatcher {
 
-OUTER:
-	for {
+	// first, let the watchers proceed if they lag behind
+	persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
 
-		// first, let the watchers proceed if they lag behind
-		persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
-
-		// check for slow merger and pause persister until merger catch up
-		if lastPersistedEpoch <= *lastMergedEpoch+epochDistance {
-			break OUTER
-		}
+OUTER:
+	// check for slow merger and await until the merger catch up
+	for lastPersistedEpoch > *lastMergedEpoch+epochDistance {
 
 		select {
 		case <-s.closeCh:
@@ -182,6 +178,9 @@ OUTER:
 			persistWatchers = append(persistWatchers, ew)
 			*lastMergedEpoch = ew.epoch
 		}
+
+		// let the watchers proceed if they lag behind
+		persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
 	}
 
 	return persistWatchers

From e4cc79a9ad6125d8d6ef45efb02735894afd7918 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 26 Feb 2018 15:56:30 +0530
Subject: [PATCH 218/728] adopting json parsing on options, fixed the
 inadvertant option modification

---
 index/scorch/merge.go | 46 ++++++++++++++++++++-----------------------
 1 file changed, 21 insertions(+), 25 deletions(-)

diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 77368b369..fbc4385eb 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -15,6 +15,7 @@
 package scorch
 
 import (
+	"encoding/json"
 	"fmt"
 	"os"
 	"sync/atomic"
@@ -28,7 +29,13 @@ import (
 
 func (s *Scorch) mergerLoop() {
 	var lastEpochMergePlanned uint64
-	mergePlannerOptions := s.parseMergePlannerOptions()
+	mergePlannerOptions, err := s.parseMergePlannerOptions()
+	if err != nil {
+		s.fireAsyncError(fmt.Errorf("mergePlannerOption json parsing err: %v", err))
+		s.asyncTasks.Done()
+		return
+	}
+
 OUTER:
 	for {
 		select {
@@ -83,32 +90,21 @@ OUTER:
 	s.asyncTasks.Done()
 }
 
-func (s *Scorch) parseMergePlannerOptions() *mergeplan.MergePlanOptions {
-	mergePlannerOptions := &mergeplan.DefaultMergePlanOptions
-	scorchOptions := map[string]interface{}{}
-	if v, ok := s.config["scorchOptions"]; ok {
-		if scorchOptions, ok = v.(map[string]interface{}); ok {
-			if v, ok := scorchOptions["maxSegmentsPerTier"].(float64); ok {
-				mergePlannerOptions.MaxSegmentsPerTier = int(v)
-			}
-			if v, ok := scorchOptions["maxSegmentSize"].(float64); ok {
-				mergePlannerOptions.MaxSegmentSize = int64(v)
-			}
-			if v, ok := scorchOptions["tierGrowth"].(float64); ok {
-				mergePlannerOptions.TierGrowth = v
-			}
-			if v, ok := scorchOptions["segmentsPerMergeTask"].(float64); ok {
-				mergePlannerOptions.SegmentsPerMergeTask = int(v)
-			}
-			if v, ok := scorchOptions["floorSegmentSize"].(float64); ok {
-				mergePlannerOptions.FloorSegmentSize = int64(v)
-			}
-			if v, ok := scorchOptions["reclaimDeletesWeight"].(float64); ok {
-				mergePlannerOptions.ReclaimDeletesWeight = v
-			}
+func (s *Scorch) parseMergePlannerOptions() (*mergeplan.MergePlanOptions,
+	error) {
+	mergePlannerOptions := mergeplan.DefaultMergePlanOptions
+	if v, ok := s.config["scorchMergePlanOptions"]; ok {
+		b, err := json.Marshal(v)
+		if err != nil {
+			return &mergePlannerOptions, err
+		}
+
+		err = json.Unmarshal(b, &mergePlannerOptions)
+		if err != nil {
+			return &mergePlannerOptions, err
 		}
 	}
-	return mergePlannerOptions
+	return &mergePlannerOptions, nil
 }
 
 func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,

From e02849fcdae060357951381bda6e10c552a414ee Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 26 Feb 2018 16:21:33 +0530
Subject: [PATCH 219/728] fix the indentation

---
 index/scorch/merge.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 48fff013c..61059b9b9 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -16,7 +16,7 @@ package scorch
 
 import (
 	"bytes"
-  "encoding/json"
+  	"encoding/json"
 
 	"fmt"
 	"os"

From ce2332e111bb56bd9a4317a821dc977ef5427784 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 24 Feb 2018 09:35:03 -0800
Subject: [PATCH 220/728] scorch zap merge reuses tf/locEncoder across terms

The finishTerm() helper func that's invoked on every outer loop resets
the tf/locEncoders so they can be safely reused.
---
 index/scorch/segment/zap/merge.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 808b16b75..1c050efe9 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -170,6 +170,9 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 	rv := make([]uint64, len(fieldsInv))
 	fieldDvLocs := make([]uint64, len(fieldsInv))
 
+	tfEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
+	locEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
+
 	// docTermMap is keyed by docNum, where the array impl provides
 	// better memory usage behavior than a sparse-friendlier hashmap
 	// for when docs have much structural similarity (i.e., every doc
@@ -227,9 +230,6 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 		newRoaring := roaring.NewBitmap()
 		newRoaringLocs := roaring.NewBitmap()
 
-		tfEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
-		locEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
-
 		finishTerm := func(term []byte) error {
 			if term == nil {
 				return nil

From 98d5d7bd81fbe0714800cae23b7b13a9601dd81a Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 24 Feb 2018 09:38:45 -0800
Subject: [PATCH 221/728] scorch zap chunkedIntCoder optimizations

The optimizations / changes include...

- reuse of a memory buf when serializing varint's.

- reuse of a govarint.U64Base128Encoder instance, as it's a thin,
  wrapper around an underlying chunkBuf, so Reset()'s on the
  chunkBuf is enough for encoder reuse.

- chunkedIntcoder.Write() method was changed to invoke w.Write() less
  often by forming a larger, reused buf.  Profiling and analysis
  showed w.Write() was getting called a lot, often with tiny 1 or 2
  byte inputs.  The theory is w.Write() and its underlying memmove()
  can be more efficient when provided with larger bufs.

- some repeated code removal, by reusing the Close() method.
---
 index/scorch/segment/zap/intcoder.go | 37 +++++++++++++---------------
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/index/scorch/segment/zap/intcoder.go b/index/scorch/segment/zap/intcoder.go
index e9f295023..b505fec94 100644
--- a/index/scorch/segment/zap/intcoder.go
+++ b/index/scorch/segment/zap/intcoder.go
@@ -30,6 +30,8 @@ type chunkedIntCoder struct {
 	encoder   *govarint.Base128Encoder
 	chunkLens []uint64
 	currChunk uint64
+
+	buf []byte
 }
 
 // newChunkedIntCoder returns a new chunk int coder which packs data into
@@ -67,12 +69,8 @@ func (c *chunkedIntCoder) Add(docNum uint64, vals ...uint64) error {
 		// starting a new chunk
 		if c.encoder != nil {
 			// close out last
-			c.encoder.Close()
-			encodingBytes := c.chunkBuf.Bytes()
-			c.chunkLens[c.currChunk] = uint64(len(encodingBytes))
-			c.final = append(c.final, encodingBytes...)
+			c.Close()
 			c.chunkBuf.Reset()
-			c.encoder = govarint.NewU64Base128Encoder(&c.chunkBuf)
 		}
 		c.currChunk = chunk
 	}
@@ -98,26 +96,25 @@ func (c *chunkedIntCoder) Close() {
 
 // Write commits all the encoded chunked integers to the provided writer.
 func (c *chunkedIntCoder) Write(w io.Writer) (int, error) {
-	var tw int
-	buf := make([]byte, binary.MaxVarintLen64)
-	// write out the number of chunks
+	bufNeeded := binary.MaxVarintLen64 * (1 + len(c.chunkLens))
+	if len(c.buf) < bufNeeded {
+		c.buf = make([]byte, bufNeeded)
+	}
+	buf := c.buf
+
+	// write out the number of chunks & each chunkLen
 	n := binary.PutUvarint(buf, uint64(len(c.chunkLens)))
-	nw, err := w.Write(buf[:n])
-	tw += nw
+	for _, chunkLen := range c.chunkLens {
+		n += binary.PutUvarint(buf[n:], uint64(chunkLen))
+	}
+
+	tw, err := w.Write(buf[:n])
 	if err != nil {
 		return tw, err
 	}
-	// write out the chunk lens
-	for _, chunkLen := range c.chunkLens {
-		n := binary.PutUvarint(buf, uint64(chunkLen))
-		nw, err = w.Write(buf[:n])
-		tw += nw
-		if err != nil {
-			return tw, err
-		}
-	}
+
 	// write out the data
-	nw, err = w.Write(c.final)
+	nw, err := w.Write(c.final)
 	tw += nw
 	if err != nil {
 		return tw, err

From 99ed12717633062862e33d0ab774dc9b36df6d62 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 26 Feb 2018 14:23:53 -0800
Subject: [PATCH 222/728] scorch zap merge optimize newDocNums lookup to
 outside of loop

And, also a "go fmt".
---
 index/scorch/introducer.go        | 8 ++++----
 index/scorch/merge.go             | 2 +-
 index/scorch/persister.go         | 1 -
 index/scorch/segment/zap/merge.go | 4 +++-
 4 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 18f8438f8..1a7d656ca 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -130,14 +130,14 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 			segment:    s.root.segment[i].segment,
 			cachedDocs: s.root.segment[i].cachedDocs,
 		}
-		
+
 		// apply new obsoletions
 		if s.root.segment[i].deleted == nil {
 			newss.deleted = delta
 		} else {
 			newss.deleted = roaring.Or(s.root.segment[i].deleted, delta)
 		}
-		
+
 		// check for live size before copying
 		if newss.LiveSize() > 0 {
 			newSnapshot.segment = append(newSnapshot.segment, newss)
@@ -241,7 +241,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 			// the root segments would be the obsolete segment set
 			delete(nextMerge.old, segmentID)
 
-		} else if s.root.segment[i].LiveSize() > 0 { 
+		} else if s.root.segment[i].LiveSize() > 0 {
 			// this segment is staying
 			newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
 				id:         s.root.segment[i].id,
@@ -269,7 +269,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 			}
 		}
 	}
-	// In case where all the docs in the newly merged segment getting 
+	// In case where all the docs in the newly merged segment getting
 	// deleted by the time we reach here, can skip the introduction.
 	if nextMerge.new != nil &&
 		nextMerge.new.Count() > newSegmentDeleted.GetCardinality() {
diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 61059b9b9..ad756588a 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -16,7 +16,7 @@ package scorch
 
 import (
 	"bytes"
-  	"encoding/json"
+	"encoding/json"
 
 	"fmt"
 	"os"
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 40333aaa1..c21bb1439 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -68,7 +68,6 @@ OUTER:
 		persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch,
 			&lastMergedEpoch, persistWatchers)
 
-
 		var ourSnapshot *IndexSnapshot
 		var ourPersisted []chan error
 
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 1c050efe9..77ae173b5 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -316,10 +316,12 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 				return nil, 0, err2
 			}
 
+			newDocNumsI := newDocNums[itrI]
+
 			postItr = postings.iterator(postItr)
 			next, err2 := postItr.Next()
 			for next != nil && err2 == nil {
-				hitNewDocNum := newDocNums[itrI][next.Number()]
+				hitNewDocNum := newDocNumsI[next.Number()]
 				if hitNewDocNum == docDropped {
 					return nil, 0, fmt.Errorf("see hit with dropped doc num")
 				}

From 3f1dcb60781b5f57375d5ed93700f96db9ff8804 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 27 Feb 2018 09:23:23 -0800
Subject: [PATCH 223/728] scorch zap merge optimize drops lookup to outside of
 loop

---
 index/scorch/segment/zap/merge.go | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 77ae173b5..ae8c5b197 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -462,10 +462,12 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 	for segI, segment := range segments {
 		segNewDocNums := make([]uint64, segment.numDocs)
 
+		dropsI := drops[segI]
+
 		// optimize when the field mapping is the same across all
 		// segments and there are no deletions, via byte-copying
 		// of stored docs bytes directly to the writer
-		if fieldsSame && (drops[segI] == nil || drops[segI].GetCardinality() == 0) {
+		if fieldsSame && (dropsI == nil || dropsI.GetCardinality() == 0) {
 			err := segment.copyStoredDocs(newDocNum, docNumOffsets, w)
 			if err != nil {
 				return 0, nil, err
@@ -483,7 +485,7 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 		// for each doc num
 		for docNum := uint64(0); docNum < segment.numDocs; docNum++ {
 			// TODO: roaring's API limits docNums to 32-bits?
-			if drops[segI] != nil && drops[segI].Contains(uint32(docNum)) {
+			if dropsI != nil && dropsI.Contains(uint32(docNum)) {
 				segNewDocNums[docNum] = docDropped
 				continue
 			}

From f58a205ae87086e9494f25e209afeedb99a8fdbb Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 27 Feb 2018 11:29:16 -0800
Subject: [PATCH 224/728] remove 1.6 from travis, add "1.10"

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 70bc093f7..775fed3a9 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,10 +3,10 @@ sudo: false
 language: go
 
 go:
- - 1.6.x
  - 1.7.x
  - 1.8.x
  - 1.9.x
+ - "1.10"
 
 script:
   - go get golang.org/x/tools/cmd/cover

From c74e08f039e56cef576e4336382b2a2d12d9e026 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 27 Feb 2018 11:33:12 -0800
Subject: [PATCH 225/728] BREAKING API CHANGE - use stdlib context pkg

update all references to context to use std lib pkg
---
 index.go                       | 3 ++-
 index_alias_impl.go            | 3 +--
 index_alias_impl_test.go       | 3 +--
 index_impl.go                  | 3 +--
 index_test.go                  | 3 +--
 search/collector.go            | 3 +--
 search/collector/bench_test.go | 2 +-
 search/collector/topn.go       | 2 +-
 search/collector/topn_test.go  | 3 +--
 vendor/manifest                | 9 ---------
 10 files changed, 10 insertions(+), 24 deletions(-)

diff --git a/index.go b/index.go
index e85652d96..ea7b3832a 100644
--- a/index.go
+++ b/index.go
@@ -15,11 +15,12 @@
 package bleve
 
 import (
+	"context"
+
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/store"
 	"github.com/blevesearch/bleve/mapping"
-	"golang.org/x/net/context"
 )
 
 // A Batch groups together multiple Index and Delete
diff --git a/index_alias_impl.go b/index_alias_impl.go
index 9e9a3594f..f678a059b 100644
--- a/index_alias_impl.go
+++ b/index_alias_impl.go
@@ -15,12 +15,11 @@
 package bleve
 
 import (
+	"context"
 	"sort"
 	"sync"
 	"time"
 
-	"golang.org/x/net/context"
-
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/store"
diff --git a/index_alias_impl_test.go b/index_alias_impl_test.go
index a59406647..2ee64991f 100644
--- a/index_alias_impl_test.go
+++ b/index_alias_impl_test.go
@@ -15,13 +15,12 @@
 package bleve
 
 import (
+	"context"
 	"fmt"
 	"reflect"
 	"testing"
 	"time"
 
-	"golang.org/x/net/context"
-
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/store"
diff --git a/index_impl.go b/index_impl.go
index 799b582a0..caea1b8e0 100644
--- a/index_impl.go
+++ b/index_impl.go
@@ -15,6 +15,7 @@
 package bleve
 
 import (
+	"context"
 	"encoding/json"
 	"fmt"
 	"os"
@@ -22,8 +23,6 @@ import (
 	"sync/atomic"
 	"time"
 
-	"golang.org/x/net/context"
-
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/store"
diff --git a/index_test.go b/index_test.go
index 762e3838e..f16a8f637 100644
--- a/index_test.go
+++ b/index_test.go
@@ -15,6 +15,7 @@
 package bleve
 
 import (
+	"context"
 	"fmt"
 	"io/ioutil"
 	"log"
@@ -28,8 +29,6 @@ import (
 	"testing"
 	"time"
 
-	"golang.org/x/net/context"
-
 	"github.com/blevesearch/bleve/analysis/analyzer/keyword"
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index"
diff --git a/search/collector.go b/search/collector.go
index cba4829d4..0d163a9d9 100644
--- a/search/collector.go
+++ b/search/collector.go
@@ -15,11 +15,10 @@
 package search
 
 import (
+	"context"
 	"time"
 
 	"github.com/blevesearch/bleve/index"
-
-	"golang.org/x/net/context"
 )
 
 type Collector interface {
diff --git a/search/collector/bench_test.go b/search/collector/bench_test.go
index e75613c36..e6a786f41 100644
--- a/search/collector/bench_test.go
+++ b/search/collector/bench_test.go
@@ -15,13 +15,13 @@
 package collector
 
 import (
+	"context"
 	"math/rand"
 	"strconv"
 	"testing"
 
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
-	"golang.org/x/net/context"
 )
 
 type createCollector func() search.Collector
diff --git a/search/collector/topn.go b/search/collector/topn.go
index 2c7c6752d..388370e7e 100644
--- a/search/collector/topn.go
+++ b/search/collector/topn.go
@@ -15,11 +15,11 @@
 package collector
 
 import (
+	"context"
 	"time"
 
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
-	"golang.org/x/net/context"
 )
 
 type collectorStore interface {
diff --git a/search/collector/topn_test.go b/search/collector/topn_test.go
index b8c331ae6..d50e38a0c 100644
--- a/search/collector/topn_test.go
+++ b/search/collector/topn_test.go
@@ -15,10 +15,9 @@
 package collector
 
 import (
+	"context"
 	"testing"
 
-	"golang.org/x/net/context"
-
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
 )
diff --git a/vendor/manifest b/vendor/manifest
index 9a86629ed..0837684fa 100644
--- a/vendor/manifest
+++ b/vendor/manifest
@@ -107,15 +107,6 @@
 			"branch": "master",
 			"notests": true
 		},
-		{
-			"importpath": "golang.org/x/net/context",
-			"repository": "https://go.googlesource.com/net",
-			"vcs": "",
-			"revision": "e45385e9b226f570b1f086bf287b25d3d4117776",
-			"branch": "master",
-			"path": "/context",
-			"notests": true
-		},
 		{
 			"importpath": "golang.org/x/text/transform",
 			"repository": "https://go.googlesource.com/text",

From 806313276698e176d9824dfefbbed730eef259ad Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 27 Feb 2018 11:57:21 -0800
Subject: [PATCH 226/728] fix new issues found by go vet when using stdlib
 context pkg

---
 index_alias_impl_test.go | 17 +++++++++++------
 index_test.go            |  8 +++++---
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/index_alias_impl_test.go b/index_alias_impl_test.go
index 2ee64991f..9599b89d6 100644
--- a/index_alias_impl_test.go
+++ b/index_alias_impl_test.go
@@ -782,7 +782,9 @@ func TestMultiSearchTimeout(t *testing.T) {
 		}}
 
 	// first run with absurdly long time out, should succeed
-	ctx, _ = context.WithTimeout(context.Background(), 10*time.Second)
+	var cancel context.CancelFunc
+	ctx, cancel = context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
 	query := NewTermQuery("test")
 	sr := NewSearchRequest(query)
 	res, err := MultiSearch(ctx, sr, ei1, ei2)
@@ -803,7 +805,8 @@ func TestMultiSearchTimeout(t *testing.T) {
 	}
 
 	// now run a search again with an absurdly low timeout (should timeout)
-	ctx, _ = context.WithTimeout(context.Background(), 1*time.Microsecond)
+	ctx, cancel = context.WithTimeout(context.Background(), 1*time.Microsecond)
+	defer cancel()
 	res, err = MultiSearch(ctx, sr, ei1, ei2)
 	if err != nil {
 		t.Errorf("expected no error, got %v", err)
@@ -829,7 +832,6 @@ func TestMultiSearchTimeout(t *testing.T) {
 	}
 
 	// now run a search again with a normal timeout, but cancel it first
-	var cancel context.CancelFunc
 	ctx, cancel = context.WithTimeout(context.Background(), 5*time.Second)
 	cancel()
 	res, err = MultiSearch(ctx, sr, ei1, ei2)
@@ -936,7 +938,9 @@ func TestMultiSearchTimeoutPartial(t *testing.T) {
 
 	// ei3 is set to take >50ms, so run search with timeout less than
 	// this, this should return partial results
-	ctx, _ = context.WithTimeout(context.Background(), 25*time.Millisecond)
+	var cancel context.CancelFunc
+	ctx, cancel = context.WithTimeout(context.Background(), 25*time.Millisecond)
+	defer cancel()
 	query := NewTermQuery("test")
 	sr := NewSearchRequest(query)
 	expected := &SearchResult{
@@ -1089,8 +1093,9 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
 	// ei2 and ei3 have 50ms delay
 	// search across aliasTop should still get results from ei1 and ei4
 	// total should still be 4
-
-	ctx, _ = context.WithTimeout(context.Background(), 25*time.Millisecond)
+	var cancel context.CancelFunc
+	ctx, cancel = context.WithTimeout(context.Background(), 25*time.Millisecond)
+	defer cancel()
 	query := NewTermQuery("test")
 	sr := NewSearchRequest(query)
 	expected := &SearchResult{
diff --git a/index_test.go b/index_test.go
index f16a8f637..a69357bf6 100644
--- a/index_test.go
+++ b/index_test.go
@@ -1507,7 +1507,8 @@ func TestSearchTimeout(t *testing.T) {
 	}()
 
 	// first run a search with an absurdly long timeout (should succeeed)
-	ctx, _ := context.WithTimeout(context.Background(), 10*time.Second)
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
 	query := NewTermQuery("water")
 	req := NewSearchRequest(query)
 	_, err = index.SearchInContext(ctx, req)
@@ -1516,7 +1517,8 @@ func TestSearchTimeout(t *testing.T) {
 	}
 
 	// now run a search again with an absurdly low timeout (should timeout)
-	ctx, _ = context.WithTimeout(context.Background(), 1*time.Microsecond)
+	ctx, cancel = context.WithTimeout(context.Background(), 1*time.Microsecond)
+	defer cancel()
 	sq := &slowQuery{
 		actual: query,
 		delay:  50 * time.Millisecond, // on Windows timer resolution is 15ms
@@ -1528,7 +1530,7 @@ func TestSearchTimeout(t *testing.T) {
 	}
 
 	// now run a search with a long timeout, but with a long query, and cancel it
-	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	ctx, cancel = context.WithTimeout(context.Background(), 10*time.Second)
 	sq = &slowQuery{
 		actual: query,
 		delay:  100 * time.Millisecond, // on Windows timer resolution is 15ms

From a32362ba2eee879782c6a142b87c1c7fe3081311 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 27 Feb 2018 15:09:22 -0800
Subject: [PATCH 227/728] MB-28403: scorch introduceMerge doesn't prealloc
 segments capacity

There's now multiple competing merge activities (file-merging and
in-memory merging during persistence), so the simple math to
precalculate capacity for the slice of segments in introduceMerge() no
longer works for all cases and might have negative capacity.

This change removes that (sometimes wrong) precalculation, and instead
depends on append() to grow the slice correctly.
---
 index/scorch/introducer.go | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 1a7d656ca..af0181cab 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -195,19 +195,8 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 	// acquire lock
 	s.rootLock.Lock()
 
-	// prepare new index snapshot
-	currSize := len(s.root.segment)
-	newSize := currSize + 1 - len(nextMerge.old)
-
-	// empty segments deletion
-	if nextMerge.new == nil {
-		newSize--
-	}
-
 	newSnapshot := &IndexSnapshot{
 		parent:   s,
-		segment:  make([]*SegmentSnapshot, 0, newSize),
-		offsets:  make([]uint64, 0, newSize),
 		internal: s.root.internal,
 		epoch:    s.nextSnapshotEpoch,
 		refs:     1,

From 4dbb4b14956d221964147971e08f4e5401869f96 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 13 Feb 2018 07:44:14 -0800
Subject: [PATCH 228/728] scorch zap posting reuses freqNorm & loc reader and
 decoder

---
 index/scorch/segment/zap/posting.go | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index d504885d0..b54899bea 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -159,7 +159,9 @@ type PostingsIterator struct {
 	currChunkFreqNorm []byte
 	currChunkLoc      []byte
 	freqNormDecoder   *govarint.Base128Decoder
+	freqNormReader    *bytes.Reader
 	locDecoder        *govarint.Base128Decoder
+	locReader         *bytes.Reader
 
 	freqChunkLens  []uint64
 	freqChunkStart uint64
@@ -183,7 +185,12 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
 	}
 	end := start + i.freqChunkLens[chunk]
 	i.currChunkFreqNorm = i.postings.sb.mem[start:end]
-	i.freqNormDecoder = govarint.NewU64Base128Decoder(bytes.NewReader(i.currChunkFreqNorm))
+	if i.freqNormReader == nil {
+		i.freqNormReader = bytes.NewReader(i.currChunkFreqNorm)
+		i.freqNormDecoder = govarint.NewU64Base128Decoder(i.freqNormReader)
+	} else {
+		i.freqNormReader.Reset(i.currChunkFreqNorm)
+	}
 
 	start = i.locChunkStart
 	for j := 0; j < chunk; j++ {
@@ -191,7 +198,12 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
 	}
 	end = start + i.locChunkLens[chunk]
 	i.currChunkLoc = i.postings.sb.mem[start:end]
-	i.locDecoder = govarint.NewU64Base128Decoder(bytes.NewReader(i.currChunkLoc))
+	if i.locReader == nil {
+		i.locReader = bytes.NewReader(i.currChunkLoc)
+		i.locDecoder = govarint.NewU64Base128Decoder(i.locReader)
+	} else {
+		i.locReader.Reset(i.currChunkLoc)
+	}
 	i.currChunk = uint32(chunk)
 	return nil
 }

From dd7d93ee5ebc99a755298f3eff7539f86f7c3c60 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 13 Feb 2018 09:16:20 -0800
Subject: [PATCH 229/728] scorch zap loadChunk reuses Location slices

---
 index/scorch/segment/zap/posting.go | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index b54899bea..ada39b434 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -171,7 +171,8 @@ type PostingsIterator struct {
 
 	locBitmap *roaring.Bitmap
 
-	next Posting
+	next     Posting    // reused across Next() calls
+	nextLocs []Location // reused across Next() calls
 }
 
 func (i *PostingsIterator) loadChunk(chunk int) error {
@@ -333,7 +334,8 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 		}
 	}
 
-	i.next = Posting{} // clear the struct.
+	reuseLocs := i.next.locs // hold for reuse before struct clearing
+	i.next = Posting{}       // clear the struct
 	rv := &i.next
 	rv.iterator = i
 	rv.docNum = uint64(n)
@@ -346,15 +348,23 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 	}
 	rv.norm = math.Float32frombits(uint32(normBits))
 	if i.locBitmap.Contains(n) {
-		// read off 'freq' locations
-		rv.locs = make([]segment.Location, rv.freq)
-		locs := make([]Location, rv.freq)
+		// read off 'freq' locations, into reused slices
+		if cap(i.nextLocs) >= int(rv.freq) {
+			i.nextLocs = i.nextLocs[0:rv.freq]
+		} else {
+			i.nextLocs = make([]Location, rv.freq)
+		}
+		if cap(reuseLocs) >= int(rv.freq) {
+			rv.locs = reuseLocs[0:rv.freq]
+		} else {
+			rv.locs = make([]segment.Location, rv.freq)
+		}
 		for j := 0; j < int(rv.freq); j++ {
-			err := i.readLocation(&locs[j])
+			err := i.readLocation(&i.nextLocs[j])
 			if err != nil {
 				return nil, err
 			}
-			rv.locs[j] = &locs[j]
+			rv.locs[j] = &i.nextLocs[j]
 		}
 	}
 

From 4b742505aae0905a869ddc116c4b0e7cab3c06e6 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Wed, 28 Feb 2018 15:31:55 +0530
Subject: [PATCH 230/728] adding stats for scorch

---
 index/scorch/introducer.go         |  6 +-
 index/scorch/merge.go              | 32 ++++++++++-
 index/scorch/persister.go          |  5 +-
 index/scorch/scorch.go             | 15 +++--
 index/scorch/snapshot_index.go     |  2 +-
 index/scorch/snapshot_index_tfr.go |  2 +-
 index/scorch/stats.go              | 91 ++++++++++++++++++++++--------
 7 files changed, 120 insertions(+), 33 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index af0181cab..0d270b171 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -159,7 +159,8 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 
 		// increment numItemsIntroduced which tracks the number of items
 		// queued for persistence.
-		atomic.AddUint64(&s.stats.numItemsIntroduced, newSegmentSnapshot.Count())
+		atomic.AddUint64(&s.stats.TotIntroducedItems, newSegmentSnapshot.Count())
+		atomic.AddUint64(&s.stats.TotIntroducedBatchSegments, 1)
 	}
 	// copy old values
 	for key, oldVal := range s.root.internal {
@@ -270,6 +271,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 			cachedDocs: &cachedDocs{cache: nil},
 		})
 		newSnapshot.offsets = append(newSnapshot.offsets, running)
+		atomic.AddUint64(&s.stats.TotIntroducedMergeSegments, 1)
 	}
 
 	newSnapshot.AddRef() // 1 ref for the nextMerge.notify response
@@ -335,6 +337,8 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
 	// release lock
 	s.rootLock.Unlock()
 
+	atomic.AddUint64(&s.stats.TotRollbackOpsDone, 1)
+
 	if rootPrev != nil {
 		_ = rootPrev.DecRef()
 	}
diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index ad756588a..6562353b7 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -129,6 +129,9 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 		return nil
 	}
 
+	mip := uint64(len(resultMergePlan.Tasks))
+	atomic.AddUint64(&s.stats.CurInProgressFileMerges, mip)
+
 	// process tasks in serial for now
 	var notifications []chan *IndexSnapshot
 	for _, task := range resultMergePlan.Tasks {
@@ -165,6 +168,10 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 				s.unmarkIneligibleForRemoval(filename)
 				return fmt.Errorf("merging failed: %v", err)
 			}
+
+			// update the count of file segments merged
+			atomic.AddUint64(&s.stats.TotMergedFileSegments, uint64(len(segmentsToMerge)))
+
 			segment, err = zap.Open(path)
 			if err != nil {
 				s.unmarkIneligibleForRemoval(filename)
@@ -193,16 +200,26 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 		case s.merges <- sm:
 		}
 	}
+
+	atomic.AddUint64(&s.stats.CurInProgressFileMerges, ^uint64(mip-1))
+
+	var newSnapshot *IndexSnapshot
 	for _, notification := range notifications {
 		select {
 		case <-s.closeCh:
 			return nil
-		case newSnapshot := <-notification:
+		case newSnapshot = <-notification:
 			if newSnapshot != nil {
 				_ = newSnapshot.DecRef()
 			}
 		}
 	}
+
+	// merge operation completed and the introduction is complete
+	if newSnapshot != nil {
+		atomic.AddUint64(&s.stats.TotFileMergeOpsDone, 1)
+	}
+
 	return nil
 }
 
@@ -224,6 +241,8 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 
 	cr := zap.NewCountHashWriter(&br)
 
+	atomic.AddUint64(&s.stats.CurInProgressMemoryMerges, 1)
+
 	newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset,
 		docValueOffset, dictLocs, fieldsInv, fieldsMap, err :=
 		zap.MergeToWriter(sbs, sbsDrops, chunkFactor, cr)
@@ -238,6 +257,9 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 		return 0, nil, 0, err
 	}
 
+	// update the count of in-memory merged segments
+	atomic.AddUint64(&s.stats.TotMergedMemorySegments, uint64(len(sbs)))
+
 	newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
 
 	filename := zapFileName(newSegmentID)
@@ -252,6 +274,10 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 		return 0, nil, 0, err
 	}
 
+	// update persisted stats
+	atomic.AddUint64(&s.stats.TotPersistedItems, segment.Count())
+	atomic.AddUint64(&s.stats.TotPersistedSegments, 1)
+
 	sm := &segmentMerge{
 		id:            newSegmentID,
 		old:           make(map[uint64]*SegmentSnapshot),
@@ -273,10 +299,14 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 	case s.merges <- sm:
 	}
 
+	atomic.AddUint64(&s.stats.CurInProgressMemoryMerges, ^uint64(0))
+
 	select { // wait for introduction to complete
 	case <-s.closeCh:
 		return 0, nil, 0, nil // TODO: return ErrInterruptedClosed?
 	case newSnapshot := <-sm.notify:
+		// update counters on success
+		atomic.AddUint64(&s.stats.TotMemoryMergeOpsDone, 1)
 		return numDocs, newSnapshot, newSegmentID, nil
 	}
 }
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index c21bb1439..7186e7c34 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -169,6 +169,8 @@ func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastM
 OUTER:
 	// check for slow merger and await until the merger catch up
 	for lastPersistedEpoch > *lastMergedEpoch+epochDistance {
+		// update the stat on each pause cycle
+		atomic.AddUint64(&s.stats.TotPersisterPause, 1)
 
 		select {
 		case <-s.closeCh:
@@ -412,7 +414,8 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
 				newIndexSnapshot.segment[i] = newSegmentSnapshot
 				delete(newSegments, segmentSnapshot.id)
 				// update items persisted incase of a new segment snapshot
-				atomic.AddUint64(&s.stats.numItemsPersisted, newSegmentSnapshot.Count())
+				atomic.AddUint64(&s.stats.TotPersistedItems, newSegmentSnapshot.Count())
+				atomic.AddUint64(&s.stats.TotPersistedSegments, 1)
 			} else {
 				newIndexSnapshot.segment[i] = s.root.segment[i]
 				newIndexSnapshot.segment[i].segment.AddRef()
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index f539313d1..2b9096655 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -111,6 +111,7 @@ func (s *Scorch) fireAsyncError(err error) {
 	if s.onAsyncError != nil {
 		s.onAsyncError(err)
 	}
+	atomic.AddUint64(&s.stats.TotOnErrors, 1)
 }
 
 func (s *Scorch) Open() error {
@@ -275,7 +276,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
 	}
 	close(resultChan)
 
-	atomic.AddUint64(&s.stats.analysisTime, uint64(time.Since(start)))
+	atomic.AddUint64(&s.stats.TotAnalysisTime, uint64(time.Since(start)))
 
 	// notify handlers that we're about to introduce a segment
 	s.fireEvent(EventKindBatchIntroductionStart, 0)
@@ -286,6 +287,8 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
 		if err != nil {
 			return err
 		}
+	} else {
+		atomic.AddUint64(&s.stats.TotEmptyBatches, 1)
 	}
 
 	err = s.prepareSegment(newSegment, ids, batch.InternalOps)
@@ -293,12 +296,12 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
 		if newSegment != nil {
 			_ = newSegment.Close()
 		}
-		atomic.AddUint64(&s.stats.errors, 1)
+		atomic.AddUint64(&s.stats.TotOnErrors, 1)
 	} else {
-		atomic.AddUint64(&s.stats.updates, numUpdates)
-		atomic.AddUint64(&s.stats.deletes, numDeletes)
-		atomic.AddUint64(&s.stats.batches, 1)
-		atomic.AddUint64(&s.stats.numPlainTextBytesIndexed, numPlainTextBytes)
+		atomic.AddUint64(&s.stats.TotUpdates, numUpdates)
+		atomic.AddUint64(&s.stats.TotDeletes, numDeletes)
+		atomic.AddUint64(&s.stats.TotBatches, 1)
+		atomic.AddUint64(&s.stats.TotIndexedPlainTextBytes, numPlainTextBytes)
 	}
 	return err
 }
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index bb9975768..7236eeb7e 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -372,7 +372,7 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
 		rv.postings[i] = pl
 		rv.iterators[i] = pl.Iterator()
 	}
-	atomic.AddUint64(&i.parent.stats.termSearchersStarted, uint64(1))
+	atomic.AddUint64(&i.parent.stats.TotTermSearchersStarted, uint64(1))
 	return rv, nil
 }
 
diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go
index 87fd0d14f..d1f23b272 100644
--- a/index/scorch/snapshot_index_tfr.go
+++ b/index/scorch/snapshot_index_tfr.go
@@ -126,7 +126,7 @@ func (i *IndexSnapshotTermFieldReader) Count() uint64 {
 
 func (i *IndexSnapshotTermFieldReader) Close() error {
 	if i.snapshot != nil {
-		atomic.AddUint64(&i.snapshot.parent.stats.termSearchersFinished, uint64(1))
+		atomic.AddUint64(&i.snapshot.parent.stats.TotTermSearchersFinished, uint64(1))
 	}
 	return nil
 }
diff --git a/index/scorch/stats.go b/index/scorch/stats.go
index c44a977bf..5edfc42c2 100644
--- a/index/scorch/stats.go
+++ b/index/scorch/stats.go
@@ -20,31 +20,78 @@ import (
 	"sync/atomic"
 )
 
-// Stats tracks statistics about the index
+// Stats tracks statistics about the index, fields that are
+// prefixed like CurXxxx are gauges (can go up and down),
+// and fields that are prefixed like TotXxxx are monotonically
+// increasing counters.
 type Stats struct {
-	updates, deletes, batches, errors uint64
-	analysisTime, indexTime           uint64
-	termSearchersStarted              uint64
-	termSearchersFinished             uint64
-	numPlainTextBytesIndexed          uint64
-	numItemsIntroduced                uint64
-	numItemsPersisted                 uint64
-	i                                 *Scorch
+	TotUpdates               uint64
+	TotDeletes               uint64
+	TotBatches               uint64
+	TotEmptyBatches          uint64
+	TotOnErrors              uint64
+	TotAnalysisTime          uint64
+	TotIndexTime             uint64
+	TotIndexedPlainTextBytes uint64
+
+	TotIndexSnapshotBeg uint64
+	TotIndexSnapshotEnd uint64
+
+	TotIntroducedBatchSegments uint64
+	TotIntroducedMergeSegments uint64
+	TotIntroducedItems         uint64
+
+	TotTermSearchersStarted  uint64
+	TotTermSearchersFinished uint64
+
+	TotPersistedItems    uint64
+	TotPersistedSegments uint64
+	TotPersisterPause    uint64
+
+	TotMemoryMergeOpsDone   uint64
+	TotMergedMemorySegments uint64
+	TotMergedFileSegments   uint64
+	TotFileMergeOpsDone     uint64
+
+	TotRollbackOpsDone uint64
+
+	CurInProgressMemoryMerges uint64
+	CurInProgressFileMerges   uint64
+
+	CurMemoryBytes uint64
+
+	i *Scorch
 }
 
 func (s *Stats) statsMap() (map[string]interface{}, error) {
 	m := map[string]interface{}{}
-	m["updates"] = atomic.LoadUint64(&s.updates)
-	m["deletes"] = atomic.LoadUint64(&s.deletes)
-	m["batches"] = atomic.LoadUint64(&s.batches)
-	m["errors"] = atomic.LoadUint64(&s.errors)
-	m["analysis_time"] = atomic.LoadUint64(&s.analysisTime)
-	m["index_time"] = atomic.LoadUint64(&s.indexTime)
-	m["term_searchers_started"] = atomic.LoadUint64(&s.termSearchersStarted)
-	m["term_searchers_finished"] = atomic.LoadUint64(&s.termSearchersFinished)
-	m["num_plain_text_bytes_indexed"] = atomic.LoadUint64(&s.numPlainTextBytesIndexed)
-	m["num_items_introduced"] = atomic.LoadUint64(&s.numItemsIntroduced)
-	m["num_items_persisted"] = atomic.LoadUint64(&s.numItemsPersisted)
+	m["TotUpdates"] = atomic.LoadUint64(&s.TotUpdates)
+	m["TotDeletes"] = atomic.LoadUint64(&s.TotDeletes)
+	m["TotBatches"] = atomic.LoadUint64(&s.TotBatches)
+	m["TotEmptyBatches"] = atomic.LoadUint64(&s.TotEmptyBatches)
+	m["TotOnErrors"] = atomic.LoadUint64(&s.TotOnErrors)
+	m["TotAnalysisTime"] = atomic.LoadUint64(&s.TotAnalysisTime)
+	m["TotIndexSnapshotBeg"] = atomic.LoadUint64(&s.TotIndexSnapshotBeg)
+	m["TotIndexSnapshotEnd"] = atomic.LoadUint64(&s.TotIndexSnapshotEnd)
+
+	m["TotTermSearchersStarted"] = atomic.LoadUint64(&s.TotTermSearchersStarted)
+	m["TotTermSearchersFinished"] = atomic.LoadUint64(&s.TotTermSearchersFinished)
+	m["TotIndexedPlainTextBytes"] = atomic.LoadUint64(&s.TotIndexedPlainTextBytes)
+	m["TotIntroducedItems"] = atomic.LoadUint64(&s.TotIntroducedItems)
+	m["TotPersistedItems"] = atomic.LoadUint64(&s.TotPersistedItems)
+
+	m["TotMemoryMergeOpsDone"] = atomic.LoadUint64(&s.TotMemoryMergeOpsDone)
+	m["TotFileMergeOpsDone"] = atomic.LoadUint64(&s.TotFileMergeOpsDone)
+	m["TotIntroducedBatchSegments"] = atomic.LoadUint64(&s.TotIntroducedBatchSegments)
+	m["TotIntroducedMergeSegments"] = atomic.LoadUint64(&s.TotIntroducedMergeSegments)
+	m["TotPersistedSegments"] = atomic.LoadUint64(&s.TotPersistedSegments)
+	m["TotRollbackOpsDone"] = atomic.LoadUint64(&s.TotRollbackOpsDone)
+	m["CurInProgressFileMerges"] = atomic.LoadUint64(&s.CurInProgressFileMerges)
+	m["CurInProgressMemoryMerges"] = atomic.LoadUint64(&s.CurInProgressMemoryMerges)
+	m["TotPersisterPause"] = atomic.LoadUint64(&s.TotPersisterPause)
+	m["TotMergedMemorySegments"] = atomic.LoadUint64(&s.TotMergedMemorySegments)
+	m["TotMergedFileSegments"] = atomic.LoadUint64(&s.TotMergedFileSegments)
+	m["CurMemoryBytes"] = s.i.MemoryUsed()
 
 	if s.i.path != "" {
 		finfos, err := ioutil.ReadDir(s.i.path)
@@ -61,8 +108,8 @@ func (s *Stats) statsMap() (map[string]interface{}, error) {
 			}
 		}
 
-		m["num_bytes_used_disk"] = numBytesUsedDisk
-		m["num_files_on_disk"] = numFilesOnDisk
+		m["TotOnDiskBytes"] = numBytesUsedDisk
+		m["TotOnDiskFiles"] = numFilesOnDisk
 	}
 
 	return m, nil

From 7d46d2c7ae17f121132cd956d3c4fb576d403b70 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 28 Feb 2018 10:09:12 -0800
Subject: [PATCH 231/728] scorch zap intcoder encoder is never nil

---
 index/scorch/segment/zap/intcoder.go | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/index/scorch/segment/zap/intcoder.go b/index/scorch/segment/zap/intcoder.go
index b505fec94..247e36fbc 100644
--- a/index/scorch/segment/zap/intcoder.go
+++ b/index/scorch/segment/zap/intcoder.go
@@ -67,11 +67,8 @@ func (c *chunkedIntCoder) Add(docNum uint64, vals ...uint64) error {
 	chunk := docNum / c.chunkSize
 	if chunk != c.currChunk {
 		// starting a new chunk
-		if c.encoder != nil {
-			// close out last
-			c.Close()
-			c.chunkBuf.Reset()
-		}
+		c.Close()
+		c.chunkBuf.Reset()
 		c.currChunk = chunk
 	}
 
@@ -92,6 +89,7 @@ func (c *chunkedIntCoder) Close() {
 	encodingBytes := c.chunkBuf.Bytes()
 	c.chunkLens[c.currChunk] = uint64(len(encodingBytes))
 	c.final = append(c.final, encodingBytes...)
+	c.currChunk = uint64(cap(c.chunkLens)) // sentinel to detect double close
 }
 
 // Write commits all the encoded chunked integers to the provided writer.

From 1b661ef844a5e6d369c2af4d27ab537ca603e2de Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 28 Feb 2018 11:36:35 -0800
Subject: [PATCH 232/728] stats cleanup, renaming, gauges replaced with
 counters

---
 index/scorch/introducer.go |  17 ++++-
 index/scorch/merge.go      |  62 ++++++++++------
 index/scorch/persister.go  |  30 +++++---
 index/scorch/scorch.go     |  43 +++++++++--
 index/scorch/stats.go      | 147 ++++++++++++++++++-------------------
 5 files changed, 180 insertions(+), 119 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 0d270b171..b6007b4ac 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -48,6 +48,8 @@ func (s *Scorch) mainLoop() {
 	var epochWatchers []*epochWatcher
 OUTER:
 	for {
+		atomic.AddUint64(&s.stats.TotIntroduceLoop, 1)
+
 		select {
 		case <-s.closeCh:
 			break OUTER
@@ -92,6 +94,9 @@ OUTER:
 }
 
 func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
+	atomic.AddUint64(&s.stats.TotIntroduceSegmentBeg, 1)
+	defer atomic.AddUint64(&s.stats.TotIntroduceSegmentEnd, 1)
+
 	// acquire lock
 	s.rootLock.Lock()
 
@@ -160,7 +165,7 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 		// increment numItemsIntroduced which tracks the number of items
 		// queued for persistence.
 		atomic.AddUint64(&s.stats.TotIntroducedItems, newSegmentSnapshot.Count())
-		atomic.AddUint64(&s.stats.TotIntroducedBatchSegments, 1)
+		atomic.AddUint64(&s.stats.TotIntroducedSegmentsBatch, 1)
 	}
 	// copy old values
 	for key, oldVal := range s.root.internal {
@@ -193,6 +198,9 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 }
 
 func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
+	atomic.AddUint64(&s.stats.TotIntroduceMergeBeg, 1)
+	defer atomic.AddUint64(&s.stats.TotIntroduceMergeEnd, 1)
+
 	// acquire lock
 	s.rootLock.Lock()
 
@@ -271,7 +279,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 			cachedDocs: &cachedDocs{cache: nil},
 		})
 		newSnapshot.offsets = append(newSnapshot.offsets, running)
-		atomic.AddUint64(&s.stats.TotIntroducedMergeSegments, 1)
+		atomic.AddUint64(&s.stats.TotIntroducedSegmentsMerge, 1)
 	}
 
 	newSnapshot.AddRef() // 1 ref for the nextMerge.notify response
@@ -292,6 +300,9 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 }
 
 func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
+	atomic.AddUint64(&s.stats.TotIntroduceRevertBeg, 1)
+	defer atomic.AddUint64(&s.stats.TotIntroduceRevertEnd, 1)
+
 	if revertTo.snapshot == nil {
 		err := fmt.Errorf("Cannot revert to a nil snapshot")
 		revertTo.applied <- err
@@ -337,8 +348,6 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
 	// release lock
 	s.rootLock.Unlock()
 
-	atomic.AddUint64(&s.stats.TotRollbackOpsDone, 1)
-
 	if rootPrev != nil {
 		_ = rootPrev.DecRef()
 	}
diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 6562353b7..005d4f41e 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -40,6 +40,8 @@ func (s *Scorch) mergerLoop() {
 
 OUTER:
 	for {
+		atomic.AddUint64(&s.stats.TotFileMergeLoopBeg, 1)
+
 		select {
 		case <-s.closeCh:
 			break OUTER
@@ -59,6 +61,7 @@ OUTER:
 				if err != nil {
 					s.fireAsyncError(fmt.Errorf("merging err: %v", err))
 					_ = ourSnapshot.DecRef()
+					atomic.AddUint64(&s.stats.TotFileMergeLoopErr, 1)
 					continue OUTER
 				}
 				lastEpochMergePlanned = ourSnapshot.epoch
@@ -88,7 +91,10 @@ OUTER:
 			case <-ew.notifyCh:
 			}
 		}
+
+		atomic.AddUint64(&s.stats.TotFileMergeLoopEnd, 1)
 	}
+
 	s.asyncTasks.Done()
 }
 
@@ -119,35 +125,45 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 		}
 	}
 
+	atomic.AddUint64(&s.stats.TotFileMergePlan, 1)
+
 	// give this list to the planner
 	resultMergePlan, err := mergeplan.Plan(onlyZapSnapshots, options)
 	if err != nil {
+		atomic.AddUint64(&s.stats.TotFileMergePlanErr, 1)
 		return fmt.Errorf("merge planning err: %v", err)
 	}
 	if resultMergePlan == nil {
 		// nothing to do
+		atomic.AddUint64(&s.stats.TotFileMergePlanNone, 1)
 		return nil
 	}
 
-	mip := uint64(len(resultMergePlan.Tasks))
-	atomic.AddUint64(&s.stats.CurInProgressFileMerges, mip)
+	atomic.AddUint64(&s.stats.TotFileMergePlanOk, 1)
+
+	atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(resultMergePlan.Tasks)))
 
 	// process tasks in serial for now
 	var notifications []chan *IndexSnapshot
 	for _, task := range resultMergePlan.Tasks {
 		if len(task.Segments) == 0 {
+			atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1)
 			continue
 		}
 
+		atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegments, uint64(len(task.Segments)))
+
 		oldMap := make(map[uint64]*SegmentSnapshot)
 		newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
 		segmentsToMerge := make([]*zap.Segment, 0, len(task.Segments))
 		docsToDrop := make([]*roaring.Bitmap, 0, len(task.Segments))
+
 		for _, planSegment := range task.Segments {
 			if segSnapshot, ok := planSegment.(*SegmentSnapshot); ok {
 				oldMap[segSnapshot.id] = segSnapshot
 				if zapSeg, ok := segSnapshot.segment.(*zap.Segment); ok {
 					if segSnapshot.LiveSize() == 0 {
+						atomic.AddUint64(&s.stats.TotFileMergeSegmentsEmpty, 1)
 						oldMap[segSnapshot.id] = nil
 					} else {
 						segmentsToMerge = append(segmentsToMerge, zapSeg)
@@ -163,24 +179,27 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 			filename := zapFileName(newSegmentID)
 			s.markIneligibleForRemoval(filename)
 			path := s.path + string(os.PathSeparator) + filename
+			atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1)
 			newDocNums, err := zap.Merge(segmentsToMerge, docsToDrop, path, 1024)
+			atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
 			if err != nil {
 				s.unmarkIneligibleForRemoval(filename)
+				atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
 				return fmt.Errorf("merging failed: %v", err)
 			}
 
-			// update the count of file segments merged
-			atomic.AddUint64(&s.stats.TotMergedFileSegments, uint64(len(segmentsToMerge)))
-
 			segment, err = zap.Open(path)
 			if err != nil {
 				s.unmarkIneligibleForRemoval(filename)
+				atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
 				return err
 			}
 			oldNewDocNums = make(map[uint64][]uint64)
 			for i, segNewDocNums := range newDocNums {
 				oldNewDocNums[task.Segments[i].Id()] = segNewDocNums
 			}
+
+			atomic.AddUint64(&s.stats.TotFileMergeSegments, uint64(len(segmentsToMerge)))
 		}
 
 		sm := &segmentMerge{
@@ -198,28 +217,24 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 			_ = segment.Close()
 			return nil
 		case s.merges <- sm:
+			atomic.AddUint64(&s.stats.TotFileMergeIntroductions, 1)
 		}
-	}
 
-	atomic.AddUint64(&s.stats.CurInProgressFileMerges, ^uint64(mip-1))
+		atomic.AddUint64(&s.stats.TotFileMergePlanTasksDone, 1)
+	}
 
-	var newSnapshot *IndexSnapshot
 	for _, notification := range notifications {
 		select {
 		case <-s.closeCh:
 			return nil
-		case newSnapshot = <-notification:
+		case newSnapshot := <-notification:
+			atomic.AddUint64(&s.stats.TotFileMergeIntroductionsDone, 1)
 			if newSnapshot != nil {
 				_ = newSnapshot.DecRef()
 			}
 		}
 	}
 
-	// merge operation completed and the introduction is complete
-	if newSnapshot != nil {
-		atomic.AddUint64(&s.stats.TotFileMergeOpsDone, 1)
-	}
-
 	return nil
 }
 
@@ -237,16 +252,19 @@ type segmentMerge struct {
 func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 	sbs []*zap.SegmentBase, sbsDrops []*roaring.Bitmap, sbsIndexes []int,
 	chunkFactor uint32) (uint64, *IndexSnapshot, uint64, error) {
+	atomic.AddUint64(&s.stats.TotMemMergeBeg, 1)
+
 	var br bytes.Buffer
 
 	cr := zap.NewCountHashWriter(&br)
 
-	atomic.AddUint64(&s.stats.CurInProgressMemoryMerges, 1)
-
+	atomic.AddUint64(&s.stats.TotMemMergeZapBeg, 1)
 	newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset,
 		docValueOffset, dictLocs, fieldsInv, fieldsMap, err :=
 		zap.MergeToWriter(sbs, sbsDrops, chunkFactor, cr)
+	atomic.AddUint64(&s.stats.TotMemMergeZapEnd, 1)
 	if err != nil {
+		atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
 		return 0, nil, 0, err
 	}
 
@@ -254,23 +272,23 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 		fieldsMap, fieldsInv, numDocs, storedIndexOffset, fieldsIndexOffset,
 		docValueOffset, dictLocs)
 	if err != nil {
+		atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
 		return 0, nil, 0, err
 	}
 
-	// update the count of in-memory merged segments
-	atomic.AddUint64(&s.stats.TotMergedMemorySegments, uint64(len(sbs)))
-
 	newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
 
 	filename := zapFileName(newSegmentID)
 	path := s.path + string(os.PathSeparator) + filename
 	err = zap.PersistSegmentBase(sb, path)
 	if err != nil {
+		atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
 		return 0, nil, 0, err
 	}
 
 	segment, err := zap.Open(path)
 	if err != nil {
+		atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
 		return 0, nil, 0, err
 	}
 
@@ -299,14 +317,12 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 	case s.merges <- sm:
 	}
 
-	atomic.AddUint64(&s.stats.CurInProgressMemoryMerges, ^uint64(0))
-
 	select { // wait for introduction to complete
 	case <-s.closeCh:
 		return 0, nil, 0, nil // TODO: return ErrInterruptedClosed?
 	case newSnapshot := <-sm.notify:
-		// update counters on success
-		atomic.AddUint64(&s.stats.TotMemoryMergeOpsDone, 1)
+		atomic.AddUint64(&s.stats.TotMemMergeSegments, uint64(len(sbs)))
+		atomic.AddUint64(&s.stats.TotMemMergeDone, 1)
 		return numDocs, newSnapshot, newSegmentID, nil
 	}
 }
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 7186e7c34..6ffbd44d4 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -55,6 +55,8 @@ func (s *Scorch) persisterLoop() {
 	var ew *epochWatcher
 OUTER:
 	for {
+		atomic.AddUint64(&s.stats.TotPersistLoopBeg, 1)
+
 		select {
 		case <-s.closeCh:
 			break OUTER
@@ -65,8 +67,8 @@ OUTER:
 		if ew != nil && ew.epoch > lastMergedEpoch {
 			lastMergedEpoch = ew.epoch
 		}
-		persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch,
-			&lastMergedEpoch, persistWatchers)
+		lastMergedEpoch, persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch,
+			lastMergedEpoch, persistWatchers)
 
 		var ourSnapshot *IndexSnapshot
 		var ourPersisted []chan error
@@ -94,6 +96,7 @@ OUTER:
 			if err != nil {
 				s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err))
 				_ = ourSnapshot.DecRef()
+				atomic.AddUint64(&s.stats.TotPersistLoopErr, 1)
 				continue OUTER
 			}
 
@@ -115,6 +118,7 @@ OUTER:
 			s.fireEvent(EventKindPersisterProgress, time.Since(startTime))
 
 			if changed {
+				atomic.AddUint64(&s.stats.TotPersistLoopProgress, 1)
 				continue OUTER
 			}
 		}
@@ -133,17 +137,21 @@ OUTER:
 
 		s.removeOldData() // might as well cleanup while waiting
 
+		atomic.AddUint64(&s.stats.TotPersistLoopWait, 1)
+
 		select {
 		case <-s.closeCh:
 			break OUTER
 		case <-w.notifyCh:
 			// woken up, next loop should pick up work
-			continue OUTER
+			atomic.AddUint64(&s.stats.TotPersistLoopWaitNotified, 1)
 		case ew = <-s.persisterNotifier:
 			// if the watchers are already caught up then let them wait,
 			// else let them continue to do the catch up
 			persistWatchers = append(persistWatchers, ew)
 		}
+
+		atomic.AddUint64(&s.stats.TotPersistLoopEnd, 1)
 	}
 }
 
@@ -160,31 +168,32 @@ func notifyMergeWatchers(lastPersistedEpoch uint64,
 	return watchersNext
 }
 
-func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastMergedEpoch *uint64,
-	persistWatchers []*epochWatcher) []*epochWatcher {
+func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastMergedEpoch uint64,
+	persistWatchers []*epochWatcher) (uint64, []*epochWatcher) {
 
 	// first, let the watchers proceed if they lag behind
 	persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
 
 OUTER:
 	// check for slow merger and await until the merger catch up
-	for lastPersistedEpoch > *lastMergedEpoch+epochDistance {
-		// update the stat on each pause cycle
-		atomic.AddUint64(&s.stats.TotPersisterPause, 1)
+	for lastPersistedEpoch > lastMergedEpoch+epochDistance {
+		atomic.AddUint64(&s.stats.TotPersisterSlowMergerPause, 1)
 
 		select {
 		case <-s.closeCh:
 			break OUTER
 		case ew := <-s.persisterNotifier:
 			persistWatchers = append(persistWatchers, ew)
-			*lastMergedEpoch = ew.epoch
+			lastMergedEpoch = ew.epoch
 		}
 
+		atomic.AddUint64(&s.stats.TotPersisterSlowMergerResume, 1)
+
 		// let the watchers proceed if they lag behind
 		persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
 	}
 
-	return persistWatchers
+	return lastMergedEpoch, persistWatchers
 }
 
 func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
@@ -413,6 +422,7 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
 				}
 				newIndexSnapshot.segment[i] = newSegmentSnapshot
 				delete(newSegments, segmentSnapshot.id)
+
 				// update items persisted incase of a new segment snapshot
 				atomic.AddUint64(&s.stats.TotPersistedItems, newSegmentSnapshot.Count())
 				atomic.AddUint64(&s.stats.TotPersistedSegments, 1)
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 2b9096655..d3b9b36a1 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -17,6 +17,7 @@ package scorch
 import (
 	"encoding/json"
 	"fmt"
+	"io/ioutil"
 	"os"
 	"sync"
 	"sync/atomic"
@@ -43,7 +44,7 @@ type Scorch struct {
 	version       uint8
 	config        map[string]interface{}
 	analysisQueue *index.AnalysisQueue
-	stats         *Stats
+	stats         Stats
 	nextSegmentID uint64
 	path          string
 
@@ -80,7 +81,6 @@ func NewScorch(storeName string,
 		closeCh:              make(chan struct{}),
 		ineligibleForRemoval: map[string]bool{},
 	}
-	rv.stats = &Stats{i: rv}
 	rv.root = &IndexSnapshot{parent: rv, refs: 1}
 	ro, ok := config["read_only"].(bool)
 	if ok {
@@ -288,7 +288,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
 			return err
 		}
 	} else {
-		atomic.AddUint64(&s.stats.TotEmptyBatches, 1)
+		atomic.AddUint64(&s.stats.TotBatchesEmpty, 1)
 	}
 
 	err = s.prepareSegment(newSegment, ids, batch.InternalOps)
@@ -377,10 +377,43 @@ func (s *Scorch) Reader() (index.IndexReader, error) {
 }
 
 func (s *Scorch) Stats() json.Marshaler {
-	return s.stats
+	return &s.stats
 }
 func (s *Scorch) StatsMap() map[string]interface{} {
-	m, _ := s.stats.statsMap()
+	m := s.stats.ToMap()
+
+	if s.path != "" {
+		finfos, err := ioutil.ReadDir(s.path)
+		if err == nil {
+			var numFilesOnDisk, numBytesUsedDisk uint64
+			for _, finfo := range finfos {
+				if !finfo.IsDir() {
+					numBytesUsedDisk += uint64(finfo.Size())
+					numFilesOnDisk++
+				}
+			}
+
+			m["TotOnDiskBytes"] = numBytesUsedDisk
+			m["TotOnDiskFiles"] = numFilesOnDisk
+		}
+	}
+
+	// TODO: consider one day removing these backwards compatible
+	// names for apps using the old names
+	m["updates"] = m["TotUpdates"]
+	m["deletes"] = m["TotDeletes"]
+	m["batches"] = m["TotBatches"]
+	m["errors"] = m["TotOnErrors"]
+	m["analysis_time"] = m["TotAnalysisTime"]
+	m["index_time"] = m["TotIndexTime"]
+	m["term_searchers_started"] = m["TotTermSearchersStarted"]
+	m["term_searchers_finished"] = m["TotTermSearchersFinished"]
+	m["num_plain_text_bytes_indexed"] = m["TotIndexedPlainTextBytes"]
+	m["num_items_introduced"] = m["TotIntroducedItems"]
+	m["num_items_persisted"] = m["TotPersistedItems"]
+	m["num_bytes_used_disk"] = m["TotOnDiskBytes"]
+	m["num_files_on_disk"] = m["TotOnDiskFiles"]
+
 	return m
 }
 
diff --git a/index/scorch/stats.go b/index/scorch/stats.go
index 5edfc42c2..d239e62ec 100644
--- a/index/scorch/stats.go
+++ b/index/scorch/stats.go
@@ -16,7 +16,7 @@ package scorch
 
 import (
 	"encoding/json"
-	"io/ioutil"
+	"reflect"
 	"sync/atomic"
 )
 
@@ -25,101 +25,94 @@ import (
 // and fields that are prefixed like TotXxxx are monotonically
 // increasing counters.
 type Stats struct {
-	TotUpdates               uint64
-	TotDeletes               uint64
-	TotBatches               uint64
-	TotEmptyBatches          uint64
-	TotOnErrors              uint64
-	TotAnalysisTime          uint64
-	TotIndexTime             uint64
-	TotIndexedPlainTextBytes uint64
+	TotUpdates      uint64
+	TotDeletes      uint64
+	TotBatches      uint64
+	TotBatchesEmpty uint64
+	TotOnErrors     uint64
 
-	TotIndexSnapshotBeg uint64
-	TotIndexSnapshotEnd uint64
+	TotAnalysisTime uint64
+	TotIndexTime    uint64
 
-	TotIntroducedBatchSegments uint64
-	TotIntroducedMergeSegments uint64
-	TotIntroducedItems         uint64
+	TotIndexedPlainTextBytes uint64
 
 	TotTermSearchersStarted  uint64
 	TotTermSearchersFinished uint64
 
+	TotIntroduceLoop       uint64
+	TotIntroduceSegmentBeg uint64
+	TotIntroduceSegmentEnd uint64
+	TotIntroduceMergeBeg   uint64
+	TotIntroduceMergeEnd   uint64
+	TotIntroduceRevertBeg  uint64
+	TotIntroduceRevertEnd  uint64
+
+	TotIntroducedItems         uint64
+	TotIntroducedSegmentsBatch uint64
+	TotIntroducedSegmentsMerge uint64
+
+	TotPersistLoopBeg          uint64
+	TotPersistLoopErr          uint64
+	TotPersistLoopProgress     uint64
+	TotPersistLoopWait         uint64
+	TotPersistLoopWaitNotified uint64
+	TotPersistLoopEnd          uint64
+
 	TotPersistedItems    uint64
 	TotPersistedSegments uint64
-	TotPersisterPause    uint64
 
-	TotMemoryMergeOpsDone   uint64
-	TotMergedMemorySegments uint64
-	TotMergedFileSegments   uint64
-	TotFileMergeOpsDone     uint64
+	TotPersisterSlowMergerPause  uint64
+	TotPersisterSlowMergerResume uint64
 
-	TotRollbackOpsDone uint64
+	TotFileMergeLoopBeg uint64
+	TotFileMergeLoopErr uint64
+	TotFileMergeLoopEnd uint64
 
-	CurInProgressMemoryMerges uint64
-	CurInProgressFileMerges   uint64
+	TotFileMergePlan     uint64
+	TotFileMergePlanErr  uint64
+	TotFileMergePlanNone uint64
+	TotFileMergePlanOk   uint64
 
-	CurMemoryBytes uint64
+	TotFileMergePlanTasks              uint64
+	TotFileMergePlanTasksDone          uint64
+	TotFileMergePlanTasksErr           uint64
+	TotFileMergePlanTasksSegments      uint64
+	TotFileMergePlanTasksSegmentsEmpty uint64
 
-	i *Scorch
-}
+	TotFileMergeSegmentsEmpty uint64
+	TotFileMergeSegments      uint64
 
-func (s *Stats) statsMap() (map[string]interface{}, error) {
-	m := map[string]interface{}{}
-	m["TotUpdates"] = atomic.LoadUint64(&s.TotUpdates)
-	m["TotDeletes"] = atomic.LoadUint64(&s.TotDeletes)
-	m["TotBatches"] = atomic.LoadUint64(&s.TotBatches)
-	m["TotEmptyBatches"] = atomic.LoadUint64(&s.TotEmptyBatches)
-	m["TotOnErrors"] = atomic.LoadUint64(&s.TotOnErrors)
-	m["TotAnalysisTime"] = atomic.LoadUint64(&s.TotAnalysisTime)
-	m["TotIndexSnapshotBeg"] = atomic.LoadUint64(&s.TotIndexSnapshotBeg)
-	m["TotIndexSnapshotEnd"] = atomic.LoadUint64(&s.TotIndexSnapshotEnd)
-
-	m["TotTermSearchersStarted"] = atomic.LoadUint64(&s.TotTermSearchersStarted)
-	m["TotTermSearchersFinished"] = atomic.LoadUint64(&s.TotTermSearchersFinished)
-	m["TotIndexedPlainTextBytes"] = atomic.LoadUint64(&s.TotIndexedPlainTextBytes)
-	m["TotIntroducedItems"] = atomic.LoadUint64(&s.TotIntroducedItems)
-	m["TotPersistedItems"] = atomic.LoadUint64(&s.TotPersistedItems)
-
-	m["TotMemoryMergeOpsDone"] = atomic.LoadUint64(&s.TotMemoryMergeOpsDone)
-	m["TotFileMergeOpsDone"] = atomic.LoadUint64(&s.TotFileMergeOpsDone)
-	m["TotIntroducedBatchSegments"] = atomic.LoadUint64(&s.TotIntroducedBatchSegments)
-	m["TotIntroducedMergeSegments"] = atomic.LoadUint64(&s.TotIntroducedMergeSegments)
-	m["TotPersistedSegments"] = atomic.LoadUint64(&s.TotPersistedSegments)
-	m["TotRollbackOpsDone"] = atomic.LoadUint64(&s.TotRollbackOpsDone)
-	m["CurInProgressFileMerges"] = atomic.LoadUint64(&s.CurInProgressFileMerges)
-	m["CurInProgressMemoryMerges"] = atomic.LoadUint64(&s.CurInProgressMemoryMerges)
-	m["TotPersisterPause"] = atomic.LoadUint64(&s.TotPersisterPause)
-	m["TotMergedMemorySegments"] = atomic.LoadUint64(&s.TotMergedMemorySegments)
-	m["TotMergedFileSegments"] = atomic.LoadUint64(&s.TotMergedFileSegments)
-	m["CurMemoryBytes"] = s.i.MemoryUsed()
-
-	if s.i.path != "" {
-		finfos, err := ioutil.ReadDir(s.i.path)
-		if err != nil {
-			return nil, err
-		}
+	TotFileMergeZapBeg uint64
+	TotFileMergeZapEnd uint64
 
-		var numFilesOnDisk, numBytesUsedDisk uint64
+	TotFileMergeIntroductions     uint64
+	TotFileMergeIntroductionsDone uint64
 
-		for _, finfo := range finfos {
-			if !finfo.IsDir() {
-				numBytesUsedDisk += uint64(finfo.Size())
-				numFilesOnDisk++
-			}
-		}
+	TotMemMergeBeg      uint64
+	TotMemMergeErr      uint64
+	TotMemMergeDone     uint64
+	TotMemMergeZapBeg   uint64
+	TotMemMergeZapEnd   uint64
+	TotMemMergeSegments uint64
+}
 
-		m["TotOnDiskBytes"] = numBytesUsedDisk
-		m["TotOnDiskFiles"] = numFilesOnDisk
+// atomically populates the returned map
+func (s *Stats) ToMap() map[string]interface{} {
+	m := map[string]interface{}{}
+	sve := reflect.ValueOf(s).Elem()
+	svet := sve.Type()
+	for i := 0; i < svet.NumField(); i++ {
+		svef := sve.Field(i)
+		if svef.CanAddr() {
+			svefp := svef.Addr().Interface()
+			m[svet.Field(i).Name] = atomic.LoadUint64(svefp.(*uint64))
+		}
 	}
-
-	return m, nil
+	return m
 }
 
-// MarshalJSON implements json.Marshaler
+// MarshalJSON implements json.Marshaler, and in contrast to standard
+// json marshaling provides atomic safety
 func (s *Stats) MarshalJSON() ([]byte, error) {
-	m, err := s.statsMap()
-	if err != nil {
-		return nil, err
-	}
-	return json.Marshal(m)
+	return json.Marshal(s.ToMap())
 }

From 0363b24dd4e16d2546f55a94d076174232983dec Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Thu, 1 Mar 2018 07:31:53 -0800
Subject: [PATCH 233/728] update to use new vellum Reset API

---
 index/scorch/segment/zap/build.go | 20 +++++++++++---------
 index/scorch/segment/zap/merge.go | 18 +++++++++++-------
 2 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 72357ae7d..77f18b05c 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -471,16 +471,11 @@ func persistDictionary(memSegment *mem.Segment, w *CountHashWriter, postingsLocs
 	varintBuf := make([]byte, binary.MaxVarintLen64)
 
 	var buffer bytes.Buffer
+	builder, err := vellum.New(&buffer, nil)
+	if err != nil {
+		return nil, err
+	}
 	for fieldID, fieldTerms := range memSegment.DictKeys {
-		if fieldID != 0 {
-			buffer.Reset()
-		}
-
-		// start a new vellum for this field
-		builder, err := vellum.New(&buffer, nil)
-		if err != nil {
-			return nil, err
-		}
 
 		dict := memSegment.Dicts[fieldID]
 		// now walk the dictionary in order of fieldTerms (already sorted)
@@ -514,6 +509,13 @@ func persistDictionary(memSegment *mem.Segment, w *CountHashWriter, postingsLocs
 		if err != nil {
 			return nil, err
 		}
+
+		// reset buffer and vellum builder
+		buffer.Reset()
+		err = builder.Reset(&buffer)
+		if err != nil {
+			return nil, err
+		}
 	}
 
 	return rv, nil
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index ae8c5b197..33ce16c59 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -180,16 +180,13 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 	var docTermMap [][]byte
 
 	var vellumBuf bytes.Buffer
+	newVellum, err := vellum.New(&vellumBuf, nil)
+	if err != nil {
+		return nil, 0, err
+	}
 
 	// for each field
 	for fieldID, fieldName := range fieldsInv {
-		if fieldID != 0 {
-			vellumBuf.Reset()
-		}
-		newVellum, err := vellum.New(&vellumBuf, nil)
-		if err != nil {
-			return nil, 0, err
-		}
 
 		// collect FST iterators from all active segments for this field
 		var newDocNums [][]uint64
@@ -423,6 +420,13 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 		if err != nil {
 			return nil, 0, err
 		}
+
+		// reset vellum buffer and vellum builder
+		vellumBuf.Reset()
+		err = newVellum.Reset(&vellumBuf)
+		if err != nil {
+			return nil, 0, err
+		}
 	}
 
 	fieldDvLocsOffset := uint64(w.Count())

From 7e5bb0bd8d5ca2ab81fb52f2e65baf265d499698 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 1 Mar 2018 14:13:36 -0800
Subject: [PATCH 234/728] renamed to CurOnDiskBytes/Files as those are gauges

---
 index/scorch/scorch.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index d3b9b36a1..226c4647e 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -393,8 +393,8 @@ func (s *Scorch) StatsMap() map[string]interface{} {
 				}
 			}
 
-			m["TotOnDiskBytes"] = numBytesUsedDisk
-			m["TotOnDiskFiles"] = numFilesOnDisk
+			m["CurOnDiskBytes"] = numBytesUsedDisk
+			m["CurOnDiskFiles"] = numFilesOnDisk
 		}
 	}
 
@@ -411,8 +411,8 @@ func (s *Scorch) StatsMap() map[string]interface{} {
 	m["num_plain_text_bytes_indexed"] = m["TotIndexedPlainTextBytes"]
 	m["num_items_introduced"] = m["TotIntroducedItems"]
 	m["num_items_persisted"] = m["TotPersistedItems"]
-	m["num_bytes_used_disk"] = m["TotOnDiskBytes"]
-	m["num_files_on_disk"] = m["TotOnDiskFiles"]
+	m["num_bytes_used_disk"] = m["CurOnDiskBytes"]
+	m["num_files_on_disk"] = m["CurOnDiskFiles"]
 
 	return m
 }

From 868a66279e41cd55284257df747d1cc3386a3b6f Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 2 Mar 2018 11:07:37 -0800
Subject: [PATCH 235/728] scorch indexing time stat

Looks like this was forgotten along the way -- the stat for analysis
time was tracked correctly, but indexing time wasn't.
---
 index/scorch/scorch.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 226c4647e..86039a7e4 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -278,6 +278,8 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
 
 	atomic.AddUint64(&s.stats.TotAnalysisTime, uint64(time.Since(start)))
 
+	indexStart := time.Now()
+
 	// notify handlers that we're about to introduce a segment
 	s.fireEvent(EventKindBatchIntroductionStart, 0)
 
@@ -303,6 +305,9 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
 		atomic.AddUint64(&s.stats.TotBatches, 1)
 		atomic.AddUint64(&s.stats.TotIndexedPlainTextBytes, numPlainTextBytes)
 	}
+
+	atomic.AddUint64(&s.stats.TotIndexTime, uint64(time.Since(indexStart)))
+
 	return err
 }
 

From d61d9e4cf6efbf7baa77649d4e1cedf160f9867a Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 2 Mar 2018 11:17:16 -0800
Subject: [PATCH 236/728] scorch stats MaxBatchIntroTime and TotBatchIntroTime

---
 index/scorch/scorch.go |  8 ++++++++
 index/scorch/stats.go  | 14 +++++++++-----
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 86039a7e4..2b85b69dd 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -344,6 +344,8 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
 
 	_ = root.DecRef()
 
+	introStartTime := time.Now()
+
 	s.introductions <- introduction
 
 	// block until this segment is applied
@@ -356,6 +358,12 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
 		err = <-introduction.persisted
 	}
 
+	introTime := uint64(time.Since(introStartTime))
+	atomic.AddUint64(&s.stats.TotBatchIntroTime, introTime)
+	if atomic.LoadUint64(&s.stats.MaxBatchIntroTime) < introTime {
+		atomic.AddUint64(&s.stats.MaxBatchIntroTime, introTime)
+	}
+
 	return err
 }
 
diff --git a/index/scorch/stats.go b/index/scorch/stats.go
index d239e62ec..21e6d0188 100644
--- a/index/scorch/stats.go
+++ b/index/scorch/stats.go
@@ -25,11 +25,15 @@ import (
 // and fields that are prefixed like TotXxxx are monotonically
 // increasing counters.
 type Stats struct {
-	TotUpdates      uint64
-	TotDeletes      uint64
-	TotBatches      uint64
-	TotBatchesEmpty uint64
-	TotOnErrors     uint64
+	TotUpdates uint64
+	TotDeletes uint64
+
+	TotBatches        uint64
+	TotBatchesEmpty   uint64
+	TotBatchIntroTime uint64
+	MaxBatchIntroTime uint64
+
+	TotOnErrors uint64
 
 	TotAnalysisTime uint64
 	TotIndexTime    uint64

From 30acc55d05b12dc05453dfb5c8b3cf98fcfbc3dc Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 2 Mar 2018 14:03:54 -0800
Subject: [PATCH 237/728] remove unnecessary scorch reader wrapper

we now use *IndexSnapshot directly
---
 index/scorch/reader.go         | 110 ---------------------------------
 index/scorch/scorch.go         |   4 +-
 index/scorch/scorch_test.go    |   8 +--
 index/scorch/snapshot_index.go |  28 +++++++++
 4 files changed, 34 insertions(+), 116 deletions(-)
 delete mode 100644 index/scorch/reader.go

diff --git a/index/scorch/reader.go b/index/scorch/reader.go
deleted file mode 100644
index 365ecb670..000000000
--- a/index/scorch/reader.go
+++ /dev/null
@@ -1,110 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package scorch
-
-import (
-	"github.com/blevesearch/bleve/document"
-	"github.com/blevesearch/bleve/index"
-)
-
-type Reader struct {
-	root *IndexSnapshot // Owns 1 ref-count on the index snapshot.
-}
-
-func (r *Reader) TermFieldReader(term []byte, field string, includeFreq,
-	includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
-	return r.root.TermFieldReader(term, field, includeFreq, includeNorm, includeTermVectors)
-}
-
-// DocIDReader returns an iterator over all doc ids
-// The caller must close returned instance to release associated resources.
-func (r *Reader) DocIDReaderAll() (index.DocIDReader, error) {
-	return r.root.DocIDReaderAll()
-}
-
-func (r *Reader) DocIDReaderOnly(ids []string) (index.DocIDReader, error) {
-	return r.root.DocIDReaderOnly(ids)
-}
-
-func (r *Reader) FieldDict(field string) (index.FieldDict, error) {
-	return r.root.FieldDict(field)
-}
-
-// FieldDictRange is currently defined to include the start and end terms
-func (r *Reader) FieldDictRange(field string, startTerm []byte,
-	endTerm []byte) (index.FieldDict, error) {
-	return r.root.FieldDictRange(field, startTerm, endTerm)
-}
-
-func (r *Reader) FieldDictPrefix(field string,
-	termPrefix []byte) (index.FieldDict, error) {
-	return r.root.FieldDictPrefix(field, termPrefix)
-}
-
-func (r *Reader) Document(id string) (*document.Document, error) {
-	return r.root.Document(id)
-}
-func (r *Reader) DocumentVisitFieldTerms(id index.IndexInternalID, fields []string,
-	visitor index.DocumentFieldTermVisitor) error {
-	return r.root.DocumentVisitFieldTerms(id, fields, visitor)
-}
-
-func (r *Reader) Fields() ([]string, error) {
-	return r.root.Fields()
-}
-
-func (r *Reader) GetInternal(key []byte) ([]byte, error) {
-	return r.root.GetInternal(key)
-}
-
-func (r *Reader) DocCount() (uint64, error) {
-	return r.root.DocCount()
-}
-
-func (r *Reader) ExternalID(id index.IndexInternalID) (string, error) {
-	return r.root.ExternalID(id)
-}
-
-func (r *Reader) InternalID(id string) (index.IndexInternalID, error) {
-	return r.root.InternalID(id)
-}
-
-func (r *Reader) DumpAll() chan interface{} {
-	rv := make(chan interface{})
-	go func() {
-		close(rv)
-	}()
-	return rv
-}
-
-func (r *Reader) DumpDoc(id string) chan interface{} {
-	rv := make(chan interface{})
-	go func() {
-		close(rv)
-	}()
-	return rv
-}
-
-func (r *Reader) DumpFields() chan interface{} {
-	rv := make(chan interface{})
-	go func() {
-		close(rv)
-	}()
-	return rv
-}
-
-func (r *Reader) Close() error {
-	return r.root.DecRef()
-}
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 2b85b69dd..fd2376777 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -383,8 +383,8 @@ func (s *Scorch) DeleteInternal(key []byte) error {
 // release associated resources.
 func (s *Scorch) Reader() (index.IndexReader, error) {
 	s.rootLock.RLock()
-	rv := &Reader{root: s.root}
-	rv.root.AddRef()
+	rv := s.root
+	rv.AddRef()
 	s.rootLock.RUnlock()
 	return rv, nil
 }
diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index 87e9bdb21..3be52cb1f 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -680,7 +680,7 @@ func TestIndexInternalCRUD(t *testing.T) {
 		t.Error(err)
 	}
 
-	if len(indexReader.(*Reader).root.segment) != 0 {
+	if len(indexReader.(*IndexSnapshot).segment) != 0 {
 		t.Errorf("expected 0 segments")
 	}
 
@@ -709,7 +709,7 @@ func TestIndexInternalCRUD(t *testing.T) {
 		t.Error(err)
 	}
 
-	if len(indexReader2.(*Reader).root.segment) != 0 {
+	if len(indexReader2.(*IndexSnapshot).segment) != 0 {
 		t.Errorf("expected 0 segments")
 	}
 
@@ -738,7 +738,7 @@ func TestIndexInternalCRUD(t *testing.T) {
 		t.Error(err)
 	}
 
-	if len(indexReader3.(*Reader).root.segment) != 0 {
+	if len(indexReader3.(*IndexSnapshot).segment) != 0 {
 		t.Errorf("expected 0 segments")
 	}
 
@@ -831,7 +831,7 @@ func TestIndexBatch(t *testing.T) {
 		}
 	}()
 
-	numSegments := len(indexReader.(*Reader).root.segment)
+	numSegments := len(indexReader.(*IndexSnapshot).segment)
 	if numSegments <= 0 {
 		t.Errorf("expected some segments, got: %d", numSegments)
 	}
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 7236eeb7e..5289b1434 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -85,6 +85,10 @@ func (i *IndexSnapshot) DecRef() (err error) {
 	return err
 }
 
+func (i *IndexSnapshot) Close() error {
+	return i.DecRef()
+}
+
 func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {
 
 	results := make(chan *asynchSegmentResult)
@@ -501,3 +505,27 @@ func extractDvPendingFields(requestedFields, persistedFields []string) []string
 	}
 	return rv
 }
+
+func (i *IndexSnapshot) DumpAll() chan interface{} {
+	rv := make(chan interface{})
+	go func() {
+		close(rv)
+	}()
+	return rv
+}
+
+func (i *IndexSnapshot) DumpDoc(id string) chan interface{} {
+	rv := make(chan interface{})
+	go func() {
+		close(rv)
+	}()
+	return rv
+}
+
+func (i *IndexSnapshot) DumpFields() chan interface{} {
+	rv := make(chan interface{})
+	go func() {
+		close(rv)
+	}()
+	return rv
+}

From a5253bfe2b47c91dd735f065d47d5f535e20ca67 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 2 Mar 2018 14:30:17 -0800
Subject: [PATCH 238/728] scorch persister goes through introducer to affect
 root

This change allows the introducer to become the only goroutine to
modify the root, which in turn allows the introducer to greatly reduce
its root lock holding surface area.
---
 index/scorch/introducer.go | 141 +++++++++++++++++++++++++++----------
 index/scorch/persister.go  |  51 ++++----------
 index/scorch/scorch.go     |   4 ++
 index/scorch/stats.go      |   2 +
 4 files changed, 124 insertions(+), 74 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index b6007b4ac..7f1d0073e 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -33,6 +33,11 @@ type segmentIntroduction struct {
 	persisted chan error
 }
 
+type persistIntroduction struct {
+	persisted map[uint64]segment.Segment
+	applied   notificationChan
+}
+
 type epochWatcher struct {
 	epoch    uint64
 	notifyCh notificationChan
@@ -66,6 +71,9 @@ OUTER:
 				continue OUTER
 			}
 
+		case persist := <-s.persists:
+			s.introducePersist(persist)
+
 		case revertTo := <-s.revertToSnapshots:
 			err := s.revertToSnapshot(revertTo)
 			if err != nil {
@@ -97,32 +105,30 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 	atomic.AddUint64(&s.stats.TotIntroduceSegmentBeg, 1)
 	defer atomic.AddUint64(&s.stats.TotIntroduceSegmentEnd, 1)
 
-	// acquire lock
-	s.rootLock.Lock()
+	s.rootLock.RLock()
+	root := s.root
+	s.rootLock.RUnlock()
 
-	nsegs := len(s.root.segment)
+	nsegs := len(root.segment)
 
 	// prepare new index snapshot
 	newSnapshot := &IndexSnapshot{
 		parent:   s,
 		segment:  make([]*SegmentSnapshot, 0, nsegs+1),
 		offsets:  make([]uint64, 0, nsegs+1),
-		internal: make(map[string][]byte, len(s.root.internal)),
-		epoch:    s.nextSnapshotEpoch,
+		internal: make(map[string][]byte, len(root.internal)),
 		refs:     1,
 	}
-	s.nextSnapshotEpoch++
 
 	// iterate through current segments
 	var running uint64
-	for i := range s.root.segment {
+	for i := range root.segment {
 		// see if optimistic work included this segment
-		delta, ok := next.obsoletes[s.root.segment[i].id]
+		delta, ok := next.obsoletes[root.segment[i].id]
 		if !ok {
 			var err error
-			delta, err = s.root.segment[i].segment.DocNumbers(next.ids)
+			delta, err = root.segment[i].segment.DocNumbers(next.ids)
 			if err != nil {
-				s.rootLock.Unlock()
 				next.applied <- fmt.Errorf("error computing doc numbers: %v", err)
 				close(next.applied)
 				_ = newSnapshot.DecRef()
@@ -131,24 +137,24 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 		}
 
 		newss := &SegmentSnapshot{
-			id:         s.root.segment[i].id,
-			segment:    s.root.segment[i].segment,
-			cachedDocs: s.root.segment[i].cachedDocs,
+			id:         root.segment[i].id,
+			segment:    root.segment[i].segment,
+			cachedDocs: root.segment[i].cachedDocs,
 		}
 
 		// apply new obsoletions
-		if s.root.segment[i].deleted == nil {
+		if root.segment[i].deleted == nil {
 			newss.deleted = delta
 		} else {
-			newss.deleted = roaring.Or(s.root.segment[i].deleted, delta)
+			newss.deleted = roaring.Or(root.segment[i].deleted, delta)
 		}
 
 		// check for live size before copying
 		if newss.LiveSize() > 0 {
 			newSnapshot.segment = append(newSnapshot.segment, newss)
-			s.root.segment[i].segment.AddRef()
+			root.segment[i].segment.AddRef()
 			newSnapshot.offsets = append(newSnapshot.offsets, running)
-			running += s.root.segment[i].Count()
+			running += root.segment[i].Count()
 		}
 	}
 
@@ -168,7 +174,7 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 		atomic.AddUint64(&s.stats.TotIntroducedSegmentsBatch, 1)
 	}
 	// copy old values
-	for key, oldVal := range s.root.internal {
+	for key, oldVal := range root.internal {
 		newSnapshot.internal[key] = oldVal
 	}
 	// set new values and apply deletes
@@ -179,10 +185,14 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 			delete(newSnapshot.internal, key)
 		}
 	}
+
+	s.rootLock.Lock()
 	if next.persisted != nil {
 		s.rootPersisted = append(s.rootPersisted, next.persisted)
 	}
 	// swap in new index snapshot
+	newSnapshot.epoch = s.nextSnapshotEpoch
+	s.nextSnapshotEpoch++
 	rootPrev := s.root
 	s.root = newSnapshot
 	// release lock
@@ -197,34 +207,89 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 	return nil
 }
 
+func (s *Scorch) introducePersist(persist *persistIntroduction) {
+	atomic.AddUint64(&s.stats.TotIntroducePersistBeg, 1)
+	defer atomic.AddUint64(&s.stats.TotIntroducePersistEnd, 1)
+
+	s.rootLock.RLock()
+	root := s.root
+	s.rootLock.RUnlock()
+
+	newIndexSnapshot := &IndexSnapshot{
+		parent:   s,
+		epoch:    s.nextSnapshotEpoch,
+		segment:  make([]*SegmentSnapshot, len(root.segment)),
+		offsets:  make([]uint64, len(root.offsets)),
+		internal: make(map[string][]byte, len(root.internal)),
+		refs:     1,
+	}
+	s.nextSnapshotEpoch++
+
+	for i, segmentSnapshot := range root.segment {
+		// see if this segment has been replaced
+		if replacement, ok := persist.persisted[segmentSnapshot.id]; ok {
+			newSegmentSnapshot := &SegmentSnapshot{
+				id:         segmentSnapshot.id,
+				segment:    replacement,
+				deleted:    segmentSnapshot.deleted,
+				cachedDocs: segmentSnapshot.cachedDocs,
+			}
+			newIndexSnapshot.segment[i] = newSegmentSnapshot
+			delete(persist.persisted, segmentSnapshot.id)
+
+			// update items persisted incase of a new segment snapshot
+			atomic.AddUint64(&s.stats.TotPersistedItems, newSegmentSnapshot.Count())
+			atomic.AddUint64(&s.stats.TotPersistedSegments, 1)
+		} else {
+			newIndexSnapshot.segment[i] = root.segment[i]
+			newIndexSnapshot.segment[i].segment.AddRef()
+		}
+		newIndexSnapshot.offsets[i] = root.offsets[i]
+	}
+
+	for k, v := range root.internal {
+		newIndexSnapshot.internal[k] = v
+	}
+
+	s.rootLock.Lock()
+	rootPrev := s.root
+	s.root = newIndexSnapshot
+	s.rootLock.Unlock()
+
+	if rootPrev != nil {
+		_ = rootPrev.DecRef()
+	}
+
+	close(persist.applied)
+}
+
 func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 	atomic.AddUint64(&s.stats.TotIntroduceMergeBeg, 1)
 	defer atomic.AddUint64(&s.stats.TotIntroduceMergeEnd, 1)
 
-	// acquire lock
-	s.rootLock.Lock()
+	s.rootLock.RLock()
+	root := s.root
+	s.rootLock.RUnlock()
 
 	newSnapshot := &IndexSnapshot{
 		parent:   s,
-		internal: s.root.internal,
-		epoch:    s.nextSnapshotEpoch,
+		internal: root.internal,
 		refs:     1,
 	}
-	s.nextSnapshotEpoch++
 
 	// iterate through current segments
 	newSegmentDeleted := roaring.NewBitmap()
 	var running uint64
-	for i := range s.root.segment {
-		segmentID := s.root.segment[i].id
+	for i := range root.segment {
+		segmentID := root.segment[i].id
 		if segSnapAtMerge, ok := nextMerge.old[segmentID]; ok {
 			// this segment is going away, see if anything else was deleted since we started the merge
-			if segSnapAtMerge != nil && s.root.segment[i].deleted != nil {
+			if segSnapAtMerge != nil && root.segment[i].deleted != nil {
 				// assume all these deletes are new
-				deletedSince := s.root.segment[i].deleted
+				deletedSince := root.segment[i].deleted
 				// if we already knew about some of them, remove
 				if segSnapAtMerge.deleted != nil {
-					deletedSince = roaring.AndNot(s.root.segment[i].deleted, segSnapAtMerge.deleted)
+					deletedSince = roaring.AndNot(root.segment[i].deleted, segSnapAtMerge.deleted)
 				}
 				deletedSinceItr := deletedSince.Iterator()
 				for deletedSinceItr.HasNext() {
@@ -238,18 +303,17 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 			// segments left behind in old map after processing
 			// the root segments would be the obsolete segment set
 			delete(nextMerge.old, segmentID)
-
-		} else if s.root.segment[i].LiveSize() > 0 {
+		} else if root.segment[i].LiveSize() > 0 {
 			// this segment is staying
 			newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
-				id:         s.root.segment[i].id,
-				segment:    s.root.segment[i].segment,
-				deleted:    s.root.segment[i].deleted,
-				cachedDocs: s.root.segment[i].cachedDocs,
+				id:         root.segment[i].id,
+				segment:    root.segment[i].segment,
+				deleted:    root.segment[i].deleted,
+				cachedDocs: root.segment[i].cachedDocs,
 			})
-			s.root.segment[i].segment.AddRef()
+			root.segment[i].segment.AddRef()
 			newSnapshot.offsets = append(newSnapshot.offsets, running)
-			running += s.root.segment[i].Count()
+			running += root.segment[i].Count()
 		}
 	}
 
@@ -284,7 +348,10 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 
 	newSnapshot.AddRef() // 1 ref for the nextMerge.notify response
 
-	// swap in new segment
+	s.rootLock.Lock()
+	// swap in new index snapshot
+	newSnapshot.epoch = s.nextSnapshotEpoch
+	s.nextSnapshotEpoch++
 	rootPrev := s.root
 	s.root = newSnapshot
 	// release lock
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 6ffbd44d4..f1a372e72 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -401,46 +401,23 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
 			}
 		}
 
-		s.rootLock.Lock()
-		newIndexSnapshot := &IndexSnapshot{
-			parent:   s,
-			epoch:    s.nextSnapshotEpoch,
-			segment:  make([]*SegmentSnapshot, len(s.root.segment)),
-			offsets:  make([]uint64, len(s.root.offsets)),
-			internal: make(map[string][]byte, len(s.root.internal)),
-			refs:     1,
-		}
-		s.nextSnapshotEpoch++
-		for i, segmentSnapshot := range s.root.segment {
-			// see if this segment has been replaced
-			if replacement, ok := newSegments[segmentSnapshot.id]; ok {
-				newSegmentSnapshot := &SegmentSnapshot{
-					id:         segmentSnapshot.id,
-					segment:    replacement,
-					deleted:    segmentSnapshot.deleted,
-					cachedDocs: segmentSnapshot.cachedDocs,
-				}
-				newIndexSnapshot.segment[i] = newSegmentSnapshot
-				delete(newSegments, segmentSnapshot.id)
-
-				// update items persisted incase of a new segment snapshot
-				atomic.AddUint64(&s.stats.TotPersistedItems, newSegmentSnapshot.Count())
-				atomic.AddUint64(&s.stats.TotPersistedSegments, 1)
-			} else {
-				newIndexSnapshot.segment[i] = s.root.segment[i]
-				newIndexSnapshot.segment[i].segment.AddRef()
-			}
-			newIndexSnapshot.offsets[i] = s.root.offsets[i]
+		persist := &persistIntroduction{
+			persisted: newSegments,
+			applied:   make(notificationChan),
 		}
-		for k, v := range s.root.internal {
-			newIndexSnapshot.internal[k] = v
+
+		select {
+		case <-s.closeCh:
+			err = ErrClosed
+			return err
+		case s.persists <- persist:
 		}
 
-		rootPrev := s.root
-		s.root = newIndexSnapshot
-		s.rootLock.Unlock()
-		if rootPrev != nil {
-			_ = rootPrev.DecRef()
+		select {
+		case <-s.closeCh:
+			err = ErrClosed
+			return err
+		case <-persist.applied:
 		}
 	}
 
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index fd2376777..87372a326 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -39,6 +39,8 @@ const Name = "scorch"
 
 const Version uint8 = 1
 
+var ErrClosed = fmt.Errorf("scorch closed")
+
 type Scorch struct {
 	readOnly      bool
 	version       uint8
@@ -59,6 +61,7 @@ type Scorch struct {
 
 	closeCh            chan struct{}
 	introductions      chan *segmentIntroduction
+	persists           chan *persistIntroduction
 	merges             chan *segmentMerge
 	introducerNotifier chan *epochWatcher
 	revertToSnapshots  chan *snapshotReversion
@@ -174,6 +177,7 @@ func (s *Scorch) openBolt() error {
 	}
 
 	s.introductions = make(chan *segmentIntroduction)
+	s.persists = make(chan *persistIntroduction)
 	s.merges = make(chan *segmentMerge)
 	s.introducerNotifier = make(chan *epochWatcher, 1)
 	s.revertToSnapshots = make(chan *snapshotReversion)
diff --git a/index/scorch/stats.go b/index/scorch/stats.go
index 21e6d0188..cd416a7c0 100644
--- a/index/scorch/stats.go
+++ b/index/scorch/stats.go
@@ -46,6 +46,8 @@ type Stats struct {
 	TotIntroduceLoop       uint64
 	TotIntroduceSegmentBeg uint64
 	TotIntroduceSegmentEnd uint64
+	TotIntroducePersistBeg uint64
+	TotIntroducePersistEnd uint64
 	TotIntroduceMergeBeg   uint64
 	TotIntroduceMergeEnd   uint64
 	TotIntroduceRevertBeg  uint64

From 88c740095b7a5224c98e7538a0ee3c7bca574ee2 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 3 Mar 2018 10:59:53 -0800
Subject: [PATCH 239/728] scorch optimizations for mem.PostingsIterator.Next()
 & docTermMap

Due to the usage rules of iterators, mem.PostingsIterator.Next() can
reuse its returned Postings instance.

Also, there's a micro optimization in persistDocValues() for one fewer
access to the docTermMap in the inner-loop.
---
 index/scorch/segment/mem/posting.go | 6 +++---
 index/scorch/segment/zap/build.go   | 5 ++---
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/index/scorch/segment/mem/posting.go b/index/scorch/segment/mem/posting.go
index d91a00561..25cbeb458 100644
--- a/index/scorch/segment/mem/posting.go
+++ b/index/scorch/segment/mem/posting.go
@@ -73,6 +73,7 @@ type PostingsIterator struct {
 	offset    int
 	locoffset int
 	actual    roaring.IntIterable
+	reuse     Posting
 }
 
 // Next returns the next posting on the postings list, or nil at the end
@@ -92,17 +93,16 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 		i.offset++
 		allN = i.all.Next()
 	}
-	rv := &Posting{
+	i.reuse = Posting{
 		iterator:  i,
 		docNum:    uint64(n),
 		offset:    i.offset,
 		locoffset: i.locoffset,
 		hasLoc:    i.locations.Contains(n),
 	}
-
 	i.locoffset += int(i.postings.dictionary.segment.Freqs[i.postings.postingsID-1][i.offset])
 	i.offset++
-	return rv, nil
+	return &i.reuse, nil
 }
 
 // Posting is a single entry in a postings list
diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 77f18b05c..b075496cf 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -552,8 +552,7 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
 			nextPosting, err2 := postingsItr.Next()
 			for err2 == nil && nextPosting != nil {
 				docNum := nextPosting.Number()
-				docTermMap[docNum] = append(docTermMap[docNum], []byte(next.Term)...)
-				docTermMap[docNum] = append(docTermMap[docNum], termSeparator)
+				docTermMap[docNum] = append(append(docTermMap[docNum], []byte(next.Term)...), termSeparator)
 				nextPosting, err2 = postingsItr.Next()
 			}
 			if err2 != nil {
@@ -562,10 +561,10 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
 
 			next, err = dictItr.Next()
 		}
-
 		if err != nil {
 			return nil, err
 		}
+
 		// sort wrt to docIDs
 		var docNumbers docIDRange
 		for k := range docTermMap {

From b7cfef81c9a86d590b2a9ccdaed184c3e1bbcdef Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 3 Mar 2018 11:43:22 -0800
Subject: [PATCH 240/728] scorch optimize mem processDocument() dict access

This change moves the dict lookup to outside of the loop.
---
 index/scorch/segment/mem/build.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go
index 57d60dc89..2a2683dca 100644
--- a/index/scorch/segment/mem/build.go
+++ b/index/scorch/segment/mem/build.go
@@ -252,8 +252,9 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 
 	// now that its been rolled up into docMap, walk that
 	for fieldID, tokenFrequencies := range docMap {
+		dict := s.Dicts[fieldID]
 		for term, tokenFreq := range tokenFrequencies {
-			pid := s.Dicts[fieldID][term] - 1
+			pid := dict[term] - 1
 			bs := s.Postings[pid]
 			bs.AddInt(int(docNum))
 			s.Freqs[pid] = append(s.Freqs[pid], uint64(tokenFreq.Frequency()))

From 6ae799052a8eaf945a769f526c209bd1385d3fbf Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 3 Mar 2018 11:52:27 -0800
Subject: [PATCH 241/728] scorch mem optimize processDocument() stored field

---
 index/scorch/segment/mem/build.go | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go
index 2a2683dca..3a892f9ac 100644
--- a/index/scorch/segment/mem/build.go
+++ b/index/scorch/segment/mem/build.go
@@ -222,12 +222,6 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 		}
 	}
 
-	storeField := func(docNum uint64, field uint16, typ byte, val []byte, pos []uint64) {
-		s.Stored[docNum][field] = append(s.Stored[docNum][field], val)
-		s.StoredTypes[docNum][field] = append(s.StoredTypes[docNum][field], typ)
-		s.StoredPos[docNum][field] = append(s.StoredPos[docNum][field], pos)
-	}
-
 	// walk each composite field
 	for _, field := range result.Document.CompositeFields {
 		fieldID := uint16(s.getOrDefineField(field.Name()))
@@ -235,6 +229,10 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 		processField(fieldID, field.Name(), l, tf)
 	}
 
+	docStored := s.Stored[docNum]
+	docStoredTypes := s.StoredTypes[docNum]
+	docStoredPos := s.StoredPos[docNum]
+
 	// walk each field
 	for i, field := range result.Document.Fields {
 		fieldID := uint16(s.getOrDefineField(field.Name()))
@@ -242,7 +240,9 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 		tf := result.Analyzed[i]
 		processField(fieldID, field.Name(), l, tf)
 		if field.Options().IsStored() {
-			storeField(docNum, fieldID, encodeFieldType(field), field.Value(), field.ArrayPositions())
+			docStored[fieldID] = append(docStored[fieldID], field.Value())
+			docStoredTypes[fieldID] = append(docStoredTypes[fieldID], encodeFieldType(field))
+			docStoredPos[fieldID] = append(docStoredPos[fieldID], field.ArrayPositions())
 		}
 
 		if field.Options().IncludeDocValues() {

From 884da6f93a3c0b9a4567501ad4ddf96eea739227 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 3 Mar 2018 11:58:28 -0800
Subject: [PATCH 242/728] scorch optimize mem processDocument() norm
 calculation

This change moves the norm calculation outside of the inner loop.
---
 index/scorch/segment/mem/build.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go
index 3a892f9ac..643ae36e4 100644
--- a/index/scorch/segment/mem/build.go
+++ b/index/scorch/segment/mem/build.go
@@ -253,12 +253,13 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 	// now that its been rolled up into docMap, walk that
 	for fieldID, tokenFrequencies := range docMap {
 		dict := s.Dicts[fieldID]
+		norm := float32(1.0/math.Sqrt(float64(fieldLens[fieldID])))
 		for term, tokenFreq := range tokenFrequencies {
 			pid := dict[term] - 1
 			bs := s.Postings[pid]
 			bs.AddInt(int(docNum))
 			s.Freqs[pid] = append(s.Freqs[pid], uint64(tokenFreq.Frequency()))
-			s.Norms[pid] = append(s.Norms[pid], float32(1.0/math.Sqrt(float64(fieldLens[fieldID]))))
+			s.Norms[pid] = append(s.Norms[pid], norm)
 			locationBS := s.PostingsLocs[pid]
 			if len(tokenFreq.Locations) > 0 {
 				locationBS.AddInt(int(docNum))

From dec265c4810fbe0c6584638eb6ac6521ce7112ba Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 5 Mar 2018 16:32:57 +0530
Subject: [PATCH 243/728] adding compaction_written_bytes/sec stats to scorch

---
 index/scorch/merge.go             | 2 +-
 index/scorch/scorch.go            | 1 +
 index/scorch/segment/zap/merge.go | 9 ++++++++-
 index/scorch/stats.go             | 6 ++++++
 4 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 005d4f41e..67e1590ac 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -180,7 +180,7 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 			s.markIneligibleForRemoval(filename)
 			path := s.path + string(os.PathSeparator) + filename
 			atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1)
-			newDocNums, err := zap.Merge(segmentsToMerge, docsToDrop, path, 1024)
+			newDocNums, err := zap.Merge(segmentsToMerge, docsToDrop, path, 1024, &s.stats)
 			atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
 			if err != nil {
 				s.unmarkIneligibleForRemoval(filename)
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 87372a326..8a2b1ec3b 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -430,6 +430,7 @@ func (s *Scorch) StatsMap() map[string]interface{} {
 	m["num_items_persisted"] = m["TotPersistedItems"]
 	m["num_bytes_used_disk"] = m["CurOnDiskBytes"]
 	m["num_files_on_disk"] = m["CurOnDiskFiles"]
+	m["total_compaction_written_bytes"] = m["TotCompactionWrittenBytes"]
 
 	return m
 }
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 33ce16c59..a9383f207 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -31,12 +31,17 @@ import (
 
 const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc
 
+// StatsReporter interface represents stats reporting methods.
+type StatsReporter interface {
+	ReportBytesWritten(numBytesWritten uint64)
+}
+
 // Merge takes a slice of zap segments and bit masks describing which
 // documents may be dropped, and creates a new segment containing the
 // remaining data.  This new segment is built at the specified path,
 // with the provided chunkFactor.
 func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
-	chunkFactor uint32) ([][]uint64, error) {
+	chunkFactor uint32, stats StatsReporter) ([][]uint64, error) {
 	flag := os.O_RDWR | os.O_CREATE
 
 	f, err := os.OpenFile(path, flag, 0600)
@@ -92,6 +97,8 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
 		return nil, err
 	}
 
+	stats.ReportBytesWritten(uint64(cr.Count()))
+
 	return newDocNums, nil
 }
 
diff --git a/index/scorch/stats.go b/index/scorch/stats.go
index cd416a7c0..4f8c8b99e 100644
--- a/index/scorch/stats.go
+++ b/index/scorch/stats.go
@@ -100,6 +100,8 @@ type Stats struct {
 	TotMemMergeZapBeg   uint64
 	TotMemMergeZapEnd   uint64
 	TotMemMergeSegments uint64
+
+	TotCompactionWrittenBytes uint64
 }
 
 // atomically populates the returned map
@@ -122,3 +124,7 @@ func (s *Stats) ToMap() map[string]interface{} {
 func (s *Stats) MarshalJSON() ([]byte, error) {
 	return json.Marshal(s.ToMap())
 }
+
+func (s *Stats) ReportBytesWritten(numBytesWritten uint64) {
+	atomic.AddUint64(&s.TotCompactionWrittenBytes, numBytesWritten)
+}

From 395b0a312dd078b115cfdeaa0888a8cf9afa0e95 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 5 Mar 2018 17:02:58 +0530
Subject: [PATCH 244/728] adding UTs

---
 index/scorch/segment/zap/merge.go      |  4 +-
 index/scorch/segment/zap/merge_test.go | 96 ++++++++++++++++++++++++--
 2 files changed, 95 insertions(+), 5 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index a9383f207..9399046b0 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -97,7 +97,9 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
 		return nil, err
 	}
 
-	stats.ReportBytesWritten(uint64(cr.Count()))
+	if stats != nil {
+		stats.ReportBytesWritten(uint64(cr.Count()))
+	}
 
 	return newDocNums, nil
 }
diff --git a/index/scorch/segment/zap/merge_test.go b/index/scorch/segment/zap/merge_test.go
index bb09f8314..2ba0b373a 100644
--- a/index/scorch/segment/zap/merge_test.go
+++ b/index/scorch/segment/zap/merge_test.go
@@ -20,6 +20,7 @@ import (
 	"reflect"
 	"sort"
 	"strings"
+	"sync/atomic"
 	"testing"
 
 	"github.com/RoaringBitmap/roaring"
@@ -72,7 +73,7 @@ func TestMerge(t *testing.T) {
 	segsToMerge[0] = segment.(*Segment)
 	segsToMerge[1] = segment2.(*Segment)
 
-	_, err = Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024)
+	_, err = Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -176,7 +177,7 @@ func testMergeWithEmptySegments(t *testing.T, before bool, numEmptySegments int)
 
 	drops := make([]*roaring.Bitmap, len(segsToMerge))
 
-	_, err = Merge(segsToMerge, drops, "/tmp/scorch3.zap", 1024)
+	_, err = Merge(segsToMerge, drops, "/tmp/scorch3.zap", 1024, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -218,7 +219,7 @@ func testMergeWithSelf(t *testing.T, segCur *Segment, expectedCount uint64) {
 		segsToMerge := make([]*Segment, 1)
 		segsToMerge[0] = segCur
 
-		_, err := Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/"+fname, 1024)
+		_, err := Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/"+fname, 1024, nil)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -590,7 +591,7 @@ func testMergeWithUpdates(t *testing.T, segmentDocIds [][]string, docsToDrop []*
 func testMergeAndDropSegments(t *testing.T, segsToMerge []*Segment, docsToDrop []*roaring.Bitmap, expectedNumDocs uint64) {
 	_ = os.RemoveAll("/tmp/scorch-merged.zap")
 
-	_, err := Merge(segsToMerge, docsToDrop, "/tmp/scorch-merged.zap", 1024)
+	_, err := Merge(segsToMerge, docsToDrop, "/tmp/scorch-merged.zap", 1024, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -782,3 +783,90 @@ func buildMemSegmentMultiHelper(docIds []string) *mem.Segment {
 
 	return segment
 }
+
+type statTest struct {
+	totalWrittenBytes uint64
+}
+
+func (s *statTest) ReportBytesWritten(numBytesWritten uint64) {
+	atomic.AddUint64(&s.totalWrittenBytes, numBytesWritten)
+}
+
+func TestMergeBytesWritten(t *testing.T) {
+	_ = os.RemoveAll("/tmp/scorch.zap")
+	_ = os.RemoveAll("/tmp/scorch2.zap")
+	_ = os.RemoveAll("/tmp/scorch3.zap")
+
+	memSegment := buildMemSegmentMulti()
+	err := PersistSegment(memSegment, "/tmp/scorch.zap", 1024)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	memSegment2 := buildMemSegmentMulti2()
+	err = PersistSegment(memSegment2, "/tmp/scorch2.zap", 1024)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	segment, err := Open("/tmp/scorch.zap")
+	if err != nil {
+		t.Fatalf("error opening segment: %v", err)
+	}
+	defer func() {
+		cerr := segment.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", err)
+		}
+	}()
+
+	segment2, err := Open("/tmp/scorch2.zap")
+	if err != nil {
+		t.Fatalf("error opening segment: %v", err)
+	}
+	defer func() {
+		cerr := segment2.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", err)
+		}
+	}()
+
+	segsToMerge := make([]*Segment, 2)
+	segsToMerge[0] = segment.(*Segment)
+	segsToMerge[1] = segment2.(*Segment)
+
+	reporter := &statTest{}
+
+	_, err = Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024, reporter)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if reporter.totalWrittenBytes == 0 {
+		t.Fatalf("expected a non zero total_compaction_written_bytes")
+	}
+
+	segm, err := Open("/tmp/scorch3.zap")
+	if err != nil {
+		t.Fatalf("error opening merged segment: %v", err)
+	}
+	seg3 := segm.(*Segment)
+	defer func() {
+		cerr := seg3.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", err)
+		}
+	}()
+
+	if seg3.Path() != "/tmp/scorch3.zap" {
+		t.Fatalf("wrong path")
+	}
+	if seg3.Count() != 4 {
+		t.Fatalf("wrong count")
+	}
+	if len(seg3.Fields()) != 5 {
+		t.Fatalf("wrong # fields: %#v\n", seg3.Fields())
+	}
+
+	testMergeWithSelf(t, seg3, 4)
+}

From d44c5ad5682450baff48ead3345d69f9ec5399cc Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 3 Mar 2018 19:32:39 -0800
Subject: [PATCH 245/728] scorch stats MaxBatchIntroTime bug fix and more
 timing stats

Added timing stats for in-mem zap merging and file-based zap merging.
---
 index/scorch/merge.go  | 19 +++++++++++++++++++
 index/scorch/scorch.go |  2 +-
 index/scorch/stats.go  |  8 ++++++--
 3 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 005d4f41e..ee3ec46c5 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -179,9 +179,19 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 			filename := zapFileName(newSegmentID)
 			s.markIneligibleForRemoval(filename)
 			path := s.path + string(os.PathSeparator) + filename
+
+			fileMergeZapStartTime := time.Now()
+
 			atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1)
 			newDocNums, err := zap.Merge(segmentsToMerge, docsToDrop, path, 1024)
 			atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
+
+			fileMergeZapTime := uint64(time.Since(fileMergeZapStartTime))
+			atomic.AddUint64(&s.stats.TotFileMergeZapTime, fileMergeZapTime)
+			if atomic.LoadUint64(&s.stats.MaxFileMergeZapTime) < fileMergeZapTime {
+				atomic.StoreUint64(&s.stats.MaxFileMergeZapTime, fileMergeZapTime)
+			}
+
 			if err != nil {
 				s.unmarkIneligibleForRemoval(filename)
 				atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
@@ -258,11 +268,20 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 
 	cr := zap.NewCountHashWriter(&br)
 
+	memMergeZapStartTime := time.Now()
+
 	atomic.AddUint64(&s.stats.TotMemMergeZapBeg, 1)
 	newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset,
 		docValueOffset, dictLocs, fieldsInv, fieldsMap, err :=
 		zap.MergeToWriter(sbs, sbsDrops, chunkFactor, cr)
 	atomic.AddUint64(&s.stats.TotMemMergeZapEnd, 1)
+
+	memMergeZapTime := uint64(time.Since(memMergeZapStartTime))
+	atomic.AddUint64(&s.stats.TotMemMergeZapTime, memMergeZapTime)
+	if atomic.LoadUint64(&s.stats.MaxMemMergeZapTime) < memMergeZapTime {
+		atomic.StoreUint64(&s.stats.MaxMemMergeZapTime, memMergeZapTime)
+	}
+
 	if err != nil {
 		atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
 		return 0, nil, 0, err
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 87372a326..a40f374ac 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -365,7 +365,7 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
 	introTime := uint64(time.Since(introStartTime))
 	atomic.AddUint64(&s.stats.TotBatchIntroTime, introTime)
 	if atomic.LoadUint64(&s.stats.MaxBatchIntroTime) < introTime {
-		atomic.AddUint64(&s.stats.MaxBatchIntroTime, introTime)
+		atomic.StoreUint64(&s.stats.MaxBatchIntroTime, introTime)
 	}
 
 	return err
diff --git a/index/scorch/stats.go b/index/scorch/stats.go
index cd416a7c0..3c978af77 100644
--- a/index/scorch/stats.go
+++ b/index/scorch/stats.go
@@ -88,8 +88,10 @@ type Stats struct {
 	TotFileMergeSegmentsEmpty uint64
 	TotFileMergeSegments      uint64
 
-	TotFileMergeZapBeg uint64
-	TotFileMergeZapEnd uint64
+	TotFileMergeZapBeg  uint64
+	TotFileMergeZapEnd  uint64
+	TotFileMergeZapTime uint64
+	MaxFileMergeZapTime uint64
 
 	TotFileMergeIntroductions     uint64
 	TotFileMergeIntroductionsDone uint64
@@ -99,6 +101,8 @@ type Stats struct {
 	TotMemMergeDone     uint64
 	TotMemMergeZapBeg   uint64
 	TotMemMergeZapEnd   uint64
+	TotMemMergeZapTime  uint64
+	MaxMemMergeZapTime  uint64
 	TotMemMergeSegments uint64
 }
 

From 85761c6a57987ece0b193042ddf953f3f360bad1 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 3 Mar 2018 19:39:21 -0800
Subject: [PATCH 246/728] go fmt

---
 index/scorch/segment/mem/build.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go
index 643ae36e4..57971aae5 100644
--- a/index/scorch/segment/mem/build.go
+++ b/index/scorch/segment/mem/build.go
@@ -253,7 +253,7 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 	// now that its been rolled up into docMap, walk that
 	for fieldID, tokenFrequencies := range docMap {
 		dict := s.Dicts[fieldID]
-		norm := float32(1.0/math.Sqrt(float64(fieldLens[fieldID])))
+		norm := float32(1.0 / math.Sqrt(float64(fieldLens[fieldID])))
 		for term, tokenFreq := range tokenFrequencies {
 			pid := dict[term] - 1
 			bs := s.Postings[pid]

From 8c0881eab2caef301835eb9b0660e0b727d06687 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sun, 4 Mar 2018 12:03:02 -0800
Subject: [PATCH 247/728] scorch zap build reuses mem postingsList/Iterator
 structs

---
 index/scorch/segment/mem/dict.go    | 20 ++++++++++++++------
 index/scorch/segment/mem/posting.go | 11 +++++++++--
 index/scorch/segment/zap/build.go   |  8 ++++++--
 3 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/index/scorch/segment/mem/dict.go b/index/scorch/segment/mem/dict.go
index cf92ef71f..b564ed1fb 100644
--- a/index/scorch/segment/mem/dict.go
+++ b/index/scorch/segment/mem/dict.go
@@ -33,12 +33,20 @@ type Dictionary struct {
 // PostingsList returns the postings list for the specified term
 func (d *Dictionary) PostingsList(term string,
 	except *roaring.Bitmap) (segment.PostingsList, error) {
-	return &PostingsList{
-		dictionary: d,
-		term:       term,
-		postingsID: d.segment.Dicts[d.fieldID][term],
-		except:     except,
-	}, nil
+	return d.InitPostingsList(term, except, nil)
+}
+
+func (d *Dictionary) InitPostingsList(term string, except *roaring.Bitmap,
+	prealloc *PostingsList) (*PostingsList, error) {
+	rv := prealloc
+	if rv == nil {
+		rv = &PostingsList{}
+	}
+	rv.dictionary = d
+	rv.term = term
+	rv.postingsID = d.segment.Dicts[d.fieldID][term]
+	rv.except = except
+	return rv, nil
 }
 
 // Iterator returns an iterator for this dictionary
diff --git a/index/scorch/segment/mem/posting.go b/index/scorch/segment/mem/posting.go
index 25cbeb458..2554333a2 100644
--- a/index/scorch/segment/mem/posting.go
+++ b/index/scorch/segment/mem/posting.go
@@ -46,9 +46,16 @@ func (p *PostingsList) Count() uint64 {
 
 // Iterator returns an iterator for this postings list
 func (p *PostingsList) Iterator() segment.PostingsIterator {
-	rv := &PostingsIterator{
-		postings: p,
+	return p.InitIterator(nil)
+}
+func (p *PostingsList) InitIterator(prealloc *PostingsIterator) *PostingsIterator {
+	rv := prealloc
+	if rv == nil {
+		rv = &PostingsIterator{postings: p}
+	} else {
+		*rv = PostingsIterator{postings: p}
 	}
+
 	if p.postingsID > 0 {
 		allbits := p.dictionary.segment.Postings[p.postingsID-1]
 		rv.locations = p.dictionary.segment.PostingsLocs[p.postingsID-1]
diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index b075496cf..7fbc995fe 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -532,6 +532,9 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
 	fieldChunkOffsets := make(map[uint16]uint64, len(memSegment.FieldsInv))
 	fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
 
+	var postings *mem.PostingsList
+	var postingsItr *mem.PostingsIterator
+
 	for fieldID := range memSegment.DocValueFields {
 		field := memSegment.FieldsInv[fieldID]
 		docTermMap := make(map[uint64][]byte, 0)
@@ -543,12 +546,13 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
 		dictItr := dict.Iterator()
 		next, err := dictItr.Next()
 		for err == nil && next != nil {
-			postings, err1 := dict.PostingsList(next.Term, nil)
+			var err1 error
+			postings, err1 = dict.(*mem.Dictionary).InitPostingsList(next.Term, nil, postings)
 			if err1 != nil {
 				return nil, err
 			}
 
-			postingsItr := postings.Iterator()
+			postingsItr = postings.InitIterator(postingsItr)
 			nextPosting, err2 := postingsItr.Next()
 			for err2 == nil && nextPosting != nil {
 				docNum := nextPosting.Number()

From 856778ad7bb4d4c8433a504de1302360ac4a5dbc Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sun, 4 Mar 2018 12:06:45 -0800
Subject: [PATCH 248/728] scorch zap build prealloc docNumbers capacity

---
 index/scorch/segment/zap/build.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 7fbc995fe..4edd277d0 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -570,7 +570,7 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
 		}
 
 		// sort wrt to docIDs
-		var docNumbers docIDRange
+		docNumbers := make(docIDRange, 0, len(docTermMap))
 		for k := range docTermMap {
 			docNumbers = append(docNumbers, k)
 		}

From a338386a038594f37d5b7901c4d3a73e53424787 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sun, 4 Mar 2018 12:56:33 -0800
Subject: [PATCH 249/728] scorch build optimize freq/loc slice capacity

---
 index/scorch/segment/zap/build.go | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 4edd277d0..eec9998bd 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -308,7 +308,7 @@ func persistStoredFieldValues(fieldID int,
 }
 
 func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFactor uint32) ([]uint64, []uint64, error) {
-	var freqOffsets, locOfffsets []uint64
+	freqOffsets := make([]uint64, 0, len(memSegment.Postings))
 	tfEncoder := newChunkedIntCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
 	for postingID := range memSegment.Postings {
 		if postingID != 0 {
@@ -351,6 +351,7 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
 	}
 
 	// now do it again for the locations
+	locOffsets := make([]uint64, 0, len(memSegment.Postings))
 	locEncoder := newChunkedIntCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
 	for postingID := range memSegment.Postings {
 		if postingID != 0 {
@@ -414,14 +415,15 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
 		}
 
 		// record where this postings loc info starts
-		locOfffsets = append(locOfffsets, uint64(w.Count()))
+		locOffsets = append(locOffsets, uint64(w.Count()))
 		locEncoder.Close()
 		_, err := locEncoder.Write(w)
 		if err != nil {
 			return nil, nil, err
 		}
 	}
-	return freqOffsets, locOfffsets, nil
+
+	return freqOffsets, locOffsets, nil
 }
 
 func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint64, err error) {

From 8f8fd511b7d2a6f221a5cc51641017b4be9c2a40 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sun, 4 Mar 2018 13:01:22 -0800
Subject: [PATCH 250/728] scorch zap access freqs[offset] outside loop

---
 index/scorch/segment/zap/build.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index eec9998bd..237cc5f3d 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -368,7 +368,8 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
 		var locOffset int
 		for postingsListItr.HasNext() {
 			docNum := uint64(postingsListItr.Next())
-			for i := 0; i < int(freqs[offset]); i++ {
+			n := int(freqs[offset])
+			for i := 0; i < n; i++ {
 				if len(locfields) > 0 {
 					// put field
 					err := locEncoder.Add(docNum, uint64(locfields[locOffset]))

From 502e64c2567c363d3cf0f2b1b542b9c1db74e973 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 5 Mar 2018 16:33:13 -0800
Subject: [PATCH 251/728] scorch zap Posting doesn't use iterator field

---
 index/scorch/segment/zap/posting.go | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index ada39b434..27d90f2b5 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -337,7 +337,6 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 	reuseLocs := i.next.locs // hold for reuse before struct clearing
 	i.next = Posting{}       // clear the struct
 	rv := &i.next
-	rv.iterator = i
 	rv.docNum = uint64(n)
 
 	var err error
@@ -373,12 +372,10 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 
 // Posting is a single entry in a postings list
 type Posting struct {
-	iterator *PostingsIterator
-	docNum   uint64
-
-	freq uint64
-	norm float32
-	locs []segment.Location
+	docNum uint64
+	freq   uint64
+	norm   float32
+	locs   []segment.Location
 }
 
 // Number returns the document number of this posting in this segment

From fa5de8e09aae8ac706182333a2d9488111663936 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Tue, 6 Mar 2018 16:22:11 +0530
Subject: [PATCH 252/728] making NumSnapshotsToKeep configurable

---
 index/scorch/persister.go | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index f1a372e72..cab2d035d 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -633,14 +633,19 @@ func (s *Scorch) removeOldBoltSnapshots() (numRemoved int, err error) {
 		return 0, err
 	}
 
-	if len(persistedEpochs) <= NumSnapshotsToKeep {
+	numSnapshotsToKeep := NumSnapshotsToKeep
+	if val, ok := s.config["numSnapshotsToKeep"].(float64); ok && val > 0 {
+		numSnapshotsToKeep = int(val)
+	}
+
+	if len(persistedEpochs) <= numSnapshotsToKeep {
 		// we need to keep everything
 		return 0, nil
 	}
 
 	// make a map of epochs to protect from deletion
-	protectedEpochs := make(map[uint64]struct{}, NumSnapshotsToKeep)
-	for _, epoch := range persistedEpochs[0:NumSnapshotsToKeep] {
+	protectedEpochs := make(map[uint64]struct{}, numSnapshotsToKeep)
+	for _, epoch := range persistedEpochs[0:numSnapshotsToKeep] {
 		protectedEpochs[epoch] = struct{}{}
 	}
 

From 655268bec821e10be03d23734d60db6804f7cb82 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 6 Mar 2018 07:55:26 -0800
Subject: [PATCH 253/728] scorch zap postings iterator nextDocNum() helper
 method

Refactored out a nextDocNum() helper method from Next() that future
optimizations can use.
---
 index/scorch/segment/zap/posting.go | 94 ++++++++++++++++-------------
 1 file changed, 53 insertions(+), 41 deletions(-)

diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 27d90f2b5..adc399eaa 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -279,9 +279,56 @@ func (i *PostingsIterator) readLocation(l *Location) error {
 
 // Next returns the next posting on the postings list, or nil at the end
 func (i *PostingsIterator) Next() (segment.Posting, error) {
-	if i.actual == nil || !i.actual.HasNext() {
+	docNum, exists, err := i.nextDocNum()
+	if err != nil {
+		return nil, err
+	}
+	if !exists {
 		return nil, nil
 	}
+
+	reuseLocs := i.next.locs // hold for reuse before struct clearing
+	i.next = Posting{}       // clear the struct
+	rv := &i.next
+	rv.docNum = docNum
+
+	var normBits uint64
+	rv.freq, normBits, err = i.readFreqNorm()
+	if err != nil {
+		return nil, err
+	}
+	rv.norm = math.Float32frombits(uint32(normBits))
+	if i.locBitmap.Contains(uint32(docNum)) {
+		// read off 'freq' locations, into reused slices
+		if cap(i.nextLocs) >= int(rv.freq) {
+			i.nextLocs = i.nextLocs[0:rv.freq]
+		} else {
+			i.nextLocs = make([]Location, rv.freq)
+		}
+		if cap(reuseLocs) >= int(rv.freq) {
+			rv.locs = reuseLocs[0:rv.freq]
+		} else {
+			rv.locs = make([]segment.Location, rv.freq)
+		}
+		for j := 0; j < int(rv.freq); j++ {
+			err := i.readLocation(&i.nextLocs[j])
+			if err != nil {
+				return nil, err
+			}
+			rv.locs[j] = &i.nextLocs[j]
+		}
+	}
+
+	return rv, nil
+}
+
+// nextDocNum returns the next docNum on the postings list, and also
+// sets up the currChunk / loc related fields of the iterator.
+func (i *PostingsIterator) nextDocNum() (uint64, bool, error) {
+	if i.actual == nil || !i.actual.HasNext() {
+		return 0, false, nil
+	}
+
 	n := i.actual.Next()
 	nChunk := n / i.postings.sb.chunkFactor
 	allN := i.all.Next()
@@ -292,30 +339,28 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 	// if they don't match, adjust offsets to factor in item we're skipping over
 	// incr the all iterator, and check again
 	for allN != n {
-
 		// in different chunks, reset offsets
 		if allNChunk != nChunk {
 			i.locoffset = 0
 			i.offset = 0
 		} else {
-
 			if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
 				err := i.loadChunk(int(nChunk))
 				if err != nil {
-					return nil, fmt.Errorf("error loading chunk: %v", err)
+					return 0, false, fmt.Errorf("error loading chunk: %v", err)
 				}
 			}
 
 			// read off freq/offsets even though we don't care about them
 			freq, _, err := i.readFreqNorm()
 			if err != nil {
-				return nil, err
+				return 0, false, err
 			}
 			if i.locBitmap.Contains(allN) {
 				for j := 0; j < int(freq); j++ {
 					err := i.readLocation(nil)
 					if err != nil {
-						return nil, err
+						return 0, false, err
 					}
 				}
 			}
@@ -330,44 +375,11 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 	if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
 		err := i.loadChunk(int(nChunk))
 		if err != nil {
-			return nil, fmt.Errorf("error loading chunk: %v", err)
+			return 0, false, fmt.Errorf("error loading chunk: %v", err)
 		}
 	}
 
-	reuseLocs := i.next.locs // hold for reuse before struct clearing
-	i.next = Posting{}       // clear the struct
-	rv := &i.next
-	rv.docNum = uint64(n)
-
-	var err error
-	var normBits uint64
-	rv.freq, normBits, err = i.readFreqNorm()
-	if err != nil {
-		return nil, err
-	}
-	rv.norm = math.Float32frombits(uint32(normBits))
-	if i.locBitmap.Contains(n) {
-		// read off 'freq' locations, into reused slices
-		if cap(i.nextLocs) >= int(rv.freq) {
-			i.nextLocs = i.nextLocs[0:rv.freq]
-		} else {
-			i.nextLocs = make([]Location, rv.freq)
-		}
-		if cap(reuseLocs) >= int(rv.freq) {
-			rv.locs = reuseLocs[0:rv.freq]
-		} else {
-			rv.locs = make([]segment.Location, rv.freq)
-		}
-		for j := 0; j < int(rv.freq); j++ {
-			err := i.readLocation(&i.nextLocs[j])
-			if err != nil {
-				return nil, err
-			}
-			rv.locs[j] = &i.nextLocs[j]
-		}
-	}
-
-	return rv, nil
+	return uint64(n), true, nil
 }
 
 // Posting is a single entry in a postings list

From 530a3d24cf0768f4c7a82e9a61dd9a0eff3ec8a2 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 6 Mar 2018 07:58:42 -0800
Subject: [PATCH 254/728] scorch zap optimize merge by byte copying
 freq/norm/loc's

This change adds a zap PostingsIterator.nextBytes() method, which is
similar to Next(), but instead of returning a Posting instance,
nextBytes() returns the encoded freq/norm and location byte slices.

The zap merge code then provides those byte slices directly to the
intCoder's via a new method, intCoder.AddBytes(), thereby avoiding
having to encode many uvarint's.
---
 index/scorch/segment/zap/intcoder.go | 13 ++++++++
 index/scorch/segment/zap/merge.go    | 42 +++++++++-----------------
 index/scorch/segment/zap/posting.go  | 44 +++++++++++++++++++++++++---
 3 files changed, 67 insertions(+), 32 deletions(-)

diff --git a/index/scorch/segment/zap/intcoder.go b/index/scorch/segment/zap/intcoder.go
index 247e36fbc..8d1f94536 100644
--- a/index/scorch/segment/zap/intcoder.go
+++ b/index/scorch/segment/zap/intcoder.go
@@ -82,6 +82,19 @@ func (c *chunkedIntCoder) Add(docNum uint64, vals ...uint64) error {
 	return nil
 }
 
+func (c *chunkedIntCoder) AddBytes(docNum uint64, buf []byte) error {
+	chunk := docNum / c.chunkSize
+	if chunk != c.currChunk {
+		// starting a new chunk
+		c.Close()
+		c.chunkBuf.Reset()
+		c.currChunk = chunk
+	}
+
+	_, err := c.chunkBuf.Write(buf)
+	return err
+}
+
 // Close indicates you are done calling Add() this allows the final chunk
 // to be encoded.
 func (c *chunkedIntCoder) Close() {
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 33ce16c59..5066dfb9e 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -162,7 +162,6 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 
 	var bufReuse bytes.Buffer
 	var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64)
-	var bufLoc []uint64
 
 	var postings *PostingsList
 	var postItr *PostingsIterator
@@ -316,45 +315,32 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 			newDocNumsI := newDocNums[itrI]
 
 			postItr = postings.iterator(postItr)
-			next, err2 := postItr.Next()
-			for next != nil && err2 == nil {
-				hitNewDocNum := newDocNumsI[next.Number()]
+
+			nextDocNum, nextFreqNormBytes, nextLocBytes, err2 := postItr.nextBytes()
+			for err2 == nil && len(nextFreqNormBytes) > 0 {
+				hitNewDocNum := newDocNumsI[nextDocNum]
 				if hitNewDocNum == docDropped {
 					return nil, 0, fmt.Errorf("see hit with dropped doc num")
 				}
+
 				newRoaring.Add(uint32(hitNewDocNum))
-				// encode norm bits
-				norm := next.Norm()
-				normBits := math.Float32bits(float32(norm))
-				err = tfEncoder.Add(hitNewDocNum, next.Frequency(), uint64(normBits))
-				if err != nil {
-					return nil, 0, err
+				err2 = tfEncoder.AddBytes(hitNewDocNum, nextFreqNormBytes)
+				if err2 != nil {
+					return nil, 0, err2
 				}
-				locs := next.Locations()
-				if len(locs) > 0 {
+
+				if len(nextLocBytes) > 0 {
 					newRoaringLocs.Add(uint32(hitNewDocNum))
-					for _, loc := range locs {
-						if cap(bufLoc) < 5+len(loc.ArrayPositions()) {
-							bufLoc = make([]uint64, 0, 5+len(loc.ArrayPositions()))
-						}
-						args := bufLoc[0:5]
-						args[0] = uint64(fieldsMap[loc.Field()] - 1)
-						args[1] = loc.Pos()
-						args[2] = loc.Start()
-						args[3] = loc.End()
-						args[4] = uint64(len(loc.ArrayPositions()))
-						args = append(args, loc.ArrayPositions()...)
-						err = locEncoder.Add(hitNewDocNum, args...)
-						if err != nil {
-							return nil, 0, err
-						}
+					err2 = locEncoder.AddBytes(hitNewDocNum, nextLocBytes)
+					if err2 != nil {
+						return nil, 0, err2
 					}
 				}
 
 				docTermMap[hitNewDocNum] =
 					append(append(docTermMap[hitNewDocNum], term...), termSeparator)
 
-				next, err2 = postItr.Next()
+				nextDocNum, nextFreqNormBytes, nextLocBytes, err2 = postItr.nextBytes()
 			}
 			if err2 != nil {
 				return nil, 0, err2
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index adc399eaa..2dab41669 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -280,12 +280,9 @@ func (i *PostingsIterator) readLocation(l *Location) error {
 // Next returns the next posting on the postings list, or nil at the end
 func (i *PostingsIterator) Next() (segment.Posting, error) {
 	docNum, exists, err := i.nextDocNum()
-	if err != nil {
+	if err != nil || !exists {
 		return nil, err
 	}
-	if !exists {
-		return nil, nil
-	}
 
 	reuseLocs := i.next.locs // hold for reuse before struct clearing
 	i.next = Posting{}       // clear the struct
@@ -322,6 +319,45 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 	return rv, nil
 }
 
+// nextBytes returns the docNum and the encoded freq & loc bytes for
+// the next posting
+func (i *PostingsIterator) nextBytes() (uint64, []byte, []byte, error) {
+	docNum, exists, err := i.nextDocNum()
+	if err != nil {
+		return 0, nil, nil, err
+	}
+	if !exists {
+		return 0, nil, nil, nil
+	}
+
+	startFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
+
+	freq, _, err := i.readFreqNorm()
+	if err != nil {
+		return 0, nil, nil, err
+	}
+
+	endFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
+	bytesFreqNorm := i.currChunkFreqNorm[startFreqNorm:endFreqNorm]
+
+	var bytesLoc []byte
+	if i.locBitmap.Contains(uint32(docNum)) {
+		startLoc := len(i.currChunkLoc) - i.locReader.Len()
+
+		for j := uint64(0); j < freq; j++ {
+			err := i.readLocation(nil)
+			if err != nil {
+				return 0, nil, nil, err
+			}
+		}
+
+		endLoc := len(i.currChunkLoc) - i.locReader.Len()
+		bytesLoc = i.currChunkLoc[startLoc:endLoc]
+	}
+
+	return docNum, bytesFreqNorm, bytesLoc, nil
+}
+
 // nextDocNum returns the next docNum on the postings list, and also
 // sets up the currChunk / loc related fields of the iterator.
 func (i *PostingsIterator) nextDocNum() (uint64, bool, error) {

From 5b86da85f358199d80436005d104607a7fed867d Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 6 Mar 2018 08:06:12 -0800
Subject: [PATCH 255/728] scorch zap optimize postings itr with tf/loc
 reader/decoder reuse

---
 index/scorch/segment/zap/posting.go | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 2dab41669..589c7cb85 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -45,7 +45,25 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
 	if rv == nil {
 		rv = &PostingsIterator{}
 	} else {
+		freqNormReader := rv.freqNormReader
+		if freqNormReader != nil {
+			freqNormReader.Reset([]byte(nil))
+		}
+		freqNormDecoder := rv.freqNormDecoder
+
+		locReader := rv.locReader
+		if locReader != nil {
+			locReader.Reset([]byte(nil))
+		}
+		locDecoder := rv.locDecoder
+
 		*rv = PostingsIterator{} // clear the struct
+
+		rv.freqNormReader = freqNormReader
+		rv.freqNormDecoder = freqNormDecoder
+
+		rv.locReader = locReader
+		rv.locDecoder = locDecoder
 	}
 	rv.postings = p
 

From 7e36109b3c83f2e454805e709bd850e859574244 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Thu, 1 Mar 2018 17:12:16 -0800
Subject: [PATCH 256/728] MB-28162: Provide API to estimate memory needed to
 run a search query
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This API (unexported) will estimate the amount of memory needed to execute
a search query over an index before the collector begins data collection.

Sample estimates for certain queries:
{Size: 10, BenchmarkUpsidedownSearchOverhead}
                                                           ESTIMATE    BENCHMEM
TermQuery                                                  4616        4796
MatchQuery                                                 5210        5405
DisjunctionQuery (Match queries)                           7700        8447
DisjunctionQuery (Term queries)                            6514        6591
ConjunctionQuery (Match queries)                           7524        8175
Nested disjunction query (disjunction of disjunctions)     10306       10708
…
---
 document/document.go                     | 21 +++++-
 document/field_composite.go              |  9 +++
 index/index.go                           | 35 ++++++++++
 index/scorch/scorch.go                   | 10 +--
 index/scorch/segment/empty.go            | 12 ++++
 index/scorch/segment/mem/build.go        |  2 +-
 index/scorch/segment/mem/dict.go         | 20 ++++++
 index/scorch/segment/mem/posting.go      | 62 ++++++++++++++++++
 index/scorch/segment/mem/segment.go      | 83 ++++++++++++------------
 index/scorch/segment/mem/segment_test.go |  2 +-
 index/scorch/segment/segment.go          | 15 +++--
 index/scorch/segment/zap/contentcoder.go |  8 +++
 index/scorch/segment/zap/docvalues.go    | 31 +++++----
 index/scorch/segment/zap/posting.go      | 79 ++++++++++++++++++++++
 index/scorch/segment/zap/segment.go      | 78 +++++++++++-----------
 index/scorch/snapshot_index.go           |  7 ++
 index/scorch/snapshot_index_doc.go       | 13 ++++
 index/scorch/snapshot_index_tfr.go       | 30 +++++++++
 index/scorch/snapshot_segment.go         |  4 +-
 index/upsidedown/index_reader.go         | 14 ++++
 index/upsidedown/reader.go               | 39 ++++++++++-
 index/upsidedown/row.go                  | 29 +++++++++
 index_impl.go                            | 53 ++++++++++++++-
 index_test.go                            | 55 ++++++++++++++++
 search.go                                | 30 +++++++++
 search/collector/search_test.go          | 18 +++++
 search/collector/topn.go                 | 25 +++++++
 search/explanation.go                    | 21 ++++++
 search/facet/facet_builder_datetime.go   | 29 +++++++++
 search/facet/facet_builder_numeric.go    | 29 +++++++++
 search/facet/facet_builder_terms.go      | 21 ++++++
 search/facets_builder.go                 | 47 ++++++++++++++
 search/pool.go                           | 11 ++++
 search/scorer/scorer_conjunction.go      | 14 ++++
 search/scorer/scorer_constant.go         | 19 ++++++
 search/scorer/scorer_disjunction.go      | 13 ++++
 search/scorer/scorer_term.go             | 24 +++++++
 search/search.go                         | 82 +++++++++++++++++++++++
 search/searcher/search_boolean.go        | 36 ++++++++++
 search/searcher/search_conjunction.go    | 26 ++++++++
 search/searcher/search_disjunction.go    | 35 ++++++++++
 search/searcher/search_docid.go          | 16 +++++
 search/searcher/search_filter.go         | 15 +++++
 search/searcher/search_match_all.go      | 17 +++++
 search/searcher/search_match_none.go     | 15 +++++
 search/searcher/search_phrase.go         | 31 +++++++++
 search/searcher/search_term.go           | 18 +++++
 size/sizes.go                            | 57 ++++++++++++++++
 48 files changed, 1242 insertions(+), 118 deletions(-)
 create mode 100644 size/sizes.go

diff --git a/document/document.go b/document/document.go
index c37585c66..921098b0b 100644
--- a/document/document.go
+++ b/document/document.go
@@ -14,7 +14,19 @@
 
 package document
 
-import "fmt"
+import (
+	"fmt"
+	"reflect"
+
+	"github.com/blevesearch/bleve/size"
+)
+
+var reflectStaticSizeDocument int
+
+func init() {
+	var d Document
+	reflectStaticSizeDocument = int(reflect.TypeOf(d).Size())
+}
 
 type Document struct {
 	ID              string  `json:"id"`
@@ -30,6 +42,13 @@ func NewDocument(id string) *Document {
 	}
 }
 
+func (d *Document) Size() int {
+	return reflectStaticSizeDocument + size.SizeOfPtr +
+		len(d.ID) +
+		len(d.Fields)*size.SizeOfPtr +
+		len(d.CompositeFields)*(size.SizeOfPtr+reflectStaticSizeCompositeField)
+}
+
 func (d *Document) AddField(f Field) *Document {
 	switch f := f.(type) {
 	case *CompositeField:
diff --git a/document/field_composite.go b/document/field_composite.go
index b41b1b8ed..e53cd4566 100644
--- a/document/field_composite.go
+++ b/document/field_composite.go
@@ -15,9 +15,18 @@
 package document
 
 import (
+	"reflect"
+
 	"github.com/blevesearch/bleve/analysis"
 )
 
+var reflectStaticSizeCompositeField int
+
+func init() {
+	var cf CompositeField
+	reflectStaticSizeCompositeField = int(reflect.TypeOf(cf).Size())
+}
+
 const DefaultCompositeIndexingOptions = IndexField
 
 type CompositeField struct {
diff --git a/index/index.go b/index/index.go
index 9870b4172..c25d7fa46 100644
--- a/index/index.go
+++ b/index/index.go
@@ -18,11 +18,23 @@ import (
 	"bytes"
 	"encoding/json"
 	"fmt"
+	"reflect"
 
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index/store"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeTermFieldDoc int
+var reflectStaticSizeTermFieldVector int
+
+func init() {
+	var tfd TermFieldDoc
+	reflectStaticSizeTermFieldDoc = int(reflect.TypeOf(tfd).Size())
+	var tfv TermFieldVector
+	reflectStaticSizeTermFieldVector = int(reflect.TypeOf(tfv).Size())
+}
+
 var ErrorUnknownStorageType = fmt.Errorf("unknown storage type")
 
 type Index interface {
@@ -82,6 +94,8 @@ type IndexReader interface {
 	DumpFields() chan interface{}
 
 	Close() error
+
+	Size() int
 }
 
 // FieldTerms contains the terms used by a document, keyed by field
@@ -115,6 +129,11 @@ type TermFieldVector struct {
 	End            uint64
 }
 
+func (tfv *TermFieldVector) Size() int {
+	return reflectStaticSizeTermFieldVector + size.SizeOfPtr +
+		len(tfv.Field) + len(tfv.ArrayPositions)*size.SizeOfUint64
+}
+
 // IndexInternalID is an opaque document identifier interal to the index impl
 type IndexInternalID []byte
 
@@ -134,6 +153,17 @@ type TermFieldDoc struct {
 	Vectors []*TermFieldVector
 }
 
+func (tfd *TermFieldDoc) Size() int {
+	sizeInBytes := reflectStaticSizeTermFieldDoc + size.SizeOfPtr +
+		len(tfd.Term) + len(tfd.ID)
+
+	for _, entry := range tfd.Vectors {
+		sizeInBytes += entry.Size()
+	}
+
+	return sizeInBytes
+}
+
 // Reset allows an already allocated TermFieldDoc to be reused
 func (tfd *TermFieldDoc) Reset() *TermFieldDoc {
 	// remember the []byte used for the ID
@@ -161,6 +191,8 @@ type TermFieldReader interface {
 	// Count returns the number of documents contains the term in this field.
 	Count() uint64
 	Close() error
+
+	Size() int
 }
 
 type DictEntry struct {
@@ -185,6 +217,9 @@ type DocIDReader interface {
 	// will start there instead. If ID is greater than or equal to the end of
 	// the range, Next() call will return io.EOF.
 	Advance(ID IndexInternalID) (IndexInternalID, error)
+
+	Size() int
+
 	Close() error
 }
 
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index a40f374ac..2a9eb6341 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -472,20 +472,20 @@ func (s *Scorch) AddEligibleForRemoval(epoch uint64) {
 }
 
 func (s *Scorch) MemoryUsed() uint64 {
-	var memUsed uint64
+	var memUsed int
 	s.rootLock.RLock()
 	if s.root != nil {
 		for _, segmentSnapshot := range s.root.segment {
 			memUsed += 8 /* size of id -> uint64 */ +
-				segmentSnapshot.segment.SizeInBytes()
+				segmentSnapshot.segment.Size()
 			if segmentSnapshot.deleted != nil {
-				memUsed += segmentSnapshot.deleted.GetSizeInBytes()
+				memUsed += int(segmentSnapshot.deleted.GetSizeInBytes())
 			}
-			memUsed += segmentSnapshot.cachedDocs.sizeInBytes()
+			memUsed += segmentSnapshot.cachedDocs.size()
 		}
 	}
 	s.rootLock.RUnlock()
-	return memUsed
+	return uint64(memUsed)
 }
 
 func (s *Scorch) markIneligibleForRemoval(filename string) {
diff --git a/index/scorch/segment/empty.go b/index/scorch/segment/empty.go
index 83454644d..6c19f60f9 100644
--- a/index/scorch/segment/empty.go
+++ b/index/scorch/segment/empty.go
@@ -46,6 +46,10 @@ func (e *EmptySegment) Close() error {
 	return nil
 }
 
+func (e *EmptySegment) Size() uint64 {
+	return 0
+}
+
 func (e *EmptySegment) AddRef() {
 }
 
@@ -84,6 +88,10 @@ func (e *EmptyPostingsList) Iterator() PostingsIterator {
 	return &EmptyPostingsIterator{}
 }
 
+func (e *EmptyPostingsList) Size() int {
+	return 0
+}
+
 func (e *EmptyPostingsList) Count() uint64 {
 	return 0
 }
@@ -93,3 +101,7 @@ type EmptyPostingsIterator struct{}
 func (e *EmptyPostingsIterator) Next() (Posting, error) {
 	return nil, nil
 }
+
+func (e *EmptyPostingsIterator) Size() int {
+	return 0
+}
diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go
index 57971aae5..264c94d1c 100644
--- a/index/scorch/segment/mem/build.go
+++ b/index/scorch/segment/mem/build.go
@@ -45,7 +45,7 @@ func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment {
 	}
 
 	// compute memory usage of segment
-	s.updateSizeInBytes()
+	s.updateSize()
 
 	// professional debugging
 	//
diff --git a/index/scorch/segment/mem/dict.go b/index/scorch/segment/mem/dict.go
index b564ed1fb..9f5a873ae 100644
--- a/index/scorch/segment/mem/dict.go
+++ b/index/scorch/segment/mem/dict.go
@@ -15,14 +15,23 @@
 package mem
 
 import (
+	"reflect"
 	"sort"
 	"strings"
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeDictionary int
+
+func init() {
+	var d Dictionary
+	reflectStaticSizeDictionary = int(reflect.TypeOf(d).Size())
+}
+
 // Dictionary is the in-memory representation of the term dictionary
 type Dictionary struct {
 	segment *Segment
@@ -30,6 +39,17 @@ type Dictionary struct {
 	fieldID uint16
 }
 
+func (d *Dictionary) Size() int {
+	sizeInBytes := reflectStaticSizeDictionary + size.SizeOfPtr +
+		len(d.field)
+
+	if d.segment != nil {
+		sizeInBytes += int(d.segment.Size())
+	}
+
+	return sizeInBytes
+}
+
 // PostingsList returns the postings list for the specified term
 func (d *Dictionary) PostingsList(term string,
 	except *roaring.Bitmap) (segment.PostingsList, error) {
diff --git a/index/scorch/segment/mem/posting.go b/index/scorch/segment/mem/posting.go
index 2554333a2..4203acbe5 100644
--- a/index/scorch/segment/mem/posting.go
+++ b/index/scorch/segment/mem/posting.go
@@ -15,10 +15,29 @@
 package mem
 
 import (
+	"reflect"
+
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index/scorch/segment"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizePostingsList int
+var reflectStaticSizePostingsIterator int
+var reflectStaticSizePosting int
+var reflectStaticSizeLocation int
+
+func init() {
+	var pl PostingsList
+	reflectStaticSizePostingsList = int(reflect.TypeOf(pl).Size())
+	var pi PostingsIterator
+	reflectStaticSizePostingsIterator = int(reflect.TypeOf(pi).Size())
+	var p Posting
+	reflectStaticSizePosting = int(reflect.TypeOf(p).Size())
+	var l Location
+	reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
+}
+
 // PostingsList is an in-memory represenation of a postings list
 type PostingsList struct {
 	dictionary *Dictionary
@@ -27,6 +46,20 @@ type PostingsList struct {
 	except     *roaring.Bitmap
 }
 
+func (p *PostingsList) Size() int {
+	sizeInBytes := reflectStaticSizePostingsList + size.SizeOfPtr
+
+	if p.dictionary != nil {
+		sizeInBytes += p.dictionary.Size()
+	}
+
+	if p.except != nil {
+		sizeInBytes += int(p.except.GetSizeInBytes())
+	}
+
+	return sizeInBytes
+}
+
 // Count returns the number of items on this postings list
 func (p *PostingsList) Count() uint64 {
 	var rv uint64
@@ -83,6 +116,16 @@ type PostingsIterator struct {
 	reuse     Posting
 }
 
+func (i *PostingsIterator) Size() int {
+	sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr
+
+	if i.locations != nil {
+		sizeInBytes += int(i.locations.GetSizeInBytes())
+	}
+
+	return sizeInBytes
+}
+
 // Next returns the next posting on the postings list, or nil at the end
 func (i *PostingsIterator) Next() (segment.Posting, error) {
 	if i.actual == nil || !i.actual.HasNext() {
@@ -121,6 +164,16 @@ type Posting struct {
 	hasLoc    bool
 }
 
+func (p *Posting) Size() int {
+	sizeInBytes := reflectStaticSizePosting + size.SizeOfPtr
+
+	if p.iterator != nil {
+		sizeInBytes += p.iterator.Size()
+	}
+
+	return sizeInBytes
+}
+
 // Number returns the document number of this posting in this segment
 func (p *Posting) Number() uint64 {
 	return p.docNum
@@ -158,6 +211,15 @@ type Location struct {
 	offset int
 }
 
+func (l *Location) Size() int {
+	sizeInBytes := reflectStaticSizeLocation
+	if l.p != nil {
+		sizeInBytes += l.p.Size()
+	}
+
+	return sizeInBytes
+}
+
 // Field returns the name of the field (useful in composite fields to know
 // which original field the value came from)
 func (l *Location) Field() string {
diff --git a/index/scorch/segment/mem/segment.go b/index/scorch/segment/mem/segment.go
index 04bdb368a..e9c4a2730 100644
--- a/index/scorch/segment/mem/segment.go
+++ b/index/scorch/segment/mem/segment.go
@@ -16,11 +16,20 @@ package mem
 
 import (
 	"fmt"
+	"reflect"
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index/scorch/segment"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeSegment int
+
+func init() {
+	var s Segment
+	reflectStaticSizeSegment = int(reflect.TypeOf(s).Size())
+}
+
 // _id field is always guaranteed to have fieldID of 0
 const idFieldID uint16 = 0
 
@@ -96,7 +105,7 @@ type Segment struct {
 
 	// Footprint of the segment, updated when analyzed document mutations
 	// are added into the segment
-	sizeInBytes uint64
+	sizeInBytes int
 }
 
 // New builds a new empty Segment
@@ -107,99 +116,87 @@ func New() *Segment {
 	}
 }
 
-func (s *Segment) updateSizeInBytes() {
-	var sizeInBytes uint64
+func (s *Segment) updateSize() {
+	sizeInBytes := reflectStaticSizeSegment
 
 	// FieldsMap, FieldsInv
 	for k, _ := range s.FieldsMap {
-		sizeInBytes += uint64((len(k)+int(segment.SizeOfString))*2 +
-			2 /* size of uint16 */)
+		sizeInBytes += (len(k)+size.SizeOfString)*2 +
+			size.SizeOfUint16
 	}
-	// overhead from the data structures
-	sizeInBytes += (segment.SizeOfMap + segment.SizeOfSlice)
 
 	// Dicts, DictKeys
 	for _, entry := range s.Dicts {
 		for k, _ := range entry {
-			sizeInBytes += uint64((len(k)+int(segment.SizeOfString))*2 +
-				8 /* size of uint64 */)
+			sizeInBytes += (len(k)+size.SizeOfString)*2 +
+				size.SizeOfUint64
 		}
 		// overhead from the data structures
-		sizeInBytes += (segment.SizeOfMap + segment.SizeOfSlice)
+		sizeInBytes += (size.SizeOfMap + size.SizeOfSlice)
 	}
-	sizeInBytes += (segment.SizeOfSlice * 2)
 
 	// Postings, PostingsLocs
 	for i := 0; i < len(s.Postings); i++ {
-		sizeInBytes += (s.Postings[i].GetSizeInBytes() + segment.SizeOfPointer) +
-			(s.PostingsLocs[i].GetSizeInBytes() + segment.SizeOfPointer)
+		sizeInBytes += (int(s.Postings[i].GetSizeInBytes()) + size.SizeOfPtr) +
+			(int(s.PostingsLocs[i].GetSizeInBytes()) + size.SizeOfPtr)
 	}
-	sizeInBytes += (segment.SizeOfSlice * 2)
 
 	// Freqs, Norms
 	for i := 0; i < len(s.Freqs); i++ {
-		sizeInBytes += uint64(len(s.Freqs[i])*8 /* size of uint64 */ +
-			len(s.Norms[i])*4 /* size of float32 */) +
-			(segment.SizeOfSlice * 2)
+		sizeInBytes += (len(s.Freqs[i])*size.SizeOfUint64 +
+			len(s.Norms[i])*size.SizeOfFloat32) +
+			(size.SizeOfSlice * 2)
 	}
-	sizeInBytes += (segment.SizeOfSlice * 2)
 
 	// Location data
 	for i := 0; i < len(s.Locfields); i++ {
-		sizeInBytes += uint64(len(s.Locfields[i])*2 /* size of uint16 */ +
-			len(s.Locstarts[i])*8 /* size of uint64 */ +
-			len(s.Locends[i])*8 /* size of uint64 */ +
-			len(s.Locpos[i])*8 /* size of uint64 */)
+		sizeInBytes += len(s.Locfields[i])*size.SizeOfUint16 +
+			len(s.Locstarts[i])*size.SizeOfUint64 +
+			len(s.Locends[i])*size.SizeOfUint64 +
+			len(s.Locpos[i])*size.SizeOfUint64
 
 		for j := 0; j < len(s.Locarraypos[i]); j++ {
-			sizeInBytes += uint64(len(s.Locarraypos[i][j])*8 /* size of uint64 */) +
-				segment.SizeOfSlice
+			sizeInBytes += len(s.Locarraypos[i][j])*size.SizeOfUint64 +
+				size.SizeOfSlice
 		}
 
-		sizeInBytes += (segment.SizeOfSlice * 5)
+		sizeInBytes += (size.SizeOfSlice * 5)
 	}
-	sizeInBytes += (segment.SizeOfSlice * 5)
 
 	// Stored data
 	for i := 0; i < len(s.Stored); i++ {
 		for _, v := range s.Stored[i] {
-			sizeInBytes += uint64(2 /* size of uint16 */)
+			sizeInBytes += size.SizeOfUint16
 			for _, arr := range v {
-				sizeInBytes += uint64(len(arr)) + segment.SizeOfSlice
+				sizeInBytes += len(arr) + size.SizeOfSlice
 			}
-			sizeInBytes += segment.SizeOfSlice
+			sizeInBytes += size.SizeOfSlice
 		}
 
 		for _, v := range s.StoredTypes[i] {
-			sizeInBytes += uint64(2 /* size of uint16 */ +len(v)) + segment.SizeOfSlice
+			sizeInBytes += size.SizeOfUint16 + len(v) + size.SizeOfSlice
 		}
 
 		for _, v := range s.StoredPos[i] {
-			sizeInBytes += uint64(2 /* size of uint16 */)
+			sizeInBytes += size.SizeOfUint16
 			for _, arr := range v {
-				sizeInBytes += uint64(len(arr)*8 /* size of uint64 */) +
-					segment.SizeOfSlice
+				sizeInBytes += len(arr)*size.SizeOfUint64 +
+					size.SizeOfSlice
 			}
-			sizeInBytes += segment.SizeOfSlice
+			sizeInBytes += size.SizeOfSlice
 		}
 
 		// overhead from map(s) within Stored, StoredTypes, StoredPos
-		sizeInBytes += (segment.SizeOfMap * 3)
+		sizeInBytes += (size.SizeOfMap * 3)
 	}
-	// overhead from data structures: Stored, StoredTypes, StoredPos
-	sizeInBytes += (segment.SizeOfSlice * 3)
 
 	// DocValueFields
-	sizeInBytes += uint64(len(s.DocValueFields)*3 /* size of uint16 + bool */) +
-		segment.SizeOfMap
-
-	// SizeInBytes
-	sizeInBytes += uint64(8)
+	sizeInBytes += len(s.DocValueFields) * (size.SizeOfUint16 + size.SizeOfBool)
 
 	s.sizeInBytes = sizeInBytes
 }
 
-func (s *Segment) SizeInBytes() uint64 {
+func (s *Segment) Size() int {
 	return s.sizeInBytes
 }
 
diff --git a/index/scorch/segment/mem/segment_test.go b/index/scorch/segment/mem/segment_test.go
index 5e3818c24..6c5625d86 100644
--- a/index/scorch/segment/mem/segment_test.go
+++ b/index/scorch/segment/mem/segment_test.go
@@ -169,7 +169,7 @@ func TestSingle(t *testing.T) {
 		t.Fatalf("segment nil, not expected")
 	}
 
-	if segment.SizeInBytes() <= 0 {
+	if segment.Size() <= 0 {
 		t.Fatalf("segment size not updated")
 	}
 
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index d5435ab96..8eee5f75f 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -19,12 +19,6 @@ import (
 	"github.com/blevesearch/bleve/index"
 )
 
-// Overhead from go data structures when deployed on a 64-bit system.
-const SizeOfMap uint64 = 8
-const SizeOfPointer uint64 = 8
-const SizeOfSlice uint64 = 24
-const SizeOfString uint64 = 16
-
 // DocumentFieldValueVisitor defines a callback to be visited for each
 // stored field value.  The return value determines if the visitor
 // should keep going.  Returning true continues visiting, false stops.
@@ -42,7 +36,7 @@ type Segment interface {
 
 	Close() error
 
-	SizeInBytes() uint64
+	Size() int
 
 	AddRef()
 	DecRef() error
@@ -63,6 +57,8 @@ type DictionaryIterator interface {
 type PostingsList interface {
 	Iterator() PostingsIterator
 
+	Size() int
+
 	Count() uint64
 
 	// NOTE deferred for future work
@@ -77,6 +73,8 @@ type PostingsIterator interface {
 	// implementations may return a shared instance to reduce memory
 	// allocations.
 	Next() (Posting, error)
+
+	Size() int
 }
 
 type Posting interface {
@@ -86,6 +84,8 @@ type Posting interface {
 	Norm() float64
 
 	Locations() []Location
+
+	Size() int
 }
 
 type Location interface {
@@ -94,6 +94,7 @@ type Location interface {
 	End() uint64
 	Pos() uint64
 	ArrayPositions() []uint64
+	Size() int
 }
 
 // DocumentFieldTermVisitable is implemented by various scorch segment
diff --git a/index/scorch/segment/zap/contentcoder.go b/index/scorch/segment/zap/contentcoder.go
index 83457146e..933f10a1e 100644
--- a/index/scorch/segment/zap/contentcoder.go
+++ b/index/scorch/segment/zap/contentcoder.go
@@ -18,10 +18,18 @@ import (
 	"bytes"
 	"encoding/binary"
 	"io"
+	"reflect"
 
 	"github.com/golang/snappy"
 )
 
+var reflectStaticSizeMetaData int
+
+func init() {
+	var md MetaData
+	reflectStaticSizeMetaData = int(reflect.TypeOf(md).Size())
+}
+
 var termSeparator byte = 0xff
 var termSeparatorSplitSlice = []byte{termSeparator}
 
diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index 0514bd307..13635c57e 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -19,13 +19,21 @@ import (
 	"encoding/binary"
 	"fmt"
 	"math"
+	"reflect"
 	"sort"
 
 	"github.com/blevesearch/bleve/index"
-	"github.com/blevesearch/bleve/index/scorch/segment"
+	"github.com/blevesearch/bleve/size"
 	"github.com/golang/snappy"
 )
 
+var reflectStaticSizedocValueIterator int
+
+func init() {
+	var dvi docValueIterator
+	reflectStaticSizedocValueIterator = int(reflect.TypeOf(dvi).Size())
+}
+
 type docValueIterator struct {
 	field          string
 	curChunkNum    uint64
@@ -36,21 +44,12 @@ type docValueIterator struct {
 	curChunkData   []byte // compressed data cache
 }
 
-func (di *docValueIterator) sizeInBytes() uint64 {
-	// curChunkNum, numChunks, dvDataLoc --> uint64
-	sizeInBytes := 24
-
-	// field
-	sizeInBytes += (len(di.field) + int(segment.SizeOfString))
-
-	// chunkLens, curChunkHeader
-	sizeInBytes += len(di.chunkLens)*8 +
-		len(di.curChunkHeader)*24 +
-		int(segment.SizeOfSlice*2) /* overhead from slices */
-
-	// curChunkData is mmap'ed, not included
-
-	return uint64(sizeInBytes)
+func (di *docValueIterator) size() int {
+	return reflectStaticSizedocValueIterator + size.SizeOfPtr +
+		len(di.field) +
+		len(di.chunkLens)*size.SizeOfUint64 +
+		len(di.curChunkHeader)*reflectStaticSizeMetaData +
+		len(di.curChunkData)
 }
 
 func (di *docValueIterator) fieldName() string {
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 589c7cb85..e9c68cbae 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -19,12 +19,30 @@ import (
 	"encoding/binary"
 	"fmt"
 	"math"
+	"reflect"
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/Smerity/govarint"
 	"github.com/blevesearch/bleve/index/scorch/segment"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizePostingsList int
+var reflectStaticSizePostingsIterator int
+var reflectStaticSizePosting int
+var reflectStaticSizeLocation int
+
+func init() {
+	var pl PostingsList
+	reflectStaticSizePostingsList = int(reflect.TypeOf(pl).Size())
+	var pi PostingsIterator
+	reflectStaticSizePostingsIterator = int(reflect.TypeOf(pi).Size())
+	var p Posting
+	reflectStaticSizePosting = int(reflect.TypeOf(p).Size())
+	var l Location
+	reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
+}
+
 // PostingsList is an in-memory represenation of a postings list
 type PostingsList struct {
 	sb             *SegmentBase
@@ -36,6 +54,28 @@ type PostingsList struct {
 	except         *roaring.Bitmap
 }
 
+func (p *PostingsList) Size() int {
+	sizeInBytes := reflectStaticSizePostingsList + size.SizeOfPtr
+
+	if p.sb != nil {
+		sizeInBytes += (p.sb.Size() - len(p.sb.mem)) // do not include the mmap'ed part
+	}
+
+	if p.locBitmap != nil {
+		sizeInBytes += int(p.locBitmap.GetSizeInBytes())
+	}
+
+	if p.postings != nil {
+		sizeInBytes += int(p.postings.GetSizeInBytes())
+	}
+
+	if p.except != nil {
+		sizeInBytes += int(p.except.GetSizeInBytes())
+	}
+
+	return sizeInBytes
+}
+
 // Iterator returns an iterator for this postings list
 func (p *PostingsList) Iterator() segment.PostingsIterator {
 	return p.iterator(nil)
@@ -193,6 +233,25 @@ type PostingsIterator struct {
 	nextLocs []Location // reused across Next() calls
 }
 
+func (i *PostingsIterator) Size() int {
+	sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr +
+		len(i.currChunkFreqNorm) +
+		len(i.currChunkLoc) +
+		len(i.freqChunkLens)*size.SizeOfUint64 +
+		len(i.locChunkLens)*size.SizeOfUint64 +
+		i.next.Size()
+
+	if i.locBitmap != nil {
+		sizeInBytes += int(i.locBitmap.GetSizeInBytes())
+	}
+
+	for _, entry := range i.nextLocs {
+		sizeInBytes += entry.Size()
+	}
+
+	return sizeInBytes
+}
+
 func (i *PostingsIterator) loadChunk(chunk int) error {
 	if chunk >= len(i.freqChunkLens) || chunk >= len(i.locChunkLens) {
 		return fmt.Errorf("tried to load chunk that doesn't exist %d/(%d %d)", chunk, len(i.freqChunkLens), len(i.locChunkLens))
@@ -444,6 +503,20 @@ type Posting struct {
 	locs   []segment.Location
 }
 
+func (p *Posting) Size() int {
+	sizeInBytes := reflectStaticSizePosting
+
+	if p.iterator != nil {
+		sizeInBytes += p.iterator.Size()
+	}
+
+	for _, entry := range p.locs {
+		sizeInBytes += entry.Size()
+	}
+
+	return sizeInBytes
+}
+
 // Number returns the document number of this posting in this segment
 func (p *Posting) Number() uint64 {
 	return p.docNum
@@ -473,6 +546,12 @@ type Location struct {
 	ap    []uint64
 }
 
+func (l *Location) Size() int {
+	return reflectStaticSizeLocation +
+		len(l.field) +
+		len(l.ap)*size.SizeOfUint64
+}
+
 // Field returns the name of the field (useful in composite fields to know
 // which original field the value came from)
 func (l *Location) Field() string {
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 40c0af274..972b7578e 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -20,16 +20,25 @@ import (
 	"fmt"
 	"io"
 	"os"
+	"reflect"
 	"sync"
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/Smerity/govarint"
 	"github.com/blevesearch/bleve/index/scorch/segment"
+	"github.com/blevesearch/bleve/size"
 	"github.com/couchbase/vellum"
 	mmap "github.com/edsrzf/mmap-go"
 	"github.com/golang/snappy"
 )
 
+var reflectStaticSizeSegmentBase int
+
+func init() {
+	var sb SegmentBase
+	reflectStaticSizeSegmentBase = int(reflect.TypeOf(sb).Size())
+}
+
 // Open returns a zap impl of a segment
 func Open(path string) (segment.Segment, error) {
 	f, err := os.Open(path)
@@ -92,6 +101,32 @@ type SegmentBase struct {
 	fieldDvIterMap    map[uint16]*docValueIterator // naive chunk cache per field
 }
 
+func (sb *SegmentBase) Size() int {
+	sizeInBytes := reflectStaticSizeSegmentBase +
+		len(sb.mem)
+
+	// fieldsMap
+	for k, _ := range sb.fieldsMap {
+		sizeInBytes += (len(k) + size.SizeOfString) + size.SizeOfUint16
+	}
+
+	// fieldsInv, dictLocs
+	for _, entry := range sb.fieldsInv {
+		sizeInBytes += len(entry) + size.SizeOfString
+	}
+	sizeInBytes += len(sb.dictLocs) * size.SizeOfUint64
+
+	// fieldDvIterMap
+	for _, v := range sb.fieldDvIterMap {
+		sizeInBytes += size.SizeOfUint16 + size.SizeOfPtr
+		if v != nil {
+			sizeInBytes += v.size()
+		}
+	}
+
+	return sizeInBytes
+}
+
 func (sb *SegmentBase) AddRef()             {}
 func (sb *SegmentBase) DecRef() (err error) { return nil }
 func (sb *SegmentBase) Close() (err error)  { return nil }
@@ -111,56 +146,19 @@ type Segment struct {
 	refs int64
 }
 
-func (s *Segment) SizeInBytes() uint64 {
+func (s *Segment) Size() int {
 	// 8 /* size of file pointer */
 	// 4 /* size of version -> uint32 */
 	// 4 /* size of crc -> uint32 */
 	sizeOfUints := 16
 
-	sizeInBytes := (len(s.path) + int(segment.SizeOfString)) + sizeOfUints
+	sizeInBytes := (len(s.path) + size.SizeOfString) + sizeOfUints
 
 	// mutex, refs -> int64
 	sizeInBytes += 16
 
 	// do not include the mmap'ed part
-	return uint64(sizeInBytes) + s.SegmentBase.SizeInBytes() - uint64(len(s.mem))
-}
-
-func (s *SegmentBase) SizeInBytes() uint64 {
-	// 4 /* size of memCRC -> uint32 */
-	// 4 /* size of chunkFactor -> uint32 */
-	// 8 /* size of numDocs -> uint64 */
-	// 8 /* size of storedIndexOffset -> uint64 */
-	// 8 /* size of fieldsIndexOffset -> uint64 */
-	// 8 /* size of docValueOffset -> uint64 */
-	sizeInBytes := 40
-
-	sizeInBytes += len(s.mem) + int(segment.SizeOfSlice)
-
-	// fieldsMap
-	for k, _ := range s.fieldsMap {
-		sizeInBytes += (len(k) + int(segment.SizeOfString)) + 2 /* size of uint16 */
-	}
-	sizeInBytes += int(segment.SizeOfMap) /* overhead from map */
-
-	// fieldsInv, dictLocs
-	for _, entry := range s.fieldsInv {
-		sizeInBytes += (len(entry) + int(segment.SizeOfString))
-	}
-	sizeInBytes += len(s.dictLocs) * 8          /* size of uint64 */
-	sizeInBytes += int(segment.SizeOfSlice) * 3 /* overhead from slices */
-
-	// fieldDvIterMap
-	sizeInBytes += len(s.fieldDvIterMap) *
-		int(segment.SizeOfPointer+2 /* size of uint16 */)
-	for _, entry := range s.fieldDvIterMap {
-		if entry != nil {
-			sizeInBytes += int(entry.sizeInBytes())
-		}
-	}
-	sizeInBytes += int(segment.SizeOfMap)
-
-	return uint64(sizeInBytes)
+	return sizeInBytes + s.SegmentBase.Size() - len(s.mem)
 }
 
 func (s *Segment) AddRef() {
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 5289b1434..9394f391e 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -27,6 +27,7 @@ import (
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
+	"github.com/blevesearch/bleve/size"
 )
 
 type asynchSegmentResult struct {
@@ -89,6 +90,12 @@ func (i *IndexSnapshot) Close() error {
 	return i.DecRef()
 }
 
+func (i *IndexSnapshot) Size() int {
+	// Just return the size of the pointer for estimating the overhead
+	// during Search, a reference of the IndexSnapshot serves as the reader.
+	return size.SizeOfPtr
+}
+
 func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {
 
 	results := make(chan *asynchSegmentResult)
diff --git a/index/scorch/snapshot_index_doc.go b/index/scorch/snapshot_index_doc.go
index d1205ff8e..27da20865 100644
--- a/index/scorch/snapshot_index_doc.go
+++ b/index/scorch/snapshot_index_doc.go
@@ -16,17 +16,30 @@ package scorch
 
 import (
 	"bytes"
+	"reflect"
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeIndexSnapshotDocIDReader int
+
+func init() {
+	var isdr IndexSnapshotDocIDReader
+	reflectStaticSizeIndexSnapshotDocIDReader = int(reflect.TypeOf(isdr).Size())
+}
+
 type IndexSnapshotDocIDReader struct {
 	snapshot      *IndexSnapshot
 	iterators     []roaring.IntIterable
 	segmentOffset int
 }
 
+func (i *IndexSnapshotDocIDReader) Size() int {
+	return reflectStaticSizeIndexSnapshotDocIDReader + size.SizeOfPtr
+}
+
 func (i *IndexSnapshotDocIDReader) Next() (index.IndexInternalID, error) {
 	for i.segmentOffset < len(i.iterators) {
 		if !i.iterators[i.segmentOffset].HasNext() {
diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go
index d1f23b272..e1a0e9a59 100644
--- a/index/scorch/snapshot_index_tfr.go
+++ b/index/scorch/snapshot_index_tfr.go
@@ -16,12 +16,21 @@ package scorch
 
 import (
 	"bytes"
+	"reflect"
 	"sync/atomic"
 
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeIndexSnapshotTermFieldReader int
+
+func init() {
+	var istfr IndexSnapshotTermFieldReader
+	reflectStaticSizeIndexSnapshotTermFieldReader = int(reflect.TypeOf(istfr).Size())
+}
+
 type IndexSnapshotTermFieldReader struct {
 	term               []byte
 	field              string
@@ -36,6 +45,27 @@ type IndexSnapshotTermFieldReader struct {
 	currID             index.IndexInternalID
 }
 
+func (i *IndexSnapshotTermFieldReader) Size() int {
+	sizeInBytes := reflectStaticSizeIndexSnapshotTermFieldReader + size.SizeOfPtr +
+		len(i.term) +
+		len(i.field) +
+		len(i.currID)
+
+	for _, entry := range i.postings {
+		sizeInBytes += entry.Size()
+	}
+
+	for _, entry := range i.iterators {
+		sizeInBytes += entry.Size()
+	}
+
+	if i.currPosting != nil {
+		sizeInBytes += i.currPosting.Size()
+	}
+
+	return sizeInBytes
+}
+
 func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
 	rv := preAlloced
 	if rv == nil {
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index 5e64cb1f2..cdfe317fe 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -213,7 +213,7 @@ func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) e
 	return nil
 }
 
-func (c *cachedDocs) sizeInBytes() uint64 {
+func (c *cachedDocs) size() int {
 	sizeInBytes := 0
 	c.m.Lock()
 	for k, v := range c.cache { // cachedFieldDocs
@@ -225,5 +225,5 @@ func (c *cachedDocs) sizeInBytes() uint64 {
 		}
 	}
 	c.m.Unlock()
-	return uint64(sizeInBytes)
+	return sizeInBytes
 }
diff --git a/index/upsidedown/index_reader.go b/index/upsidedown/index_reader.go
index 77d523c30..4e5755219 100644
--- a/index/upsidedown/index_reader.go
+++ b/index/upsidedown/index_reader.go
@@ -15,17 +15,31 @@
 package upsidedown
 
 import (
+	"reflect"
+
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/store"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeIndexReader int
+
+func init() {
+	var ir IndexReader
+	reflectStaticSizeIndexReader = int(reflect.TypeOf(ir).Size())
+}
+
 type IndexReader struct {
 	index    *UpsideDownCouch
 	kvreader store.KVReader
 	docCount uint64
 }
 
+func (i *IndexReader) Size() int {
+	return reflectStaticSizeIndexReader + size.SizeOfPtr
+}
+
 func (i *IndexReader) TermFieldReader(term []byte, fieldName string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
 	fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false)
 	if fieldExists {
diff --git a/index/upsidedown/reader.go b/index/upsidedown/reader.go
index 1f40c02de..646d4d8ac 100644
--- a/index/upsidedown/reader.go
+++ b/index/upsidedown/reader.go
@@ -16,13 +16,27 @@ package upsidedown
 
 import (
 	"bytes"
+	"reflect"
 	"sort"
 	"sync/atomic"
 
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/store"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeUpsideDownCouchTermFieldReader int
+var reflectStaticSizeUpsideDownCouchDocIDReader int
+
+func init() {
+	var tfr UpsideDownCouchTermFieldReader
+	reflectStaticSizeUpsideDownCouchTermFieldReader =
+		int(reflect.TypeOf(tfr).Size())
+	var cdr UpsideDownCouchDocIDReader
+	reflectStaticSizeUpsideDownCouchDocIDReader =
+		int(reflect.TypeOf(cdr).Size())
+}
+
 type UpsideDownCouchTermFieldReader struct {
 	count              uint64
 	indexReader        *IndexReader
@@ -35,6 +49,19 @@ type UpsideDownCouchTermFieldReader struct {
 	includeTermVectors bool
 }
 
+func (r *UpsideDownCouchTermFieldReader) Size() int {
+	sizeInBytes := reflectStaticSizeUpsideDownCouchTermFieldReader + size.SizeOfPtr +
+		len(r.term) +
+		r.tfrPrealloc.Size() +
+		len(r.keyBuf)
+
+	if r.tfrNext != nil {
+		sizeInBytes += r.tfrNext.Size()
+	}
+
+	return sizeInBytes
+}
+
 func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) {
 	bufNeeded := termFrequencyRowKeySize(term, nil)
 	if bufNeeded < dictionaryRowKeySize(term) {
@@ -174,8 +201,18 @@ type UpsideDownCouchDocIDReader struct {
 	onlyMode    bool
 }
 
-func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) {
+func (r *UpsideDownCouchDocIDReader) Size() int {
+	sizeInBytes := reflectStaticSizeUpsideDownCouchDocIDReader +
+		r.indexReader.Size()
+
+	for _, entry := range r.only {
+		sizeInBytes += size.SizeOfString + len(entry)
+	}
 
+	return sizeInBytes
+}
+
+func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) {
 	startBytes := []byte{0x0}
 	endBytes := []byte{0xff}
 
diff --git a/index/upsidedown/row.go b/index/upsidedown/row.go
index 7e503ae05..ba50314cd 100644
--- a/index/upsidedown/row.go
+++ b/index/upsidedown/row.go
@@ -20,10 +20,22 @@ import (
 	"fmt"
 	"io"
 	"math"
+	"reflect"
 
+	"github.com/blevesearch/bleve/size"
 	"github.com/golang/protobuf/proto"
 )
 
+var reflectStaticSizeTermFrequencyRow int
+var reflectStaticSizeTermVector int
+
+func init() {
+	var tfr TermFrequencyRow
+	reflectStaticSizeTermFrequencyRow = int(reflect.TypeOf(tfr).Size())
+	var tv TermVector
+	reflectStaticSizeTermVector = int(reflect.TypeOf(tv).Size())
+}
+
 const ByteSeparator byte = 0xff
 
 type UpsideDownCouchRowStream chan UpsideDownCouchRow
@@ -358,6 +370,11 @@ type TermVector struct {
 	end            uint64
 }
 
+func (tv *TermVector) Size() int {
+	return reflectStaticSizeTermVector + size.SizeOfPtr +
+		len(tv.arrayPositions)*size.SizeOfUint64
+}
+
 func (tv *TermVector) String() string {
 	return fmt.Sprintf("Field: %d Pos: %d Start: %d End %d ArrayPositions: %#v", tv.field, tv.pos, tv.start, tv.end, tv.arrayPositions)
 }
@@ -371,6 +388,18 @@ type TermFrequencyRow struct {
 	field   uint16
 }
 
+func (tfr *TermFrequencyRow) Size() int {
+	sizeInBytes := reflectStaticSizeTermFrequencyRow +
+		len(tfr.term) +
+		len(tfr.doc)
+
+	for _, entry := range tfr.vectors {
+		sizeInBytes += entry.Size()
+	}
+
+	return sizeInBytes
+}
+
 func (tfr *TermFrequencyRow) Term() []byte {
 	return tfr.term
 }
diff --git a/index_impl.go b/index_impl.go
index caea1b8e0..df6e748d9 100644
--- a/index_impl.go
+++ b/index_impl.go
@@ -362,8 +362,59 @@ func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) {
 	return i.SearchInContext(context.Background(), req)
 }
 
+// memNeededForSearch is a helper function that returns an estimate of RAM
+// needed to execute a search request.
+func memNeededForSearch(req *SearchRequest,
+	searcher search.Searcher,
+	topnCollector *collector.TopNCollector) uint64 {
+
+	backingSize := req.Size + req.From + 1
+	if req.Size+req.From > collector.PreAllocSizeSkipCap {
+		backingSize = collector.PreAllocSizeSkipCap + 1
+	}
+	numDocMatches := backingSize + searcher.DocumentMatchPoolSize()
+
+	estimate := 0
+
+	// overhead, size in bytes from collector
+	estimate += topnCollector.Size()
+
+	var dm search.DocumentMatch
+	sizeOfDocumentMatch := dm.Size()
+
+	// pre-allocing DocumentMatchPool
+	var sc search.SearchContext
+	estimate += sc.Size() + numDocMatches*sizeOfDocumentMatch
+
+	// searcher overhead
+	estimate += searcher.Size()
+
+	// overhead from results, lowestMatchOutsideResults
+	estimate += (numDocMatches + 1) * sizeOfDocumentMatch
+
+	// additional overhead from SearchResult
+	var sr SearchResult
+	estimate += sr.Size()
+
+	// overhead from facet results
+	if req.Facets != nil {
+		var fr search.FacetResult
+		estimate += len(req.Facets) * fr.Size()
+	}
+
+	// highlighting, store
+	var d document.Document
+	if len(req.Fields) > 0 || req.Highlight != nil {
+		for i := 0; i < (req.Size + req.From); i++ { // size + from => number of hits
+			estimate += (req.Size + req.From) * d.Size()
+		}
+	}
+
+	return uint64(estimate)
+}
+
 // SearchInContext executes a search request operation within the provided
-// Context.  Returns a SearchResult object or an error.
+// Context. Returns a SearchResult object or an error.
 func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr *SearchResult, err error) {
 	i.mutex.RLock()
 	defer i.mutex.RUnlock()
diff --git a/index_test.go b/index_test.go
index a69357bf6..f1e53647d 100644
--- a/index_test.go
+++ b/index_test.go
@@ -36,6 +36,9 @@ import (
 	"github.com/blevesearch/bleve/mapping"
 	"github.com/blevesearch/bleve/search"
 	"github.com/blevesearch/bleve/search/query"
+
+	"github.com/blevesearch/bleve/index/scorch"
+	"github.com/blevesearch/bleve/index/upsidedown"
 )
 
 func TestCrud(t *testing.T) {
@@ -1815,3 +1818,55 @@ func TestIndexAdvancedCountMatchSearch(t *testing.T) {
 		t.Fatal(err)
 	}
 }
+
+func benchmarkSearchOverhead(indexType string, b *testing.B) {
+	defer func() {
+		err := os.RemoveAll("testidx")
+		if err != nil {
+			b.Fatal(err)
+		}
+	}()
+
+	index, err := NewUsing("testidx", NewIndexMapping(),
+		indexType, Config.DefaultKVStore, nil)
+	if err != nil {
+		b.Fatal(err)
+	}
+	defer func() {
+		err := index.Close()
+		if err != nil {
+			b.Fatal(err)
+		}
+	}()
+
+	elements := []string{"air", "water", "fire", "earth"}
+	for j := 0; j < 10000; j++ {
+		err = index.Index(fmt.Sprintf("%d", j),
+			map[string]interface{}{"name": elements[j%len(elements)]})
+		if err != nil {
+			b.Fatal(err)
+		}
+	}
+
+	query1 := NewTermQuery("water")
+	query2 := NewTermQuery("fire")
+	query := NewDisjunctionQuery(query1, query2)
+	req := NewSearchRequest(query)
+
+	b.ResetTimer()
+
+	for n := 0; n < b.N; n++ {
+		_, err = index.Search(req)
+		if err != nil {
+			b.Fatal(err)
+		}
+	}
+}
+
+func BenchmarkUpsidedownSearchOverhead(b *testing.B) {
+	benchmarkSearchOverhead(upsidedown.Name, b)
+}
+
+func BenchmarkScorchSearchOverhead(b *testing.B) {
+	benchmarkSearchOverhead(scorch.Name, b)
+}
diff --git a/search.go b/search.go
index 46d849c1b..e324262e5 100644
--- a/search.go
+++ b/search.go
@@ -17,6 +17,7 @@ package bleve
 import (
 	"encoding/json"
 	"fmt"
+	"reflect"
 	"time"
 
 	"github.com/blevesearch/bleve/analysis"
@@ -24,8 +25,19 @@ import (
 	"github.com/blevesearch/bleve/registry"
 	"github.com/blevesearch/bleve/search"
 	"github.com/blevesearch/bleve/search/query"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeSearchResult int
+var reflectStaticSizeSearchStatus int
+
+func init() {
+	var sr SearchResult
+	reflectStaticSizeSearchResult = int(reflect.TypeOf(sr).Size())
+	var ss SearchStatus
+	reflectStaticSizeSearchStatus = int(reflect.TypeOf(ss).Size())
+}
+
 var cache = registry.NewCache()
 
 const defaultDateTimeParser = optional.Name
@@ -432,6 +444,24 @@ type SearchResult struct {
 	Facets   search.FacetResults            `json:"facets"`
 }
 
+func (sr *SearchResult) Size() int {
+	sizeInBytes := reflectStaticSizeSearchResult + size.SizeOfPtr +
+		reflectStaticSizeSearchStatus
+
+	for _, entry := range sr.Hits {
+		if entry != nil {
+			sizeInBytes += entry.Size()
+		}
+	}
+
+	for k, v := range sr.Facets {
+		sizeInBytes += size.SizeOfString + len(k) +
+			v.Size()
+	}
+
+	return sizeInBytes
+}
+
 func (sr *SearchResult) String() string {
 	rv := ""
 	if sr.Total > 0 {
diff --git a/search/collector/search_test.go b/search/collector/search_test.go
index 8457fb989..3ba71c1d1 100644
--- a/search/collector/search_test.go
+++ b/search/collector/search_test.go
@@ -15,6 +15,8 @@
 package collector
 
 import (
+	"reflect"
+
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
@@ -25,6 +27,18 @@ type stubSearcher struct {
 	matches []*search.DocumentMatch
 }
 
+func (ss *stubSearcher) Size() int {
+	sizeInBytes := int(reflect.TypeOf(*ss).Size())
+
+	for _, entry := range ss.matches {
+		if entry != nil {
+			sizeInBytes += entry.Size()
+		}
+	}
+
+	return sizeInBytes
+}
+
 func (ss *stubSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
 	if ss.index < len(ss.matches) {
 		rv := ctx.DocumentMatchPool.Get()
@@ -76,6 +90,10 @@ func (ss *stubSearcher) DocumentMatchPoolSize() int {
 
 type stubReader struct{}
 
+func (sr *stubReader) Size() int {
+	return 0
+}
+
 func (sr *stubReader) TermFieldReader(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
 	return nil, nil
 }
diff --git a/search/collector/topn.go b/search/collector/topn.go
index 388370e7e..d684868cc 100644
--- a/search/collector/topn.go
+++ b/search/collector/topn.go
@@ -16,12 +16,21 @@ package collector
 
 import (
 	"context"
+	"reflect"
 	"time"
 
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeTopNCollector int
+
+func init() {
+	var coll TopNCollector
+	reflectStaticSizeTopNCollector = int(reflect.TypeOf(coll).Size())
+}
+
 type collectorStore interface {
 	// Add the document, and if the new store size exceeds the provided size
 	// the last element is removed and returned.  If the size has not been
@@ -98,6 +107,22 @@ func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector
 	return hc
 }
 
+func (hc *TopNCollector) Size() int {
+	sizeInBytes := reflectStaticSizeTopNCollector + size.SizeOfPtr
+
+	if hc.facetsBuilder != nil {
+		sizeInBytes += hc.facetsBuilder.Size()
+	}
+
+	for _, entry := range hc.neededFields {
+		sizeInBytes += len(entry) + size.SizeOfString
+	}
+
+	sizeInBytes += len(hc.cachedScoring) + len(hc.cachedDesc)
+
+	return sizeInBytes
+}
+
 // Collect goes to the index to find the matching documents
 func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error {
 	startTime := time.Now()
diff --git a/search/explanation.go b/search/explanation.go
index 766367d77..3b81737b5 100644
--- a/search/explanation.go
+++ b/search/explanation.go
@@ -17,8 +17,18 @@ package search
 import (
 	"encoding/json"
 	"fmt"
+	"reflect"
+
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeExplanation int
+
+func init() {
+	var e Explanation
+	reflectStaticSizeExplanation = int(reflect.TypeOf(e).Size())
+}
+
 type Explanation struct {
 	Value    float64        `json:"value"`
 	Message  string         `json:"message"`
@@ -32,3 +42,14 @@ func (expl *Explanation) String() string {
 	}
 	return string(js)
 }
+
+func (expl *Explanation) Size() int {
+	sizeInBytes := reflectStaticSizeExplanation + size.SizeOfPtr +
+		len(expl.Message)
+
+	for _, entry := range expl.Children {
+		sizeInBytes += entry.Size()
+	}
+
+	return sizeInBytes
+}
diff --git a/search/facet/facet_builder_datetime.go b/search/facet/facet_builder_datetime.go
index 8657a553a..c45442e4d 100644
--- a/search/facet/facet_builder_datetime.go
+++ b/search/facet/facet_builder_datetime.go
@@ -15,13 +15,25 @@
 package facet
 
 import (
+	"reflect"
 	"sort"
 	"time"
 
 	"github.com/blevesearch/bleve/numeric"
 	"github.com/blevesearch/bleve/search"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeDateTimeFacetBuilder int
+var reflectStaticSizedateTimeRange int
+
+func init() {
+	var dtfb DateTimeFacetBuilder
+	reflectStaticSizeDateTimeFacetBuilder = int(reflect.TypeOf(dtfb).Size())
+	var dtr dateTimeRange
+	reflectStaticSizedateTimeRange = int(reflect.TypeOf(dtr).Size())
+}
+
 type dateTimeRange struct {
 	start time.Time
 	end   time.Time
@@ -46,6 +58,23 @@ func NewDateTimeFacetBuilder(field string, size int) *DateTimeFacetBuilder {
 	}
 }
 
+func (fb *DateTimeFacetBuilder) Size() int {
+	sizeInBytes := reflectStaticSizeDateTimeFacetBuilder + size.SizeOfPtr +
+		len(fb.field)
+
+	for k, _ := range fb.termsCount {
+		sizeInBytes += size.SizeOfString + len(k) +
+			size.SizeOfInt
+	}
+
+	for k, _ := range fb.ranges {
+		sizeInBytes += size.SizeOfString + len(k) +
+			size.SizeOfPtr + reflectStaticSizedateTimeRange
+	}
+
+	return sizeInBytes
+}
+
 func (fb *DateTimeFacetBuilder) AddRange(name string, start, end time.Time) {
 	r := dateTimeRange{
 		start: start,
diff --git a/search/facet/facet_builder_numeric.go b/search/facet/facet_builder_numeric.go
index 2ab5f2789..c1692b549 100644
--- a/search/facet/facet_builder_numeric.go
+++ b/search/facet/facet_builder_numeric.go
@@ -15,12 +15,24 @@
 package facet
 
 import (
+	"reflect"
 	"sort"
 
 	"github.com/blevesearch/bleve/numeric"
 	"github.com/blevesearch/bleve/search"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeNumericFacetBuilder int
+var reflectStaticSizenumericRange int
+
+func init() {
+	var nfb NumericFacetBuilder
+	reflectStaticSizeNumericFacetBuilder = int(reflect.TypeOf(nfb).Size())
+	var nr numericRange
+	reflectStaticSizenumericRange = int(reflect.TypeOf(nr).Size())
+}
+
 type numericRange struct {
 	min *float64
 	max *float64
@@ -45,6 +57,23 @@ func NewNumericFacetBuilder(field string, size int) *NumericFacetBuilder {
 	}
 }
 
+func (fb *NumericFacetBuilder) Size() int {
+	sizeInBytes := reflectStaticSizeNumericFacetBuilder + size.SizeOfPtr +
+		len(fb.field)
+
+	for k, _ := range fb.termsCount {
+		sizeInBytes += size.SizeOfString + len(k) +
+			size.SizeOfInt
+	}
+
+	for k, _ := range fb.ranges {
+		sizeInBytes += size.SizeOfString + len(k) +
+			size.SizeOfPtr + reflectStaticSizenumericRange
+	}
+
+	return sizeInBytes
+}
+
 func (fb *NumericFacetBuilder) AddRange(name string, min, max *float64) {
 	r := numericRange{
 		min: min,
diff --git a/search/facet/facet_builder_terms.go b/search/facet/facet_builder_terms.go
index a41e475a9..5b5901e01 100644
--- a/search/facet/facet_builder_terms.go
+++ b/search/facet/facet_builder_terms.go
@@ -15,11 +15,20 @@
 package facet
 
 import (
+	"reflect"
 	"sort"
 
 	"github.com/blevesearch/bleve/search"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeTermsFacetBuilder int
+
+func init() {
+	var tfb TermsFacetBuilder
+	reflectStaticSizeTermsFacetBuilder = int(reflect.TypeOf(tfb).Size())
+}
+
 type TermsFacetBuilder struct {
 	size       int
 	field      string
@@ -37,6 +46,18 @@ func NewTermsFacetBuilder(field string, size int) *TermsFacetBuilder {
 	}
 }
 
+func (fb *TermsFacetBuilder) Size() int {
+	sizeInBytes := reflectStaticSizeTermsFacetBuilder + size.SizeOfPtr +
+		len(fb.field)
+
+	for k, _ := range fb.termsCount {
+		sizeInBytes += size.SizeOfString + len(k) +
+			size.SizeOfInt
+	}
+
+	return sizeInBytes
+}
+
 func (fb *TermsFacetBuilder) Field() string {
 	return fb.field
 }
diff --git a/search/facets_builder.go b/search/facets_builder.go
index 05e270413..34e45af84 100644
--- a/search/facets_builder.go
+++ b/search/facets_builder.go
@@ -15,11 +15,32 @@
 package search
 
 import (
+	"reflect"
 	"sort"
 
 	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeFacetsBuilder int
+var reflectStaticSizeFacetResult int
+var reflectStaticSizeTermFacet int
+var reflectStaticSizeNumericRangeFacet int
+var reflectStaticSizeDateRangeFacet int
+
+func init() {
+	var fb FacetsBuilder
+	reflectStaticSizeFacetsBuilder = int(reflect.TypeOf(fb).Size())
+	var fr FacetResult
+	reflectStaticSizeFacetResult = int(reflect.TypeOf(fr).Size())
+	var tf TermFacet
+	reflectStaticSizeTermFacet = int(reflect.TypeOf(tf).Size())
+	var nrf NumericRangeFacet
+	reflectStaticSizeNumericRangeFacet = int(reflect.TypeOf(nrf).Size())
+	var drf DateRangeFacet
+	reflectStaticSizeDateRangeFacet = int(reflect.TypeOf(drf).Size())
+}
+
 type FacetBuilder interface {
 	StartDoc()
 	UpdateVisitor(field string, term []byte)
@@ -27,6 +48,8 @@ type FacetBuilder interface {
 
 	Result() *FacetResult
 	Field() string
+
+	Size() int
 }
 
 type FacetsBuilder struct {
@@ -42,6 +65,22 @@ func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder {
 	}
 }
 
+func (fb *FacetsBuilder) Size() int {
+	sizeInBytes := reflectStaticSizeFacetsBuilder + size.SizeOfPtr +
+		fb.indexReader.Size()
+
+	for k, v := range fb.facets {
+		sizeInBytes += size.SizeOfString + len(k) +
+			v.Size()
+	}
+
+	for _, entry := range fb.fields {
+		sizeInBytes += size.SizeOfString + len(entry)
+	}
+
+	return sizeInBytes
+}
+
 func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) {
 	fb.facets[name] = facetBuilder
 	fb.fields = append(fb.fields, facetBuilder.Field())
@@ -213,6 +252,14 @@ type FacetResult struct {
 	DateRanges    DateRangeFacets    `json:"date_ranges,omitempty"`
 }
 
+func (fr *FacetResult) Size() int {
+	return reflectStaticSizeFacetResult + size.SizeOfPtr +
+		len(fr.Field) +
+		len(fr.Terms)*(reflectStaticSizeTermFacet+size.SizeOfPtr) +
+		len(fr.NumericRanges)*(reflectStaticSizeNumericRangeFacet+size.SizeOfPtr) +
+		len(fr.DateRanges)*(reflectStaticSizeDateRangeFacet+size.SizeOfPtr)
+}
+
 func (fr *FacetResult) Merge(other *FacetResult) {
 	fr.Total += other.Total
 	fr.Missing += other.Missing
diff --git a/search/pool.go b/search/pool.go
index b9b52a613..ba8be8fc2 100644
--- a/search/pool.go
+++ b/search/pool.go
@@ -14,6 +14,17 @@
 
 package search
 
+import (
+	"reflect"
+)
+
+var reflectStaticSizeDocumentMatchPool int
+
+func init() {
+	var dmp DocumentMatchPool
+	reflectStaticSizeDocumentMatchPool = int(reflect.TypeOf(dmp).Size())
+}
+
 // DocumentMatchPoolTooSmall is a callback function that can be executed
 // when the DocumentMatchPool does not have sufficient capacity
 // By default we just perform just-in-time allocation, but you could log
diff --git a/search/scorer/scorer_conjunction.go b/search/scorer/scorer_conjunction.go
index aad6f9c16..b866293e0 100644
--- a/search/scorer/scorer_conjunction.go
+++ b/search/scorer/scorer_conjunction.go
@@ -15,13 +15,27 @@
 package scorer
 
 import (
+	"reflect"
+
 	"github.com/blevesearch/bleve/search"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeConjunctionQueryScorer int
+
+func init() {
+	var cqs ConjunctionQueryScorer
+	reflectStaticSizeConjunctionQueryScorer = int(reflect.TypeOf(cqs).Size())
+}
+
 type ConjunctionQueryScorer struct {
 	options search.SearcherOptions
 }
 
+func (s *ConjunctionQueryScorer) Size() int {
+	return reflectStaticSizeConjunctionQueryScorer + size.SizeOfPtr
+}
+
 func NewConjunctionQueryScorer(options search.SearcherOptions) *ConjunctionQueryScorer {
 	return &ConjunctionQueryScorer{
 		options: options,
diff --git a/search/scorer/scorer_constant.go b/search/scorer/scorer_constant.go
index a65a826f2..dc10fdaa4 100644
--- a/search/scorer/scorer_constant.go
+++ b/search/scorer/scorer_constant.go
@@ -16,11 +16,20 @@ package scorer
 
 import (
 	"fmt"
+	"reflect"
 
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeConstantScorer int
+
+func init() {
+	var cs ConstantScorer
+	reflectStaticSizeConstantScorer = int(reflect.TypeOf(cs).Size())
+}
+
 type ConstantScorer struct {
 	constant               float64
 	boost                  float64
@@ -30,6 +39,16 @@ type ConstantScorer struct {
 	queryWeightExplanation *search.Explanation
 }
 
+func (s *ConstantScorer) Size() int {
+	sizeInBytes := reflectStaticSizeConstantScorer + size.SizeOfPtr
+
+	if s.queryWeightExplanation != nil {
+		sizeInBytes += s.queryWeightExplanation.Size()
+	}
+
+	return sizeInBytes
+}
+
 func NewConstantScorer(constant float64, boost float64, options search.SearcherOptions) *ConstantScorer {
 	rv := ConstantScorer{
 		options:     options,
diff --git a/search/scorer/scorer_disjunction.go b/search/scorer/scorer_disjunction.go
index 184a15d27..36a601c72 100644
--- a/search/scorer/scorer_disjunction.go
+++ b/search/scorer/scorer_disjunction.go
@@ -16,14 +16,27 @@ package scorer
 
 import (
 	"fmt"
+	"reflect"
 
 	"github.com/blevesearch/bleve/search"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeDisjunctionQueryScorer int
+
+func init() {
+	var dqs DisjunctionQueryScorer
+	reflectStaticSizeDisjunctionQueryScorer = int(reflect.TypeOf(dqs).Size())
+}
+
 type DisjunctionQueryScorer struct {
 	options search.SearcherOptions
 }
 
+func (s *DisjunctionQueryScorer) Size() int {
+	return reflectStaticSizeDisjunctionQueryScorer + size.SizeOfPtr
+}
+
 func NewDisjunctionQueryScorer(options search.SearcherOptions) *DisjunctionQueryScorer {
 	return &DisjunctionQueryScorer{
 		options: options,
diff --git a/search/scorer/scorer_term.go b/search/scorer/scorer_term.go
index b5f46322c..077e38e0f 100644
--- a/search/scorer/scorer_term.go
+++ b/search/scorer/scorer_term.go
@@ -17,11 +17,20 @@ package scorer
 import (
 	"fmt"
 	"math"
+	"reflect"
 
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeTermQueryScorer int
+
+func init() {
+	var tqs TermQueryScorer
+	reflectStaticSizeTermQueryScorer = int(reflect.TypeOf(tqs).Size())
+}
+
 type TermQueryScorer struct {
 	queryTerm              []byte
 	queryField             string
@@ -36,6 +45,21 @@ type TermQueryScorer struct {
 	queryWeightExplanation *search.Explanation
 }
 
+func (s *TermQueryScorer) Size() int {
+	sizeInBytes := reflectStaticSizeTermQueryScorer + size.SizeOfPtr +
+		len(s.queryTerm) + len(s.queryField)
+
+	if s.idfExplanation != nil {
+		sizeInBytes += s.idfExplanation.Size()
+	}
+
+	if s.queryWeightExplanation != nil {
+		sizeInBytes += s.queryWeightExplanation.Size()
+	}
+
+	return sizeInBytes
+}
+
 func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer {
 	rv := TermQueryScorer{
 		queryTerm:   queryTerm,
diff --git a/search/search.go b/search/search.go
index f9a92783b..ca030df4b 100644
--- a/search/search.go
+++ b/search/search.go
@@ -16,11 +16,26 @@ package search
 
 import (
 	"fmt"
+	"reflect"
 
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeDocumentMatch int
+var reflectStaticSizeSearchContext int
+var reflectStaticSizeLocation int
+
+func init() {
+	var dm DocumentMatch
+	reflectStaticSizeDocumentMatch = int(reflect.TypeOf(dm).Size())
+	var sc SearchContext
+	reflectStaticSizeSearchContext = int(reflect.TypeOf(sc).Size())
+	var l Location
+	reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
+}
+
 type ArrayPositions []uint64
 
 func (ap ArrayPositions) Equals(other ArrayPositions) bool {
@@ -47,6 +62,11 @@ type Location struct {
 	ArrayPositions ArrayPositions `json:"array_positions"`
 }
 
+func (l *Location) Size() int {
+	return reflectStaticSizeLocation + size.SizeOfPtr +
+		len(l.ArrayPositions)*size.SizeOfUint64
+}
+
 type Locations []*Location
 
 type TermLocationMap map[string]Locations
@@ -117,6 +137,52 @@ func (dm *DocumentMatch) Reset() *DocumentMatch {
 	return dm
 }
 
+func (dm *DocumentMatch) Size() int {
+	sizeInBytes := reflectStaticSizeDocumentMatch + size.SizeOfPtr +
+		len(dm.Index) +
+		len(dm.ID) +
+		len(dm.IndexInternalID)
+
+	if dm.Expl != nil {
+		sizeInBytes += dm.Expl.Size()
+	}
+
+	for k, v := range dm.Locations {
+		sizeInBytes += size.SizeOfString + len(k)
+		for k1, v1 := range v {
+			sizeInBytes += size.SizeOfString + len(k1) +
+				size.SizeOfSlice
+			for _, entry := range v1 {
+				sizeInBytes += entry.Size()
+			}
+		}
+	}
+
+	for k, v := range dm.Fragments {
+		sizeInBytes += size.SizeOfString + len(k) +
+			size.SizeOfSlice
+
+		for _, entry := range v {
+			sizeInBytes += size.SizeOfString + len(entry)
+		}
+	}
+
+	for _, entry := range dm.Sort {
+		sizeInBytes += size.SizeOfString + len(entry)
+	}
+
+	for k, _ := range dm.Fields {
+		sizeInBytes += size.SizeOfString + len(k) +
+			size.SizeOfPtr
+	}
+
+	if dm.Document != nil {
+		sizeInBytes += dm.Document.Size()
+	}
+
+	return sizeInBytes
+}
+
 func (dm *DocumentMatch) String() string {
 	return fmt.Sprintf("[%s-%f]", string(dm.IndexInternalID), dm.Score)
 }
@@ -135,6 +201,7 @@ type Searcher interface {
 	SetQueryNorm(float64)
 	Count() uint64
 	Min() int
+	Size() int
 
 	DocumentMatchPoolSize() int
 }
@@ -148,3 +215,18 @@ type SearcherOptions struct {
 type SearchContext struct {
 	DocumentMatchPool *DocumentMatchPool
 }
+
+func (sc *SearchContext) Size() int {
+	sizeInBytes := reflectStaticSizeSearchContext + size.SizeOfPtr +
+		reflectStaticSizeDocumentMatchPool + size.SizeOfPtr
+
+	if sc.DocumentMatchPool != nil {
+		for _, entry := range sc.DocumentMatchPool.avail {
+			if entry != nil {
+				sizeInBytes += entry.Size()
+			}
+		}
+	}
+
+	return sizeInBytes
+}
diff --git a/search/searcher/search_boolean.go b/search/searcher/search_boolean.go
index a905c29e5..b87337e1e 100644
--- a/search/searcher/search_boolean.go
+++ b/search/searcher/search_boolean.go
@@ -16,12 +16,21 @@ package searcher
 
 import (
 	"math"
+	"reflect"
 
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
 	"github.com/blevesearch/bleve/search/scorer"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeBooleanSearcher int
+
+func init() {
+	var bs BooleanSearcher
+	reflectStaticSizeBooleanSearcher = int(reflect.TypeOf(bs).Size())
+}
+
 type BooleanSearcher struct {
 	indexReader     index.IndexReader
 	mustSearcher    search.Searcher
@@ -52,6 +61,33 @@ func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searc
 	return &rv, nil
 }
 
+func (s *BooleanSearcher) Size() int {
+	sizeInBytes := reflectStaticSizeBooleanSearcher + size.SizeOfPtr +
+		s.indexReader.Size()
+
+	if s.mustSearcher != nil {
+		sizeInBytes += s.mustSearcher.Size()
+	}
+
+	if s.shouldSearcher != nil {
+		sizeInBytes += s.shouldSearcher.Size()
+	}
+
+	if s.mustNotSearcher != nil {
+		sizeInBytes += s.mustNotSearcher.Size()
+	}
+
+	sizeInBytes += s.scorer.Size()
+
+	for _, entry := range s.matches {
+		if entry != nil {
+			sizeInBytes += entry.Size()
+		}
+	}
+
+	return sizeInBytes
+}
+
 func (s *BooleanSearcher) computeQueryNorm() {
 	// first calculate sum of squared weights
 	sumOfSquaredWeights := 0.0
diff --git a/search/searcher/search_conjunction.go b/search/searcher/search_conjunction.go
index 73fba19cd..da65f3981 100644
--- a/search/searcher/search_conjunction.go
+++ b/search/searcher/search_conjunction.go
@@ -16,13 +16,22 @@ package searcher
 
 import (
 	"math"
+	"reflect"
 	"sort"
 
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
 	"github.com/blevesearch/bleve/search/scorer"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeConjunctionSearcher int
+
+func init() {
+	var cs ConjunctionSearcher
+	reflectStaticSizeConjunctionSearcher = int(reflect.TypeOf(cs).Size())
+}
+
 type ConjunctionSearcher struct {
 	indexReader index.IndexReader
 	searchers   OrderedSearcherList
@@ -54,6 +63,23 @@ func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.S
 	return &rv, nil
 }
 
+func (s *ConjunctionSearcher) Size() int {
+	sizeInBytes := reflectStaticSizeConjunctionSearcher + size.SizeOfPtr +
+		s.scorer.Size()
+
+	for _, entry := range s.searchers {
+		sizeInBytes += entry.Size()
+	}
+
+	for _, entry := range s.currs {
+		if entry != nil {
+			sizeInBytes += entry.Size()
+		}
+	}
+
+	return sizeInBytes
+}
+
 func (s *ConjunctionSearcher) computeQueryNorm() {
 	// first calculate sum of squared weights
 	sumOfSquaredWeights := 0.0
diff --git a/search/searcher/search_disjunction.go b/search/searcher/search_disjunction.go
index b6910ddb6..119bac970 100644
--- a/search/searcher/search_disjunction.go
+++ b/search/searcher/search_disjunction.go
@@ -17,13 +17,22 @@ package searcher
 import (
 	"fmt"
 	"math"
+	"reflect"
 	"sort"
 
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
 	"github.com/blevesearch/bleve/search/scorer"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeDisjunctionSearcher int
+
+func init() {
+	var ds DisjunctionSearcher
+	reflectStaticSizeDisjunctionSearcher = int(reflect.TypeOf(ds).Size())
+}
+
 // DisjunctionMaxClauseCount is a compile time setting that applications can
 // adjust to non-zero value to cause the DisjunctionSearcher to return an
 // error instead of exeucting searches when the size exceeds this value.
@@ -90,6 +99,32 @@ func newDisjunctionSearcher(indexReader index.IndexReader,
 	return &rv, nil
 }
 
+func (s *DisjunctionSearcher) Size() int {
+	sizeInBytes := reflectStaticSizeDisjunctionSearcher + size.SizeOfPtr +
+		s.indexReader.Size() +
+		s.scorer.Size()
+
+	for _, entry := range s.searchers {
+		sizeInBytes += entry.Size()
+	}
+
+	for _, entry := range s.currs {
+		if entry != nil {
+			sizeInBytes += entry.Size()
+		}
+	}
+
+	for _, entry := range s.matching {
+		if entry != nil {
+			sizeInBytes += entry.Size()
+		}
+	}
+
+	sizeInBytes += len(s.matchingIdxs) * size.SizeOfInt
+
+	return sizeInBytes
+}
+
 func (s *DisjunctionSearcher) computeQueryNorm() {
 	// first calculate sum of squared weights
 	sumOfSquaredWeights := 0.0
diff --git a/search/searcher/search_docid.go b/search/searcher/search_docid.go
index 06351b4a0..3b258a580 100644
--- a/search/searcher/search_docid.go
+++ b/search/searcher/search_docid.go
@@ -15,11 +15,21 @@
 package searcher
 
 import (
+	"reflect"
+
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
 	"github.com/blevesearch/bleve/search/scorer"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeDocIDSearcher int
+
+func init() {
+	var ds DocIDSearcher
+	reflectStaticSizeDocIDSearcher = int(reflect.TypeOf(ds).Size())
+}
+
 // DocIDSearcher returns documents matching a predefined set of identifiers.
 type DocIDSearcher struct {
 	reader index.DocIDReader
@@ -42,6 +52,12 @@ func NewDocIDSearcher(indexReader index.IndexReader, ids []string, boost float64
 	}, nil
 }
 
+func (s *DocIDSearcher) Size() int {
+	return reflectStaticSizeDocIDSearcher + size.SizeOfPtr +
+		s.reader.Size() +
+		s.scorer.Size()
+}
+
 func (s *DocIDSearcher) Count() uint64 {
 	return uint64(s.count)
 }
diff --git a/search/searcher/search_filter.go b/search/searcher/search_filter.go
index 219f2ee7e..7c95fb41c 100644
--- a/search/searcher/search_filter.go
+++ b/search/searcher/search_filter.go
@@ -15,10 +15,20 @@
 package searcher
 
 import (
+	"reflect"
+
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeFilteringSearcher int
+
+func init() {
+	var fs FilteringSearcher
+	reflectStaticSizeFilteringSearcher = int(reflect.TypeOf(fs).Size())
+}
+
 // FilterFunc defines a function which can filter documents
 // returning true means keep the document
 // returning false means do not keep the document
@@ -38,6 +48,11 @@ func NewFilteringSearcher(s search.Searcher, filter FilterFunc) *FilteringSearch
 	}
 }
 
+func (f *FilteringSearcher) Size() int {
+	return reflectStaticSizeFilteringSearcher + size.SizeOfPtr +
+		f.child.Size()
+}
+
 func (f *FilteringSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
 	next, err := f.child.Next(ctx)
 	for next != nil && err == nil {
diff --git a/search/searcher/search_match_all.go b/search/searcher/search_match_all.go
index 822db2ea0..3f34e5918 100644
--- a/search/searcher/search_match_all.go
+++ b/search/searcher/search_match_all.go
@@ -15,11 +15,21 @@
 package searcher
 
 import (
+	"reflect"
+
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
 	"github.com/blevesearch/bleve/search/scorer"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeMatchAllSearcher int
+
+func init() {
+	var mas MatchAllSearcher
+	reflectStaticSizeMatchAllSearcher = int(reflect.TypeOf(mas).Size())
+}
+
 type MatchAllSearcher struct {
 	indexReader index.IndexReader
 	reader      index.DocIDReader
@@ -46,6 +56,13 @@ func NewMatchAllSearcher(indexReader index.IndexReader, boost float64, options s
 	}, nil
 }
 
+func (s *MatchAllSearcher) Size() int {
+	return reflectStaticSizeMatchAllSearcher + size.SizeOfPtr +
+		s.indexReader.Size() +
+		s.reader.Size() +
+		s.scorer.Size()
+}
+
 func (s *MatchAllSearcher) Count() uint64 {
 	return s.count
 }
diff --git a/search/searcher/search_match_none.go b/search/searcher/search_match_none.go
index 947596714..6b50b3222 100644
--- a/search/searcher/search_match_none.go
+++ b/search/searcher/search_match_none.go
@@ -15,10 +15,20 @@
 package searcher
 
 import (
+	"reflect"
+
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeMatchNoneSearcher int
+
+func init() {
+	var mns MatchNoneSearcher
+	reflectStaticSizeMatchNoneSearcher = int(reflect.TypeOf(mns).Size())
+}
+
 type MatchNoneSearcher struct {
 	indexReader index.IndexReader
 }
@@ -29,6 +39,11 @@ func NewMatchNoneSearcher(indexReader index.IndexReader) (*MatchNoneSearcher, er
 	}, nil
 }
 
+func (s *MatchNoneSearcher) Size() int {
+	return reflectStaticSizeMatchNoneSearcher + size.SizeOfPtr +
+		s.indexReader.Size()
+}
+
 func (s *MatchNoneSearcher) Count() uint64 {
 	return uint64(0)
 }
diff --git a/search/searcher/search_phrase.go b/search/searcher/search_phrase.go
index 6237cecfd..23a359bd7 100644
--- a/search/searcher/search_phrase.go
+++ b/search/searcher/search_phrase.go
@@ -17,11 +17,20 @@ package searcher
 import (
 	"fmt"
 	"math"
+	"reflect"
 
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizePhraseSearcher int
+
+func init() {
+	var ps PhraseSearcher
+	reflectStaticSizePhraseSearcher = int(reflect.TypeOf(ps).Size())
+}
+
 type PhraseSearcher struct {
 	indexReader  index.IndexReader
 	mustSearcher *ConjunctionSearcher
@@ -32,6 +41,28 @@ type PhraseSearcher struct {
 	initialized  bool
 }
 
+func (s *PhraseSearcher) Size() int {
+	sizeInBytes := reflectStaticSizePhraseSearcher + size.SizeOfPtr +
+		s.indexReader.Size()
+
+	if s.mustSearcher != nil {
+		sizeInBytes += s.mustSearcher.Size()
+	}
+
+	if s.currMust != nil {
+		sizeInBytes += s.currMust.Size()
+	}
+
+	for _, entry := range s.terms {
+		sizeInBytes += size.SizeOfSlice
+		for _, entry1 := range entry {
+			sizeInBytes += size.SizeOfString + len(entry1)
+		}
+	}
+
+	return sizeInBytes
+}
+
 func NewPhraseSearcher(indexReader index.IndexReader, terms []string, field string, options search.SearcherOptions) (*PhraseSearcher, error) {
 	// turn flat terms []string into [][]string
 	mterms := make([][]string, len(terms))
diff --git a/search/searcher/search_term.go b/search/searcher/search_term.go
index 6fae6ae5a..576d6643a 100644
--- a/search/searcher/search_term.go
+++ b/search/searcher/search_term.go
@@ -15,11 +15,21 @@
 package searcher
 
 import (
+	"reflect"
+
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
 	"github.com/blevesearch/bleve/search/scorer"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeTermSearcher int
+
+func init() {
+	var ts TermSearcher
+	reflectStaticSizeTermSearcher = int(reflect.TypeOf(ts).Size())
+}
+
 type TermSearcher struct {
 	indexReader index.IndexReader
 	reader      index.TermFieldReader
@@ -63,6 +73,14 @@ func NewTermSearcherBytes(indexReader index.IndexReader, term []byte, field stri
 	}, nil
 }
 
+func (s *TermSearcher) Size() int {
+	return reflectStaticSizeTermSearcher + size.SizeOfPtr +
+		s.indexReader.Size() +
+		s.reader.Size() +
+		s.tfd.Size() +
+		s.scorer.Size()
+}
+
 func (s *TermSearcher) Count() uint64 {
 	return s.reader.Count()
 }
diff --git a/size/sizes.go b/size/sizes.go
new file mode 100644
index 000000000..4ba544a71
--- /dev/null
+++ b/size/sizes.go
@@ -0,0 +1,57 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package size
+
+import (
+	"reflect"
+)
+
+func init() {
+	var a bool
+	SizeOfBool = int(reflect.TypeOf(a).Size())
+	var b float32
+	SizeOfFloat32 = int(reflect.TypeOf(b).Size())
+	var c float64
+	SizeOfFloat64 = int(reflect.TypeOf(c).Size())
+	var d map[int]int
+	SizeOfMap = int(reflect.TypeOf(d).Size())
+	var e *int
+	SizeOfPtr = int(reflect.TypeOf(e).Size())
+	var f []int
+	SizeOfSlice = int(reflect.TypeOf(f).Size())
+	var g string
+	SizeOfString = int(reflect.TypeOf(g).Size())
+	var h uint8
+	SizeOfUint8 = int(reflect.TypeOf(h).Size())
+	var i uint16
+	SizeOfUint16 = int(reflect.TypeOf(i).Size())
+	var j uint32
+	SizeOfUint32 = int(reflect.TypeOf(j).Size())
+	var k uint64
+	SizeOfUint64 = int(reflect.TypeOf(k).Size())
+}
+
+var SizeOfBool int
+var SizeOfFloat32 int
+var SizeOfFloat64 int
+var SizeOfInt int
+var SizeOfMap int
+var SizeOfPtr int
+var SizeOfSlice int
+var SizeOfString int
+var SizeOfUint8 int
+var SizeOfUint16 int
+var SizeOfUint32 int
+var SizeOfUint64 int

From 96071c085cbdf0beb7d7a74f483b1b2c53f4bd97 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 5 Mar 2018 16:49:55 -0800
Subject: [PATCH 257/728] MB-28163: Register a callback with context to
 estimate RAM for search

This callback if registered with context will invoke the api to estimate
the memory needed to execute a search query. The callback defined at
the client side will be responsible for determining whether to
continue with the search or abort based on the threshold settings.
---
 index_impl.go | 13 +++++++++++++
 index_test.go | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 60 insertions(+)

diff --git a/index_impl.go b/index_impl.go
index df6e748d9..1036aef2a 100644
--- a/index_impl.go
+++ b/index_impl.go
@@ -50,6 +50,10 @@ const storePath = "store"
 
 var mappingInternalKey = []byte("_mapping")
 
+const SearchMemCheckCallbackKey = "_search_mem_callback_key"
+
+type SearchMemCheckCallbackFn func(size uint64) error
+
 func indexStorePath(path string) string {
 	return path + string(os.PathSeparator) + storePath
 }
@@ -479,6 +483,15 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
 		collector.SetFacetsBuilder(facetsBuilder)
 	}
 
+	if memCb := ctx.Value(SearchMemCheckCallbackKey); memCb != nil {
+		if memCbFn, ok := memCb.(SearchMemCheckCallbackFn); ok {
+			err = memCbFn(memNeededForSearch(req, searcher, collector))
+		}
+	}
+	if err != nil {
+		return nil, err
+	}
+
 	err = collector.Collect(ctx, searcher, indexReader)
 	if err != nil {
 		return nil, err
diff --git a/index_test.go b/index_test.go
index f1e53647d..57429dcb2 100644
--- a/index_test.go
+++ b/index_test.go
@@ -1870,3 +1870,50 @@ func BenchmarkUpsidedownSearchOverhead(b *testing.B) {
 func BenchmarkScorchSearchOverhead(b *testing.B) {
 	benchmarkSearchOverhead(scorch.Name, b)
 }
+
+func TestSearchMemCheckCallback(t *testing.T) {
+	defer func() {
+		err := os.RemoveAll("testidx")
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	index, err := New("testidx", NewIndexMapping())
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer func() {
+		err := index.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	elements := []string{"air", "water", "fire", "earth"}
+	for j := 0; j < 10000; j++ {
+		err = index.Index(fmt.Sprintf("%d", j),
+			map[string]interface{}{"name": elements[j%len(elements)]})
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	query := NewTermQuery("water")
+	req := NewSearchRequest(query)
+
+	expErr := fmt.Errorf("MEM_LIMIT_EXCEEDED")
+	f := func(size uint64) error {
+		if size > 1000 {
+			return expErr
+		}
+		return nil
+	}
+
+	ctx := context.WithValue(context.Background(), SearchMemCheckCallbackKey,
+		SearchMemCheckCallbackFn(f))
+	_, err = index.SearchInContext(ctx, req)
+	if err != expErr {
+		t.Fatalf("Expected: %v, Got: %v", expErr, err)
+	}
+}

From 38b6c522b0a8a77f775b8f340909a7517766d67a Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Tue, 6 Mar 2018 14:00:54 -0800
Subject: [PATCH 258/728] Address build breakage after rebase

Removed attribute: iterator of type Posting
---
 index/scorch/segment/zap/posting.go | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index e9c68cbae..ef21df8d5 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -506,10 +506,6 @@ type Posting struct {
 func (p *Posting) Size() int {
 	sizeInBytes := reflectStaticSizePosting
 
-	if p.iterator != nil {
-		sizeInBytes += p.iterator.Size()
-	}
-
 	for _, entry := range p.locs {
 		sizeInBytes += entry.Size()
 	}

From b62ca996f63e64dad76bdd95c7d8b4f31d4d6f0a Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 6 Mar 2018 13:30:44 -0800
Subject: [PATCH 259/728] scorch zap optimize chunkedIntCoder.Add() calls to
 use multiple vals

This change leverages the ability for the chunkedIntCoder.Add() method
to accept multiple input param values (via the '...' param signature),
meaning there are fewer Add() invocations.
---
 index/scorch/segment/zap/build.go | 51 +++++--------------------------
 1 file changed, 8 insertions(+), 43 deletions(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 237cc5f3d..361e56e51 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -319,19 +319,10 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
 		postingsListItr := memSegment.Postings[postingID].Iterator()
 		var offset int
 		for postingsListItr.HasNext() {
-
 			docNum := uint64(postingsListItr.Next())
 
-			// put freq
-			err := tfEncoder.Add(docNum, freqs[offset])
-			if err != nil {
-				return nil, nil, err
-			}
-
-			// put norm
-			norm := norms[offset]
-			normBits := math.Float32bits(norm)
-			err = tfEncoder.Add(docNum, uint64(normBits))
+			// put freq & norm
+			err := tfEncoder.Add(docNum, freqs[offset], uint64(math.Float32bits(norms[offset])))
 			if err != nil {
 				return nil, nil, err
 			}
@@ -347,7 +338,6 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
 		if err != nil {
 			return nil, nil, err
 		}
-
 	}
 
 	// now do it again for the locations
@@ -371,44 +361,18 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
 			n := int(freqs[offset])
 			for i := 0; i < n; i++ {
 				if len(locfields) > 0 {
-					// put field
-					err := locEncoder.Add(docNum, uint64(locfields[locOffset]))
+					err := locEncoder.Add(docNum, uint64(locfields[locOffset]),
+						locpos[locOffset], locstarts[locOffset], locends[locOffset],
+						uint64(len(locarraypos[locOffset])))
 					if err != nil {
 						return nil, nil, err
 					}
 
-					// put pos
-					err = locEncoder.Add(docNum, locpos[locOffset])
-					if err != nil {
-						return nil, nil, err
-					}
-
-					// put start
-					err = locEncoder.Add(docNum, locstarts[locOffset])
-					if err != nil {
-						return nil, nil, err
-					}
-
-					// put end
-					err = locEncoder.Add(docNum, locends[locOffset])
-					if err != nil {
-						return nil, nil, err
-					}
-
-					// put the number of array positions to follow
-					num := len(locarraypos[locOffset])
-					err = locEncoder.Add(docNum, uint64(num))
+					// put each array position
+					err = locEncoder.Add(docNum, locarraypos[locOffset]...)
 					if err != nil {
 						return nil, nil, err
 					}
-
-					// put each array position
-					for _, pos := range locarraypos[locOffset] {
-						err = locEncoder.Add(docNum, pos)
-						if err != nil {
-							return nil, nil, err
-						}
-					}
 				}
 				locOffset++
 			}
@@ -417,6 +381,7 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
 
 		// record where this postings loc info starts
 		locOffsets = append(locOffsets, uint64(w.Count()))
+
 		locEncoder.Close()
 		_, err := locEncoder.Write(w)
 		if err != nil {

From dde6c2e01b3b33828f33f28ad43af2147713f7c1 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 6 Mar 2018 14:59:20 -0800
Subject: [PATCH 260/728] scorch zap optimize writeRoaringWithLen()

Before this change, writeRoaringWithLen() would leverage a reused
bytes.Buffer (#A) and invoke the roaring.WriteTo() API.

But, it turns out the roaring.WriteTo() API has a suboptimal
implementation, in that underneath-the-hood it converts the roaring
bitmap to a byte buffer (using roaring.ToBytes()), and then calls
Write().  But, that Write() turns out to be an additional memcpy into
the provided bytes.Buffer (#A).

By directly invoking roaring.ToBytes(), this change to
writeRoaringWithLen() avoids the extra memory allocation and memcpy.
---
 index/scorch/segment/zap/build.go |  6 ++----
 index/scorch/segment/zap/merge.go |  5 ++---
 index/scorch/segment/zap/write.go | 20 ++++++++++----------
 3 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 361e56e51..404ec8694 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -394,13 +394,12 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
 
 func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint64, err error) {
 	rv = make([]uint64, 0, len(memSegment.PostingsLocs))
-	var reuseBuf bytes.Buffer
 	reuseBufVarint := make([]byte, binary.MaxVarintLen64)
 	for postingID := range memSegment.PostingsLocs {
 		// record where we start this posting loc
 		rv = append(rv, uint64(w.Count()))
 		// write out the length and bitmap
-		_, err = writeRoaringWithLen(memSegment.PostingsLocs[postingID], w, &reuseBuf, reuseBufVarint)
+		_, err = writeRoaringWithLen(memSegment.PostingsLocs[postingID], w, reuseBufVarint)
 		if err != nil {
 			return nil, err
 		}
@@ -411,7 +410,6 @@ func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint
 func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter,
 	postingsListLocs, freqOffsets, locOffsets []uint64) (rv []uint64, err error) {
 	rv = make([]uint64, 0, len(memSegment.Postings))
-	var reuseBuf bytes.Buffer
 	reuseBufVarint := make([]byte, binary.MaxVarintLen64)
 	for postingID := range memSegment.Postings {
 		// record where we start this posting list
@@ -425,7 +423,7 @@ func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter,
 		}
 
 		// write out the length and bitmap
-		_, err = writeRoaringWithLen(memSegment.Postings[postingID], w, &reuseBuf, reuseBufVarint)
+		_, err = writeRoaringWithLen(memSegment.Postings[postingID], w, reuseBufVarint)
 		if err != nil {
 			return nil, err
 		}
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 5066dfb9e..3a0577cdd 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -160,7 +160,6 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 	newSegDocCount uint64, chunkFactor uint32,
 	w *CountHashWriter) ([]uint64, uint64, error) {
 
-	var bufReuse bytes.Buffer
 	var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64)
 
 	var postings *PostingsList
@@ -247,7 +246,7 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 					return err
 				}
 				postingLocOffset := uint64(w.Count())
-				_, err = writeRoaringWithLen(newRoaringLocs, w, &bufReuse, bufMaxVarintLen64)
+				_, err = writeRoaringWithLen(newRoaringLocs, w, bufMaxVarintLen64)
 				if err != nil {
 					return err
 				}
@@ -271,7 +270,7 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 				if err != nil {
 					return err
 				}
-				_, err = writeRoaringWithLen(newRoaring, w, &bufReuse, bufMaxVarintLen64)
+				_, err = writeRoaringWithLen(newRoaring, w, bufMaxVarintLen64)
 				if err != nil {
 					return err
 				}
diff --git a/index/scorch/segment/zap/write.go b/index/scorch/segment/zap/write.go
index c5316a99f..7f4f5a88b 100644
--- a/index/scorch/segment/zap/write.go
+++ b/index/scorch/segment/zap/write.go
@@ -15,7 +15,6 @@
 package zap
 
 import (
-	"bytes"
 	"encoding/binary"
 	"io"
 
@@ -25,28 +24,29 @@ import (
 // writes out the length of the roaring bitmap in bytes as varint
 // then writes out the roaring bitmap itself
 func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer,
-	reuseBuf *bytes.Buffer, reuseBufVarint []byte) (int, error) {
-	reuseBuf.Reset()
-
-	// write out postings list to memory so we know the len
-	postingsListLen, err := r.WriteTo(reuseBuf)
+	reuseBufVarint []byte) (int, error) {
+	buf, err := r.ToBytes()
 	if err != nil {
 		return 0, err
 	}
+
 	var tw int
-	// write out the length of this postings list
-	n := binary.PutUvarint(reuseBufVarint, uint64(postingsListLen))
+
+	// write out the length
+	n := binary.PutUvarint(reuseBufVarint, uint64(len(buf)))
 	nw, err := w.Write(reuseBufVarint[:n])
 	tw += nw
 	if err != nil {
 		return tw, err
 	}
-	// write out the postings list itself
-	nw, err = w.Write(reuseBuf.Bytes())
+
+	// write out the roaring bytes
+	nw, err = w.Write(buf)
 	tw += nw
 	if err != nil {
 		return tw, err
 	}
+
 	return tw, nil
 }
 

From 8841d79d26aa76dcd88df1e1f7bf39742e694364 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 6 Mar 2018 15:26:54 -0800
Subject: [PATCH 261/728] scorch optimize mem processField inner-loop

---
 index/scorch/segment/mem/build.go | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go
index 264c94d1c..a064fcba0 100644
--- a/index/scorch/segment/mem/build.go
+++ b/index/scorch/segment/mem/build.go
@@ -111,13 +111,15 @@ func (s *Segment) initializeDict(results []*index.AnalysisResult) {
 	}
 
 	processField := func(fieldID uint16, tfs analysis.TokenFrequencies) {
+		dict := s.Dicts[fieldID]
+		dictKeys := s.DictKeys[fieldID]
 		for term, tf := range tfs {
-			pidPlus1, exists := s.Dicts[fieldID][term]
+			pidPlus1, exists := dict[term]
 			if !exists {
 				numPostingsLists++
 				pidPlus1 = uint64(numPostingsLists)
-				s.Dicts[fieldID][term] = pidPlus1
-				s.DictKeys[fieldID] = append(s.DictKeys[fieldID], term)
+				dict[term] = pidPlus1
+				dictKeys = append(dictKeys, term)
 				numTermsPerPostingsList = append(numTermsPerPostingsList, 0)
 				numLocsPerPostingsList = append(numLocsPerPostingsList, 0)
 			}
@@ -127,6 +129,7 @@ func (s *Segment) initializeDict(results []*index.AnalysisResult) {
 			totLocs += len(tf.Locations)
 		}
 		numTokenFrequencies += len(tfs)
+		s.DictKeys[fieldID] = dictKeys
 	}
 
 	for _, result := range results {

From 5c721226cfab83b0cbd366aef6919ccc0df20364 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Tue, 6 Mar 2018 15:53:14 -0800
Subject: [PATCH 262/728] Fixing the scorch search request memory estimate

Do not re-account for certain referenced data in the zap structures.

New estimates:

                                    ESTIMATE    BENCHMEM
TermQuery                           11396       12437
MatchQuery                          12244       12951
DisjunctionQuery (Term queries)     20644       20709
---
 index/scorch/segment/zap/posting.go | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index ef21df8d5..8106ebcc0 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -57,18 +57,6 @@ type PostingsList struct {
 func (p *PostingsList) Size() int {
 	sizeInBytes := reflectStaticSizePostingsList + size.SizeOfPtr
 
-	if p.sb != nil {
-		sizeInBytes += (p.sb.Size() - len(p.sb.mem)) // do not include the mmap'ed part
-	}
-
-	if p.locBitmap != nil {
-		sizeInBytes += int(p.locBitmap.GetSizeInBytes())
-	}
-
-	if p.postings != nil {
-		sizeInBytes += int(p.postings.GetSizeInBytes())
-	}
-
 	if p.except != nil {
 		sizeInBytes += int(p.except.GetSizeInBytes())
 	}

From 2a9739ee1b438c0a187353b65e4b52890a4a2045 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Wed, 7 Mar 2018 14:37:33 +0530
Subject: [PATCH 263/728] naming change, interface removal

---
 index/scorch/merge.go                  |  3 ++-
 index/scorch/scorch.go                 |  2 +-
 index/scorch/segment/zap/merge.go      | 25 ++++++++-----------------
 index/scorch/segment/zap/merge_test.go | 23 ++++++-----------------
 index/scorch/stats.go                  |  7 +------
 5 files changed, 18 insertions(+), 42 deletions(-)

diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 67e1590ac..d7edfd402 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -180,8 +180,9 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 			s.markIneligibleForRemoval(filename)
 			path := s.path + string(os.PathSeparator) + filename
 			atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1)
-			newDocNums, err := zap.Merge(segmentsToMerge, docsToDrop, path, 1024, &s.stats)
+			newDocNums, nBytes, err := zap.Merge(segmentsToMerge, docsToDrop, path, 1024)
 			atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
+			atomic.AddUint64(&s.stats.TotFileMergeWrittenBytes, nBytes)
 			if err != nil {
 				s.unmarkIneligibleForRemoval(filename)
 				atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 8a2b1ec3b..4da336e5e 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -430,7 +430,7 @@ func (s *Scorch) StatsMap() map[string]interface{} {
 	m["num_items_persisted"] = m["TotPersistedItems"]
 	m["num_bytes_used_disk"] = m["CurOnDiskBytes"]
 	m["num_files_on_disk"] = m["CurOnDiskFiles"]
-	m["total_compaction_written_bytes"] = m["TotCompactionWrittenBytes"]
+	m["total_compaction_written_bytes"] = m["TotFileMergeWrittenBytes"]
 
 	return m
 }
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 9399046b0..7abef1886 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -31,22 +31,17 @@ import (
 
 const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc
 
-// StatsReporter interface represents stats reporting methods.
-type StatsReporter interface {
-	ReportBytesWritten(numBytesWritten uint64)
-}
-
 // Merge takes a slice of zap segments and bit masks describing which
 // documents may be dropped, and creates a new segment containing the
 // remaining data.  This new segment is built at the specified path,
 // with the provided chunkFactor.
 func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
-	chunkFactor uint32, stats StatsReporter) ([][]uint64, error) {
+	chunkFactor uint32) ([][]uint64, uint64, error) {
 	flag := os.O_RDWR | os.O_CREATE
 
 	f, err := os.OpenFile(path, flag, 0600)
 	if err != nil {
-		return nil, err
+		return nil, 0, err
 	}
 
 	cleanup := func() {
@@ -69,39 +64,35 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
 		MergeToWriter(segmentBases, drops, chunkFactor, cr)
 	if err != nil {
 		cleanup()
-		return nil, err
+		return nil, 0, err
 	}
 
 	err = persistFooter(numDocs, storedIndexOffset, fieldsIndexOffset,
 		docValueOffset, chunkFactor, cr.Sum32(), cr)
 	if err != nil {
 		cleanup()
-		return nil, err
+		return nil, 0, err
 	}
 
 	err = br.Flush()
 	if err != nil {
 		cleanup()
-		return nil, err
+		return nil, 0, err
 	}
 
 	err = f.Sync()
 	if err != nil {
 		cleanup()
-		return nil, err
+		return nil, 0, err
 	}
 
 	err = f.Close()
 	if err != nil {
 		cleanup()
-		return nil, err
-	}
-
-	if stats != nil {
-		stats.ReportBytesWritten(uint64(cr.Count()))
+		return nil, 0, err
 	}
 
-	return newDocNums, nil
+	return newDocNums, uint64(cr.Count()), nil
 }
 
 func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap,
diff --git a/index/scorch/segment/zap/merge_test.go b/index/scorch/segment/zap/merge_test.go
index 2ba0b373a..501947f96 100644
--- a/index/scorch/segment/zap/merge_test.go
+++ b/index/scorch/segment/zap/merge_test.go
@@ -20,7 +20,6 @@ import (
 	"reflect"
 	"sort"
 	"strings"
-	"sync/atomic"
 	"testing"
 
 	"github.com/RoaringBitmap/roaring"
@@ -73,7 +72,7 @@ func TestMerge(t *testing.T) {
 	segsToMerge[0] = segment.(*Segment)
 	segsToMerge[1] = segment2.(*Segment)
 
-	_, err = Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024, nil)
+	_, _, err = Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -177,7 +176,7 @@ func testMergeWithEmptySegments(t *testing.T, before bool, numEmptySegments int)
 
 	drops := make([]*roaring.Bitmap, len(segsToMerge))
 
-	_, err = Merge(segsToMerge, drops, "/tmp/scorch3.zap", 1024, nil)
+	_, _, err = Merge(segsToMerge, drops, "/tmp/scorch3.zap", 1024)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -219,7 +218,7 @@ func testMergeWithSelf(t *testing.T, segCur *Segment, expectedCount uint64) {
 		segsToMerge := make([]*Segment, 1)
 		segsToMerge[0] = segCur
 
-		_, err := Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/"+fname, 1024, nil)
+		_, _, err := Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/"+fname, 1024)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -591,7 +590,7 @@ func testMergeWithUpdates(t *testing.T, segmentDocIds [][]string, docsToDrop []*
 func testMergeAndDropSegments(t *testing.T, segsToMerge []*Segment, docsToDrop []*roaring.Bitmap, expectedNumDocs uint64) {
 	_ = os.RemoveAll("/tmp/scorch-merged.zap")
 
-	_, err := Merge(segsToMerge, docsToDrop, "/tmp/scorch-merged.zap", 1024, nil)
+	_, _, err := Merge(segsToMerge, docsToDrop, "/tmp/scorch-merged.zap", 1024)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -784,14 +783,6 @@ func buildMemSegmentMultiHelper(docIds []string) *mem.Segment {
 	return segment
 }
 
-type statTest struct {
-	totalWrittenBytes uint64
-}
-
-func (s *statTest) ReportBytesWritten(numBytesWritten uint64) {
-	atomic.AddUint64(&s.totalWrittenBytes, numBytesWritten)
-}
-
 func TestMergeBytesWritten(t *testing.T) {
 	_ = os.RemoveAll("/tmp/scorch.zap")
 	_ = os.RemoveAll("/tmp/scorch2.zap")
@@ -835,14 +826,12 @@ func TestMergeBytesWritten(t *testing.T) {
 	segsToMerge[0] = segment.(*Segment)
 	segsToMerge[1] = segment2.(*Segment)
 
-	reporter := &statTest{}
-
-	_, err = Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024, reporter)
+	_, nBytes, err := Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024)
 	if err != nil {
 		t.Fatal(err)
 	}
 
-	if reporter.totalWrittenBytes == 0 {
+	if nBytes == 0 {
 		t.Fatalf("expected a non zero total_compaction_written_bytes")
 	}
 
diff --git a/index/scorch/stats.go b/index/scorch/stats.go
index 4f8c8b99e..36245a8e8 100644
--- a/index/scorch/stats.go
+++ b/index/scorch/stats.go
@@ -87,6 +87,7 @@ type Stats struct {
 
 	TotFileMergeSegmentsEmpty uint64
 	TotFileMergeSegments      uint64
+	TotFileMergeWrittenBytes  uint64
 
 	TotFileMergeZapBeg uint64
 	TotFileMergeZapEnd uint64
@@ -100,8 +101,6 @@ type Stats struct {
 	TotMemMergeZapBeg   uint64
 	TotMemMergeZapEnd   uint64
 	TotMemMergeSegments uint64
-
-	TotCompactionWrittenBytes uint64
 }
 
 // atomically populates the returned map
@@ -124,7 +123,3 @@ func (s *Stats) ToMap() map[string]interface{} {
 func (s *Stats) MarshalJSON() ([]byte, error) {
 	return json.Marshal(s.ToMap())
 }
-
-func (s *Stats) ReportBytesWritten(numBytesWritten uint64) {
-	atomic.AddUint64(&s.TotCompactionWrittenBytes, numBytesWritten)
-}

From 73ed8e248d340ceb2ca3380f2f91bc1fd316576b Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Wed, 7 Mar 2018 18:34:54 +0530
Subject: [PATCH 264/728] fixing the indentation issues.

looks like it happened during the web based conflict resolution..
---
 index/scorch/merge.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 89d6ec141..2b0e734c7 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -185,7 +185,7 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 			atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1)
 			newDocNums, nBytes, err := zap.Merge(segmentsToMerge, docsToDrop, path, 1024)
 			atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
-      atomic.AddUint64(&s.stats.TotFileMergeWrittenBytes, nBytes)
+			atomic.AddUint64(&s.stats.TotFileMergeWrittenBytes, nBytes)
       
 			fileMergeZapTime := uint64(time.Since(fileMergeZapStartTime))
 			atomic.AddUint64(&s.stats.TotFileMergeZapTime, fileMergeZapTime)
@@ -193,7 +193,7 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 				atomic.StoreUint64(&s.stats.MaxFileMergeZapTime, fileMergeZapTime)
 			}
       
-      if err != nil {
+      			if err != nil {
 				s.unmarkIneligibleForRemoval(filename)
 				atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
 				return fmt.Errorf("merging failed: %v", err)

From 8c0f402d4b06a9c345f5049bd0f72b474db8ffbb Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 6 Mar 2018 15:48:22 -0800
Subject: [PATCH 265/728] scorch zap optimize processDocument() loc inner loop

---
 index/scorch/segment/mem/build.go | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go
index a064fcba0..a4b69013e 100644
--- a/index/scorch/segment/mem/build.go
+++ b/index/scorch/segment/mem/build.go
@@ -266,21 +266,34 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 			locationBS := s.PostingsLocs[pid]
 			if len(tokenFreq.Locations) > 0 {
 				locationBS.AddInt(int(docNum))
+
+				locfields := s.Locfields[pid]
+				locstarts := s.Locstarts[pid]
+				locends := s.Locends[pid]
+				locpos := s.Locpos[pid]
+				locarraypos := s.Locarraypos[pid]
+
 				for _, loc := range tokenFreq.Locations {
 					var locf = fieldID
 					if loc.Field != "" {
 						locf = uint16(s.getOrDefineField(loc.Field))
 					}
-					s.Locfields[pid] = append(s.Locfields[pid], locf)
-					s.Locstarts[pid] = append(s.Locstarts[pid], uint64(loc.Start))
-					s.Locends[pid] = append(s.Locends[pid], uint64(loc.End))
-					s.Locpos[pid] = append(s.Locpos[pid], uint64(loc.Position))
+					locfields = append(locfields, locf)
+					locstarts = append(locstarts, uint64(loc.Start))
+					locends = append(locends, uint64(loc.End))
+					locpos = append(locpos, uint64(loc.Position))
 					if len(loc.ArrayPositions) > 0 {
-						s.Locarraypos[pid] = append(s.Locarraypos[pid], loc.ArrayPositions)
+						locarraypos = append(locarraypos, loc.ArrayPositions)
 					} else {
-						s.Locarraypos[pid] = append(s.Locarraypos[pid], nil)
+						locarraypos = append(locarraypos, nil)
 					}
 				}
+
+				s.Locfields[pid] = locfields
+				s.Locstarts[pid] = locstarts
+				s.Locends[pid] = locends
+				s.Locpos[pid] = locpos
+				s.Locarraypos[pid] = locarraypos
 			}
 		}
 	}

From 79f28b7c93b99934bf37241454d77fa940de3863 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 6 Mar 2018 16:29:33 -0800
Subject: [PATCH 266/728] scorch fix persistDocValues() err return

---
 index/scorch/segment/zap/build.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 404ec8694..f1698e39f 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -515,7 +515,7 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
 			var err1 error
 			postings, err1 = dict.(*mem.Dictionary).InitPostingsList(next.Term, nil, postings)
 			if err1 != nil {
-				return nil, err
+				return nil, err1
 			}
 
 			postingsItr = postings.InitIterator(postingsItr)

From 59eb70d0200dedb7710406280a27d4f75ff6ca97 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 6 Mar 2018 17:47:25 -0800
Subject: [PATCH 267/728] scorch zap remove unused chunkedIntCoder field

---
 index/scorch/segment/zap/intcoder.go | 2 --
 1 file changed, 2 deletions(-)

diff --git a/index/scorch/segment/zap/intcoder.go b/index/scorch/segment/zap/intcoder.go
index 8d1f94536..ea0330181 100644
--- a/index/scorch/segment/zap/intcoder.go
+++ b/index/scorch/segment/zap/intcoder.go
@@ -24,7 +24,6 @@ import (
 
 type chunkedIntCoder struct {
 	final     []byte
-	maxDocNum uint64
 	chunkSize uint64
 	chunkBuf  bytes.Buffer
 	encoder   *govarint.Base128Encoder
@@ -41,7 +40,6 @@ func newChunkedIntCoder(chunkSize uint64, maxDocNum uint64) *chunkedIntCoder {
 	total := maxDocNum/chunkSize + 1
 	rv := &chunkedIntCoder{
 		chunkSize: chunkSize,
-		maxDocNum: maxDocNum,
 		chunkLens: make([]uint64, total),
 		final:     make([]byte, 0, 64),
 	}

From 2b5da7a8192821442761af98d689afdd74a1272e Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 7 Mar 2018 09:12:55 -0800
Subject: [PATCH 268/728] go fmt

---
 index/scorch/merge.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 2b0e734c7..ec2c8d4b3 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -186,14 +186,14 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 			newDocNums, nBytes, err := zap.Merge(segmentsToMerge, docsToDrop, path, 1024)
 			atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
 			atomic.AddUint64(&s.stats.TotFileMergeWrittenBytes, nBytes)
-      
+
 			fileMergeZapTime := uint64(time.Since(fileMergeZapStartTime))
 			atomic.AddUint64(&s.stats.TotFileMergeZapTime, fileMergeZapTime)
 			if atomic.LoadUint64(&s.stats.MaxFileMergeZapTime) < fileMergeZapTime {
 				atomic.StoreUint64(&s.stats.MaxFileMergeZapTime, fileMergeZapTime)
 			}
-      
-      			if err != nil {
+
+			if err != nil {
 				s.unmarkIneligibleForRemoval(filename)
 				atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
 				return fmt.Errorf("merging failed: %v", err)

From 1e2bb14f13ad14dc435fe6cd5c2bc09375df651f Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 7 Mar 2018 09:49:08 -0800
Subject: [PATCH 269/728] added TestRoaringSizes()

---
 index/scorch/segment/zap/write_test.go | 86 ++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)
 create mode 100644 index/scorch/segment/zap/write_test.go

diff --git a/index/scorch/segment/zap/write_test.go b/index/scorch/segment/zap/write_test.go
new file mode 100644
index 000000000..2e72d4b82
--- /dev/null
+++ b/index/scorch/segment/zap/write_test.go
@@ -0,0 +1,86 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zap
+
+import (
+	"math"
+	"testing"
+
+	"github.com/RoaringBitmap/roaring"
+)
+
+func TestRoaringSizes(t *testing.T) {
+	tests := []struct {
+		vals          []uint32
+		expectedSize  int // expected serialized # bytes
+		optimizedSize int // after calling roaring's RunOptimize() API
+	}{
+		{[]uint32{}, 8, 8}, // empty roaring is 8 bytes
+
+		{[]uint32{0}, 18, 18}, // single entry roaring is 18 bytes
+		{[]uint32{1}, 18, 18},
+		{[]uint32{4}, 18, 18},
+		{[]uint32{4000}, 18, 18},
+		{[]uint32{40000000}, 18, 18},
+		{[]uint32{math.MaxUint32}, 18, 18},
+		{[]uint32{math.MaxUint32 - 1}, 18, 18},
+
+		{[]uint32{0, 1}, 20, 20},
+		{[]uint32{0, 10000000}, 28, 28},
+
+		{[]uint32{0, 1, 2}, 22, 15},
+		{[]uint32{0, 1, 20000000}, 30, 30},
+
+		{[]uint32{0, 1, 2, 3}, 24, 15},
+		{[]uint32{0, 1, 2, 30000000}, 32, 21},
+	}
+
+	for _, test := range tests {
+		bm := roaring.New()
+		for _, val := range test.vals {
+			bm.Add(val)
+		}
+
+		b, err := bm.ToBytes()
+		if err != nil {
+			t.Errorf("expected no ToBytes() err, got: %v", err)
+		}
+		if len(b) != test.expectedSize {
+			t.Errorf("size did not match,"+
+				" got: %d, test: %#v", len(b), test)
+		}
+		if int(bm.GetSerializedSizeInBytes()) != test.expectedSize {
+			t.Errorf("GetSerializedSizeInBytes did not match,"+
+				" got: %d, test: %#v",
+				bm.GetSerializedSizeInBytes(), test)
+		}
+
+		bm.RunOptimize()
+
+		b, err = bm.ToBytes()
+		if err != nil {
+			t.Errorf("expected no ToBytes() err, got: %v", err)
+		}
+		if len(b) != test.optimizedSize {
+			t.Errorf("optimized size did not match,"+
+				" got: %d, test: %#v", len(b), test)
+		}
+		if int(bm.GetSerializedSizeInBytes()) != test.optimizedSize {
+			t.Errorf("optimized GetSerializedSizeInBytes did not match,"+
+				" got: %d, test: %#v",
+				bm.GetSerializedSizeInBytes(), test)
+		}
+	}
+}

From 33ef4ce35e22452ddde0a05e77a90afba5206d5f Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Wed, 7 Mar 2018 12:05:39 -0800
Subject: [PATCH 270/728] MB-28163: Provide an API to estimate the RAM needed
 for SearchResult

exported API: MemoryNeededForSearchResult(req *SearchRequest)
---
 search.go      | 43 +++++++++++++++++++++++++++++++++++++++++++
 search_test.go | 15 +++++++++++++++
 2 files changed, 58 insertions(+)

diff --git a/search.go b/search.go
index e324262e5..86ea4193a 100644
--- a/search.go
+++ b/search.go
@@ -22,8 +22,10 @@ import (
 
 	"github.com/blevesearch/bleve/analysis"
 	"github.com/blevesearch/bleve/analysis/datetime/optional"
+	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/registry"
 	"github.com/blevesearch/bleve/search"
+	"github.com/blevesearch/bleve/search/collector"
 	"github.com/blevesearch/bleve/search/query"
 	"github.com/blevesearch/bleve/size"
 )
@@ -518,3 +520,44 @@ func (sr *SearchResult) Merge(other *SearchResult) {
 
 	sr.Facets.Merge(other.Facets)
 }
+
+// MemoryNeededForSearchResult is an exported helper function to determine the RAM
+// needed to accommodate the results for a given search request.
+func MemoryNeededForSearchResult(req *SearchRequest) uint64 {
+	if req == nil {
+		return 0
+	}
+
+	numDocMatches := req.Size + req.From
+	if req.Size+req.From > collector.PreAllocSizeSkipCap {
+		numDocMatches = collector.PreAllocSizeSkipCap
+	}
+
+	estimate := 0
+
+	// overhead from the SearchResult structure
+	var sr SearchResult
+	estimate += sr.Size()
+
+	var dm search.DocumentMatch
+	sizeOfDocumentMatch := dm.Size()
+
+	// overhead from results
+	estimate += numDocMatches * sizeOfDocumentMatch
+
+	// overhead from facet results
+	if req.Facets != nil {
+		var fr search.FacetResult
+		estimate += len(req.Facets) * fr.Size()
+	}
+
+	// highlighting, store
+	var d document.Document
+	if len(req.Fields) > 0 || req.Highlight != nil {
+		for i := 0; i < (req.Size + req.From); i++ {
+			estimate += (req.Size + req.From) * d.Size()
+		}
+	}
+
+	return uint64(estimate)
+}
diff --git a/search_test.go b/search_test.go
index 242494132..87a718285 100644
--- a/search_test.go
+++ b/search_test.go
@@ -399,3 +399,18 @@ func TestSearchResultFacetsMerge(t *testing.T) {
 		t.Errorf("expected %#v, got %#v", expected, l)
 	}
 }
+
+func TestMemoryNeededForSearchResult(t *testing.T) {
+	query := NewTermQuery("blah")
+	req := NewSearchRequest(query)
+
+	var sr SearchResult
+	expect := sr.Size()
+	var dm search.DocumentMatch
+	expect += 10 * dm.Size()
+
+	estimate := MemoryNeededForSearchResult(req)
+	if estimate != uint64(expect) {
+		t.Errorf("estimate not what is expected: %v != %v", estimate, expect)
+	}
+}

From eac98089907947b66dce3e8848fee62fa0717971 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 7 Mar 2018 15:00:38 -0800
Subject: [PATCH 271/728] scorch zap optimize FST val encoding for terms with 1
 hit

NOTE: this is a scorch zap file format change / bump to version 4.

In this optimization, the uint64 val stored in the vellum FST (term
dictionary) now may either be a uint64 postingsOffset (same as before
this change) or a uint64 encoding of the docNum + norm (in the case
where a term appears in just a single doc).
---
 cmd/bleve/cmd/zap/explore.go           |  16 ++-
 index/scorch/segment/zap/build.go      |   2 +-
 index/scorch/segment/zap/intcoder.go   |   4 +
 index/scorch/segment/zap/merge.go      |  42 ++++++-
 index/scorch/segment/zap/merge_test.go |   9 ++
 index/scorch/segment/zap/posting.go    | 158 +++++++++++++++++++++----
 6 files changed, 201 insertions(+), 30 deletions(-)

diff --git a/cmd/bleve/cmd/zap/explore.go b/cmd/bleve/cmd/zap/explore.go
index de05c63e7..543b572fd 100644
--- a/cmd/bleve/cmd/zap/explore.go
+++ b/cmd/bleve/cmd/zap/explore.go
@@ -18,7 +18,9 @@ import (
 	"encoding/binary"
 	"fmt"
 	"log"
+	"math"
 
+	"github.com/blevesearch/bleve/index/scorch/segment/zap"
 	"github.com/couchbase/vellum"
 	"github.com/spf13/cobra"
 )
@@ -57,7 +59,19 @@ var exploreCmd = &cobra.Command{
 					return fmt.Errorf("error looking for term : %v", err)
 				}
 				if exists {
-					fmt.Printf("postings list begins at %d (%x)\n", postingsAddr, postingsAddr)
+					fmt.Printf("fst val is %d (%x)\n", postingsAddr, postingsAddr)
+
+					if postingsAddr&zap.FSTValEncodingMask == zap.FSTValEncoding1Hit {
+						docNum, normBits := zap.FSTValDecode1Hit(postingsAddr)
+						norm := math.Float32frombits(uint32(normBits))
+						fmt.Printf("Posting List is 1-hit encoded, docNum: %d, norm: %f\n",
+							docNum, norm)
+						return nil
+					}
+
+					if postingsAddr&zap.FSTValEncodingMask != zap.FSTValEncodingGeneral {
+						return fmt.Errorf("unknown fst val encoding")
+					}
 
 					var n uint64
 					freqAddr, read := binary.Uvarint(data[postingsAddr : postingsAddr+binary.MaxVarintLen64])
diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index f1698e39f..8ec610953 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -28,7 +28,7 @@ import (
 	"github.com/golang/snappy"
 )
 
-const version uint32 = 3
+const version uint32 = 4
 
 const fieldNotUninverted = math.MaxUint64
 
diff --git a/index/scorch/segment/zap/intcoder.go b/index/scorch/segment/zap/intcoder.go
index ea0330181..6680e608e 100644
--- a/index/scorch/segment/zap/intcoder.go
+++ b/index/scorch/segment/zap/intcoder.go
@@ -130,3 +130,7 @@ func (c *chunkedIntCoder) Write(w io.Writer) (int, error) {
 	}
 	return tw, nil
 }
+
+func (c *chunkedIntCoder) FinalSize() int {
+	return len(c.final)
+}
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 454213af9..383fedbf3 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -225,6 +225,21 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 		newRoaring := roaring.NewBitmap()
 		newRoaringLocs := roaring.NewBitmap()
 
+		var lastDocNum, lastFreq, lastNorm uint64
+
+		// determines whether to use "1-hit" encoding optimization
+		// when a term appears in only 1 doc, with no loc info,
+		// has freq of 1, and the docNum fits into 31-bits
+		use1HitEncoding := func(termCardinality uint64) (bool, uint64, uint64) {
+			if termCardinality == uint64(1) && locEncoder.FinalSize() <= 0 {
+				docNum := uint64(newRoaring.Minimum())
+				if under32Bits(docNum) && docNum == lastDocNum && lastFreq == 1 {
+					return true, docNum, lastNorm
+				}
+			}
+			return false, 0, 0
+		}
+
 		finishTerm := func(term []byte) error {
 			if term == nil {
 				return nil
@@ -233,8 +248,16 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 			tfEncoder.Close()
 			locEncoder.Close()
 
-			if newRoaring.GetCardinality() > 0 {
-				// this field/term actually has hits in the new segment, lets write it down
+			termCardinality := newRoaring.GetCardinality()
+
+			encodeAs1Hit, docNum1Hit, normBits1Hit := use1HitEncoding(termCardinality)
+			if encodeAs1Hit {
+				err = newVellum.Insert(term, FSTValEncode1Hit(docNum1Hit, normBits1Hit))
+				if err != nil {
+					return err
+				}
+			} else if termCardinality > 0 {
+				// this field/term has hits in the new segment
 				freqOffset := uint64(w.Count())
 				_, err := tfEncoder.Write(w)
 				if err != nil {
@@ -251,7 +274,6 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 					return err
 				}
 				postingOffset := uint64(w.Count())
-
 				// write out the start of the term info
 				n := binary.PutUvarint(bufMaxVarintLen64, freqOffset)
 				_, err = w.Write(bufMaxVarintLen64[:n])
@@ -287,6 +309,10 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 			tfEncoder.Reset()
 			locEncoder.Reset()
 
+			lastDocNum = 0
+			lastFreq = 0
+			lastNorm = 0
+
 			return nil
 		}
 
@@ -315,7 +341,8 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 
 			postItr = postings.iterator(postItr)
 
-			nextDocNum, nextFreqNormBytes, nextLocBytes, err2 := postItr.nextBytes()
+			nextDocNum, nextFreq, nextNorm, nextFreqNormBytes, nextLocBytes, err2 :=
+				postItr.nextBytes()
 			for err2 == nil && len(nextFreqNormBytes) > 0 {
 				hitNewDocNum := newDocNumsI[nextDocNum]
 				if hitNewDocNum == docDropped {
@@ -339,7 +366,12 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 				docTermMap[hitNewDocNum] =
 					append(append(docTermMap[hitNewDocNum], term...), termSeparator)
 
-				nextDocNum, nextFreqNormBytes, nextLocBytes, err2 = postItr.nextBytes()
+				lastDocNum = hitNewDocNum
+				lastFreq = nextFreq
+				lastNorm = nextNorm
+
+				nextDocNum, nextFreq, nextNorm, nextFreqNormBytes, nextLocBytes, err2 =
+					postItr.nextBytes()
 			}
 			if err2 != nil {
 				return nil, 0, err2
diff --git a/index/scorch/segment/zap/merge_test.go b/index/scorch/segment/zap/merge_test.go
index 501947f96..d80b26086 100644
--- a/index/scorch/segment/zap/merge_test.go
+++ b/index/scorch/segment/zap/merge_test.go
@@ -859,3 +859,12 @@ func TestMergeBytesWritten(t *testing.T) {
 
 	testMergeWithSelf(t, seg3, 4)
 }
+
+func TestUnder32Bits(t *testing.T) {
+	if !under32Bits(0) || !under32Bits(uint64(0x7fffffff)) {
+		t.Errorf("under32Bits bad")
+	}
+	if under32Bits(uint64(0x80000000)) || under32Bits(uint64(0x80000001)) {
+		t.Errorf("under32Bits wrong")
+	}
+}
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 8106ebcc0..7ae36120b 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -43,6 +43,55 @@ func init() {
 	reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
 }
 
+// FST or vellum value (uint64) encoding is determined by the top two
+// highest-order or most significant bits...
+//
+//  encoding  : MSB
+//  name      : 63  62  61...to...bit #0 (LSB)
+//  ----------+---+---+---------------------------------------------------
+//   general  : 0 | 0 | 62-bits of postingsOffset.
+//   ~        : 0 | 1 | reserved for future.
+//   1-hit    : 1 | 0 | 31-bits of positive float31 norm | 31-bits docNum.
+//   ~        : 1 | 1 | reserved for future.
+//
+// Encoding "general" is able to handle all cases, where the
+// postingsOffset points to more information about the postings for
+// the term.
+//
+// Encoding "1-hit" is used to optimize a commonly seen case when a
+// term has only a single hit.  For example, a term in the _id field
+// will have only 1 hit.  The "1-hit" encoding is used for a term
+// in a field when...
+//
+// - term vector info is disabled for that field;
+// - and, the term appears in only a single doc for that field;
+// - and, the term's freq is exactly 1 in that single doc for that field;
+// - and, the docNum must fit into 31-bits;
+//
+// Otherwise, the "general" encoding is used instead.
+//
+// In the "1-hit" encoding, the field in that single doc may have
+// other terms, which is supported in the "1-hit" encoding by the
+// positive float31 norm.
+
+const FSTValEncodingMask = uint64(0xc000000000000000)
+const FSTValEncodingGeneral = uint64(0x0000000000000000)
+const FSTValEncoding1Hit = uint64(0x8000000000000000)
+
+func FSTValEncode1Hit(docNum uint64, normBits uint64) uint64 {
+	return FSTValEncoding1Hit | ((mask31Bits & normBits) << 31) | (mask31Bits & docNum)
+}
+
+func FSTValDecode1Hit(v uint64) (docNum uint64, normBits uint64) {
+	return (mask31Bits & v), (mask31Bits & (v >> 31))
+}
+
+const mask31Bits = uint64(0x000000007fffffff)
+
+func under32Bits(x uint64) bool {
+	return x <= mask31Bits
+}
+
 // PostingsList is an in-memory represenation of a postings list
 type PostingsList struct {
 	sb             *SegmentBase
@@ -52,6 +101,10 @@ type PostingsList struct {
 	locBitmap      *roaring.Bitmap
 	postings       *roaring.Bitmap
 	except         *roaring.Bitmap
+
+	// when postingsOffset == freqOffset == 0, then the postings list
+	// represents a "1-hit" encoding, and has the following norm
+	normBits1Hit uint64
 }
 
 func (p *PostingsList) Size() int {
@@ -85,6 +138,8 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
 		}
 		locDecoder := rv.locDecoder
 
+		buf := rv.buf
+
 		*rv = PostingsIterator{} // clear the struct
 
 		rv.freqNormReader = freqNormReader
@@ -92,11 +147,17 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
 
 		rv.locReader = locReader
 		rv.locDecoder = locDecoder
+
+		rv.buf = buf
 	}
 	rv.postings = p
 
-	if p.postings != nil {
-		// prepare the freq chunk details
+	if p.postings == nil {
+		return rv
+	}
+
+	if p.freqOffset > 0 && p.locOffset > 0 {
+		// "general" encoding, so prepare the freq chunk details
 		var n uint64
 		var read int
 		var numFreqChunks uint64
@@ -120,15 +181,19 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
 			n += uint64(read)
 		}
 		rv.locChunkStart = p.locOffset + n
-		rv.locBitmap = p.locBitmap
+	} else {
+		// "1-hit" encoding
+		rv.normBits1Hit = p.normBits1Hit
+	}
 
-		rv.all = p.postings.Iterator()
-		if p.except != nil {
-			allExcept := roaring.AndNot(p.postings, p.except)
-			rv.actual = allExcept.Iterator()
-		} else {
-			rv.actual = p.postings.Iterator()
-		}
+	rv.locBitmap = p.locBitmap
+
+	rv.all = p.postings.Iterator()
+	if p.except != nil {
+		allExcept := roaring.AndNot(p.postings, p.except)
+		rv.actual = allExcept.Iterator()
+	} else {
+		rv.actual = p.postings.Iterator()
 	}
 
 	return rv
@@ -153,6 +218,11 @@ func (p *PostingsList) Count() uint64 {
 func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
 	rv.postingsOffset = postingsOffset
 
+	// handle "1-hit" encoding special case
+	if rv.postingsOffset&FSTValEncodingMask == FSTValEncoding1Hit {
+		return rv.init1Hit(postingsOffset)
+	}
+
 	// read the location of the freq/norm details
 	var n uint64
 	var read int
@@ -193,6 +263,24 @@ func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
 	return nil
 }
 
+var emptyRoaring = roaring.NewBitmap()
+
+func (rv *PostingsList) init1Hit(fstVal uint64) error {
+	docNum, normBits := FSTValDecode1Hit(fstVal)
+
+	rv.locBitmap = emptyRoaring
+
+	rv.postings = roaring.NewBitmap()
+	rv.postings.Add(uint32(docNum))
+
+	// TODO: we can likely do better than allocating a roaring bitmap
+	// with just 1 entry, but for now reuse existing machinery
+
+	rv.normBits1Hit = normBits
+
+	return nil
+}
+
 // PostingsIterator provides a way to iterate through the postings list
 type PostingsIterator struct {
 	postings  *PostingsList
@@ -219,6 +307,10 @@ type PostingsIterator struct {
 
 	next     Posting    // reused across Next() calls
 	nextLocs []Location // reused across Next() calls
+
+	normBits1Hit uint64
+
+	buf []byte
 }
 
 func (i *PostingsIterator) Size() int {
@@ -244,7 +336,8 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
 	if chunk >= len(i.freqChunkLens) || chunk >= len(i.locChunkLens) {
 		return fmt.Errorf("tried to load chunk that doesn't exist %d/(%d %d)", chunk, len(i.freqChunkLens), len(i.locChunkLens))
 	}
-	// load correct chunk bytes
+
+	// load freq chunk bytes
 	start := i.freqChunkStart
 	for j := 0; j < chunk; j++ {
 		start += i.freqChunkLens[j]
@@ -258,6 +351,7 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
 		i.freqNormReader.Reset(i.currChunkFreqNorm)
 	}
 
+	// load loc chunk bytes
 	start = i.locChunkStart
 	for j := 0; j < chunk; j++ {
 		start += i.locChunkLens[j]
@@ -270,11 +364,16 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
 	} else {
 		i.locReader.Reset(i.currChunkLoc)
 	}
+
 	i.currChunk = uint32(chunk)
 	return nil
 }
 
 func (i *PostingsIterator) readFreqNorm() (uint64, uint64, error) {
+	if i.normBits1Hit != 0 {
+		return 1, i.normBits1Hit, nil
+	}
+
 	freq, err := i.freqNormDecoder.GetU64()
 	if err != nil {
 		return 0, 0, fmt.Errorf("error reading frequency: %v", err)
@@ -360,6 +459,7 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 		return nil, err
 	}
 	rv.norm = math.Float32frombits(uint32(normBits))
+
 	if i.locBitmap.Contains(uint32(docNum)) {
 		// read off 'freq' locations, into reused slices
 		if cap(i.nextLocs) >= int(rv.freq) {
@@ -386,33 +486,40 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 
 // nextBytes returns the docNum and the encoded freq & loc bytes for
 // the next posting
-func (i *PostingsIterator) nextBytes() (uint64, []byte, []byte, error) {
+func (i *PostingsIterator) nextBytes() (
+	docNumOut uint64, freq uint64, normBits uint64,
+	bytesFreqNorm []byte, bytesLoc []byte, err error) {
 	docNum, exists, err := i.nextDocNum()
-	if err != nil {
-		return 0, nil, nil, err
+	if err != nil || !exists {
+		return 0, 0, 0, nil, nil, err
 	}
-	if !exists {
-		return 0, nil, nil, nil
+
+	if i.normBits1Hit != 0 {
+		if i.buf == nil {
+			i.buf = make([]byte, binary.MaxVarintLen64*2)
+		}
+		n := binary.PutUvarint(i.buf, uint64(1))
+		n += binary.PutUvarint(i.buf, i.normBits1Hit)
+		return docNum, uint64(1), i.normBits1Hit, i.buf[:n], nil, nil
 	}
 
 	startFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
 
-	freq, _, err := i.readFreqNorm()
+	freq, normBits, err = i.readFreqNorm()
 	if err != nil {
-		return 0, nil, nil, err
+		return 0, 0, 0, nil, nil, err
 	}
 
 	endFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
-	bytesFreqNorm := i.currChunkFreqNorm[startFreqNorm:endFreqNorm]
+	bytesFreqNorm = i.currChunkFreqNorm[startFreqNorm:endFreqNorm]
 
-	var bytesLoc []byte
 	if i.locBitmap.Contains(uint32(docNum)) {
 		startLoc := len(i.currChunkLoc) - i.locReader.Len()
 
 		for j := uint64(0); j < freq; j++ {
 			err := i.readLocation(nil)
 			if err != nil {
-				return 0, nil, nil, err
+				return 0, 0, 0, nil, nil, err
 			}
 		}
 
@@ -420,7 +527,7 @@ func (i *PostingsIterator) nextBytes() (uint64, []byte, []byte, error) {
 		bytesLoc = i.currChunkLoc[startLoc:endLoc]
 	}
 
-	return docNum, bytesFreqNorm, bytesLoc, nil
+	return docNum, freq, normBits, bytesFreqNorm, bytesLoc, nil
 }
 
 // nextDocNum returns the next docNum on the postings list, and also
@@ -431,8 +538,13 @@ func (i *PostingsIterator) nextDocNum() (uint64, bool, error) {
 	}
 
 	n := i.actual.Next()
-	nChunk := n / i.postings.sb.chunkFactor
 	allN := i.all.Next()
+
+	if i.normBits1Hit != 0 {
+		return uint64(n), true, nil
+	}
+
+	nChunk := n / i.postings.sb.chunkFactor
 	allNChunk := allN / i.postings.sb.chunkFactor
 
 	// n is the next actual hit (excluding some postings)

From 25beba615d5105b6933553b8af3f04559bc2ecf5 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 8 Mar 2018 09:36:36 -0800
Subject: [PATCH 272/728] scorch mem processDocument reuses fieldLens/docMap
 arrays

This change produces less garbage by switching from a map[uint16]'s to
array's for the fieldLens and docMap, and then reusing those arrays
across multiple processDocument() calls.
---
 index/scorch/segment/mem/build.go | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go
index a4b69013e..0b329704a 100644
--- a/index/scorch/segment/mem/build.go
+++ b/index/scorch/segment/mem/build.go
@@ -35,8 +35,10 @@ func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment {
 	s.initializeDict(results)
 
 	// walk each doc
+	fieldLensReuse := make([]int, len(s.FieldsMap))
+	docMapReuse := make([]analysis.TokenFrequencies, len(s.FieldsMap))
 	for _, result := range results {
-		s.processDocument(result)
+		s.processDocument(result, fieldLensReuse, docMapReuse)
 	}
 
 	// go back and sort the dictKeys
@@ -209,19 +211,25 @@ func (s *Segment) initializeDict(results []*index.AnalysisResult) {
 	}
 }
 
-func (s *Segment) processDocument(result *index.AnalysisResult) {
-	// used to collate information across fields
-	docMap := make(map[uint16]analysis.TokenFrequencies, len(s.FieldsMap))
-	fieldLens := make(map[uint16]int, len(s.FieldsMap))
+func (s *Segment) processDocument(result *index.AnalysisResult,
+	fieldLens []int, docMap []analysis.TokenFrequencies) {
+	// clear the fieldLens and docMap for reuse
+	n := len(s.FieldsMap)
+	for i := 0; i < n; i++ {
+		fieldLens[i] = 0
+		docMap[i] = nil
+	}
 
 	docNum := uint64(s.addDocument())
 
-	processField := func(field uint16, name string, l int, tf analysis.TokenFrequencies) {
-		fieldLens[field] += l
-		if existingFreqs, ok := docMap[field]; ok {
+	processField := func(fieldID uint16, name string, l int, tf analysis.TokenFrequencies) {
+		fieldLens[fieldID] += l
+
+		existingFreqs := docMap[fieldID]
+		if existingFreqs != nil {
 			existingFreqs.MergeAll(name, tf)
 		} else {
-			docMap[field] = tf
+			docMap[fieldID] = tf
 		}
 	}
 
@@ -274,7 +282,7 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 				locarraypos := s.Locarraypos[pid]
 
 				for _, loc := range tokenFreq.Locations {
-					var locf = fieldID
+					var locf = uint16(fieldID)
 					if loc.Field != "" {
 						locf = uint16(s.getOrDefineField(loc.Field))
 					}

From 40f63baeb9812fc6f757b60245ce516ed20572bc Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Thu, 8 Mar 2018 10:38:09 -0800
Subject: [PATCH 273/728] MB-28562: Support search query callbacks before and
 after execution

+ SearchQueryStartCallback
+ SearchQueryEndCallback
---
 index_impl.go | 21 ++++++++++++++++-----
 index_test.go |  6 +++---
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/index_impl.go b/index_impl.go
index 1036aef2a..68777f072 100644
--- a/index_impl.go
+++ b/index_impl.go
@@ -50,9 +50,11 @@ const storePath = "store"
 
 var mappingInternalKey = []byte("_mapping")
 
-const SearchMemCheckCallbackKey = "_search_mem_callback_key"
+const SearchQueryStartCallbackKey = "_search_query_start_callback_key"
+const SearchQueryEndCallbackKey = "_search_query_end_callback_key"
 
-type SearchMemCheckCallbackFn func(size uint64) error
+type SearchQueryStartCallbackFn func(size uint64) error
+type SearchQueryEndCallbackFn func(size uint64) error
 
 func indexStorePath(path string) string {
 	return path + string(os.PathSeparator) + storePath
@@ -483,15 +485,24 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
 		collector.SetFacetsBuilder(facetsBuilder)
 	}
 
-	if memCb := ctx.Value(SearchMemCheckCallbackKey); memCb != nil {
-		if memCbFn, ok := memCb.(SearchMemCheckCallbackFn); ok {
-			err = memCbFn(memNeededForSearch(req, searcher, collector))
+	memNeeded := memNeededForSearch(req, searcher, collector)
+	if cb := ctx.Value(SearchQueryStartCallbackKey); cb != nil {
+		if cbF, ok := cb.(SearchQueryStartCallbackFn); ok {
+			err = cbF(memNeeded)
 		}
 	}
 	if err != nil {
 		return nil, err
 	}
 
+	if cb := ctx.Value(SearchQueryEndCallbackKey); cb != nil {
+		if cbF, ok := cb.(SearchQueryEndCallbackFn); ok {
+			defer func() {
+				_ = cbF(memNeeded)
+			}()
+		}
+	}
+
 	err = collector.Collect(ctx, searcher, indexReader)
 	if err != nil {
 		return nil, err
diff --git a/index_test.go b/index_test.go
index 57429dcb2..69ca61a98 100644
--- a/index_test.go
+++ b/index_test.go
@@ -1871,7 +1871,7 @@ func BenchmarkScorchSearchOverhead(b *testing.B) {
 	benchmarkSearchOverhead(scorch.Name, b)
 }
 
-func TestSearchMemCheckCallback(t *testing.T) {
+func TestSearchQueryCallback(t *testing.T) {
 	defer func() {
 		err := os.RemoveAll("testidx")
 		if err != nil {
@@ -1910,8 +1910,8 @@ func TestSearchMemCheckCallback(t *testing.T) {
 		return nil
 	}
 
-	ctx := context.WithValue(context.Background(), SearchMemCheckCallbackKey,
-		SearchMemCheckCallbackFn(f))
+	ctx := context.WithValue(context.Background(), SearchQueryStartCallbackKey,
+		SearchQueryStartCallbackFn(f))
 	_, err = index.SearchInContext(ctx, req)
 	if err != expErr {
 		t.Fatalf("Expected: %v, Got: %v", expErr, err)

From b04909d3ee67e964e3decb4db745ce63106a7280 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Wed, 7 Mar 2018 13:12:06 +0530
Subject: [PATCH 274/728] adding the integer parser utility

---
 index/scorch/persister.go | 11 +++-------
 index/scorch/scorch.go    | 43 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 8 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index cab2d035d..ccb0c1f21 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -633,19 +633,14 @@ func (s *Scorch) removeOldBoltSnapshots() (numRemoved int, err error) {
 		return 0, err
 	}
 
-	numSnapshotsToKeep := NumSnapshotsToKeep
-	if val, ok := s.config["numSnapshotsToKeep"].(float64); ok && val > 0 {
-		numSnapshotsToKeep = int(val)
-	}
-
-	if len(persistedEpochs) <= numSnapshotsToKeep {
+	if len(persistedEpochs) <= s.numSnapshotsToKeep {
 		// we need to keep everything
 		return 0, nil
 	}
 
 	// make a map of epochs to protect from deletion
-	protectedEpochs := make(map[uint64]struct{}, numSnapshotsToKeep)
-	for _, epoch := range persistedEpochs[0:numSnapshotsToKeep] {
+	protectedEpochs := make(map[uint64]struct{}, s.numSnapshotsToKeep)
+	for _, epoch := range persistedEpochs[0:s.numSnapshotsToKeep] {
 		protectedEpochs[epoch] = struct{}{}
 	}
 
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 87372a326..7a33fb7f0 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -58,6 +58,7 @@ type Scorch struct {
 	nextSnapshotEpoch    uint64
 	eligibleForRemoval   []uint64        // Index snapshot epochs that are safe to GC.
 	ineligibleForRemoval map[string]bool // Filenames that should not be GC'ed yet.
+	numSnapshotsToKeep   int
 
 	closeCh            chan struct{}
 	introductions      chan *segmentIntroduction
@@ -191,6 +192,17 @@ func (s *Scorch) openBolt() error {
 		}
 	}
 
+	s.numSnapshotsToKeep = NumSnapshotsToKeep
+	if v, ok := s.config["numSnapshotsToKeep"]; ok {
+		var t int
+		if t, err = parseToInteger(v); err != nil {
+			return fmt.Errorf("numSnapshotsToKeep parse err: %v", err)
+		}
+		if t > 0 {
+			s.numSnapshotsToKeep = t
+		}
+	}
+
 	return nil
 }
 
@@ -503,3 +515,34 @@ func (s *Scorch) unmarkIneligibleForRemoval(filename string) {
 func init() {
 	registry.RegisterIndexType(Name, NewScorch)
 }
+
+func parseToInteger(v interface{}) (int, error) {
+	switch v.(type) {
+	case float32:
+		return int(v.(float32)), nil
+	case float64:
+		return int(v.(float64)), nil
+	case int:
+		return v.(int), nil
+	case int8:
+		return int(v.(int8)), nil
+	case int16:
+		return int(v.(int16)), nil
+	case int32:
+		return int(v.(int32)), nil
+	case int64:
+		return int(v.(int64)), nil
+	case uint:
+		return int(v.(uint)), nil
+	case uint8:
+		return int(v.(uint8)), nil
+	case uint16:
+		return int(v.(uint16)), nil
+	case uint32:
+		return int(v.(uint32)), nil
+	case uint64:
+		return int(v.(uint64)), nil
+	default:
+		return 0, fmt.Errorf("expects a numeric value")
+	}
+}

From d6522e7e17812bcb6795751abb1cad7acd7af6b1 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Fri, 9 Mar 2018 16:01:37 +0530
Subject: [PATCH 275/728] minor optimisation to loadChunk method

---
 index/scorch/segment/zap/contentcoder.go  |   7 +-
 index/scorch/segment/zap/docvalues.go     |  23 ++-
 index/scorch/segment/zap/intcoder.go      |  46 ++++-
 index/scorch/segment/zap/intcoder_test.go | 212 ++++++++++++++++++++++
 index/scorch/segment/zap/posting.go       |  44 ++---
 5 files changed, 294 insertions(+), 38 deletions(-)

diff --git a/index/scorch/segment/zap/contentcoder.go b/index/scorch/segment/zap/contentcoder.go
index 933f10a1e..c731f52c4 100644
--- a/index/scorch/segment/zap/contentcoder.go
+++ b/index/scorch/segment/zap/contentcoder.go
@@ -156,7 +156,12 @@ func (c *chunkedContentCoder) Write(w io.Writer) (int, error) {
 	if err != nil {
 		return tw, err
 	}
-	// write out the chunk lens
+
+	if len(c.chunkLens) > 1 {
+		chunkLengthsToOffsets(c.chunkLens)
+	}
+
+	// write out the chunk starting offsets
 	for _, chunkLen := range c.chunkLens {
 		n := binary.PutUvarint(buf, uint64(chunkLen))
 		nw, err = w.Write(buf[:n])
diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index 13635c57e..882ff43dd 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -38,7 +38,7 @@ type docValueIterator struct {
 	field          string
 	curChunkNum    uint64
 	numChunks      uint64
-	chunkLens      []uint64
+	chunkOffsets   []uint64
 	dvDataLoc      uint64
 	curChunkHeader []MetaData
 	curChunkData   []byte // compressed data cache
@@ -47,7 +47,7 @@ type docValueIterator struct {
 func (di *docValueIterator) size() int {
 	return reflectStaticSizedocValueIterator + size.SizeOfPtr +
 		len(di.field) +
-		len(di.chunkLens)*size.SizeOfUint64 +
+		len(di.chunkOffsets)*size.SizeOfUint64 +
 		len(di.curChunkHeader)*reflectStaticSizeMetaData +
 		len(di.curChunkData)
 }
@@ -78,16 +78,16 @@ func (s *SegmentBase) loadFieldDocValueIterator(field string,
 	offset += uint64(read)
 
 	fdvIter := &docValueIterator{
-		curChunkNum: math.MaxUint64,
-		field:       field,
-		chunkLens:   make([]uint64, int(numChunks)),
+		curChunkNum:  math.MaxUint64,
+		field:        field,
+		chunkOffsets: make([]uint64, int(numChunks)),
 	}
 	for i := 0; i < int(numChunks); i++ {
 		clen, read = binary.Uvarint(s.mem[fieldDvLoc+offset : fieldDvLoc+offset+binary.MaxVarintLen64])
 		if read <= 0 {
 			return nil, fmt.Errorf("corrupted chunk length during segment load")
 		}
-		fdvIter.chunkLens[i] = clen
+		fdvIter.chunkOffsets[i] = clen
 		offset += uint64(read)
 	}
 
@@ -99,12 +99,11 @@ func (di *docValueIterator) loadDvChunk(chunkNumber,
 	localDocNum uint64, s *SegmentBase) error {
 	// advance to the chunk where the docValues
 	// reside for the given docNum
-	destChunkDataLoc := di.dvDataLoc
-	for i := 0; i < int(chunkNumber); i++ {
-		destChunkDataLoc += di.chunkLens[i]
-	}
+	destChunkDataLoc, curChunkEnd := di.dvDataLoc, di.dvDataLoc
+	start, end := readChunkBoundary(int(chunkNumber), di.chunkOffsets)
+	destChunkDataLoc += start
+	curChunkEnd += end
 
-	curChunkSize := di.chunkLens[chunkNumber]
 	// read the number of docs reside in the chunk
 	numDocs, read := binary.Uvarint(s.mem[destChunkDataLoc : destChunkDataLoc+binary.MaxVarintLen64])
 	if read <= 0 {
@@ -124,7 +123,7 @@ func (di *docValueIterator) loadDvChunk(chunkNumber,
 	}
 
 	compressedDataLoc := chunkMetaLoc + offset
-	dataLength := destChunkDataLoc + curChunkSize - compressedDataLoc
+	dataLength := curChunkEnd - compressedDataLoc
 	di.curChunkData = s.mem[compressedDataLoc : compressedDataLoc+dataLength]
 	di.curChunkNum = chunkNumber
 	return nil
diff --git a/index/scorch/segment/zap/intcoder.go b/index/scorch/segment/zap/intcoder.go
index 6680e608e..79fe5156e 100644
--- a/index/scorch/segment/zap/intcoder.go
+++ b/index/scorch/segment/zap/intcoder.go
@@ -111,7 +111,12 @@ func (c *chunkedIntCoder) Write(w io.Writer) (int, error) {
 	}
 	buf := c.buf
 
-	// write out the number of chunks & each chunkLen
+	// convert the chunk lengths into starting chunk offsets
+	if len(c.chunkLens) > 1 {
+		chunkLengthsToOffsets(c.chunkLens)
+	}
+
+	// write out the number of chunks & each chunk starting offsets
 	n := binary.PutUvarint(buf, uint64(len(c.chunkLens)))
 	for _, chunkLen := range c.chunkLens {
 		n += binary.PutUvarint(buf[n:], uint64(chunkLen))
@@ -134,3 +139,42 @@ func (c *chunkedIntCoder) Write(w io.Writer) (int, error) {
 func (c *chunkedIntCoder) FinalSize() int {
 	return len(c.final)
 }
+
+// chunkLengthsToOffsets converts the chunk length array
+// to a chunk starting offset array. The readChunkBoundary
+// will figure out the start and end of every chunk from
+// these offsets. The starting offset of the first/single
+// array element will always be zero and this position is
+// used for storing the size of the current last item in
+// the array at any given point.
+// For eg:
+// Lens ->  5 5 5 5 => 5 5 10 15
+// Lens ->  0 5 0 5 => 5 0 5 5
+// Lens ->  0 0 0 5 => 5 0 0 0
+// Lens ->  5 0 0 0 => 0 5 5 5
+// Lens ->  0 5 0 0 => 0 0 5 5
+// Lens ->  0 0 5 0 => 0 0 0 5
+func chunkLengthsToOffsets(lengths []uint64) {
+	lengths[1], lengths[0] = lengths[0], lengths[1]
+	for i := 2; i < len(lengths); i++ {
+		cur := lengths[i]
+		lengths[i] = lengths[i-1] + lengths[0]
+		lengths[0] = cur
+	}
+}
+
+func readChunkBoundary(chunk int, offsets []uint64) (uint64, uint64) {
+	var start, end uint64
+	if chunk > 0 {
+		start = offsets[chunk]
+	}
+	// single element case
+	if chunk == 0 && len(offsets) == 1 {
+		end = offsets[chunk]
+	} else if chunk < len(offsets)-1 {
+		end = offsets[chunk+1]
+	} else { // for last element
+		end = start + offsets[0]
+	}
+	return start, end
+}
diff --git a/index/scorch/segment/zap/intcoder_test.go b/index/scorch/segment/zap/intcoder_test.go
index 85d2c5a76..8c77eab61 100644
--- a/index/scorch/segment/zap/intcoder_test.go
+++ b/index/scorch/segment/zap/intcoder_test.go
@@ -71,3 +71,215 @@ func TestChunkIntCoder(t *testing.T) {
 		}
 	}
 }
+
+func TestChunkLengthToOffsets(t *testing.T) {
+
+	tests := []struct {
+		lengths         []uint64
+		expectedOffsets []uint64
+	}{
+		{
+			lengths:         []uint64{5, 5, 5, 5, 5},
+			expectedOffsets: []uint64{5, 5, 10, 15, 20},
+		},
+		{
+			lengths:         []uint64{0, 5, 0, 5, 0},
+			expectedOffsets: []uint64{0, 0, 5, 5, 10},
+		},
+		{
+			lengths:         []uint64{0, 0, 0, 0, 5},
+			expectedOffsets: []uint64{5, 0, 0, 0, 0},
+		},
+		{
+			lengths:         []uint64{5, 0, 0, 0, 0},
+			expectedOffsets: []uint64{0, 5, 5, 5, 5},
+		},
+		{
+			lengths:         []uint64{0, 5, 0, 0, 0},
+			expectedOffsets: []uint64{0, 0, 5, 5, 5},
+		},
+		{
+			lengths:         []uint64{0, 0, 0, 5, 0},
+			expectedOffsets: []uint64{0, 0, 0, 0, 5},
+		},
+		{
+			lengths:         []uint64{0, 0, 0, 5, 5},
+			expectedOffsets: []uint64{5, 0, 0, 0, 5},
+		},
+		{
+			lengths:         []uint64{5, 5, 5, 0, 0},
+			expectedOffsets: []uint64{0, 5, 10, 15, 15},
+		},
+	}
+
+	for i, test := range tests {
+		chunkLengthsToOffsets(test.lengths)
+		if !reflect.DeepEqual(test.expectedOffsets, test.lengths) {
+			t.Errorf("Test: %d failed, got %+v, expected %+v", i, test.lengths, test.expectedOffsets)
+		}
+	}
+}
+
+func TestChunkReadBoundaryFromOffsets(t *testing.T) {
+
+	tests := []struct {
+		chunkNumber   int
+		offsets       []uint64
+		expectedStart uint64
+		expectedEnd   uint64
+	}{
+		{
+			offsets:       []uint64{5, 5, 10, 15, 20},
+			chunkNumber:   4,
+			expectedStart: 20,
+			expectedEnd:   25,
+		},
+		{
+			offsets:       []uint64{5, 5, 10, 15, 20},
+			chunkNumber:   0,
+			expectedStart: 0,
+			expectedEnd:   5,
+		},
+		{
+			offsets:       []uint64{5, 5, 10, 15, 20},
+			chunkNumber:   2,
+			expectedStart: 10,
+			expectedEnd:   15,
+		},
+		{
+			offsets:       []uint64{0, 0, 5, 5, 10},
+			chunkNumber:   4,
+			expectedStart: 10,
+			expectedEnd:   10,
+		},
+		{
+			offsets:       []uint64{0, 0, 5, 5, 10},
+			chunkNumber:   1,
+			expectedStart: 0,
+			expectedEnd:   5,
+		},
+		{
+			offsets:       []uint64{5, 0, 0, 0, 0},
+			chunkNumber:   0,
+			expectedStart: 0,
+			expectedEnd:   0,
+		},
+		{
+			offsets:       []uint64{5, 0, 0, 0, 0},
+			chunkNumber:   4,
+			expectedStart: 0,
+			expectedEnd:   5,
+		},
+		{
+			offsets:       []uint64{5, 0, 0, 0, 0},
+			chunkNumber:   1,
+			expectedStart: 0,
+			expectedEnd:   0,
+		},
+		{
+			offsets:       []uint64{0, 5, 5, 5, 5},
+			chunkNumber:   1,
+			expectedStart: 5,
+			expectedEnd:   5,
+		},
+		{
+			offsets:       []uint64{0, 5, 5, 5, 5},
+			chunkNumber:   0,
+			expectedStart: 0,
+			expectedEnd:   5,
+		},
+		{
+			offsets:       []uint64{0, 0, 5, 5, 5},
+			chunkNumber:   2,
+			expectedStart: 5,
+			expectedEnd:   5,
+		},
+		{
+			offsets:       []uint64{0, 0, 5, 5, 5},
+			chunkNumber:   1,
+			expectedStart: 0,
+			expectedEnd:   5,
+		},
+		{
+			offsets:       []uint64{0, 0, 0, 0, 5},
+			chunkNumber:   4,
+			expectedStart: 5,
+			expectedEnd:   5,
+		},
+		{
+			offsets:       []uint64{0, 0, 0, 0, 5},
+			chunkNumber:   3,
+			expectedStart: 0,
+			expectedEnd:   5,
+		},
+		{
+			offsets:       []uint64{0, 0, 0, 0, 5},
+			chunkNumber:   2,
+			expectedStart: 0,
+			expectedEnd:   0,
+		},
+		{
+			offsets:       []uint64{5, 0, 0, 0, 5},
+			chunkNumber:   0,
+			expectedStart: 0,
+			expectedEnd:   0,
+		},
+		{
+			offsets:       []uint64{5, 0, 0, 0, 5},
+			chunkNumber:   1,
+			expectedStart: 0,
+			expectedEnd:   0,
+		},
+		{
+			offsets:       []uint64{5, 0, 0, 0, 5},
+			chunkNumber:   3,
+			expectedStart: 0,
+			expectedEnd:   5,
+		},
+		{
+			offsets:       []uint64{5, 0, 0, 0, 5},
+			chunkNumber:   4,
+			expectedStart: 5,
+			expectedEnd:   10,
+		},
+		{
+			offsets:       []uint64{0, 5, 10, 15, 15},
+			chunkNumber:   0,
+			expectedStart: 0,
+			expectedEnd:   5,
+		},
+		{
+			offsets:       []uint64{0, 5, 10, 15, 15},
+			chunkNumber:   1,
+			expectedStart: 5,
+			expectedEnd:   10,
+		},
+		{
+			offsets:       []uint64{0, 5, 10, 15, 15},
+			chunkNumber:   2,
+			expectedStart: 10,
+			expectedEnd:   15,
+		},
+		{
+			offsets:       []uint64{0, 5, 10, 15, 15},
+			chunkNumber:   3,
+			expectedStart: 15,
+			expectedEnd:   15,
+		},
+		{
+			offsets:       []uint64{0, 5, 10, 15, 15},
+			chunkNumber:   4,
+			expectedStart: 15,
+			expectedEnd:   15,
+		},
+	}
+
+	for i, test := range tests {
+		s, e := readChunkBoundary(test.chunkNumber, test.offsets)
+		if test.expectedStart != s || test.expectedEnd != e {
+			t.Errorf("Test: %d failed for chunkNumber: %d got start: %d end: %d,"+
+				" expected start: %d end: %d", i, test.chunkNumber, s, e,
+				test.expectedStart, test.expectedEnd)
+		}
+	}
+}
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 7ae36120b..c47648cda 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -163,9 +163,9 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
 		var numFreqChunks uint64
 		numFreqChunks, read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
 		n += uint64(read)
-		rv.freqChunkLens = make([]uint64, int(numFreqChunks))
+		rv.freqChunkOffsets = make([]uint64, int(numFreqChunks))
 		for i := 0; i < int(numFreqChunks); i++ {
-			rv.freqChunkLens[i], read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
+			rv.freqChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
 			n += uint64(read)
 		}
 		rv.freqChunkStart = p.freqOffset + n
@@ -175,9 +175,9 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
 		var numLocChunks uint64
 		numLocChunks, read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
 		n += uint64(read)
-		rv.locChunkLens = make([]uint64, int(numLocChunks))
+		rv.locChunkOffsets = make([]uint64, int(numLocChunks))
 		for i := 0; i < int(numLocChunks); i++ {
-			rv.locChunkLens[i], read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
+			rv.locChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
 			n += uint64(read)
 		}
 		rv.locChunkStart = p.locOffset + n
@@ -297,11 +297,11 @@ type PostingsIterator struct {
 	locDecoder        *govarint.Base128Decoder
 	locReader         *bytes.Reader
 
-	freqChunkLens  []uint64
-	freqChunkStart uint64
+	freqChunkOffsets []uint64
+	freqChunkStart   uint64
 
-	locChunkLens  []uint64
-	locChunkStart uint64
+	locChunkOffsets []uint64
+	locChunkStart   uint64
 
 	locBitmap *roaring.Bitmap
 
@@ -317,8 +317,8 @@ func (i *PostingsIterator) Size() int {
 	sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr +
 		len(i.currChunkFreqNorm) +
 		len(i.currChunkLoc) +
-		len(i.freqChunkLens)*size.SizeOfUint64 +
-		len(i.locChunkLens)*size.SizeOfUint64 +
+		len(i.freqChunkOffsets)*size.SizeOfUint64 +
+		len(i.locChunkOffsets)*size.SizeOfUint64 +
 		i.next.Size()
 
 	if i.locBitmap != nil {
@@ -333,16 +333,14 @@ func (i *PostingsIterator) Size() int {
 }
 
 func (i *PostingsIterator) loadChunk(chunk int) error {
-	if chunk >= len(i.freqChunkLens) || chunk >= len(i.locChunkLens) {
-		return fmt.Errorf("tried to load chunk that doesn't exist %d/(%d %d)", chunk, len(i.freqChunkLens), len(i.locChunkLens))
+	if chunk >= len(i.freqChunkOffsets) || chunk >= len(i.locChunkOffsets) {
+		return fmt.Errorf("tried to load chunk that doesn't exist %d/(%d %d)", chunk, len(i.freqChunkOffsets), len(i.locChunkOffsets))
 	}
 
-	// load freq chunk bytes
-	start := i.freqChunkStart
-	for j := 0; j < chunk; j++ {
-		start += i.freqChunkLens[j]
-	}
-	end := start + i.freqChunkLens[chunk]
+	end, start := i.freqChunkStart, i.freqChunkStart
+	s, e := readChunkBoundary(chunk, i.freqChunkOffsets)
+	start += s
+	end += e
 	i.currChunkFreqNorm = i.postings.sb.mem[start:end]
 	if i.freqNormReader == nil {
 		i.freqNormReader = bytes.NewReader(i.currChunkFreqNorm)
@@ -351,12 +349,10 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
 		i.freqNormReader.Reset(i.currChunkFreqNorm)
 	}
 
-	// load loc chunk bytes
-	start = i.locChunkStart
-	for j := 0; j < chunk; j++ {
-		start += i.locChunkLens[j]
-	}
-	end = start + i.locChunkLens[chunk]
+	end, start = i.locChunkStart, i.locChunkStart
+	s, e = readChunkBoundary(chunk, i.locChunkOffsets)
+	start += s
+	end += e
 	i.currChunkLoc = i.postings.sb.mem[start:end]
 	if i.locReader == nil {
 		i.locReader = bytes.NewReader(i.currChunkLoc)

From 8e8c3ee8c92446a6d48b651a2ede68764f12ca8e Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Thu, 8 Mar 2018 15:29:01 -0800
Subject: [PATCH 276/728] Adding RoaringBitmap/roaring to the bleve vendor
 manifest

---
 vendor/manifest | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/vendor/manifest b/vendor/manifest
index 0837684fa..1883de76e 100644
--- a/vendor/manifest
+++ b/vendor/manifest
@@ -74,6 +74,14 @@
 			"branch": "master",
 			"notests": true
 		},
+		{
+			"importpath": "github.com/RoaringBitmap/roaring",
+			"repository": "https://github.com/RoaringBitmap/roaring",
+			"vcs": "",
+			"revision": "01d244c43a7e8d1191a4f369f5908ea9eb9bc9ac",
+			"branch": "master",
+			"notests": true
+		},
 		{
 			"importpath": "github.com/seiflotfy/cuckoofilter",
 			"repository": "https://github.com/seiflotfy/cuckoofilter",

From 3884cf4d1216457fe46fbe617cf4e73c3a516250 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 8 Mar 2018 21:36:19 -0800
Subject: [PATCH 277/728] scorch zap writePostings() helper func refactored out

---
 index/scorch/segment/zap/merge.go | 120 ++++++++++++++++++------------
 1 file changed, 71 insertions(+), 49 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 383fedbf3..a934dfc35 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -248,56 +248,15 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 			tfEncoder.Close()
 			locEncoder.Close()
 
-			termCardinality := newRoaring.GetCardinality()
-
-			encodeAs1Hit, docNum1Hit, normBits1Hit := use1HitEncoding(termCardinality)
-			if encodeAs1Hit {
-				err = newVellum.Insert(term, FSTValEncode1Hit(docNum1Hit, normBits1Hit))
-				if err != nil {
-					return err
-				}
-			} else if termCardinality > 0 {
-				// this field/term has hits in the new segment
-				freqOffset := uint64(w.Count())
-				_, err := tfEncoder.Write(w)
-				if err != nil {
-					return err
-				}
-				locOffset := uint64(w.Count())
-				_, err = locEncoder.Write(w)
-				if err != nil {
-					return err
-				}
-				postingLocOffset := uint64(w.Count())
-				_, err = writeRoaringWithLen(newRoaringLocs, w, bufMaxVarintLen64)
-				if err != nil {
-					return err
-				}
-				postingOffset := uint64(w.Count())
-				// write out the start of the term info
-				n := binary.PutUvarint(bufMaxVarintLen64, freqOffset)
-				_, err = w.Write(bufMaxVarintLen64[:n])
-				if err != nil {
-					return err
-				}
-				// write out the start of the loc info
-				n = binary.PutUvarint(bufMaxVarintLen64, locOffset)
-				_, err = w.Write(bufMaxVarintLen64[:n])
-				if err != nil {
-					return err
-				}
-				// write out the start of the posting locs
-				n = binary.PutUvarint(bufMaxVarintLen64, postingLocOffset)
-				_, err = w.Write(bufMaxVarintLen64[:n])
-				if err != nil {
-					return err
-				}
-				_, err = writeRoaringWithLen(newRoaring, w, bufMaxVarintLen64)
-				if err != nil {
-					return err
-				}
+			postingsOffset, err := writePostings(
+				newRoaring, newRoaringLocs, tfEncoder, locEncoder,
+				use1HitEncoding, w, bufMaxVarintLen64)
+			if err != nil {
+				return err
+			}
 
-				err = newVellum.Insert(term, postingOffset)
+			if postingsOffset > 0 {
+				err = newVellum.Insert(term, postingsOffset)
 				if err != nil {
 					return err
 				}
@@ -460,6 +419,69 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 	return rv, fieldDvLocsOffset, nil
 }
 
+func writePostings(postings, postingLocs *roaring.Bitmap,
+	tfEncoder, locEncoder *chunkedIntCoder,
+	use1HitEncoding func(uint64) (bool, uint64, uint64),
+	w *CountHashWriter, bufMaxVarintLen64 []byte) (
+	offset uint64, err error) {
+	termCardinality := postings.GetCardinality()
+	if termCardinality <= 0 {
+		return 0, nil
+	}
+
+	if use1HitEncoding != nil {
+		encodeAs1Hit, docNum1Hit, normBits1Hit := use1HitEncoding(termCardinality)
+		if encodeAs1Hit {
+			return FSTValEncode1Hit(docNum1Hit, normBits1Hit), nil
+		}
+	}
+
+	tfOffset := uint64(w.Count())
+	_, err = tfEncoder.Write(w)
+	if err != nil {
+		return 0, err
+	}
+
+	locOffset := uint64(w.Count())
+	_, err = locEncoder.Write(w)
+	if err != nil {
+		return 0, err
+	}
+
+	postingLocsOffset := uint64(w.Count())
+	_, err = writeRoaringWithLen(postingLocs, w, bufMaxVarintLen64)
+	if err != nil {
+		return 0, err
+	}
+
+	postingsOffset := uint64(w.Count())
+
+	n := binary.PutUvarint(bufMaxVarintLen64, tfOffset)
+	_, err = w.Write(bufMaxVarintLen64[:n])
+	if err != nil {
+		return 0, err
+	}
+
+	n = binary.PutUvarint(bufMaxVarintLen64, locOffset)
+	_, err = w.Write(bufMaxVarintLen64[:n])
+	if err != nil {
+		return 0, err
+	}
+
+	n = binary.PutUvarint(bufMaxVarintLen64, postingLocsOffset)
+	_, err = w.Write(bufMaxVarintLen64[:n])
+	if err != nil {
+		return 0, err
+	}
+
+	_, err = writeRoaringWithLen(postings, w, bufMaxVarintLen64)
+	if err != nil {
+		return 0, err
+	}
+
+	return postingsOffset, nil
+}
+
 func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 	fieldsMap map[string]uint16, fieldsInv []string, fieldsSame bool, newSegDocCount uint64,
 	w *CountHashWriter) (uint64, [][]uint64, error) {

From e82774ad20cef894996733f144a7f8c09c3ae7f1 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 9 Mar 2018 00:16:28 -0800
Subject: [PATCH 278/728] scorch zap AnalysisResultsToSegmentBase()

AnalysisResultsToSegmentBase() allows analysis results to be directly
converted into a zap-encoded SegmentBase, which can then be introduced
onto the root, avoiding the creation of mem.Segment data structures.
This leads to some reduction of garbage memory allocations.

The grouping and sorting and shaping of the postings list information
is taken from the mem.Segment codepaths.

The encoding of stored fields reuses functions from zap's merger,
which has the largest savings of garbage memory avoidance.

And, the encoding of tf/loc chunks, postings & dictionary information
also follows the approach used by zap's merger, which also has some
savings of garbage memory avoidance.

In future changes, the mem.Segment dependencies will be removed from
zap, which should result in a smaller codebase.
---
 index/scorch/scorch.go          |   3 +-
 index/scorch/segment/zap/new.go | 659 ++++++++++++++++++++++++++++++++
 2 files changed, 660 insertions(+), 2 deletions(-)
 create mode 100644 index/scorch/segment/zap/new.go

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 6d0bcd1e3..e16a146f7 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -28,7 +28,6 @@ import (
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
-	"github.com/blevesearch/bleve/index/scorch/segment/mem"
 	"github.com/blevesearch/bleve/index/scorch/segment/zap"
 	"github.com/blevesearch/bleve/index/store"
 	"github.com/blevesearch/bleve/registry"
@@ -289,7 +288,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
 
 	var newSegment segment.Segment
 	if len(analysisResults) > 0 {
-		newSegment, err = zap.NewSegmentBase(mem.NewFromAnalyzedDocs(analysisResults), DefaultChunkFactor)
+		newSegment, err = zap.AnalysisResultsToSegmentBase(analysisResults, DefaultChunkFactor)
 		if err != nil {
 			return err
 		}
diff --git a/index/scorch/segment/zap/new.go b/index/scorch/segment/zap/new.go
new file mode 100644
index 000000000..3a8b2012d
--- /dev/null
+++ b/index/scorch/segment/zap/new.go
@@ -0,0 +1,659 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zap
+
+import (
+	"bytes"
+	"encoding/binary"
+	"math"
+	"sort"
+
+	"github.com/RoaringBitmap/roaring"
+	"github.com/Smerity/govarint"
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/index"
+	"github.com/couchbase/vellum"
+	"github.com/golang/snappy"
+)
+
+// AnalysisResultsToSegmentBase produces an in-memory zap-encoded
+// SegmentBase from analysis results
+func AnalysisResultsToSegmentBase(results []*index.AnalysisResult,
+	chunkFactor uint32) (*SegmentBase, error) {
+	var br bytes.Buffer
+
+	s := interim{
+		results:     results,
+		chunkFactor: chunkFactor,
+		w:           NewCountHashWriter(&br),
+		FieldsMap:   map[string]uint16{},
+	}
+
+	storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets,
+		err := s.convert()
+	if err != nil {
+		return nil, err
+	}
+
+	sb, err := InitSegmentBase(br.Bytes(), s.w.Sum32(), chunkFactor,
+		s.FieldsMap, s.FieldsInv, uint64(len(results)),
+		storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets)
+
+	return sb, err
+}
+
+// interim holds temporary working data used while converting from
+// analysis results to a zap-encoded segment
+type interim struct {
+	results []*index.AnalysisResult
+
+	chunkFactor uint32
+
+	w *CountHashWriter
+
+	// FieldsMap adds 1 to field id to avoid zero value issues
+	//  name -> field id + 1
+	FieldsMap map[string]uint16
+
+	// FieldsInv is the inverse of FieldsMap
+	//  field id -> name
+	FieldsInv []string
+
+	// Term dictionaries for each field
+	//  field id -> term -> postings list id + 1
+	Dicts []map[string]uint64
+
+	// Terms for each field, where terms are sorted ascending
+	//  field id -> []term
+	DictKeys [][]string
+
+	// Fields whose IncludeDocValues is true
+	//  field id -> bool
+	IncludeDocValues []bool
+
+	// postings id -> bitmap of docNums
+	Postings []*roaring.Bitmap
+
+	// postings id -> bitmap of docNums that have locations
+	PostingsLocs []*roaring.Bitmap
+
+	// postings id -> freq/norm's, one for each docNum in postings
+	FreqNorms [][]interimFreqNorm
+
+	// postings id -> locs, one for each freq
+	Locs [][]interimLoc
+
+	buf0 bytes.Buffer
+	tmp0 []byte
+	tmp1 []byte
+}
+
+func (s *interim) grabBuf(size int) []byte {
+	buf := s.tmp0
+	if cap(buf) < size {
+		buf = make([]byte, size)
+		s.tmp0 = buf
+	}
+	return buf[0:size]
+}
+
+type interimStoredField struct {
+	vals      [][]byte
+	typs      []byte
+	arrayposs [][]uint64 // array positions
+}
+
+type interimFreqNorm struct {
+	freq uint64
+	norm float32
+}
+
+type interimLoc struct {
+	fieldID   uint16
+	pos       uint64
+	start     uint64
+	end       uint64
+	arrayposs []uint64
+}
+
+func (s *interim) convert() (uint64, uint64, uint64, []uint64, error) {
+	s.getOrDefineField("_id") // _id field is fieldID 0
+
+	for _, result := range s.results {
+		for _, field := range result.Document.CompositeFields {
+			s.getOrDefineField(field.Name())
+		}
+		for _, field := range result.Document.Fields {
+			s.getOrDefineField(field.Name())
+		}
+	}
+
+	sort.Strings(s.FieldsInv[1:]) // keep _id as first field
+
+	s.FieldsMap = make(map[string]uint16, len(s.FieldsInv))
+	for fieldID, fieldName := range s.FieldsInv {
+		s.FieldsMap[fieldName] = uint16(fieldID + 1)
+	}
+
+	s.IncludeDocValues = make([]bool, len(s.FieldsInv))
+
+	s.prepareDicts()
+
+	for _, dict := range s.DictKeys {
+		sort.Strings(dict)
+	}
+
+	s.processDocuments()
+
+	storedIndexOffset, err := s.writeStoredFields()
+	if err != nil {
+		return 0, 0, 0, nil, err
+	}
+
+	var fdvIndexOffset uint64
+	var dictOffsets []uint64
+
+	if len(s.results) > 0 {
+		fdvIndexOffset, dictOffsets, err = s.writeDicts()
+		if err != nil {
+			return 0, 0, 0, nil, err
+		}
+	} else {
+		dictOffsets = make([]uint64, len(s.FieldsInv))
+	}
+
+	fieldsIndexOffset, err := persistFields(s.FieldsInv, s.w, dictOffsets)
+	if err != nil {
+		return 0, 0, 0, nil, err
+	}
+
+	return storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets, nil
+}
+
+func (s *interim) getOrDefineField(fieldName string) int {
+	fieldIDPlus1, exists := s.FieldsMap[fieldName]
+	if !exists {
+		fieldIDPlus1 = uint16(len(s.FieldsInv) + 1)
+		s.FieldsMap[fieldName] = fieldIDPlus1
+		s.FieldsInv = append(s.FieldsInv, fieldName)
+		s.Dicts = append(s.Dicts, make(map[string]uint64))
+		s.DictKeys = append(s.DictKeys, make([]string, 0))
+	}
+	return int(fieldIDPlus1 - 1)
+}
+
+// fill Dicts and DictKeys from analysis results
+func (s *interim) prepareDicts() {
+	var pidNext int
+
+	numTermsPerPostingsList := make([]int, 0, 64) // key is postings list id
+	numLocsPerPostingsList := make([]int, 0, 64)  // key is postings list id
+
+	var totTFs int
+	var totLocs int
+
+	visitField := func(fieldID uint16, tfs analysis.TokenFrequencies) {
+		dict := s.Dicts[fieldID]
+		dictKeys := s.DictKeys[fieldID]
+
+		for term, tf := range tfs {
+			pidPlus1, exists := dict[term]
+			if !exists {
+				pidNext++
+				pidPlus1 = uint64(pidNext)
+
+				dict[term] = pidPlus1
+				dictKeys = append(dictKeys, term)
+
+				numTermsPerPostingsList = append(numTermsPerPostingsList, 0)
+				numLocsPerPostingsList = append(numLocsPerPostingsList, 0)
+			}
+
+			pid := pidPlus1 - 1
+
+			numTermsPerPostingsList[pid] += 1
+			numLocsPerPostingsList[pid] += len(tf.Locations)
+
+			totLocs += len(tf.Locations)
+		}
+
+		totTFs += len(tfs)
+
+		s.DictKeys[fieldID] = dictKeys
+	}
+
+	for _, result := range s.results {
+		// walk each composite field
+		for _, field := range result.Document.CompositeFields {
+			fieldID := uint16(s.getOrDefineField(field.Name()))
+			_, tf := field.Analyze()
+			visitField(fieldID, tf)
+		}
+
+		// walk each field
+		for i, field := range result.Document.Fields {
+			fieldID := uint16(s.getOrDefineField(field.Name()))
+			tf := result.Analyzed[i]
+			visitField(fieldID, tf)
+		}
+	}
+
+	numPostingsLists := pidNext
+
+	s.Postings = make([]*roaring.Bitmap, numPostingsLists)
+	for i := 0; i < numPostingsLists; i++ {
+		s.Postings[i] = roaring.New()
+	}
+
+	s.PostingsLocs = make([]*roaring.Bitmap, numPostingsLists)
+	for i := 0; i < numPostingsLists; i++ {
+		s.PostingsLocs[i] = roaring.New()
+	}
+
+	s.FreqNorms = make([][]interimFreqNorm, numPostingsLists)
+
+	freqNormsBacking := make([]interimFreqNorm, totTFs)
+	for pid, numTerms := range numTermsPerPostingsList {
+		s.FreqNorms[pid] = freqNormsBacking[0:0]
+		freqNormsBacking = freqNormsBacking[numTerms:]
+	}
+
+	s.Locs = make([][]interimLoc, numPostingsLists)
+
+	locsBacking := make([]interimLoc, totLocs)
+	for pid, numLocs := range numLocsPerPostingsList {
+		s.Locs[pid] = locsBacking[0:0]
+		locsBacking = locsBacking[numLocs:]
+	}
+}
+
+func (s *interim) processDocuments() {
+	numFields := len(s.FieldsInv)
+	reuseFieldLens := make([]int, numFields)
+	reuseFieldTFs := make([]analysis.TokenFrequencies, numFields)
+
+	for docNum, result := range s.results {
+		for i := 0; i < numFields; i++ { // clear these for reuse
+			reuseFieldLens[i] = 0
+			reuseFieldTFs[i] = nil
+		}
+
+		s.processDocument(uint64(docNum), result,
+			reuseFieldLens, reuseFieldTFs)
+	}
+}
+
+func (s *interim) processDocument(docNum uint64,
+	result *index.AnalysisResult,
+	fieldLens []int, fieldTFs []analysis.TokenFrequencies) {
+	visitField := func(fieldID uint16, fieldName string,
+		ln int, tf analysis.TokenFrequencies) {
+		fieldLens[fieldID] += ln
+
+		existingFreqs := fieldTFs[fieldID]
+		if existingFreqs != nil {
+			existingFreqs.MergeAll(fieldName, tf)
+		} else {
+			fieldTFs[fieldID] = tf
+		}
+	}
+
+	// walk each composite field
+	for _, field := range result.Document.CompositeFields {
+		fieldID := uint16(s.getOrDefineField(field.Name()))
+		ln, tf := field.Analyze()
+		visitField(fieldID, field.Name(), ln, tf)
+	}
+
+	// walk each field
+	for i, field := range result.Document.Fields {
+		fieldID := uint16(s.getOrDefineField(field.Name()))
+		ln := result.Length[i]
+		tf := result.Analyzed[i]
+		visitField(fieldID, field.Name(), ln, tf)
+	}
+
+	// now that it's been rolled up into fieldTFs, walk that
+	for fieldID, tfs := range fieldTFs {
+		dict := s.Dicts[fieldID]
+		norm := float32(1.0 / math.Sqrt(float64(fieldLens[fieldID])))
+
+		for term, tf := range tfs {
+			pid := dict[term] - 1
+			bs := s.Postings[pid]
+			bs.AddInt(int(docNum))
+
+			s.FreqNorms[pid] = append(s.FreqNorms[pid],
+				interimFreqNorm{
+					freq: uint64(tf.Frequency()),
+					norm: norm,
+				})
+
+			if len(tf.Locations) > 0 {
+				locBS := s.PostingsLocs[pid]
+				locBS.AddInt(int(docNum))
+
+				locs := s.Locs[pid]
+
+				for _, loc := range tf.Locations {
+					var locf = uint16(fieldID)
+					if loc.Field != "" {
+						locf = uint16(s.getOrDefineField(loc.Field))
+					}
+					var arrayposs []uint64
+					if len(loc.ArrayPositions) > 0 {
+						arrayposs = loc.ArrayPositions
+					}
+					locs = append(locs, interimLoc{
+						fieldID:   locf,
+						pos:       uint64(loc.Position),
+						start:     uint64(loc.Start),
+						end:       uint64(loc.End),
+						arrayposs: arrayposs,
+					})
+				}
+
+				s.Locs[pid] = locs
+			}
+		}
+	}
+}
+
+func (s *interim) writeStoredFields() (
+	storedIndexOffset uint64, err error) {
+	metaBuf := &s.buf0
+	metaEncoder := govarint.NewU64Base128Encoder(metaBuf)
+
+	data, compressed := s.tmp0[:0], s.tmp1[:0]
+	defer func() { s.tmp0, s.tmp1 = data, compressed }()
+
+	// keyed by docNum
+	docStoredOffsets := make([]uint64, len(s.results))
+
+	// keyed by fieldID, for the current doc in the loop
+	docStoredFields := map[uint16]interimStoredField{}
+
+	for docNum, result := range s.results {
+		for fieldID := range docStoredFields { // reset for next doc
+			delete(docStoredFields, fieldID)
+		}
+
+		for _, field := range result.Document.Fields {
+			fieldID := uint16(s.getOrDefineField(field.Name()))
+
+			opts := field.Options()
+
+			if opts.IsStored() {
+				isf := docStoredFields[fieldID]
+				isf.vals = append(isf.vals, field.Value())
+				isf.typs = append(isf.typs, encodeFieldType(field))
+				isf.arrayposs = append(isf.arrayposs, field.ArrayPositions())
+				docStoredFields[fieldID] = isf
+			}
+
+			if opts.IncludeDocValues() {
+				s.IncludeDocValues[fieldID] = true
+			}
+		}
+
+		var curr int
+
+		metaBuf.Reset()
+		data = data[:0]
+		compressed = compressed[:0]
+
+		for fieldID := range s.FieldsInv {
+			isf, exists := docStoredFields[uint16(fieldID)]
+			if exists {
+				curr, data, err = persistStoredFieldValues(
+					fieldID, isf.vals, isf.typs, isf.arrayposs,
+					curr, metaEncoder, data)
+				if err != nil {
+					return 0, err
+				}
+			}
+		}
+
+		metaEncoder.Close()
+		metaBytes := metaBuf.Bytes()
+
+		compressed = snappy.Encode(compressed, data)
+
+		docStoredOffsets[docNum] = uint64(s.w.Count())
+
+		_, err := writeUvarints(s.w,
+			uint64(len(metaBytes)),
+			uint64(len(compressed)))
+		if err != nil {
+			return 0, err
+		}
+
+		_, err = s.w.Write(metaBytes)
+		if err != nil {
+			return 0, err
+		}
+
+		_, err = s.w.Write(compressed)
+		if err != nil {
+			return 0, err
+		}
+	}
+
+	storedIndexOffset = uint64(s.w.Count())
+
+	for _, docStoredOffset := range docStoredOffsets {
+		err = binary.Write(s.w, binary.BigEndian, docStoredOffset)
+		if err != nil {
+			return 0, err
+		}
+	}
+
+	return storedIndexOffset, nil
+}
+
+func (s *interim) writeDicts() (uint64, []uint64, error) {
+	dictOffsets := make([]uint64, len(s.FieldsInv))
+
+	fdvOffsets := make([]uint64, len(s.FieldsInv))
+
+	buf := s.grabBuf(binary.MaxVarintLen64)
+
+	tfEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1))
+	locEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1))
+	fdvEncoder := newChunkedContentCoder(uint64(s.chunkFactor), uint64(len(s.results)-1))
+
+	var docTermMap [][]byte
+
+	s.buf0.Reset()
+	builder, err := vellum.New(&s.buf0, nil)
+	if err != nil {
+		return 0, nil, err
+	}
+
+	for fieldID, terms := range s.DictKeys {
+		if cap(docTermMap) < len(s.results) {
+			docTermMap = make([][]byte, len(s.results))
+		} else {
+			docTermMap = docTermMap[0:len(s.results)]
+			for docNum := range docTermMap { // reset the docTermMap
+				docTermMap[docNum] = docTermMap[docNum][:0]
+			}
+		}
+
+		dict := s.Dicts[fieldID]
+
+		for _, term := range terms { // terms are already sorted
+			pid := dict[term] - 1
+
+			postingsBS := s.Postings[pid]
+			postingsLocsBS := s.PostingsLocs[pid]
+
+			freqNorms := s.FreqNorms[pid]
+			freqNormOffset := 0
+
+			locs := s.Locs[pid]
+			locOffset := 0
+
+			postingsItr := postingsBS.Iterator()
+			for postingsItr.HasNext() {
+				docNum := uint64(postingsItr.Next())
+
+				freqNorm := freqNorms[freqNormOffset]
+
+				err = tfEncoder.Add(docNum, freqNorm.freq,
+					uint64(math.Float32bits(freqNorm.norm)))
+				if err != nil {
+					return 0, nil, err
+				}
+
+				for i := uint64(0); i < freqNorm.freq; i++ {
+					if len(locs) > 0 {
+						loc := locs[locOffset]
+
+						err = locEncoder.Add(docNum, uint64(loc.fieldID),
+							loc.pos, loc.start, loc.end,
+							uint64(len(loc.arrayposs)))
+						if err != nil {
+							return 0, nil, err
+						}
+
+						err = locEncoder.Add(docNum, loc.arrayposs...)
+						if err != nil {
+							return 0, nil, err
+						}
+					}
+
+					locOffset++
+				}
+
+				freqNormOffset++
+
+				docTermMap[docNum] = append(
+					append(docTermMap[docNum], term...),
+					termSeparator)
+			}
+
+			tfEncoder.Close()
+			locEncoder.Close()
+
+			postingsOffset, err := writePostings(
+				postingsBS, postingsLocsBS, tfEncoder, locEncoder,
+				nil, s.w, buf)
+			if err != nil {
+				return 0, nil, err
+			}
+
+			if postingsOffset > uint64(0) {
+				err = builder.Insert([]byte(term), postingsOffset)
+				if err != nil {
+					return 0, nil, err
+				}
+			}
+
+			tfEncoder.Reset()
+			locEncoder.Reset()
+		}
+
+		err = builder.Close()
+		if err != nil {
+			return 0, nil, err
+		}
+
+		// record where this dictionary starts
+		dictOffsets[fieldID] = uint64(s.w.Count())
+
+		vellumData := s.buf0.Bytes()
+
+		// write out the length of the vellum data
+		n := binary.PutUvarint(buf, uint64(len(vellumData)))
+		_, err = s.w.Write(buf[:n])
+		if err != nil {
+			return 0, nil, err
+		}
+
+		// write this vellum to disk
+		_, err = s.w.Write(vellumData)
+		if err != nil {
+			return 0, nil, err
+		}
+
+		// reset vellum for reuse
+		s.buf0.Reset()
+
+		err = builder.Reset(&s.buf0)
+		if err != nil {
+			return 0, nil, err
+		}
+
+		// write the field doc values
+		if s.IncludeDocValues[fieldID] {
+			for docNum, docTerms := range docTermMap {
+				if len(docTerms) > 0 {
+					err = fdvEncoder.Add(uint64(docNum), docTerms)
+					if err != nil {
+						return 0, nil, err
+					}
+				}
+			}
+			err = fdvEncoder.Close()
+			if err != nil {
+				return 0, nil, err
+			}
+
+			fdvOffsets[fieldID] = uint64(s.w.Count())
+
+			_, err = fdvEncoder.Write(s.w)
+			if err != nil {
+				return 0, nil, err
+			}
+
+			fdvEncoder.Reset()
+		} else {
+			fdvOffsets[fieldID] = fieldNotUninverted
+		}
+	}
+
+	fdvIndexOffset := uint64(s.w.Count())
+
+	for _, fdvOffset := range fdvOffsets {
+		n := binary.PutUvarint(buf, fdvOffset)
+		_, err := s.w.Write(buf[:n])
+		if err != nil {
+			return 0, nil, err
+		}
+	}
+
+	return fdvIndexOffset, dictOffsets, nil
+}
+
+func encodeFieldType(f document.Field) byte {
+	fieldType := byte('x')
+	switch f.(type) {
+	case *document.TextField:
+		fieldType = 't'
+	case *document.NumericField:
+		fieldType = 'n'
+	case *document.DateTimeField:
+		fieldType = 'd'
+	case *document.BooleanField:
+		fieldType = 'b'
+	case *document.GeoPointField:
+		fieldType = 'g'
+	case *document.CompositeField:
+		fieldType = 'c'
+	}
+	return fieldType
+}

From eade78be2f2026b82a66e4eff98ca1567c3d88c3 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 9 Mar 2018 14:05:17 -0800
Subject: [PATCH 279/728] scorch zap unit tests no longer use mem.Segment

---
 index/scorch/segment/zap/build_test.go   | 37 +++++++++++++------
 index/scorch/segment/zap/dict_test.go    | 11 ++----
 index/scorch/segment/zap/merge_test.go   | 47 +++++++++++-------------
 index/scorch/segment/zap/segment_test.go | 20 +++++-----
 4 files changed, 61 insertions(+), 54 deletions(-)

diff --git a/index/scorch/segment/zap/build_test.go b/index/scorch/segment/zap/build_test.go
index 9063980b7..65de7931d 100644
--- a/index/scorch/segment/zap/build_test.go
+++ b/index/scorch/segment/zap/build_test.go
@@ -21,20 +21,22 @@ import (
 	"github.com/blevesearch/bleve/analysis"
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index"
-	"github.com/blevesearch/bleve/index/scorch/segment/mem"
 )
 
 func TestBuild(t *testing.T) {
 	_ = os.RemoveAll("/tmp/scorch.zap")
 
-	memSegment := buildMemSegment()
-	err := PersistSegment(memSegment, "/tmp/scorch.zap", 1024)
+	sb, err := buildTestSegment()
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = PersistSegmentBase(sb, "/tmp/scorch.zap")
 	if err != nil {
 		t.Fatal(err)
 	}
 }
 
-func buildMemSegment() *mem.Segment {
+func buildTestSegment() (*SegmentBase, error) {
 	doc := &document.Document{
 		ID: "a",
 		Fields: []document.Field{
@@ -120,11 +122,22 @@ func buildMemSegment() *mem.Segment {
 		}
 	}
 
-	return mem.NewFromAnalyzedDocs(results)
+	return AnalysisResultsToSegmentBase(results, 1024)
+}
+
+func buildTestSegmentMulti() (*SegmentBase, error) {
+	results := buildTestAnalysisResultsMulti()
+
+	return AnalysisResultsToSegmentBase(results, 1024)
 }
 
-func buildMemSegmentMulti() *mem.Segment {
+func buildTestSegmentMultiWithChunkFactor(chunkFactor uint32) (*SegmentBase, error) {
+	results := buildTestAnalysisResultsMulti()
 
+	return AnalysisResultsToSegmentBase(results, chunkFactor)
+}
+
+func buildTestAnalysisResultsMulti() []*index.AnalysisResult {
 	doc := &document.Document{
 		ID: "a",
 		Fields: []document.Field{
@@ -282,13 +295,11 @@ func buildMemSegmentMulti() *mem.Segment {
 		}
 	}
 
-	segment := mem.NewFromAnalyzedDocs(results)
-
-	return segment
+	return results
 }
 
-func buildMemSegmentWithDefaultFieldMapping() (*mem.Segment, []string) {
-
+func buildTestSegmentWithDefaultFieldMapping(chunkFactor uint32) (
+	*SegmentBase, []string, error) {
 	doc := &document.Document{
 		ID: "a",
 		Fields: []document.Field{
@@ -371,5 +382,7 @@ func buildMemSegmentWithDefaultFieldMapping() (*mem.Segment, []string) {
 		}
 	}
 
-	return mem.NewFromAnalyzedDocs(results), fields
+	sb, err := AnalysisResultsToSegmentBase(results, chunkFactor)
+
+	return sb, fields, err
 }
diff --git a/index/scorch/segment/zap/dict_test.go b/index/scorch/segment/zap/dict_test.go
index 336fb37ca..b70f2adf7 100644
--- a/index/scorch/segment/zap/dict_test.go
+++ b/index/scorch/segment/zap/dict_test.go
@@ -22,10 +22,9 @@ import (
 	"github.com/blevesearch/bleve/analysis"
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index"
-	"github.com/blevesearch/bleve/index/scorch/segment/mem"
 )
 
-func buildMemSegmentForDict() *mem.Segment {
+func buildTestSegmentForDict() (*SegmentBase, error) {
 	doc := &document.Document{
 		ID: "a",
 		Fields: []document.Field{
@@ -99,17 +98,15 @@ func buildMemSegmentForDict() *mem.Segment {
 		},
 	}
 
-	segment := mem.NewFromAnalyzedDocs(results)
-
-	return segment
+	return AnalysisResultsToSegmentBase(results, 1024)
 }
 
 func TestDictionary(t *testing.T) {
 
 	_ = os.RemoveAll("/tmp/scorch.zap")
 
-	memSegment := buildMemSegmentForDict()
-	err := PersistSegment(memSegment, "/tmp/scorch.zap", 1024)
+	testSeg, _ := buildTestSegmentForDict()
+	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
 	if err != nil {
 		t.Fatalf("error persisting segment: %v", err)
 	}
diff --git a/index/scorch/segment/zap/merge_test.go b/index/scorch/segment/zap/merge_test.go
index d80b26086..d931f6c23 100644
--- a/index/scorch/segment/zap/merge_test.go
+++ b/index/scorch/segment/zap/merge_test.go
@@ -26,7 +26,6 @@ import (
 	"github.com/blevesearch/bleve/analysis"
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index"
-	"github.com/blevesearch/bleve/index/scorch/segment/mem"
 )
 
 func TestMerge(t *testing.T) {
@@ -34,14 +33,14 @@ func TestMerge(t *testing.T) {
 	_ = os.RemoveAll("/tmp/scorch2.zap")
 	_ = os.RemoveAll("/tmp/scorch3.zap")
 
-	memSegment := buildMemSegmentMulti()
-	err := PersistSegment(memSegment, "/tmp/scorch.zap", 1024)
+	testSeg, _ := buildTestSegmentMulti()
+	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
 	if err != nil {
 		t.Fatal(err)
 	}
 
-	memSegment2 := buildMemSegmentMulti2()
-	err = PersistSegment(memSegment2, "/tmp/scorch2.zap", 1024)
+	testSeg2, _ := buildTestSegmentMulti2()
+	err = PersistSegmentBase(testSeg2, "/tmp/scorch2.zap")
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -121,8 +120,8 @@ func TestMergeWithEmptySegmentsFirst(t *testing.T) {
 func testMergeWithEmptySegments(t *testing.T, before bool, numEmptySegments int) {
 	_ = os.RemoveAll("/tmp/scorch.zap")
 
-	memSegment := buildMemSegmentMulti()
-	err := PersistSegment(memSegment, "/tmp/scorch.zap", 1024)
+	testSeg, _ := buildTestSegmentMulti()
+	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -148,8 +147,8 @@ func testMergeWithEmptySegments(t *testing.T, before bool, numEmptySegments int)
 
 		_ = os.RemoveAll("/tmp/" + fname)
 
-		emptySegment := mem.NewFromAnalyzedDocs([]*index.AnalysisResult{})
-		err = PersistSegment(emptySegment, "/tmp/"+fname, 1024)
+		emptySegment, _ := AnalysisResultsToSegmentBase([]*index.AnalysisResult{}, 1024)
+		err = PersistSegmentBase(emptySegment, "/tmp/"+fname)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -462,8 +461,8 @@ func testMergeAndDrop(t *testing.T, docsToDrop []*roaring.Bitmap) {
 	_ = os.RemoveAll("/tmp/scorch.zap")
 	_ = os.RemoveAll("/tmp/scorch2.zap")
 
-	memSegment := buildMemSegmentMulti()
-	err := PersistSegment(memSegment, "/tmp/scorch.zap", 1024)
+	testSeg, _ := buildTestSegmentMulti()
+	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -478,8 +477,8 @@ func testMergeAndDrop(t *testing.T, docsToDrop []*roaring.Bitmap) {
 		}
 	}()
 
-	memSegment2 := buildMemSegmentMulti2()
-	err = PersistSegment(memSegment2, "/tmp/scorch2.zap", 1024)
+	testSeg2, _ := buildTestSegmentMulti2()
+	err = PersistSegmentBase(testSeg2, "/tmp/scorch2.zap")
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -565,8 +564,8 @@ func testMergeWithUpdates(t *testing.T, segmentDocIds [][]string, docsToDrop []*
 
 		_ = os.RemoveAll("/tmp/" + fname)
 
-		memSegment := buildMemSegmentMultiHelper(docIds)
-		err := PersistSegment(memSegment, "/tmp/"+fname, 1024)
+		testSeg, _ := buildTestSegmentMultiHelper(docIds)
+		err := PersistSegmentBase(testSeg, "/tmp/"+fname)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -616,11 +615,11 @@ func testMergeAndDropSegments(t *testing.T, segsToMerge []*Segment, docsToDrop [
 	testMergeWithSelf(t, segm.(*Segment), expectedNumDocs)
 }
 
-func buildMemSegmentMulti2() *mem.Segment {
-	return buildMemSegmentMultiHelper([]string{"c", "d"})
+func buildTestSegmentMulti2() (*SegmentBase, error) {
+	return buildTestSegmentMultiHelper([]string{"c", "d"})
 }
 
-func buildMemSegmentMultiHelper(docIds []string) *mem.Segment {
+func buildTestSegmentMultiHelper(docIds []string) (*SegmentBase, error) {
 	doc := &document.Document{
 		ID: "c",
 		Fields: []document.Field{
@@ -778,9 +777,7 @@ func buildMemSegmentMultiHelper(docIds []string) *mem.Segment {
 		}
 	}
 
-	segment := mem.NewFromAnalyzedDocs(results)
-
-	return segment
+	return AnalysisResultsToSegmentBase(results, 1024)
 }
 
 func TestMergeBytesWritten(t *testing.T) {
@@ -788,14 +785,14 @@ func TestMergeBytesWritten(t *testing.T) {
 	_ = os.RemoveAll("/tmp/scorch2.zap")
 	_ = os.RemoveAll("/tmp/scorch3.zap")
 
-	memSegment := buildMemSegmentMulti()
-	err := PersistSegment(memSegment, "/tmp/scorch.zap", 1024)
+	testSeg, _ := buildTestSegmentMulti()
+	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
 	if err != nil {
 		t.Fatal(err)
 	}
 
-	memSegment2 := buildMemSegmentMulti2()
-	err = PersistSegment(memSegment2, "/tmp/scorch2.zap", 1024)
+	testSeg2, _ := buildTestSegmentMulti2()
+	err = PersistSegmentBase(testSeg2, "/tmp/scorch2.zap")
 	if err != nil {
 		t.Fatal(err)
 	}
diff --git a/index/scorch/segment/zap/segment_test.go b/index/scorch/segment/zap/segment_test.go
index 9ce354ce3..50d5dbd7f 100644
--- a/index/scorch/segment/zap/segment_test.go
+++ b/index/scorch/segment/zap/segment_test.go
@@ -28,8 +28,8 @@ import (
 func TestOpen(t *testing.T) {
 	_ = os.RemoveAll("/tmp/scorch.zap")
 
-	memSegment := buildMemSegment()
-	err := PersistSegment(memSegment, "/tmp/scorch.zap", 1024)
+	testSeg, _ := buildTestSegment()
+	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
 	if err != nil {
 		t.Fatalf("error persisting segment: %v", err)
 	}
@@ -328,8 +328,8 @@ func TestOpen(t *testing.T) {
 func TestOpenMulti(t *testing.T) {
 	_ = os.RemoveAll("/tmp/scorch.zap")
 
-	memSegment := buildMemSegmentMulti()
-	err := PersistSegment(memSegment, "/tmp/scorch.zap", 1024)
+	testSeg, _ := buildTestSegmentMulti()
+	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
 	if err != nil {
 		t.Fatalf("error persisting segment: %v", err)
 	}
@@ -428,8 +428,8 @@ func TestOpenMulti(t *testing.T) {
 func TestOpenMultiWithTwoChunks(t *testing.T) {
 	_ = os.RemoveAll("/tmp/scorch.zap")
 
-	memSegment := buildMemSegmentMulti()
-	err := PersistSegment(memSegment, "/tmp/scorch.zap", 1)
+	testSeg, _ := buildTestSegmentMultiWithChunkFactor(1)
+	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
 	if err != nil {
 		t.Fatalf("error persisting segment: %v", err)
 	}
@@ -523,8 +523,8 @@ func TestOpenMultiWithTwoChunks(t *testing.T) {
 func TestSegmentVisitableDocValueFieldsList(t *testing.T) {
 	_ = os.RemoveAll("/tmp/scorch.zap")
 
-	memSegment := buildMemSegmentMulti()
-	err := PersistSegment(memSegment, "/tmp/scorch.zap", 1)
+	testSeg, _ := buildTestSegmentMultiWithChunkFactor(1)
+	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
 	if err != nil {
 		t.Fatalf("error persisting segment: %v", err)
 	}
@@ -551,8 +551,8 @@ func TestSegmentVisitableDocValueFieldsList(t *testing.T) {
 	}
 	_ = os.RemoveAll("/tmp/scorch.zap")
 
-	memSegment, expectedFields := buildMemSegmentWithDefaultFieldMapping()
-	err = PersistSegment(memSegment, "/tmp/scorch.zap", 1)
+	testSeg, expectedFields, _ := buildTestSegmentWithDefaultFieldMapping(1)
+	err = PersistSegmentBase(testSeg, "/tmp/scorch.zap")
 	if err != nil {
 		t.Fatalf("error persisting segment: %v", err)
 	}

From 5abf7b7a19071d24123e1add6443ebd91005fc68 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 9 Mar 2018 14:11:42 -0800
Subject: [PATCH 280/728] scorch zap remove mem.Segment usage from persist /
 build.go

---
 index/scorch/segment/zap/build.go | 488 ------------------------------
 1 file changed, 488 deletions(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 8ec610953..30ae8d774 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -16,16 +16,10 @@ package zap
 
 import (
 	"bufio"
-	"bytes"
-	"encoding/binary"
 	"math"
 	"os"
-	"sort"
 
 	"github.com/Smerity/govarint"
-	"github.com/blevesearch/bleve/index/scorch/segment/mem"
-	"github.com/couchbase/vellum"
-	"github.com/golang/snappy"
 )
 
 const version uint32 = 4
@@ -82,186 +76,6 @@ func PersistSegmentBase(sb *SegmentBase, path string) error {
 	return nil
 }
 
-// PersistSegment takes the in-memory segment and persists it to
-// the specified path in the zap file format.
-func PersistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) error {
-	flag := os.O_RDWR | os.O_CREATE
-
-	f, err := os.OpenFile(path, flag, 0600)
-	if err != nil {
-		return err
-	}
-
-	cleanup := func() {
-		_ = f.Close()
-		_ = os.Remove(path)
-	}
-
-	// buffer the output
-	br := bufio.NewWriter(f)
-
-	// wrap it for counting (tracking offsets)
-	cr := NewCountHashWriter(br)
-
-	numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, _, err :=
-		persistBase(memSegment, cr, chunkFactor)
-	if err != nil {
-		cleanup()
-		return err
-	}
-
-	err = persistFooter(numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset,
-		chunkFactor, cr.Sum32(), cr)
-	if err != nil {
-		cleanup()
-		return err
-	}
-
-	err = br.Flush()
-	if err != nil {
-		cleanup()
-		return err
-	}
-
-	err = f.Sync()
-	if err != nil {
-		cleanup()
-		return err
-	}
-
-	err = f.Close()
-	if err != nil {
-		cleanup()
-		return err
-	}
-
-	return nil
-}
-
-func persistBase(memSegment *mem.Segment, cr *CountHashWriter, chunkFactor uint32) (
-	numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64,
-	dictLocs []uint64, err error) {
-	docValueOffset = uint64(fieldNotUninverted)
-
-	if len(memSegment.Stored) > 0 {
-		storedIndexOffset, err = persistStored(memSegment, cr)
-		if err != nil {
-			return 0, 0, 0, 0, nil, err
-		}
-
-		freqOffsets, locOffsets, err := persistPostingDetails(memSegment, cr, chunkFactor)
-		if err != nil {
-			return 0, 0, 0, 0, nil, err
-		}
-
-		postingsListLocs, err := persistPostingsLocs(memSegment, cr)
-		if err != nil {
-			return 0, 0, 0, 0, nil, err
-		}
-
-		postingsLocs, err := persistPostingsLists(memSegment, cr, postingsListLocs, freqOffsets, locOffsets)
-		if err != nil {
-			return 0, 0, 0, 0, nil, err
-		}
-
-		dictLocs, err = persistDictionary(memSegment, cr, postingsLocs)
-		if err != nil {
-			return 0, 0, 0, 0, nil, err
-		}
-
-		docValueOffset, err = persistFieldDocValues(memSegment, cr, chunkFactor)
-		if err != nil {
-			return 0, 0, 0, 0, nil, err
-		}
-	} else {
-		dictLocs = make([]uint64, len(memSegment.FieldsInv))
-	}
-
-	fieldsIndexOffset, err = persistFields(memSegment.FieldsInv, cr, dictLocs)
-	if err != nil {
-		return 0, 0, 0, 0, nil, err
-	}
-
-	return uint64(len(memSegment.Stored)), storedIndexOffset, fieldsIndexOffset, docValueOffset,
-		dictLocs, nil
-}
-
-func persistStored(memSegment *mem.Segment, w *CountHashWriter) (uint64, error) {
-	var curr int
-	var metaBuf bytes.Buffer
-	var data, compressed []byte
-
-	metaEncoder := govarint.NewU64Base128Encoder(&metaBuf)
-
-	docNumOffsets := make(map[int]uint64, len(memSegment.Stored))
-
-	for docNum, storedValues := range memSegment.Stored {
-		if docNum != 0 {
-			// reset buffer if necessary
-			curr = 0
-			metaBuf.Reset()
-			data = data[:0]
-			compressed = compressed[:0]
-		}
-
-		st := memSegment.StoredTypes[docNum]
-		sp := memSegment.StoredPos[docNum]
-
-		// encode fields in order
-		for fieldID := range memSegment.FieldsInv {
-			if storedFieldValues, ok := storedValues[uint16(fieldID)]; ok {
-				stf := st[uint16(fieldID)]
-				spf := sp[uint16(fieldID)]
-
-				var err2 error
-				curr, data, err2 = persistStoredFieldValues(fieldID,
-					storedFieldValues, stf, spf, curr, metaEncoder, data)
-				if err2 != nil {
-					return 0, err2
-				}
-			}
-		}
-
-		metaEncoder.Close()
-		metaBytes := metaBuf.Bytes()
-
-		// compress the data
-		compressed = snappy.Encode(compressed, data)
-
-		// record where we're about to start writing
-		docNumOffsets[docNum] = uint64(w.Count())
-
-		// write out the meta len and compressed data len
-		_, err := writeUvarints(w, uint64(len(metaBytes)), uint64(len(compressed)))
-		if err != nil {
-			return 0, err
-		}
-
-		// now write the meta
-		_, err = w.Write(metaBytes)
-		if err != nil {
-			return 0, err
-		}
-		// now write the compressed data
-		_, err = w.Write(compressed)
-		if err != nil {
-			return 0, err
-		}
-	}
-
-	// return value is the start of the stored index
-	rv := uint64(w.Count())
-	// now write out the stored doc index
-	for docNum := range memSegment.Stored {
-		err := binary.Write(w, binary.BigEndian, docNumOffsets[docNum])
-		if err != nil {
-			return 0, err
-		}
-	}
-
-	return rv, nil
-}
-
 func persistStoredFieldValues(fieldID int,
 	storedFieldValues [][]byte, stf []byte, spf [][]uint64,
 	curr int, metaEncoder *govarint.Base128Encoder, data []byte) (
@@ -307,308 +121,6 @@ func persistStoredFieldValues(fieldID int,
 	return curr, data, nil
 }
 
-func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFactor uint32) ([]uint64, []uint64, error) {
-	freqOffsets := make([]uint64, 0, len(memSegment.Postings))
-	tfEncoder := newChunkedIntCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
-	for postingID := range memSegment.Postings {
-		if postingID != 0 {
-			tfEncoder.Reset()
-		}
-		freqs := memSegment.Freqs[postingID]
-		norms := memSegment.Norms[postingID]
-		postingsListItr := memSegment.Postings[postingID].Iterator()
-		var offset int
-		for postingsListItr.HasNext() {
-			docNum := uint64(postingsListItr.Next())
-
-			// put freq & norm
-			err := tfEncoder.Add(docNum, freqs[offset], uint64(math.Float32bits(norms[offset])))
-			if err != nil {
-				return nil, nil, err
-			}
-
-			offset++
-		}
-
-		// record where this postings freq info starts
-		freqOffsets = append(freqOffsets, uint64(w.Count()))
-
-		tfEncoder.Close()
-		_, err := tfEncoder.Write(w)
-		if err != nil {
-			return nil, nil, err
-		}
-	}
-
-	// now do it again for the locations
-	locOffsets := make([]uint64, 0, len(memSegment.Postings))
-	locEncoder := newChunkedIntCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
-	for postingID := range memSegment.Postings {
-		if postingID != 0 {
-			locEncoder.Reset()
-		}
-		freqs := memSegment.Freqs[postingID]
-		locfields := memSegment.Locfields[postingID]
-		locpos := memSegment.Locpos[postingID]
-		locstarts := memSegment.Locstarts[postingID]
-		locends := memSegment.Locends[postingID]
-		locarraypos := memSegment.Locarraypos[postingID]
-		postingsListItr := memSegment.Postings[postingID].Iterator()
-		var offset int
-		var locOffset int
-		for postingsListItr.HasNext() {
-			docNum := uint64(postingsListItr.Next())
-			n := int(freqs[offset])
-			for i := 0; i < n; i++ {
-				if len(locfields) > 0 {
-					err := locEncoder.Add(docNum, uint64(locfields[locOffset]),
-						locpos[locOffset], locstarts[locOffset], locends[locOffset],
-						uint64(len(locarraypos[locOffset])))
-					if err != nil {
-						return nil, nil, err
-					}
-
-					// put each array position
-					err = locEncoder.Add(docNum, locarraypos[locOffset]...)
-					if err != nil {
-						return nil, nil, err
-					}
-				}
-				locOffset++
-			}
-			offset++
-		}
-
-		// record where this postings loc info starts
-		locOffsets = append(locOffsets, uint64(w.Count()))
-
-		locEncoder.Close()
-		_, err := locEncoder.Write(w)
-		if err != nil {
-			return nil, nil, err
-		}
-	}
-
-	return freqOffsets, locOffsets, nil
-}
-
-func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint64, err error) {
-	rv = make([]uint64, 0, len(memSegment.PostingsLocs))
-	reuseBufVarint := make([]byte, binary.MaxVarintLen64)
-	for postingID := range memSegment.PostingsLocs {
-		// record where we start this posting loc
-		rv = append(rv, uint64(w.Count()))
-		// write out the length and bitmap
-		_, err = writeRoaringWithLen(memSegment.PostingsLocs[postingID], w, reuseBufVarint)
-		if err != nil {
-			return nil, err
-		}
-	}
-	return rv, nil
-}
-
-func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter,
-	postingsListLocs, freqOffsets, locOffsets []uint64) (rv []uint64, err error) {
-	rv = make([]uint64, 0, len(memSegment.Postings))
-	reuseBufVarint := make([]byte, binary.MaxVarintLen64)
-	for postingID := range memSegment.Postings {
-		// record where we start this posting list
-		rv = append(rv, uint64(w.Count()))
-
-		// write out the term info, loc info, and loc posting list offset
-		_, err = writeUvarints(w, freqOffsets[postingID],
-			locOffsets[postingID], postingsListLocs[postingID])
-		if err != nil {
-			return nil, err
-		}
-
-		// write out the length and bitmap
-		_, err = writeRoaringWithLen(memSegment.Postings[postingID], w, reuseBufVarint)
-		if err != nil {
-			return nil, err
-		}
-	}
-	return rv, nil
-}
-
-func persistDictionary(memSegment *mem.Segment, w *CountHashWriter, postingsLocs []uint64) ([]uint64, error) {
-	rv := make([]uint64, 0, len(memSegment.DictKeys))
-
-	varintBuf := make([]byte, binary.MaxVarintLen64)
-
-	var buffer bytes.Buffer
-	builder, err := vellum.New(&buffer, nil)
-	if err != nil {
-		return nil, err
-	}
-	for fieldID, fieldTerms := range memSegment.DictKeys {
-
-		dict := memSegment.Dicts[fieldID]
-		// now walk the dictionary in order of fieldTerms (already sorted)
-		for _, fieldTerm := range fieldTerms {
-			postingID := dict[fieldTerm] - 1
-			postingsAddr := postingsLocs[postingID]
-			err = builder.Insert([]byte(fieldTerm), postingsAddr)
-			if err != nil {
-				return nil, err
-			}
-		}
-		err = builder.Close()
-		if err != nil {
-			return nil, err
-		}
-
-		// record where this dictionary starts
-		rv = append(rv, uint64(w.Count()))
-
-		vellumData := buffer.Bytes()
-
-		// write out the length of the vellum data
-		n := binary.PutUvarint(varintBuf, uint64(len(vellumData)))
-		_, err = w.Write(varintBuf[:n])
-		if err != nil {
-			return nil, err
-		}
-
-		// write this vellum to disk
-		_, err = w.Write(vellumData)
-		if err != nil {
-			return nil, err
-		}
-
-		// reset buffer and vellum builder
-		buffer.Reset()
-		err = builder.Reset(&buffer)
-		if err != nil {
-			return nil, err
-		}
-	}
-
-	return rv, nil
-}
-
-type docIDRange []uint64
-
-func (a docIDRange) Len() int           { return len(a) }
-func (a docIDRange) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
-func (a docIDRange) Less(i, j int) bool { return a[i] < a[j] }
-
-func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
-	chunkFactor uint32) (map[uint16]uint64, error) {
-	fieldChunkOffsets := make(map[uint16]uint64, len(memSegment.FieldsInv))
-	fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
-
-	var postings *mem.PostingsList
-	var postingsItr *mem.PostingsIterator
-
-	for fieldID := range memSegment.DocValueFields {
-		field := memSegment.FieldsInv[fieldID]
-		docTermMap := make(map[uint64][]byte, 0)
-		dict, err := memSegment.Dictionary(field)
-		if err != nil {
-			return nil, err
-		}
-
-		dictItr := dict.Iterator()
-		next, err := dictItr.Next()
-		for err == nil && next != nil {
-			var err1 error
-			postings, err1 = dict.(*mem.Dictionary).InitPostingsList(next.Term, nil, postings)
-			if err1 != nil {
-				return nil, err1
-			}
-
-			postingsItr = postings.InitIterator(postingsItr)
-			nextPosting, err2 := postingsItr.Next()
-			for err2 == nil && nextPosting != nil {
-				docNum := nextPosting.Number()
-				docTermMap[docNum] = append(append(docTermMap[docNum], []byte(next.Term)...), termSeparator)
-				nextPosting, err2 = postingsItr.Next()
-			}
-			if err2 != nil {
-				return nil, err2
-			}
-
-			next, err = dictItr.Next()
-		}
-		if err != nil {
-			return nil, err
-		}
-
-		// sort wrt to docIDs
-		docNumbers := make(docIDRange, 0, len(docTermMap))
-		for k := range docTermMap {
-			docNumbers = append(docNumbers, k)
-		}
-		sort.Sort(docNumbers)
-
-		for _, docNum := range docNumbers {
-			err = fdvEncoder.Add(docNum, docTermMap[docNum])
-			if err != nil {
-				return nil, err
-			}
-		}
-
-		fieldChunkOffsets[fieldID] = uint64(w.Count())
-		err = fdvEncoder.Close()
-		if err != nil {
-			return nil, err
-		}
-		// persist the doc value details for this field
-		_, err = fdvEncoder.Write(w)
-		if err != nil {
-			return nil, err
-		}
-		// reseting encoder for the next field
-		fdvEncoder.Reset()
-	}
-
-	return fieldChunkOffsets, nil
-}
-
-func persistFieldDocValues(memSegment *mem.Segment, w *CountHashWriter,
-	chunkFactor uint32) (uint64, error) {
-	fieldDvOffsets, err := persistDocValues(memSegment, w, chunkFactor)
-	if err != nil {
-		return 0, err
-	}
-
-	fieldDocValuesOffset := uint64(w.Count())
-	buf := make([]byte, binary.MaxVarintLen64)
-	offset := uint64(0)
-	ok := true
-	for fieldID := range memSegment.FieldsInv {
-		// if the field isn't configured for docValue, then mark
-		// the offset accordingly
-		if offset, ok = fieldDvOffsets[uint16(fieldID)]; !ok {
-			offset = fieldNotUninverted
-		}
-		n := binary.PutUvarint(buf, uint64(offset))
-		_, err := w.Write(buf[:n])
-		if err != nil {
-			return 0, err
-		}
-	}
-
-	return fieldDocValuesOffset, nil
-}
-
-func NewSegmentBase(memSegment *mem.Segment, chunkFactor uint32) (*SegmentBase, error) {
-	var br bytes.Buffer
-
-	cr := NewCountHashWriter(&br)
-
-	numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, dictLocs, err :=
-		persistBase(memSegment, cr, chunkFactor)
-	if err != nil {
-		return nil, err
-	}
-
-	return InitSegmentBase(br.Bytes(), cr.Sum32(), chunkFactor,
-		memSegment.FieldsMap, memSegment.FieldsInv, numDocs,
-		storedIndexOffset, fieldsIndexOffset, docValueOffset, dictLocs)
-}
-
 func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32,
 	fieldsMap map[string]uint16, fieldsInv []string, numDocs uint64,
 	storedIndexOffset uint64, fieldsIndexOffset uint64, docValueOffset uint64,

From 2a20a36e154e58b35b37904d59a372eed20ee792 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 9 Mar 2018 23:19:47 -0800
Subject: [PATCH 281/728] scorch zap optimimze to avoid bitmaps for 1-hit
 posting lists

This commit avoids creating roaring.Bitmap's (which would have just a
single entry) when a postings list/iterator represents a single
"1-hit" encoding.
---
 index/scorch/segment/zap/posting.go | 129 ++++++++++++++++------------
 index/scorch/segment/zap/segment.go |   8 +-
 2 files changed, 78 insertions(+), 59 deletions(-)

diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 7ae36120b..f2df32bf7 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -92,6 +92,8 @@ func under32Bits(x uint64) bool {
 	return x <= mask31Bits
 }
 
+const docNum1HitFinished = math.MaxUint64
+
 // PostingsList is an in-memory represenation of a postings list
 type PostingsList struct {
 	sb             *SegmentBase
@@ -102,8 +104,9 @@ type PostingsList struct {
 	postings       *roaring.Bitmap
 	except         *roaring.Bitmap
 
-	// when postingsOffset == freqOffset == 0, then the postings list
-	// represents a "1-hit" encoding, and has the following norm
+	// when normBits1Hit != 0, then this postings list came from a
+	// 1-hit encoding, and only the docNum1Hit & normBits1Hit apply
+	docNum1Hit   uint64
 	normBits1Hit uint64
 }
 
@@ -117,6 +120,17 @@ func (p *PostingsList) Size() int {
 	return sizeInBytes
 }
 
+func (p *PostingsList) OrInto(receiver *roaring.Bitmap) {
+	if p.normBits1Hit != 0 {
+		receiver.Add(uint32(p.docNum1Hit))
+		return
+	}
+
+	if p.postings != nil {
+		receiver.Or(p.postings)
+	}
+}
+
 // Iterator returns an iterator for this postings list
 func (p *PostingsList) Iterator() segment.PostingsIterator {
 	return p.iterator(nil)
@@ -152,39 +166,47 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
 	}
 	rv.postings = p
 
+	if p.normBits1Hit != 0 {
+		// "1-hit" encoding
+		rv.docNum1Hit = p.docNum1Hit
+		rv.normBits1Hit = p.normBits1Hit
+
+		if p.except != nil && p.except.Contains(uint32(rv.docNum1Hit)) {
+			rv.docNum1Hit = docNum1HitFinished
+		}
+
+		return rv
+	}
+
+	// "general" encoding, check if empty
 	if p.postings == nil {
 		return rv
 	}
 
-	if p.freqOffset > 0 && p.locOffset > 0 {
-		// "general" encoding, so prepare the freq chunk details
-		var n uint64
-		var read int
-		var numFreqChunks uint64
-		numFreqChunks, read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
+	// prepare the freq chunk details
+	var n uint64
+	var read int
+	var numFreqChunks uint64
+	numFreqChunks, read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
+	n += uint64(read)
+	rv.freqChunkLens = make([]uint64, int(numFreqChunks))
+	for i := 0; i < int(numFreqChunks); i++ {
+		rv.freqChunkLens[i], read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
 		n += uint64(read)
-		rv.freqChunkLens = make([]uint64, int(numFreqChunks))
-		for i := 0; i < int(numFreqChunks); i++ {
-			rv.freqChunkLens[i], read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
-			n += uint64(read)
-		}
-		rv.freqChunkStart = p.freqOffset + n
+	}
+	rv.freqChunkStart = p.freqOffset + n
 
-		// prepare the loc chunk details
-		n = 0
-		var numLocChunks uint64
-		numLocChunks, read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
+	// prepare the loc chunk details
+	n = 0
+	var numLocChunks uint64
+	numLocChunks, read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
+	n += uint64(read)
+	rv.locChunkLens = make([]uint64, int(numLocChunks))
+	for i := 0; i < int(numLocChunks); i++ {
+		rv.locChunkLens[i], read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
 		n += uint64(read)
-		rv.locChunkLens = make([]uint64, int(numLocChunks))
-		for i := 0; i < int(numLocChunks); i++ {
-			rv.locChunkLens[i], read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
-			n += uint64(read)
-		}
-		rv.locChunkStart = p.locOffset + n
-	} else {
-		// "1-hit" encoding
-		rv.normBits1Hit = p.normBits1Hit
 	}
+	rv.locChunkStart = p.locOffset + n
 
 	rv.locBitmap = p.locBitmap
 
@@ -201,18 +223,20 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
 
 // Count returns the number of items on this postings list
 func (p *PostingsList) Count() uint64 {
-	if p.postings != nil {
-		n := p.postings.GetCardinality()
-		if p.except != nil {
-			e := p.except.GetCardinality()
-			if e > n {
-				e = n
-			}
-			return n - e
-		}
-		return n
+	var n uint64
+	if p.normBits1Hit != 0 {
+		n = 1
+	} else if p.postings != nil {
+		n = p.postings.GetCardinality()
+	}
+	var e uint64
+	if p.except != nil {
+		e = p.except.GetCardinality()
 	}
-	return 0
+	if n <= e {
+		return 0
+	}
+	return n - e
 }
 
 func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
@@ -263,19 +287,10 @@ func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
 	return nil
 }
 
-var emptyRoaring = roaring.NewBitmap()
-
 func (rv *PostingsList) init1Hit(fstVal uint64) error {
 	docNum, normBits := FSTValDecode1Hit(fstVal)
 
-	rv.locBitmap = emptyRoaring
-
-	rv.postings = roaring.NewBitmap()
-	rv.postings.Add(uint32(docNum))
-
-	// TODO: we can likely do better than allocating a roaring bitmap
-	// with just 1 entry, but for now reuse existing machinery
-
+	rv.docNum1Hit = docNum
 	rv.normBits1Hit = normBits
 
 	return nil
@@ -308,6 +323,7 @@ type PostingsIterator struct {
 	next     Posting    // reused across Next() calls
 	nextLocs []Location // reused across Next() calls
 
+	docNum1Hit   uint64
 	normBits1Hit uint64
 
 	buf []byte
@@ -460,7 +476,7 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 	}
 	rv.norm = math.Float32frombits(uint32(normBits))
 
-	if i.locBitmap.Contains(uint32(docNum)) {
+	if i.locBitmap != nil && i.locBitmap.Contains(uint32(docNum)) {
 		// read off 'freq' locations, into reused slices
 		if cap(i.nextLocs) >= int(rv.freq) {
 			i.nextLocs = i.nextLocs[0:rv.freq]
@@ -513,7 +529,7 @@ func (i *PostingsIterator) nextBytes() (
 	endFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
 	bytesFreqNorm = i.currChunkFreqNorm[startFreqNorm:endFreqNorm]
 
-	if i.locBitmap.Contains(uint32(docNum)) {
+	if i.locBitmap != nil && i.locBitmap.Contains(uint32(docNum)) {
 		startLoc := len(i.currChunkLoc) - i.locReader.Len()
 
 		for j := uint64(0); j < freq; j++ {
@@ -533,6 +549,15 @@ func (i *PostingsIterator) nextBytes() (
 // nextDocNum returns the next docNum on the postings list, and also
 // sets up the currChunk / loc related fields of the iterator.
 func (i *PostingsIterator) nextDocNum() (uint64, bool, error) {
+	if i.normBits1Hit != 0 {
+		if i.docNum1Hit == docNum1HitFinished {
+			return 0, false, nil
+		}
+		docNum := i.docNum1Hit
+		i.docNum1Hit = docNum1HitFinished // consume our 1-hit docNum
+		return docNum, true, nil
+	}
+
 	if i.actual == nil || !i.actual.HasNext() {
 		return 0, false, nil
 	}
@@ -540,10 +565,6 @@ func (i *PostingsIterator) nextDocNum() (uint64, bool, error) {
 	n := i.actual.Next()
 	allN := i.all.Next()
 
-	if i.normBits1Hit != 0 {
-		return uint64(n), true, nil
-	}
-
 	nChunk := n / i.postings.sb.chunkFactor
 	allNChunk := allN / i.postings.sb.chunkFactor
 
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 972b7578e..e1d2a14f7 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -341,15 +341,13 @@ func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) {
 			return nil, err
 		}
 
-		var postings *PostingsList
+		var postingsList *PostingsList
 		for _, id := range ids {
-			postings, err = idDict.postingsList([]byte(id), nil, postings)
+			postingsList, err = idDict.postingsList([]byte(id), nil, postingsList)
 			if err != nil {
 				return nil, err
 			}
-			if postings.postings != nil {
-				rv.Or(postings.postings)
-			}
+			postingsList.OrInto(rv)
 		}
 	}
 

From aaccf59191dc5bc31fd2bb703e53d6dd4142a269 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 12 Mar 2018 15:36:46 +0530
Subject: [PATCH 282/728] docValue space savings

merging the doc value length and loc
slices into a single offset slice  as that
is enough to compute the starting offset and
length of the the doc values data for a given
document inside a docValue chunk.
---
 cmd/bleve/cmd/zap/docvalue.go                 | 18 ++++++++++---
 index/scorch/segment/zap/contentcoder.go      | 26 ++++++++++++-------
 index/scorch/segment/zap/contentcoder_test.go |  6 ++---
 index/scorch/segment/zap/docvalues.go         | 24 ++++++++++++-----
 4 files changed, 51 insertions(+), 23 deletions(-)

diff --git a/cmd/bleve/cmd/zap/docvalue.go b/cmd/bleve/cmd/zap/docvalue.go
index 743974955..234c45e6b 100644
--- a/cmd/bleve/cmd/zap/docvalue.go
+++ b/cmd/bleve/cmd/zap/docvalue.go
@@ -209,9 +209,7 @@ var docvalueCmd = &cobra.Command{
 		for i := 0; i < int(numDocs); i++ {
 			curChunkHeader[i].DocNum, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
 			offset += uint64(nread)
-			curChunkHeader[i].DocDvLoc, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
-			offset += uint64(nread)
-			curChunkHeader[i].DocDvLen, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
+			curChunkHeader[i].DocDvOffset, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
 			offset += uint64(nread)
 		}
 
@@ -255,7 +253,19 @@ func getDocValueLocs(docNum uint64, metaHeader []zap.MetaData) (uint64, uint64)
 		return metaHeader[i].DocNum >= docNum
 	})
 	if i < len(metaHeader) && metaHeader[i].DocNum == docNum {
-		return metaHeader[i].DocDvLoc, metaHeader[i].DocDvLen
+		var start, end uint64
+		if i > 0 {
+			start = metaHeader[i].DocDvOffset
+		}
+		// single element case
+		if i == 0 && len(metaHeader) == 1 {
+			end = metaHeader[i].DocDvOffset
+		} else if i < len(metaHeader)-1 {
+			end = metaHeader[i+1].DocDvOffset
+		} else { // for last element
+			end = start + metaHeader[0].DocDvOffset
+		}
+		return start, end
 	}
 	return math.MaxUint64, math.MaxUint64
 }
diff --git a/index/scorch/segment/zap/contentcoder.go b/index/scorch/segment/zap/contentcoder.go
index 933f10a1e..adaab2fa3 100644
--- a/index/scorch/segment/zap/contentcoder.go
+++ b/index/scorch/segment/zap/contentcoder.go
@@ -47,9 +47,8 @@ type chunkedContentCoder struct {
 // MetaData represents the data information inside a
 // chunk.
 type MetaData struct {
-	DocNum   uint64 // docNum of the data inside the chunk
-	DocDvLoc uint64 // starting offset for a given docid
-	DocDvLen uint64 // length of data inside the chunk for the given docid
+	DocNum      uint64 // docNum of the data inside the chunk
+	DocDvOffset uint64 // offset of data inside the chunk for the given docid
 }
 
 // newChunkedContentCoder returns a new chunk content coder which
@@ -94,9 +93,20 @@ func (c *chunkedContentCoder) flushContents() error {
 		return err
 	}
 
+	// convert the document data lens to data offsets
+	if len(c.chunkMeta) > 1 {
+		c.chunkMeta[1].DocDvOffset, c.chunkMeta[0].DocDvOffset =
+			c.chunkMeta[0].DocDvOffset, c.chunkMeta[1].DocDvOffset
+		for i := 2; i < len(c.chunkMeta); i++ {
+			cur := c.chunkMeta[i].DocDvOffset
+			c.chunkMeta[i].DocDvOffset = c.chunkMeta[i-1].DocDvOffset + c.chunkMeta[0].DocDvOffset
+			c.chunkMeta[0].DocDvOffset = cur
+		}
+	}
+
 	// write out the metaData slice
 	for _, meta := range c.chunkMeta {
-		_, err := writeUvarints(&c.chunkMetaBuf, meta.DocNum, meta.DocDvLoc, meta.DocDvLen)
+		_, err := writeUvarints(&c.chunkMetaBuf, meta.DocNum, meta.DocDvOffset)
 		if err != nil {
 			return err
 		}
@@ -130,17 +140,15 @@ func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
 		c.currChunk = chunk
 	}
 
-	// mark the starting offset for this doc
-	dvOffset := c.chunkBuf.Len()
+	// mark the data length for this doc
 	dvSize, err := c.chunkBuf.Write(vals)
 	if err != nil {
 		return err
 	}
 
 	c.chunkMeta = append(c.chunkMeta, MetaData{
-		DocNum:   docNum,
-		DocDvLoc: uint64(dvOffset),
-		DocDvLen: uint64(dvSize),
+		DocNum:      docNum,
+		DocDvOffset: uint64(dvSize),
 	})
 	return nil
 }
diff --git a/index/scorch/segment/zap/contentcoder_test.go b/index/scorch/segment/zap/contentcoder_test.go
index 0e45b783e..c6b3df82c 100644
--- a/index/scorch/segment/zap/contentcoder_test.go
+++ b/index/scorch/segment/zap/contentcoder_test.go
@@ -35,7 +35,7 @@ func TestChunkContentCoder(t *testing.T) {
 			docNums:   []uint64{0},
 			vals:      [][]byte{[]byte("bleve")},
 			// 1 chunk, chunk-0 length 11(b), value
-			expected: string([]byte{0x1, 0xb, 0x1, 0x0, 0x0, 0x05, 0x05, 0x10, 0x62, 0x6c, 0x65, 0x76, 0x65}),
+			expected: string([]byte{0x1, 0xa, 0x1, 0x0, 0x05, 0x05, 0x10, 0x62, 0x6c, 0x65, 0x76, 0x65}),
 		},
 		{
 			maxDocNum: 1,
@@ -46,8 +46,8 @@ func TestChunkContentCoder(t *testing.T) {
 				[]byte("scorch"),
 			},
 
-			expected: string([]byte{0x02, 0x0c, 0x0c, 0x01, 0x00, 0x00, 0x06, 0x06, 0x14,
-				0x75, 0x70, 0x73, 0x69, 0x64, 0x65, 0x01, 0x01, 0x00, 0x06, 0x06,
+			expected: string([]byte{0x02, 0x0b, 0x0b, 0x01, 0x00, 0x06, 0x06, 0x14,
+				0x75, 0x70, 0x73, 0x69, 0x64, 0x65, 0x01, 0x01, 0x06, 0x06,
 				0x14, 0x73, 0x63, 0x6f, 0x72, 0x63, 0x68}),
 		},
 	}
diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index 13635c57e..bbefe5a10 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -117,9 +117,7 @@ func (di *docValueIterator) loadDvChunk(chunkNumber,
 	for i := 0; i < int(numDocs); i++ {
 		di.curChunkHeader[i].DocNum, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
 		offset += uint64(read)
-		di.curChunkHeader[i].DocDvLoc, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
-		offset += uint64(read)
-		di.curChunkHeader[i].DocDvLen, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
+		di.curChunkHeader[i].DocDvOffset, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
 		offset += uint64(read)
 	}
 
@@ -133,8 +131,8 @@ func (di *docValueIterator) loadDvChunk(chunkNumber,
 func (di *docValueIterator) visitDocValues(docNum uint64,
 	visitor index.DocumentFieldTermVisitor) error {
 	// binary search the term locations for the docNum
-	start, length := di.getDocValueLocs(docNum)
-	if start == math.MaxUint64 || length == math.MaxUint64 {
+	start, end := di.getDocValueLocs(docNum)
+	if start == math.MaxUint64 || end == math.MaxUint64 {
 		return nil
 	}
 	// uncompress the already loaded data
@@ -144,7 +142,7 @@ func (di *docValueIterator) visitDocValues(docNum uint64,
 	}
 
 	// pick the terms for the given docNum
-	uncompressed = uncompressed[start : start+length]
+	uncompressed = uncompressed[start:end]
 	for {
 		i := bytes.Index(uncompressed, termSeparatorSplitSlice)
 		if i < 0 {
@@ -163,7 +161,19 @@ func (di *docValueIterator) getDocValueLocs(docNum uint64) (uint64, uint64) {
 		return di.curChunkHeader[i].DocNum >= docNum
 	})
 	if i < len(di.curChunkHeader) && di.curChunkHeader[i].DocNum == docNum {
-		return di.curChunkHeader[i].DocDvLoc, di.curChunkHeader[i].DocDvLen
+		var start, end uint64
+		if i > 0 {
+			start = di.curChunkHeader[i].DocDvOffset
+		}
+		// single element case
+		if i == 0 && len(di.curChunkHeader) == 1 {
+			end = di.curChunkHeader[i].DocDvOffset
+		} else if i < len(di.curChunkHeader)-1 {
+			end = di.curChunkHeader[i+1].DocDvOffset
+		} else { // for last element
+			end = start + di.curChunkHeader[0].DocDvOffset
+		}
+		return start, end
 	}
 	return math.MaxUint64, math.MaxUint64
 }

From 90aa91105ab3ab5848902a59e161e6a642aa3577 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Fri, 9 Mar 2018 11:19:39 +0530
Subject: [PATCH 283/728] handling only int, float64 values

---
 index/scorch/scorch.go | 31 ++++++-------------------------
 1 file changed, 6 insertions(+), 25 deletions(-)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 7a33fb7f0..c171092d6 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -516,33 +516,14 @@ func init() {
 	registry.RegisterIndexType(Name, NewScorch)
 }
 
-func parseToInteger(v interface{}) (int, error) {
-	switch v.(type) {
-	case float32:
-		return int(v.(float32)), nil
+func parseToInteger(i interface{}) (int, error) {
+	switch v := i.(type) {
 	case float64:
-		return int(v.(float64)), nil
+		return int(v), nil
 	case int:
-		return v.(int), nil
-	case int8:
-		return int(v.(int8)), nil
-	case int16:
-		return int(v.(int16)), nil
-	case int32:
-		return int(v.(int32)), nil
-	case int64:
-		return int(v.(int64)), nil
-	case uint:
-		return int(v.(uint)), nil
-	case uint8:
-		return int(v.(uint8)), nil
-	case uint16:
-		return int(v.(uint16)), nil
-	case uint32:
-		return int(v.(uint32)), nil
-	case uint64:
-		return int(v.(uint64)), nil
+		return v, nil
+
 	default:
-		return 0, fmt.Errorf("expects a numeric value")
+		return 0, fmt.Errorf("expects int or float64 value")
 	}
 }

From 531800c479915f58ab823f00fe1f5fa112695702 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sun, 11 Mar 2018 16:30:52 -0700
Subject: [PATCH 284/728] scorch zap use roaring Add() instead of AddInt()

This change invokes Add() directly as AddInt() is a convenience
wrapper around Add().
---
 index/scorch/segment/zap/new.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/index/scorch/segment/zap/new.go b/index/scorch/segment/zap/new.go
index 3a8b2012d..f29711c06 100644
--- a/index/scorch/segment/zap/new.go
+++ b/index/scorch/segment/zap/new.go
@@ -334,7 +334,7 @@ func (s *interim) processDocument(docNum uint64,
 		for term, tf := range tfs {
 			pid := dict[term] - 1
 			bs := s.Postings[pid]
-			bs.AddInt(int(docNum))
+			bs.Add(uint32(docNum))
 
 			s.FreqNorms[pid] = append(s.FreqNorms[pid],
 				interimFreqNorm{
@@ -344,7 +344,7 @@ func (s *interim) processDocument(docNum uint64,
 
 			if len(tf.Locations) > 0 {
 				locBS := s.PostingsLocs[pid]
-				locBS.AddInt(int(docNum))
+				locBS.Add(uint32(docNum))
 
 				locs := s.Locs[pid]
 

From c4ceffe58430d4f4103d5cca7a0777dceebd8af6 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sun, 11 Mar 2018 12:09:21 -0700
Subject: [PATCH 285/728] scorch zap sync Pool for interim data

---
 index/scorch/segment/zap/new.go | 93 +++++++++++++++++++++++++++------
 1 file changed, 78 insertions(+), 15 deletions(-)

diff --git a/index/scorch/segment/zap/new.go b/index/scorch/segment/zap/new.go
index f29711c06..c7a6b0ce4 100644
--- a/index/scorch/segment/zap/new.go
+++ b/index/scorch/segment/zap/new.go
@@ -19,6 +19,7 @@ import (
 	"encoding/binary"
 	"math"
 	"sort"
+	"sync"
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/Smerity/govarint"
@@ -35,12 +36,11 @@ func AnalysisResultsToSegmentBase(results []*index.AnalysisResult,
 	chunkFactor uint32) (*SegmentBase, error) {
 	var br bytes.Buffer
 
-	s := interim{
-		results:     results,
-		chunkFactor: chunkFactor,
-		w:           NewCountHashWriter(&br),
-		FieldsMap:   map[string]uint16{},
-	}
+	s := interimPool.Get().(*interim)
+
+	s.results = results
+	s.chunkFactor = chunkFactor
+	s.w = NewCountHashWriter(&br)
 
 	storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets,
 		err := s.convert()
@@ -52,9 +52,13 @@ func AnalysisResultsToSegmentBase(results []*index.AnalysisResult,
 		s.FieldsMap, s.FieldsInv, uint64(len(results)),
 		storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets)
 
+	interimPool.Put(s.cleanse())
+
 	return sb, err
 }
 
+var interimPool = sync.Pool{New: func() interface{} { return &interim{} }}
+
 // interim holds temporary working data used while converting from
 // analysis results to a zap-encoded segment
 type interim struct {
@@ -101,6 +105,41 @@ type interim struct {
 	tmp1 []byte
 }
 
+func (s *interim) cleanse() *interim {
+	s.results = nil
+	s.chunkFactor = 0
+	s.w = nil
+	s.FieldsMap = nil
+	s.FieldsInv = s.FieldsInv[:0]
+	for i := range s.Dicts {
+		s.Dicts[i] = nil
+	}
+	s.Dicts = s.Dicts[:0]
+	for i := range s.DictKeys {
+		s.DictKeys[i] = s.DictKeys[i][:0]
+	}
+	s.DictKeys = s.DictKeys[:0]
+	for i := range s.IncludeDocValues {
+		s.IncludeDocValues[i] = false
+	}
+	s.IncludeDocValues = s.IncludeDocValues[:0]
+	for _, idn := range s.Postings {
+		idn.Clear()
+	}
+	s.Postings = s.Postings[:0]
+	for _, idn := range s.PostingsLocs {
+		idn.Clear()
+	}
+	s.PostingsLocs = s.PostingsLocs[:0]
+	s.FreqNorms = nil
+	s.Locs = nil
+	s.buf0.Reset()
+	s.tmp0 = s.tmp0[:0]
+	s.tmp1 = s.tmp1[:0]
+
+	return s
+}
+
 func (s *interim) grabBuf(size int) []byte {
 	buf := s.tmp0
 	if cap(buf) < size {
@@ -130,6 +169,8 @@ type interimLoc struct {
 }
 
 func (s *interim) convert() (uint64, uint64, uint64, []uint64, error) {
+	s.FieldsMap = map[string]uint16{}
+
 	s.getOrDefineField("_id") // _id field is fieldID 0
 
 	for _, result := range s.results {
@@ -143,12 +184,15 @@ func (s *interim) convert() (uint64, uint64, uint64, []uint64, error) {
 
 	sort.Strings(s.FieldsInv[1:]) // keep _id as first field
 
-	s.FieldsMap = make(map[string]uint16, len(s.FieldsInv))
 	for fieldID, fieldName := range s.FieldsInv {
 		s.FieldsMap[fieldName] = uint16(fieldID + 1)
 	}
 
-	s.IncludeDocValues = make([]bool, len(s.FieldsInv))
+	if cap(s.IncludeDocValues) >= len(s.FieldsInv) {
+		s.IncludeDocValues = s.IncludeDocValues[:len(s.FieldsInv)]
+	} else {
+		s.IncludeDocValues = make([]bool, len(s.FieldsInv))
+	}
 
 	s.prepareDicts()
 
@@ -189,9 +233,18 @@ func (s *interim) getOrDefineField(fieldName string) int {
 		fieldIDPlus1 = uint16(len(s.FieldsInv) + 1)
 		s.FieldsMap[fieldName] = fieldIDPlus1
 		s.FieldsInv = append(s.FieldsInv, fieldName)
+
 		s.Dicts = append(s.Dicts, make(map[string]uint64))
-		s.DictKeys = append(s.DictKeys, make([]string, 0))
+
+		n := len(s.DictKeys)
+		if n < cap(s.DictKeys) {
+			s.DictKeys = s.DictKeys[:n+1]
+			s.DictKeys[n] = s.DictKeys[n][:0]
+		} else {
+			s.DictKeys = append(s.DictKeys, []string(nil))
+		}
 	}
+
 	return int(fieldIDPlus1 - 1)
 }
 
@@ -253,16 +306,25 @@ func (s *interim) prepareDicts() {
 
 	numPostingsLists := pidNext
 
-	s.Postings = make([]*roaring.Bitmap, numPostingsLists)
-	for i := 0; i < numPostingsLists; i++ {
-		s.Postings[i] = roaring.New()
+	if cap(s.Postings) >= numPostingsLists {
+		s.Postings = s.Postings[:numPostingsLists]
+	} else {
+		s.Postings = make([]*roaring.Bitmap, numPostingsLists)
+		for i := 0; i < numPostingsLists; i++ {
+			s.Postings[i] = roaring.New()
+		}
 	}
 
-	s.PostingsLocs = make([]*roaring.Bitmap, numPostingsLists)
-	for i := 0; i < numPostingsLists; i++ {
-		s.PostingsLocs[i] = roaring.New()
+	if cap(s.PostingsLocs) >= numPostingsLists {
+		s.PostingsLocs = s.PostingsLocs[:numPostingsLists]
+	} else {
+		s.PostingsLocs = make([]*roaring.Bitmap, numPostingsLists)
+		for i := 0; i < numPostingsLists; i++ {
+			s.PostingsLocs[i] = roaring.New()
+		}
 	}
 
+	// TODO: reuse this.
 	s.FreqNorms = make([][]interimFreqNorm, numPostingsLists)
 
 	freqNormsBacking := make([]interimFreqNorm, totTFs)
@@ -271,6 +333,7 @@ func (s *interim) prepareDicts() {
 		freqNormsBacking = freqNormsBacking[numTerms:]
 	}
 
+	// TODO: reuse this.
 	s.Locs = make([][]interimLoc, numPostingsLists)
 
 	locsBacking := make([]interimLoc, totLocs)

From cad88096cacab844536d322312c0a59f150926c3 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sun, 11 Mar 2018 16:30:34 -0700
Subject: [PATCH 286/728] scorch zap reuse roaring Bitmap during merge

---
 index/scorch/segment/zap/merge.go | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index a934dfc35..07de0943c 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -183,6 +183,9 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 		return nil, 0, err
 	}
 
+	newRoaring := roaring.NewBitmap()
+	newRoaringLocs := roaring.NewBitmap()
+
 	// for each field
 	for fieldID, fieldName := range fieldsInv {
 
@@ -222,8 +225,8 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 
 		var prevTerm []byte
 
-		newRoaring := roaring.NewBitmap()
-		newRoaringLocs := roaring.NewBitmap()
+		newRoaring.Clear()
+		newRoaringLocs.Clear()
 
 		var lastDocNum, lastFreq, lastNorm uint64
 
@@ -262,8 +265,8 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 				}
 			}
 
-			newRoaring = roaring.NewBitmap()
-			newRoaringLocs = roaring.NewBitmap()
+			newRoaring.Clear()
+			newRoaringLocs.Clear()
 
 			tfEncoder.Reset()
 			locEncoder.Reset()

From b1f39695217295421681d509744e55c0027b5f5d Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sun, 11 Mar 2018 20:13:31 -0700
Subject: [PATCH 287/728] scorch zap reuse roaring Bitmap in postings lists

---
 index/scorch/segment/zap/dict.go    | 12 ++++++++++++
 index/scorch/segment/zap/posting.go |  8 ++++++--
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index e5d712686..3b8132f2c 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -68,7 +68,19 @@ func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap)
 	if rv == nil {
 		rv = &PostingsList{}
 	} else {
+		postings := rv.postings
+		if postings != nil {
+			postings.Clear()
+		}
+		locBitmap := rv.locBitmap
+		if locBitmap != nil {
+			locBitmap.Clear()
+		}
+
 		*rv = PostingsList{} // clear the struct
+
+		rv.postings = postings
+		rv.locBitmap = locBitmap
 	}
 	rv.sb = d.sb
 	rv.except = except
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index f2df32bf7..bc533ad14 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -266,7 +266,9 @@ func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
 
 	locRoaringBytes := d.sb.mem[locBitmapOffset+uint64(read) : locBitmapOffset+uint64(read)+locBitmapLen]
 
-	rv.locBitmap = roaring.NewBitmap()
+	if rv.locBitmap == nil {
+		rv.locBitmap = roaring.NewBitmap()
+	}
 	_, err := rv.locBitmap.FromBuffer(locRoaringBytes)
 	if err != nil {
 		return fmt.Errorf("error loading roaring bitmap of locations with hits: %v", err)
@@ -278,7 +280,9 @@ func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
 
 	roaringBytes := d.sb.mem[postingsOffset+n : postingsOffset+n+postingsLen]
 
-	rv.postings = roaring.NewBitmap()
+	if rv.postings == nil {
+		rv.postings = roaring.NewBitmap()
+	}
 	_, err = rv.postings.FromBuffer(roaringBytes)
 	if err != nil {
 		return fmt.Errorf("error loading roaring bitmap: %v", err)

From 07901910e24dabd67f9e26c88a03a8675e0fd87d Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sun, 11 Mar 2018 21:07:14 -0700
Subject: [PATCH 288/728] scorch zap reuse roaring Bitmap in prepareDicts()
 slice growth

In this change, if the postings/postingsLocs slices need to be grown,
then copy over and reuse any of the preallocated roaring Bitmap's from
the old slice.
---
 index/scorch/segment/zap/new.go | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/index/scorch/segment/zap/new.go b/index/scorch/segment/zap/new.go
index c7a6b0ce4..5b625b995 100644
--- a/index/scorch/segment/zap/new.go
+++ b/index/scorch/segment/zap/new.go
@@ -309,19 +309,27 @@ func (s *interim) prepareDicts() {
 	if cap(s.Postings) >= numPostingsLists {
 		s.Postings = s.Postings[:numPostingsLists]
 	} else {
-		s.Postings = make([]*roaring.Bitmap, numPostingsLists)
+		postings := make([]*roaring.Bitmap, numPostingsLists)
+		copy(postings, s.Postings[:cap(s.Postings)])
 		for i := 0; i < numPostingsLists; i++ {
-			s.Postings[i] = roaring.New()
+			if postings[i] == nil {
+				postings[i] = roaring.New()
+			}
 		}
+		s.Postings = postings
 	}
 
 	if cap(s.PostingsLocs) >= numPostingsLists {
 		s.PostingsLocs = s.PostingsLocs[:numPostingsLists]
 	} else {
-		s.PostingsLocs = make([]*roaring.Bitmap, numPostingsLists)
+		postingsLocs := make([]*roaring.Bitmap, numPostingsLists)
+		copy(postingsLocs, s.PostingsLocs[:cap(s.PostingsLocs)])
 		for i := 0; i < numPostingsLists; i++ {
-			s.PostingsLocs[i] = roaring.New()
+			if postingsLocs[i] == nil {
+				postingsLocs[i] = roaring.New()
+			}
 		}
+		s.PostingsLocs = postingsLocs
 	}
 
 	// TODO: reuse this.

From dbfc5e913027c7dc5a85f9a9f91476babf8322da Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 12 Mar 2018 10:04:11 -0700
Subject: [PATCH 289/728] scorch zap reuse interim freq/norm/loc slices

---
 index/scorch/segment/zap/new.go | 68 ++++++++++++++++++++++++---------
 1 file changed, 49 insertions(+), 19 deletions(-)

diff --git a/index/scorch/segment/zap/new.go b/index/scorch/segment/zap/new.go
index 5b625b995..51971ba51 100644
--- a/index/scorch/segment/zap/new.go
+++ b/index/scorch/segment/zap/new.go
@@ -95,10 +95,15 @@ type interim struct {
 	PostingsLocs []*roaring.Bitmap
 
 	// postings id -> freq/norm's, one for each docNum in postings
-	FreqNorms [][]interimFreqNorm
+	FreqNorms        [][]interimFreqNorm
+	freqNormsBacking []interimFreqNorm
 
 	// postings id -> locs, one for each freq
-	Locs [][]interimLoc
+	Locs        [][]interimLoc
+	locsBacking []interimLoc
+
+	numTermsPerPostingsList []int // key is postings list id
+	numLocsPerPostingsList  []int // key is postings list id
 
 	buf0 bytes.Buffer
 	tmp0 []byte
@@ -131,8 +136,18 @@ func (s *interim) cleanse() *interim {
 		idn.Clear()
 	}
 	s.PostingsLocs = s.PostingsLocs[:0]
-	s.FreqNorms = nil
-	s.Locs = nil
+	s.FreqNorms = s.FreqNorms[:0]
+	for i := range s.freqNormsBacking {
+		s.freqNormsBacking[i] = interimFreqNorm{}
+	}
+	s.freqNormsBacking = s.freqNormsBacking[:0]
+	s.Locs = s.Locs[:0]
+	for i := range s.locsBacking {
+		s.locsBacking[i] = interimLoc{}
+	}
+	s.locsBacking = s.locsBacking[:0]
+	s.numTermsPerPostingsList = s.numTermsPerPostingsList[:0]
+	s.numLocsPerPostingsList = s.numLocsPerPostingsList[:0]
 	s.buf0.Reset()
 	s.tmp0 = s.tmp0[:0]
 	s.tmp1 = s.tmp1[:0]
@@ -252,9 +267,6 @@ func (s *interim) getOrDefineField(fieldName string) int {
 func (s *interim) prepareDicts() {
 	var pidNext int
 
-	numTermsPerPostingsList := make([]int, 0, 64) // key is postings list id
-	numLocsPerPostingsList := make([]int, 0, 64)  // key is postings list id
-
 	var totTFs int
 	var totLocs int
 
@@ -271,14 +283,14 @@ func (s *interim) prepareDicts() {
 				dict[term] = pidPlus1
 				dictKeys = append(dictKeys, term)
 
-				numTermsPerPostingsList = append(numTermsPerPostingsList, 0)
-				numLocsPerPostingsList = append(numLocsPerPostingsList, 0)
+				s.numTermsPerPostingsList = append(s.numTermsPerPostingsList, 0)
+				s.numLocsPerPostingsList = append(s.numLocsPerPostingsList, 0)
 			}
 
 			pid := pidPlus1 - 1
 
-			numTermsPerPostingsList[pid] += 1
-			numLocsPerPostingsList[pid] += len(tf.Locations)
+			s.numTermsPerPostingsList[pid] += 1
+			s.numLocsPerPostingsList[pid] += len(tf.Locations)
 
 			totLocs += len(tf.Locations)
 		}
@@ -332,20 +344,38 @@ func (s *interim) prepareDicts() {
 		s.PostingsLocs = postingsLocs
 	}
 
-	// TODO: reuse this.
-	s.FreqNorms = make([][]interimFreqNorm, numPostingsLists)
+	if cap(s.FreqNorms) >= numPostingsLists {
+		s.FreqNorms = s.FreqNorms[:numPostingsLists]
+	} else {
+		s.FreqNorms = make([][]interimFreqNorm, numPostingsLists)
+	}
+
+	if cap(s.freqNormsBacking) >= totTFs {
+		s.freqNormsBacking = s.freqNormsBacking[:totTFs]
+	} else {
+		s.freqNormsBacking = make([]interimFreqNorm, totTFs)
+	}
 
-	freqNormsBacking := make([]interimFreqNorm, totTFs)
-	for pid, numTerms := range numTermsPerPostingsList {
+	freqNormsBacking := s.freqNormsBacking
+	for pid, numTerms := range s.numTermsPerPostingsList {
 		s.FreqNorms[pid] = freqNormsBacking[0:0]
 		freqNormsBacking = freqNormsBacking[numTerms:]
 	}
 
-	// TODO: reuse this.
-	s.Locs = make([][]interimLoc, numPostingsLists)
+	if cap(s.Locs) >= numPostingsLists {
+		s.Locs = s.Locs[:numPostingsLists]
+	} else {
+		s.Locs = make([][]interimLoc, numPostingsLists)
+	}
+
+	if cap(s.locsBacking) >= totLocs {
+		s.locsBacking = s.locsBacking[:totLocs]
+	} else {
+		s.locsBacking = make([]interimLoc, totLocs)
+	}
 
-	locsBacking := make([]interimLoc, totLocs)
-	for pid, numLocs := range numLocsPerPostingsList {
+	locsBacking := s.locsBacking
+	for pid, numLocs := range s.numLocsPerPostingsList {
 		s.Locs[pid] = locsBacking[0:0]
 		locsBacking = locsBacking[numLocs:]
 	}

From 19318194fa039bf85d6a1261ccfa91101f8f023c Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Tue, 13 Mar 2018 14:06:48 +0530
Subject: [PATCH 290/728] moving to new offset slice format

---
 cmd/bleve/cmd/zap/docvalue.go            | 14 +--------
 index/scorch/segment/zap/build.go        |  2 +-
 index/scorch/segment/zap/contentcoder.go | 36 +++++++++++++++++++-----
 index/scorch/segment/zap/docvalues.go    | 14 +--------
 4 files changed, 32 insertions(+), 34 deletions(-)

diff --git a/cmd/bleve/cmd/zap/docvalue.go b/cmd/bleve/cmd/zap/docvalue.go
index 234c45e6b..dcfa58de1 100644
--- a/cmd/bleve/cmd/zap/docvalue.go
+++ b/cmd/bleve/cmd/zap/docvalue.go
@@ -253,19 +253,7 @@ func getDocValueLocs(docNum uint64, metaHeader []zap.MetaData) (uint64, uint64)
 		return metaHeader[i].DocNum >= docNum
 	})
 	if i < len(metaHeader) && metaHeader[i].DocNum == docNum {
-		var start, end uint64
-		if i > 0 {
-			start = metaHeader[i].DocDvOffset
-		}
-		// single element case
-		if i == 0 && len(metaHeader) == 1 {
-			end = metaHeader[i].DocDvOffset
-		} else if i < len(metaHeader)-1 {
-			end = metaHeader[i+1].DocDvOffset
-		} else { // for last element
-			end = start + metaHeader[0].DocDvOffset
-		}
-		return start, end
+		return zap.ReadDocValueBoundary(i, metaHeader)
 	}
 	return math.MaxUint64, math.MaxUint64
 }
diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 8ec610953..28df9bd60 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -28,7 +28,7 @@ import (
 	"github.com/golang/snappy"
 )
 
-const version uint32 = 4
+const version uint32 = 6
 
 const fieldNotUninverted = math.MaxUint64
 
diff --git a/index/scorch/segment/zap/contentcoder.go b/index/scorch/segment/zap/contentcoder.go
index adaab2fa3..3bb904caa 100644
--- a/index/scorch/segment/zap/contentcoder.go
+++ b/index/scorch/segment/zap/contentcoder.go
@@ -95,13 +95,14 @@ func (c *chunkedContentCoder) flushContents() error {
 
 	// convert the document data lens to data offsets
 	if len(c.chunkMeta) > 1 {
-		c.chunkMeta[1].DocDvOffset, c.chunkMeta[0].DocDvOffset =
-			c.chunkMeta[0].DocDvOffset, c.chunkMeta[1].DocDvOffset
-		for i := 2; i < len(c.chunkMeta); i++ {
-			cur := c.chunkMeta[i].DocDvOffset
-			c.chunkMeta[i].DocDvOffset = c.chunkMeta[i-1].DocDvOffset + c.chunkMeta[0].DocDvOffset
-			c.chunkMeta[0].DocDvOffset = cur
+		var runningOffset uint64
+		var index, i int
+		for i = 1; i < len(c.chunkMeta); i++ {
+			runningOffset += c.chunkMeta[i-1].DocDvOffset
+			c.chunkMeta[index].DocDvOffset = runningOffset
+			index++
 		}
+		c.chunkMeta[index].DocDvOffset = c.chunkMeta[i-1].DocDvOffset
 	}
 
 	// write out the metaData slice
@@ -140,7 +141,7 @@ func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
 		c.currChunk = chunk
 	}
 
-	// mark the data length for this doc
+	// mark the data size for this doc
 	dvSize, err := c.chunkBuf.Write(vals)
 	if err != nil {
 		return err
@@ -181,3 +182,24 @@ func (c *chunkedContentCoder) Write(w io.Writer) (int, error) {
 	}
 	return tw, nil
 }
+
+// ReadDocValueBoundary elicits the start, end offsets from a
+// starting offset based metaData header slice
+func ReadDocValueBoundary(chunk int, metaHeaders []MetaData) (uint64, uint64) {
+	var start, end uint64
+	if chunk > len(metaHeaders) {
+		return start, end
+	}
+
+	if chunk > 0 {
+		start = metaHeaders[chunk-1].DocDvOffset
+	}
+
+	if chunk < len(metaHeaders)-1 {
+		end = metaHeaders[chunk].DocDvOffset
+	} else {
+		end = start + metaHeaders[chunk].DocDvOffset
+	}
+
+	return start, end
+}
diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index bbefe5a10..2399801a1 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -161,19 +161,7 @@ func (di *docValueIterator) getDocValueLocs(docNum uint64) (uint64, uint64) {
 		return di.curChunkHeader[i].DocNum >= docNum
 	})
 	if i < len(di.curChunkHeader) && di.curChunkHeader[i].DocNum == docNum {
-		var start, end uint64
-		if i > 0 {
-			start = di.curChunkHeader[i].DocDvOffset
-		}
-		// single element case
-		if i == 0 && len(di.curChunkHeader) == 1 {
-			end = di.curChunkHeader[i].DocDvOffset
-		} else if i < len(di.curChunkHeader)-1 {
-			end = di.curChunkHeader[i+1].DocDvOffset
-		} else { // for last element
-			end = start + di.curChunkHeader[0].DocDvOffset
-		}
-		return start, end
+		return ReadDocValueBoundary(i, di.curChunkHeader)
 	}
 	return math.MaxUint64, math.MaxUint64
 }

From debbcd7d47715c2dc78d5007fca7a5e8e63184f6 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Tue, 13 Mar 2018 17:29:05 +0530
Subject: [PATCH 291/728] adding maxsegment size limit checks

---
 index/scorch/merge.go                     |  5 ++
 index/scorch/mergeplan/merge_plan.go      | 17 +++++++
 index/scorch/mergeplan/merge_plan_test.go | 58 +++++++++++++++++++++++
 3 files changed, 80 insertions(+)

diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index ec2c8d4b3..41086ad3d 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -111,6 +111,11 @@ func (s *Scorch) parseMergePlannerOptions() (*mergeplan.MergePlanOptions,
 		if err != nil {
 			return &mergePlannerOptions, err
 		}
+
+		err = mergeplan.ValidateMergePlannerOptions(&mergePlannerOptions)
+		if err != nil {
+			return nil, err
+		}
 	}
 	return &mergePlannerOptions, nil
 }
diff --git a/index/scorch/mergeplan/merge_plan.go b/index/scorch/mergeplan/merge_plan.go
index 62f643f43..f0d6f162d 100644
--- a/index/scorch/mergeplan/merge_plan.go
+++ b/index/scorch/mergeplan/merge_plan.go
@@ -18,6 +18,7 @@
 package mergeplan
 
 import (
+	"errors"
 	"fmt"
 	"math"
 	"sort"
@@ -115,6 +116,14 @@ func (o *MergePlanOptions) RaiseToFloorSegmentSize(s int64) int64 {
 	return o.FloorSegmentSize
 }
 
+// MaxSegmentSizeLimit represents the maximum size of a segment,
+// this limit comes as the roaring lib supports uint32.
+const MaxSegmentSizeLimit = 1<<32 - 1
+
+// ErrMaxSegmentSizeTooLarge is returned when the size of the segment
+// exceeds the MaxSegmentSizeLimit
+var ErrMaxSegmentSizeTooLarge = errors.New("MaxSegmentSize exceeds the size limit")
+
 // Suggested default options.
 var DefaultMergePlanOptions = MergePlanOptions{
 	MaxSegmentsPerTier:   10,
@@ -367,3 +376,11 @@ func ToBarChart(prefix string, barMax int, segments []Segment, plan *MergePlan)
 
 	return strings.Join(rv, "\n")
 }
+
+// ValidateMergePlannerOptions validates the merge planner options
+func ValidateMergePlannerOptions(options *MergePlanOptions) error {
+	if options.MaxSegmentSize > MaxSegmentSizeLimit {
+		return ErrMaxSegmentSizeTooLarge
+	}
+	return nil
+}
diff --git a/index/scorch/mergeplan/merge_plan_test.go b/index/scorch/mergeplan/merge_plan_test.go
index 419ab8253..3adc1f4b8 100644
--- a/index/scorch/mergeplan/merge_plan_test.go
+++ b/index/scorch/mergeplan/merge_plan_test.go
@@ -17,10 +17,12 @@ package mergeplan
 import (
 	"encoding/json"
 	"fmt"
+	"math/rand"
 	"os"
 	"reflect"
 	"sort"
 	"testing"
+	"time"
 )
 
 // Implements the Segment interface for testing,
@@ -401,6 +403,62 @@ func TestManySameSizedSegmentsWithDeletesBetweenMerges(t *testing.T) {
 	}
 }
 
+func TestValidateMergePlannerOptions(t *testing.T) {
+	o := &MergePlanOptions{
+		MaxSegmentSize:       1 << 32,
+		MaxSegmentsPerTier:   3,
+		TierGrowth:           3.0,
+		SegmentsPerMergeTask: 3,
+	}
+	err := ValidateMergePlannerOptions(o)
+	if err != ErrMaxSegmentSizeTooLarge {
+		t.Error("Validation expected to fail as the MaxSegmentSize exceeds limit")
+	}
+}
+
+func TestPlanMaxSegmentSizeLimit(t *testing.T) {
+	o := &MergePlanOptions{
+		MaxSegmentSize:       20,
+		MaxSegmentsPerTier:   5,
+		TierGrowth:           3.0,
+		SegmentsPerMergeTask: 5,
+		FloorSegmentSize:     5,
+	}
+	segments := makeLinearSegments(20)
+
+	s := rand.NewSource(time.Now().UnixNano())
+	r := rand.New(s)
+
+	max := 20
+	min := 5
+	randomInRange := func() int64 {
+		return int64(r.Intn(max-min) + min)
+	}
+	for i := 1; i < 20; i++ {
+		o.MaxSegmentSize = randomInRange()
+		plans, err := Plan(segments, o)
+		if err != nil {
+			t.Errorf("Plan failed, err: %v", err)
+		}
+		if len(plans.Tasks) == 0 {
+			t.Errorf("expected some plans with tasks")
+		}
+
+		for _, task := range plans.Tasks {
+			var totalLiveSize int64
+			for _, segs := range task.Segments {
+				totalLiveSize += segs.LiveSize()
+
+			}
+			if totalLiveSize >= o.MaxSegmentSize {
+				t.Errorf("merged segments size: %d exceeding the MaxSegmentSize"+
+					"limit: %d", totalLiveSize, o.MaxSegmentSize)
+			}
+		}
+	}
+
+}
+
 // ----------------------------------------
 
 type testCyclesSpec struct {

From 715144d6323850d01ed970f8794692c1b99d5b9d Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Tue, 13 Mar 2018 13:34:48 -0700
Subject: [PATCH 292/728] MB-27385: De-duplicate the list of requested fields

De-duplicate the list of fields provided by the client as part
of the search request, so as to not inadvertantly load the same
stored field more than once.
---
 index_impl.go | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/index_impl.go b/index_impl.go
index 68777f072..4d03b78af 100644
--- a/index_impl.go
+++ b/index_impl.go
@@ -534,7 +534,8 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
 			doc, err := indexReader.Document(hit.ID)
 			if err == nil && doc != nil {
 				if len(req.Fields) > 0 {
-					for _, f := range req.Fields {
+					fieldsToLoad := deDuplicate(req.Fields)
+					for _, f := range fieldsToLoad {
 						for _, docF := range doc.Fields {
 							if f == "*" || docF.Name() == f {
 								var value interface{}
@@ -830,3 +831,16 @@ func (f *indexImplFieldDict) Close() error {
 	}
 	return f.indexReader.Close()
 }
+
+// helper function to remove duplicate entries from slice of strings
+func deDuplicate(fields []string) []string {
+	entries := make(map[string]struct{})
+	ret := []string{}
+	for _, entry := range fields {
+		if _, exists := entries[entry]; !exists {
+			entries[entry] = struct{}{}
+			ret = append(ret, entry)
+		}
+	}
+	return ret
+}

From 7578ff7cb8df1279853e29684f5c43d7a5c9af70 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 13 Mar 2018 11:10:31 -0700
Subject: [PATCH 293/728] scorch zap optimize interim's reuse of vellum
 builders

Since interim structs are now sync.Pool'ed, we can now also hold onto
and reuse the associated vellum builder.
---
 index/scorch/segment/zap/new.go | 52 ++++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 21 deletions(-)

diff --git a/index/scorch/segment/zap/new.go b/index/scorch/segment/zap/new.go
index 51971ba51..4c9ec9c19 100644
--- a/index/scorch/segment/zap/new.go
+++ b/index/scorch/segment/zap/new.go
@@ -52,7 +52,9 @@ func AnalysisResultsToSegmentBase(results []*index.AnalysisResult,
 		s.FieldsMap, s.FieldsInv, uint64(len(results)),
 		storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets)
 
-	interimPool.Put(s.cleanse())
+	if err == nil && s.reset() == nil {
+		interimPool.Put(s)
+	}
 
 	return sb, err
 }
@@ -105,12 +107,16 @@ type interim struct {
 	numTermsPerPostingsList []int // key is postings list id
 	numLocsPerPostingsList  []int // key is postings list id
 
-	buf0 bytes.Buffer
+	builder    *vellum.Builder
+	builderBuf bytes.Buffer
+
+	metaBuf bytes.Buffer
+
 	tmp0 []byte
 	tmp1 []byte
 }
 
-func (s *interim) cleanse() *interim {
+func (s *interim) reset() (err error) {
 	s.results = nil
 	s.chunkFactor = 0
 	s.w = nil
@@ -148,11 +154,15 @@ func (s *interim) cleanse() *interim {
 	s.locsBacking = s.locsBacking[:0]
 	s.numTermsPerPostingsList = s.numTermsPerPostingsList[:0]
 	s.numLocsPerPostingsList = s.numLocsPerPostingsList[:0]
-	s.buf0.Reset()
+	s.builderBuf.Reset()
+	if s.builder != nil {
+		err = s.builder.Reset(&s.builderBuf)
+	}
+	s.metaBuf.Reset()
 	s.tmp0 = s.tmp0[:0]
 	s.tmp1 = s.tmp1[:0]
 
-	return s
+	return err
 }
 
 func (s *interim) grabBuf(size int) []byte {
@@ -475,8 +485,7 @@ func (s *interim) processDocument(docNum uint64,
 
 func (s *interim) writeStoredFields() (
 	storedIndexOffset uint64, err error) {
-	metaBuf := &s.buf0
-	metaEncoder := govarint.NewU64Base128Encoder(metaBuf)
+	metaEncoder := govarint.NewU64Base128Encoder(&s.metaBuf)
 
 	data, compressed := s.tmp0[:0], s.tmp1[:0]
 	defer func() { s.tmp0, s.tmp1 = data, compressed }()
@@ -512,7 +521,7 @@ func (s *interim) writeStoredFields() (
 
 		var curr int
 
-		metaBuf.Reset()
+		s.metaBuf.Reset()
 		data = data[:0]
 		compressed = compressed[:0]
 
@@ -529,7 +538,7 @@ func (s *interim) writeStoredFields() (
 		}
 
 		metaEncoder.Close()
-		metaBytes := metaBuf.Bytes()
+		metaBytes := s.metaBuf.Bytes()
 
 		compressed = snappy.Encode(compressed, data)
 
@@ -565,8 +574,8 @@ func (s *interim) writeStoredFields() (
 	return storedIndexOffset, nil
 }
 
-func (s *interim) writeDicts() (uint64, []uint64, error) {
-	dictOffsets := make([]uint64, len(s.FieldsInv))
+func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err error) {
+	dictOffsets = make([]uint64, len(s.FieldsInv))
 
 	fdvOffsets := make([]uint64, len(s.FieldsInv))
 
@@ -578,10 +587,11 @@ func (s *interim) writeDicts() (uint64, []uint64, error) {
 
 	var docTermMap [][]byte
 
-	s.buf0.Reset()
-	builder, err := vellum.New(&s.buf0, nil)
-	if err != nil {
-		return 0, nil, err
+	if s.builder == nil {
+		s.builder, err = vellum.New(&s.builderBuf, nil)
+		if err != nil {
+			return 0, nil, err
+		}
 	}
 
 	for fieldID, terms := range s.DictKeys {
@@ -658,7 +668,7 @@ func (s *interim) writeDicts() (uint64, []uint64, error) {
 			}
 
 			if postingsOffset > uint64(0) {
-				err = builder.Insert([]byte(term), postingsOffset)
+				err = s.builder.Insert([]byte(term), postingsOffset)
 				if err != nil {
 					return 0, nil, err
 				}
@@ -668,7 +678,7 @@ func (s *interim) writeDicts() (uint64, []uint64, error) {
 			locEncoder.Reset()
 		}
 
-		err = builder.Close()
+		err = s.builder.Close()
 		if err != nil {
 			return 0, nil, err
 		}
@@ -676,7 +686,7 @@ func (s *interim) writeDicts() (uint64, []uint64, error) {
 		// record where this dictionary starts
 		dictOffsets[fieldID] = uint64(s.w.Count())
 
-		vellumData := s.buf0.Bytes()
+		vellumData := s.builderBuf.Bytes()
 
 		// write out the length of the vellum data
 		n := binary.PutUvarint(buf, uint64(len(vellumData)))
@@ -692,9 +702,9 @@ func (s *interim) writeDicts() (uint64, []uint64, error) {
 		}
 
 		// reset vellum for reuse
-		s.buf0.Reset()
+		s.builderBuf.Reset()
 
-		err = builder.Reset(&s.buf0)
+		err = s.builder.Reset(&s.builderBuf)
 		if err != nil {
 			return 0, nil, err
 		}
@@ -727,7 +737,7 @@ func (s *interim) writeDicts() (uint64, []uint64, error) {
 		}
 	}
 
-	fdvIndexOffset := uint64(s.w.Count())
+	fdvIndexOffset = uint64(s.w.Count())
 
 	for _, fdvOffset := range fdvOffsets {
 		n := binary.PutUvarint(buf, fdvOffset)

From 4af65a78460e4168ed1e8474b30d2b39a202e93c Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 13 Mar 2018 11:44:56 -0700
Subject: [PATCH 294/728] scorch zap prealloc buf via estimate from previous
 interim work

---
 index/scorch/segment/zap/new.go | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/index/scorch/segment/zap/new.go b/index/scorch/segment/zap/new.go
index 4c9ec9c19..dd2740fb2 100644
--- a/index/scorch/segment/zap/new.go
+++ b/index/scorch/segment/zap/new.go
@@ -34,10 +34,18 @@ import (
 // SegmentBase from analysis results
 func AnalysisResultsToSegmentBase(results []*index.AnalysisResult,
 	chunkFactor uint32) (*SegmentBase, error) {
-	var br bytes.Buffer
-
 	s := interimPool.Get().(*interim)
 
+	var br bytes.Buffer
+	if s.lastNumDocs > 0 {
+		// use previous results to initialize the buf with an estimate
+		// size, but note that the interim instance comes from a
+		// global interimPool, so multiple scorch instances indexing
+		// different docs can lead to low quality estimates
+		avgBytesPerDoc := s.lastOutSize / s.lastNumDocs
+		br.Grow(avgBytesPerDoc * (len(results) + 1))
+	}
+
 	s.results = results
 	s.chunkFactor = chunkFactor
 	s.w = NewCountHashWriter(&br)
@@ -53,6 +61,8 @@ func AnalysisResultsToSegmentBase(results []*index.AnalysisResult,
 		storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets)
 
 	if err == nil && s.reset() == nil {
+		s.lastNumDocs = len(results)
+		s.lastOutSize = len(br.Bytes())
 		interimPool.Put(s)
 	}
 
@@ -114,6 +124,9 @@ type interim struct {
 
 	tmp0 []byte
 	tmp1 []byte
+
+	lastNumDocs int
+	lastOutSize int
 }
 
 func (s *interim) reset() (err error) {
@@ -161,6 +174,8 @@ func (s *interim) reset() (err error) {
 	s.metaBuf.Reset()
 	s.tmp0 = s.tmp0[:0]
 	s.tmp1 = s.tmp1[:0]
+	s.lastNumDocs = 0
+	s.lastOutSize = 0
 
 	return err
 }

From 441065a41b436b0cc8ff1e4d7d86a7c45668ffbc Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Wed, 14 Mar 2018 10:42:19 +0530
Subject: [PATCH 295/728] comments,simplification

---
 index/scorch/segment/zap/contentcoder.go      | 34 ++++---------------
 index/scorch/segment/zap/contentcoder_test.go |  2 +-
 2 files changed, 7 insertions(+), 29 deletions(-)

diff --git a/index/scorch/segment/zap/contentcoder.go b/index/scorch/segment/zap/contentcoder.go
index 3bb904caa..30197c0e5 100644
--- a/index/scorch/segment/zap/contentcoder.go
+++ b/index/scorch/segment/zap/contentcoder.go
@@ -93,18 +93,6 @@ func (c *chunkedContentCoder) flushContents() error {
 		return err
 	}
 
-	// convert the document data lens to data offsets
-	if len(c.chunkMeta) > 1 {
-		var runningOffset uint64
-		var index, i int
-		for i = 1; i < len(c.chunkMeta); i++ {
-			runningOffset += c.chunkMeta[i-1].DocDvOffset
-			c.chunkMeta[index].DocDvOffset = runningOffset
-			index++
-		}
-		c.chunkMeta[index].DocDvOffset = c.chunkMeta[i-1].DocDvOffset
-	}
-
 	// write out the metaData slice
 	for _, meta := range c.chunkMeta {
 		_, err := writeUvarints(&c.chunkMetaBuf, meta.DocNum, meta.DocDvOffset)
@@ -141,7 +129,8 @@ func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
 		c.currChunk = chunk
 	}
 
-	// mark the data size for this doc
+	// get the starting offset for this doc
+	dvOffset := c.chunkBuf.Len()
 	dvSize, err := c.chunkBuf.Write(vals)
 	if err != nil {
 		return err
@@ -149,7 +138,7 @@ func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
 
 	c.chunkMeta = append(c.chunkMeta, MetaData{
 		DocNum:      docNum,
-		DocDvOffset: uint64(dvSize),
+		DocDvOffset: uint64(dvOffset + dvSize),
 	})
 	return nil
 }
@@ -184,22 +173,11 @@ func (c *chunkedContentCoder) Write(w io.Writer) (int, error) {
 }
 
 // ReadDocValueBoundary elicits the start, end offsets from a
-// starting offset based metaData header slice
+// metaData header slice
 func ReadDocValueBoundary(chunk int, metaHeaders []MetaData) (uint64, uint64) {
-	var start, end uint64
-	if chunk > len(metaHeaders) {
-		return start, end
-	}
-
+	var start uint64
 	if chunk > 0 {
 		start = metaHeaders[chunk-1].DocDvOffset
 	}
-
-	if chunk < len(metaHeaders)-1 {
-		end = metaHeaders[chunk].DocDvOffset
-	} else {
-		end = start + metaHeaders[chunk].DocDvOffset
-	}
-
-	return start, end
+	return start, metaHeaders[chunk].DocDvOffset
 }
diff --git a/index/scorch/segment/zap/contentcoder_test.go b/index/scorch/segment/zap/contentcoder_test.go
index c6b3df82c..fce84714b 100644
--- a/index/scorch/segment/zap/contentcoder_test.go
+++ b/index/scorch/segment/zap/contentcoder_test.go
@@ -69,7 +69,7 @@ func TestChunkContentCoder(t *testing.T) {
 		}
 
 		if !reflect.DeepEqual(test.expected, string(actual.Bytes())) {
-			t.Errorf("got % s, expected % s", string(actual.Bytes()), test.expected)
+			t.Errorf("got:%s, expected:%s", string(actual.Bytes()), test.expected)
 		}
 	}
 }

From 1775602958abb6358f2f490d9914865280d026fa Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Thu, 15 Mar 2018 14:40:00 +0530
Subject: [PATCH 296/728] posting iterator array positions clean up, max
 segment size limit adjustment for hit-1 optimisation

---
 index/scorch/mergeplan/merge_plan.go | 6 +++---
 index/scorch/segment/zap/posting.go  | 2 ++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/index/scorch/mergeplan/merge_plan.go b/index/scorch/mergeplan/merge_plan.go
index f0d6f162d..b09e5381e 100644
--- a/index/scorch/mergeplan/merge_plan.go
+++ b/index/scorch/mergeplan/merge_plan.go
@@ -117,14 +117,14 @@ func (o *MergePlanOptions) RaiseToFloorSegmentSize(s int64) int64 {
 }
 
 // MaxSegmentSizeLimit represents the maximum size of a segment,
-// this limit comes as the roaring lib supports uint32.
-const MaxSegmentSizeLimit = 1<<32 - 1
+// this limit comes with hit-1 optimisation/max encoding limit uint31.
+const MaxSegmentSizeLimit = 1<<31 - 1
 
 // ErrMaxSegmentSizeTooLarge is returned when the size of the segment
 // exceeds the MaxSegmentSizeLimit
 var ErrMaxSegmentSizeTooLarge = errors.New("MaxSegmentSize exceeds the size limit")
 
-// Suggested default options.
+// DefaultMergePlanOptions suggests the default options.
 var DefaultMergePlanOptions = MergePlanOptions{
 	MaxSegmentsPerTier:   10,
 	MaxSegmentSize:       5000000,
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index bc533ad14..fbe703c10 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -444,6 +444,8 @@ func (i *PostingsIterator) readLocation(l *Location) error {
 		l.end = end
 		if numArrayPos > 0 {
 			l.ap = make([]uint64, int(numArrayPos))
+		} else {
+			l.ap = l.ap[:0]
 		}
 	}
 

From d1155c223a431bdd0e564ff5f15547aa9fe8e640 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Tue, 13 Mar 2018 12:13:48 +0530
Subject: [PATCH 297/728] zap version bump, changed the offset slice format
 ,UTs

---
 index/scorch/segment/zap/build.go             |   2 +-
 index/scorch/segment/zap/contentcoder.go      |  11 +-
 index/scorch/segment/zap/contentcoder_test.go |   2 +-
 index/scorch/segment/zap/docvalues.go         |   8 +-
 index/scorch/segment/zap/intcoder.go          |  68 +++++------
 index/scorch/segment/zap/intcoder_test.go     | 114 ++++++++----------
 index/scorch/segment/zap/posting.go           |   8 +-
 7 files changed, 93 insertions(+), 120 deletions(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 30ae8d774..cd56e3b54 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -22,7 +22,7 @@ import (
 	"github.com/Smerity/govarint"
 )
 
-const version uint32 = 4
+const version uint32 = 5
 
 const fieldNotUninverted = math.MaxUint64
 
diff --git a/index/scorch/segment/zap/contentcoder.go b/index/scorch/segment/zap/contentcoder.go
index c731f52c4..5ba15d69c 100644
--- a/index/scorch/segment/zap/contentcoder.go
+++ b/index/scorch/segment/zap/contentcoder.go
@@ -157,13 +157,10 @@ func (c *chunkedContentCoder) Write(w io.Writer) (int, error) {
 		return tw, err
 	}
 
-	if len(c.chunkLens) > 1 {
-		chunkLengthsToOffsets(c.chunkLens)
-	}
-
-	// write out the chunk starting offsets
-	for _, chunkLen := range c.chunkLens {
-		n := binary.PutUvarint(buf, uint64(chunkLen))
+	chunkOffsets := modifyLengthsToEndOffsets(c.chunkLens)
+	// write out the chunk offsets
+	for _, chunkOffset := range chunkOffsets {
+		n := binary.PutUvarint(buf, chunkOffset)
 		nw, err = w.Write(buf[:n])
 		tw += nw
 		if err != nil {
diff --git a/index/scorch/segment/zap/contentcoder_test.go b/index/scorch/segment/zap/contentcoder_test.go
index 0e45b783e..da80f9479 100644
--- a/index/scorch/segment/zap/contentcoder_test.go
+++ b/index/scorch/segment/zap/contentcoder_test.go
@@ -46,7 +46,7 @@ func TestChunkContentCoder(t *testing.T) {
 				[]byte("scorch"),
 			},
 
-			expected: string([]byte{0x02, 0x0c, 0x0c, 0x01, 0x00, 0x00, 0x06, 0x06, 0x14,
+			expected: string([]byte{0x02, 0x0c, 0x18, 0x01, 0x00, 0x00, 0x06, 0x06, 0x14,
 				0x75, 0x70, 0x73, 0x69, 0x64, 0x65, 0x01, 0x01, 0x00, 0x06, 0x06,
 				0x14, 0x73, 0x63, 0x6f, 0x72, 0x63, 0x68}),
 		},
diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index 882ff43dd..61b83877f 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -69,7 +69,7 @@ func (s *SegmentBase) loadFieldDocValueIterator(field string,
 	}
 
 	// read the number of chunks, chunk lengths
-	var offset, clen uint64
+	var offset, loc uint64
 	numChunks, read := binary.Uvarint(s.mem[fieldDvLoc : fieldDvLoc+binary.MaxVarintLen64])
 	if read <= 0 {
 		return nil, fmt.Errorf("failed to read the field "+
@@ -83,11 +83,11 @@ func (s *SegmentBase) loadFieldDocValueIterator(field string,
 		chunkOffsets: make([]uint64, int(numChunks)),
 	}
 	for i := 0; i < int(numChunks); i++ {
-		clen, read = binary.Uvarint(s.mem[fieldDvLoc+offset : fieldDvLoc+offset+binary.MaxVarintLen64])
+		loc, read = binary.Uvarint(s.mem[fieldDvLoc+offset : fieldDvLoc+offset+binary.MaxVarintLen64])
 		if read <= 0 {
-			return nil, fmt.Errorf("corrupted chunk length during segment load")
+			return nil, fmt.Errorf("corrupted chunk offset during segment load")
 		}
-		fdvIter.chunkOffsets[i] = clen
+		fdvIter.chunkOffsets[i] = loc
 		offset += uint64(read)
 	}
 
diff --git a/index/scorch/segment/zap/intcoder.go b/index/scorch/segment/zap/intcoder.go
index 79fe5156e..81ef8bb2e 100644
--- a/index/scorch/segment/zap/intcoder.go
+++ b/index/scorch/segment/zap/intcoder.go
@@ -111,15 +111,13 @@ func (c *chunkedIntCoder) Write(w io.Writer) (int, error) {
 	}
 	buf := c.buf
 
-	// convert the chunk lengths into starting chunk offsets
-	if len(c.chunkLens) > 1 {
-		chunkLengthsToOffsets(c.chunkLens)
-	}
+	// convert the chunk lengths into chunk offsets
+	chunkOffsets := modifyLengthsToEndOffsets(c.chunkLens)
 
-	// write out the number of chunks & each chunk starting offsets
-	n := binary.PutUvarint(buf, uint64(len(c.chunkLens)))
-	for _, chunkLen := range c.chunkLens {
-		n += binary.PutUvarint(buf[n:], uint64(chunkLen))
+	// write out the number of chunks & each chunk offsets
+	n := binary.PutUvarint(buf, uint64(len(chunkOffsets)))
+	for _, chunkOffset := range chunkOffsets {
+		n += binary.PutUvarint(buf[n:], chunkOffset)
 	}
 
 	tw, err := w.Write(buf[:n])
@@ -140,41 +138,35 @@ func (c *chunkedIntCoder) FinalSize() int {
 	return len(c.final)
 }
 
-// chunkLengthsToOffsets converts the chunk length array
-// to a chunk starting offset array. The readChunkBoundary
+// modifyLengthsToEndOffsets converts the chunk length array
+// to a chunk offset array. The readChunkBoundary
 // will figure out the start and end of every chunk from
-// these offsets. The starting offset of the first/single
-// array element will always be zero and this position is
-// used for storing the size of the current last item in
-// the array at any given point.
-// For eg:
-// Lens ->  5 5 5 5 => 5 5 10 15
-// Lens ->  0 5 0 5 => 5 0 5 5
-// Lens ->  0 0 0 5 => 5 0 0 0
-// Lens ->  5 0 0 0 => 0 5 5 5
-// Lens ->  0 5 0 0 => 0 0 5 5
-// Lens ->  0 0 5 0 => 0 0 0 5
-func chunkLengthsToOffsets(lengths []uint64) {
-	lengths[1], lengths[0] = lengths[0], lengths[1]
-	for i := 2; i < len(lengths); i++ {
-		cur := lengths[i]
-		lengths[i] = lengths[i-1] + lengths[0]
-		lengths[0] = cur
+// these offsets. Starting offset of i'th index is stored
+// in i-1'th position except for 0'th index and ending offset
+// is stored at i'th index position.
+// For 0'th element, starting position is always zero.
+// eg:
+// Lens ->  5 5 5 5 => 5 10 15 20
+// Lens ->  0 5 0 5 => 0 5 5 10
+// Lens ->  0 0 0 5 => 0 0 0 5
+// Lens ->  5 0 0 0 => 5 5 5 5
+// Lens ->  0 5 0 0 => 0 5 5 5
+// Lens ->  0 0 5 0 => 0 0 5 5
+func modifyLengthsToEndOffsets(lengths []uint64) []uint64 {
+	var runningOffset uint64
+	var index, i int
+	for i = 1; i <= len(lengths); i++ {
+		runningOffset += lengths[i-1]
+		lengths[index] = runningOffset
+		index++
 	}
+	return lengths
 }
 
 func readChunkBoundary(chunk int, offsets []uint64) (uint64, uint64) {
-	var start, end uint64
+	var start uint64
 	if chunk > 0 {
-		start = offsets[chunk]
-	}
-	// single element case
-	if chunk == 0 && len(offsets) == 1 {
-		end = offsets[chunk]
-	} else if chunk < len(offsets)-1 {
-		end = offsets[chunk+1]
-	} else { // for last element
-		end = start + offsets[0]
+		start = offsets[chunk-1]
 	}
-	return start, end
+	return start, offsets[chunk]
 }
diff --git a/index/scorch/segment/zap/intcoder_test.go b/index/scorch/segment/zap/intcoder_test.go
index 8c77eab61..952e0669d 100644
--- a/index/scorch/segment/zap/intcoder_test.go
+++ b/index/scorch/segment/zap/intcoder_test.go
@@ -46,8 +46,8 @@ func TestChunkIntCoder(t *testing.T) {
 				[]uint64{3},
 				[]uint64{7},
 			},
-			// 2 chunks, chunk-0 length 1, chunk-1 length 1, value 3, value 7
-			expected: []byte{0x2, 0x1, 0x1, 0x3, 0x7},
+			// 2 chunks, chunk-0 offset 1, chunk-1 offset 2, value 3, value 7
+			expected: []byte{0x2, 0x1, 0x2, 0x3, 0x7},
 		},
 	}
 
@@ -80,40 +80,48 @@ func TestChunkLengthToOffsets(t *testing.T) {
 	}{
 		{
 			lengths:         []uint64{5, 5, 5, 5, 5},
-			expectedOffsets: []uint64{5, 5, 10, 15, 20},
+			expectedOffsets: []uint64{5, 10, 15, 20, 25},
 		},
 		{
 			lengths:         []uint64{0, 5, 0, 5, 0},
-			expectedOffsets: []uint64{0, 0, 5, 5, 10},
+			expectedOffsets: []uint64{0, 5, 5, 10, 10},
 		},
 		{
 			lengths:         []uint64{0, 0, 0, 0, 5},
-			expectedOffsets: []uint64{5, 0, 0, 0, 0},
+			expectedOffsets: []uint64{0, 0, 0, 0, 5},
 		},
 		{
 			lengths:         []uint64{5, 0, 0, 0, 0},
-			expectedOffsets: []uint64{0, 5, 5, 5, 5},
+			expectedOffsets: []uint64{5, 5, 5, 5, 5},
 		},
 		{
 			lengths:         []uint64{0, 5, 0, 0, 0},
-			expectedOffsets: []uint64{0, 0, 5, 5, 5},
+			expectedOffsets: []uint64{0, 5, 5, 5, 5},
 		},
 		{
 			lengths:         []uint64{0, 0, 0, 5, 0},
-			expectedOffsets: []uint64{0, 0, 0, 0, 5},
+			expectedOffsets: []uint64{0, 0, 0, 5, 5},
 		},
 		{
 			lengths:         []uint64{0, 0, 0, 5, 5},
-			expectedOffsets: []uint64{5, 0, 0, 0, 5},
+			expectedOffsets: []uint64{0, 0, 0, 5, 10},
 		},
 		{
 			lengths:         []uint64{5, 5, 5, 0, 0},
-			expectedOffsets: []uint64{0, 5, 10, 15, 15},
+			expectedOffsets: []uint64{5, 10, 15, 15, 15},
+		},
+		{
+			lengths:         []uint64{5},
+			expectedOffsets: []uint64{5},
+		},
+		{
+			lengths:         []uint64{5, 5},
+			expectedOffsets: []uint64{5, 10},
 		},
 	}
 
 	for i, test := range tests {
-		chunkLengthsToOffsets(test.lengths)
+		modifyLengthsToEndOffsets(test.lengths)
 		if !reflect.DeepEqual(test.expectedOffsets, test.lengths) {
 			t.Errorf("Test: %d failed, got %+v, expected %+v", i, test.lengths, test.expectedOffsets)
 		}
@@ -129,86 +137,80 @@ func TestChunkReadBoundaryFromOffsets(t *testing.T) {
 		expectedEnd   uint64
 	}{
 		{
-			offsets:       []uint64{5, 5, 10, 15, 20},
+			offsets:       []uint64{5, 10, 15, 20, 25},
 			chunkNumber:   4,
 			expectedStart: 20,
 			expectedEnd:   25,
 		},
 		{
-			offsets:       []uint64{5, 5, 10, 15, 20},
+			offsets:       []uint64{5, 10, 15, 20, 25},
 			chunkNumber:   0,
 			expectedStart: 0,
 			expectedEnd:   5,
 		},
 		{
-			offsets:       []uint64{5, 5, 10, 15, 20},
+			offsets:       []uint64{5, 10, 15, 20, 25},
 			chunkNumber:   2,
 			expectedStart: 10,
 			expectedEnd:   15,
 		},
 		{
-			offsets:       []uint64{0, 0, 5, 5, 10},
+			offsets:       []uint64{0, 5, 5, 10, 10},
 			chunkNumber:   4,
 			expectedStart: 10,
 			expectedEnd:   10,
 		},
 		{
-			offsets:       []uint64{0, 0, 5, 5, 10},
+			offsets:       []uint64{0, 5, 5, 10, 10},
 			chunkNumber:   1,
 			expectedStart: 0,
 			expectedEnd:   5,
 		},
 		{
-			offsets:       []uint64{5, 0, 0, 0, 0},
+			offsets:       []uint64{5, 5, 5, 5, 5},
 			chunkNumber:   0,
 			expectedStart: 0,
-			expectedEnd:   0,
+			expectedEnd:   5,
 		},
 		{
-			offsets:       []uint64{5, 0, 0, 0, 0},
+			offsets:       []uint64{5, 5, 5, 5, 5},
 			chunkNumber:   4,
-			expectedStart: 0,
+			expectedStart: 5,
 			expectedEnd:   5,
 		},
 		{
-			offsets:       []uint64{5, 0, 0, 0, 0},
+			offsets:       []uint64{5, 5, 5, 5, 5},
 			chunkNumber:   1,
-			expectedStart: 0,
-			expectedEnd:   0,
+			expectedStart: 5,
+			expectedEnd:   5,
 		},
 		{
 			offsets:       []uint64{0, 5, 5, 5, 5},
 			chunkNumber:   1,
-			expectedStart: 5,
+			expectedStart: 0,
 			expectedEnd:   5,
 		},
 		{
 			offsets:       []uint64{0, 5, 5, 5, 5},
 			chunkNumber:   0,
 			expectedStart: 0,
-			expectedEnd:   5,
+			expectedEnd:   0,
 		},
 		{
-			offsets:       []uint64{0, 0, 5, 5, 5},
+			offsets:       []uint64{0, 0, 0, 5, 5},
 			chunkNumber:   2,
-			expectedStart: 5,
-			expectedEnd:   5,
+			expectedStart: 0,
+			expectedEnd:   0,
 		},
 		{
-			offsets:       []uint64{0, 0, 5, 5, 5},
+			offsets:       []uint64{0, 0, 0, 5, 5},
 			chunkNumber:   1,
 			expectedStart: 0,
-			expectedEnd:   5,
+			expectedEnd:   0,
 		},
 		{
 			offsets:       []uint64{0, 0, 0, 0, 5},
 			chunkNumber:   4,
-			expectedStart: 5,
-			expectedEnd:   5,
-		},
-		{
-			offsets:       []uint64{0, 0, 0, 0, 5},
-			chunkNumber:   3,
 			expectedStart: 0,
 			expectedEnd:   5,
 		},
@@ -219,59 +221,41 @@ func TestChunkReadBoundaryFromOffsets(t *testing.T) {
 			expectedEnd:   0,
 		},
 		{
-			offsets:       []uint64{5, 0, 0, 0, 5},
-			chunkNumber:   0,
-			expectedStart: 0,
-			expectedEnd:   0,
-		},
-		{
-			offsets:       []uint64{5, 0, 0, 0, 5},
-			chunkNumber:   1,
-			expectedStart: 0,
-			expectedEnd:   0,
-		},
-		{
-			offsets:       []uint64{5, 0, 0, 0, 5},
-			chunkNumber:   3,
-			expectedStart: 0,
-			expectedEnd:   5,
-		},
-		{
-			offsets:       []uint64{5, 0, 0, 0, 5},
-			chunkNumber:   4,
-			expectedStart: 5,
-			expectedEnd:   10,
-		},
-		{
-			offsets:       []uint64{0, 5, 10, 15, 15},
+			offsets:       []uint64{5, 10, 15, 15, 15},
 			chunkNumber:   0,
 			expectedStart: 0,
 			expectedEnd:   5,
 		},
 		{
-			offsets:       []uint64{0, 5, 10, 15, 15},
+			offsets:       []uint64{5, 10, 15, 15, 15},
 			chunkNumber:   1,
 			expectedStart: 5,
 			expectedEnd:   10,
 		},
 		{
-			offsets:       []uint64{0, 5, 10, 15, 15},
+			offsets:       []uint64{5, 10, 15, 15, 15},
 			chunkNumber:   2,
 			expectedStart: 10,
 			expectedEnd:   15,
 		},
 		{
-			offsets:       []uint64{0, 5, 10, 15, 15},
+			offsets:       []uint64{5, 10, 15, 15, 15},
 			chunkNumber:   3,
 			expectedStart: 15,
 			expectedEnd:   15,
 		},
 		{
-			offsets:       []uint64{0, 5, 10, 15, 15},
+			offsets:       []uint64{5, 10, 15, 15, 15},
 			chunkNumber:   4,
 			expectedStart: 15,
 			expectedEnd:   15,
 		},
+		{
+			offsets:       []uint64{5},
+			chunkNumber:   0,
+			expectedStart: 0,
+			expectedEnd:   5,
+		},
 	}
 
 	for i, test := range tests {
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index c3fc2330c..bdbb47e31 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -189,9 +189,9 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
 	var numFreqChunks uint64
 	numFreqChunks, read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
 	n += uint64(read)
-	rv.freqChunkLens = make([]uint64, int(numFreqChunks))
+	rv.freqChunkOffsets = make([]uint64, int(numFreqChunks))
 	for i := 0; i < int(numFreqChunks); i++ {
-		rv.freqChunkLens[i], read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
+		rv.freqChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
 		n += uint64(read)
 	}
 	rv.freqChunkStart = p.freqOffset + n
@@ -201,9 +201,9 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
 	var numLocChunks uint64
 	numLocChunks, read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
 	n += uint64(read)
-	rv.locChunkLens = make([]uint64, int(numLocChunks))
+	rv.locChunkOffsets = make([]uint64, int(numLocChunks))
 	for i := 0; i < int(numLocChunks); i++ {
-		rv.locChunkLens[i], read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
+		rv.locChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
 		n += uint64(read)
 	}
 	rv.locChunkStart = p.locOffset + n

From 65fed52d0b5b6cbb8e116a308d76bdf10dd96b52 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Thu, 15 Mar 2018 13:20:32 -0700
Subject: [PATCH 298/728] Do not account IndexReader's size in the query RAM
 estimate

Since its just the pointer size of the IndexReader that is
being accounted for while estimating the RAM needed to
execute a search query, get rid of the Size() API in the
IndexReader interface.
---
 index/index.go                        | 2 --
 index/scorch/snapshot_index.go        | 7 -------
 index/upsidedown/index_reader.go      | 5 -----
 index/upsidedown/reader.go            | 2 +-
 search/facets_builder.go              | 3 +--
 search/searcher/search_boolean.go     | 3 +--
 search/searcher/search_disjunction.go | 1 -
 search/searcher/search_match_all.go   | 1 -
 search/searcher/search_match_none.go  | 3 +--
 search/searcher/search_phrase.go      | 3 +--
 search/searcher/search_term.go        | 1 -
 11 files changed, 5 insertions(+), 26 deletions(-)

diff --git a/index/index.go b/index/index.go
index c25d7fa46..e5a69297b 100644
--- a/index/index.go
+++ b/index/index.go
@@ -94,8 +94,6 @@ type IndexReader interface {
 	DumpFields() chan interface{}
 
 	Close() error
-
-	Size() int
 }
 
 // FieldTerms contains the terms used by a document, keyed by field
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 9394f391e..5289b1434 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -27,7 +27,6 @@ import (
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
-	"github.com/blevesearch/bleve/size"
 )
 
 type asynchSegmentResult struct {
@@ -90,12 +89,6 @@ func (i *IndexSnapshot) Close() error {
 	return i.DecRef()
 }
 
-func (i *IndexSnapshot) Size() int {
-	// Just return the size of the pointer for estimating the overhead
-	// during Search, a reference of the IndexSnapshot serves as the reader.
-	return size.SizeOfPtr
-}
-
 func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {
 
 	results := make(chan *asynchSegmentResult)
diff --git a/index/upsidedown/index_reader.go b/index/upsidedown/index_reader.go
index 4e5755219..e045f67c7 100644
--- a/index/upsidedown/index_reader.go
+++ b/index/upsidedown/index_reader.go
@@ -20,7 +20,6 @@ import (
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/store"
-	"github.com/blevesearch/bleve/size"
 )
 
 var reflectStaticSizeIndexReader int
@@ -36,10 +35,6 @@ type IndexReader struct {
 	docCount uint64
 }
 
-func (i *IndexReader) Size() int {
-	return reflectStaticSizeIndexReader + size.SizeOfPtr
-}
-
 func (i *IndexReader) TermFieldReader(term []byte, fieldName string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
 	fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false)
 	if fieldExists {
diff --git a/index/upsidedown/reader.go b/index/upsidedown/reader.go
index 646d4d8ac..bc0fef119 100644
--- a/index/upsidedown/reader.go
+++ b/index/upsidedown/reader.go
@@ -203,7 +203,7 @@ type UpsideDownCouchDocIDReader struct {
 
 func (r *UpsideDownCouchDocIDReader) Size() int {
 	sizeInBytes := reflectStaticSizeUpsideDownCouchDocIDReader +
-		r.indexReader.Size()
+		reflectStaticSizeIndexReader + size.SizeOfPtr
 
 	for _, entry := range r.only {
 		sizeInBytes += size.SizeOfString + len(entry)
diff --git a/search/facets_builder.go b/search/facets_builder.go
index 34e45af84..c5d41e2d3 100644
--- a/search/facets_builder.go
+++ b/search/facets_builder.go
@@ -66,8 +66,7 @@ func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder {
 }
 
 func (fb *FacetsBuilder) Size() int {
-	sizeInBytes := reflectStaticSizeFacetsBuilder + size.SizeOfPtr +
-		fb.indexReader.Size()
+	sizeInBytes := reflectStaticSizeFacetsBuilder + size.SizeOfPtr
 
 	for k, v := range fb.facets {
 		sizeInBytes += size.SizeOfString + len(k) +
diff --git a/search/searcher/search_boolean.go b/search/searcher/search_boolean.go
index b87337e1e..f7ee2cd83 100644
--- a/search/searcher/search_boolean.go
+++ b/search/searcher/search_boolean.go
@@ -62,8 +62,7 @@ func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searc
 }
 
 func (s *BooleanSearcher) Size() int {
-	sizeInBytes := reflectStaticSizeBooleanSearcher + size.SizeOfPtr +
-		s.indexReader.Size()
+	sizeInBytes := reflectStaticSizeBooleanSearcher + size.SizeOfPtr
 
 	if s.mustSearcher != nil {
 		sizeInBytes += s.mustSearcher.Size()
diff --git a/search/searcher/search_disjunction.go b/search/searcher/search_disjunction.go
index 119bac970..32d614801 100644
--- a/search/searcher/search_disjunction.go
+++ b/search/searcher/search_disjunction.go
@@ -101,7 +101,6 @@ func newDisjunctionSearcher(indexReader index.IndexReader,
 
 func (s *DisjunctionSearcher) Size() int {
 	sizeInBytes := reflectStaticSizeDisjunctionSearcher + size.SizeOfPtr +
-		s.indexReader.Size() +
 		s.scorer.Size()
 
 	for _, entry := range s.searchers {
diff --git a/search/searcher/search_match_all.go b/search/searcher/search_match_all.go
index 3f34e5918..bb6640122 100644
--- a/search/searcher/search_match_all.go
+++ b/search/searcher/search_match_all.go
@@ -58,7 +58,6 @@ func NewMatchAllSearcher(indexReader index.IndexReader, boost float64, options s
 
 func (s *MatchAllSearcher) Size() int {
 	return reflectStaticSizeMatchAllSearcher + size.SizeOfPtr +
-		s.indexReader.Size() +
 		s.reader.Size() +
 		s.scorer.Size()
 }
diff --git a/search/searcher/search_match_none.go b/search/searcher/search_match_none.go
index 6b50b3222..a345e17f7 100644
--- a/search/searcher/search_match_none.go
+++ b/search/searcher/search_match_none.go
@@ -40,8 +40,7 @@ func NewMatchNoneSearcher(indexReader index.IndexReader) (*MatchNoneSearcher, er
 }
 
 func (s *MatchNoneSearcher) Size() int {
-	return reflectStaticSizeMatchNoneSearcher + size.SizeOfPtr +
-		s.indexReader.Size()
+	return reflectStaticSizeMatchNoneSearcher + size.SizeOfPtr
 }
 
 func (s *MatchNoneSearcher) Count() uint64 {
diff --git a/search/searcher/search_phrase.go b/search/searcher/search_phrase.go
index 23a359bd7..0026794dd 100644
--- a/search/searcher/search_phrase.go
+++ b/search/searcher/search_phrase.go
@@ -42,8 +42,7 @@ type PhraseSearcher struct {
 }
 
 func (s *PhraseSearcher) Size() int {
-	sizeInBytes := reflectStaticSizePhraseSearcher + size.SizeOfPtr +
-		s.indexReader.Size()
+	sizeInBytes := reflectStaticSizePhraseSearcher + size.SizeOfPtr
 
 	if s.mustSearcher != nil {
 		sizeInBytes += s.mustSearcher.Size()
diff --git a/search/searcher/search_term.go b/search/searcher/search_term.go
index 576d6643a..b99e4c263 100644
--- a/search/searcher/search_term.go
+++ b/search/searcher/search_term.go
@@ -75,7 +75,6 @@ func NewTermSearcherBytes(indexReader index.IndexReader, term []byte, field stri
 
 func (s *TermSearcher) Size() int {
 	return reflectStaticSizeTermSearcher + size.SizeOfPtr +
-		s.indexReader.Size() +
 		s.reader.Size() +
 		s.tfd.Size() +
 		s.scorer.Size()

From 45e0e5c666df945cfd015b085b76ecc3ce9f0771 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Thu, 15 Mar 2018 16:31:04 -0400
Subject: [PATCH 299/728] memoize the size of an entire index snapshot

by memoizing the size of index snapshots and their
constituent parts, we significantly reduce the amount
of time that the lock is held in the app_herder, when
calculating the total memory used
---
 index/scorch/introducer.go          |  4 ++++
 index/scorch/scorch.go              | 25 ++++++++++---------------
 index/scorch/segment/zap/build.go   |  1 +
 index/scorch/segment/zap/segment.go |  8 +++++++-
 index/scorch/snapshot_index.go      | 20 ++++++++++++++++++++
 index/scorch/snapshot_segment.go    | 26 ++++++++++++++++++++++----
 6 files changed, 64 insertions(+), 20 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 7f1d0073e..98769ed56 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -186,6 +186,7 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 		}
 	}
 
+	newSnapshot.updateSize()
 	s.rootLock.Lock()
 	if next.persisted != nil {
 		s.rootPersisted = append(s.rootPersisted, next.persisted)
@@ -251,6 +252,7 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
 		newIndexSnapshot.internal[k] = v
 	}
 
+	newIndexSnapshot.updateSize()
 	s.rootLock.Lock()
 	rootPrev := s.root
 	s.root = newIndexSnapshot
@@ -348,6 +350,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 
 	newSnapshot.AddRef() // 1 ref for the nextMerge.notify response
 
+	newSnapshot.updateSize()
 	s.rootLock.Lock()
 	// swap in new index snapshot
 	newSnapshot.epoch = s.nextSnapshotEpoch
@@ -409,6 +412,7 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
 		s.rootPersisted = append(s.rootPersisted, revertTo.persisted)
 	}
 
+	newSnapshot.updateSize()
 	// swap in new snapshot
 	rootPrev := s.root
 	s.root = newSnapshot
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 4bda2d34b..85f9082c5 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -397,11 +397,15 @@ func (s *Scorch) DeleteInternal(key []byte) error {
 // Reader returns a low-level accessor on the index data. Close it to
 // release associated resources.
 func (s *Scorch) Reader() (index.IndexReader, error) {
+	return s.currentSnapshot(), nil
+}
+
+func (s *Scorch) currentSnapshot() *IndexSnapshot {
 	s.rootLock.RLock()
 	rv := s.root
 	rv.AddRef()
 	s.rootLock.RUnlock()
-	return rv, nil
+	return rv
 }
 
 func (s *Scorch) Stats() json.Marshaler {
@@ -484,20 +488,11 @@ func (s *Scorch) AddEligibleForRemoval(epoch uint64) {
 }
 
 func (s *Scorch) MemoryUsed() uint64 {
-	var memUsed int
-	s.rootLock.RLock()
-	if s.root != nil {
-		for _, segmentSnapshot := range s.root.segment {
-			memUsed += 8 /* size of id -> uint64 */ +
-				segmentSnapshot.segment.Size()
-			if segmentSnapshot.deleted != nil {
-				memUsed += int(segmentSnapshot.deleted.GetSizeInBytes())
-			}
-			memUsed += segmentSnapshot.cachedDocs.size()
-		}
-	}
-	s.rootLock.RUnlock()
-	return uint64(memUsed)
+	indexSnapshot := s.currentSnapshot()
+	defer func() {
+		_ = indexSnapshot.Close()
+	}()
+	return uint64(indexSnapshot.SizeFull())
 }
 
 func (s *Scorch) markIneligibleForRemoval(filename string) {
diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 30ae8d774..e7abe7dfb 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -138,6 +138,7 @@ func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32,
 		dictLocs:          dictLocs,
 		fieldDvIterMap:    make(map[uint16]*docValueIterator),
 	}
+	sb.updateSize()
 
 	err := sb.loadDvIterators()
 	if err != nil {
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index e1d2a14f7..0d2ad072f 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -63,6 +63,7 @@ func Open(path string) (segment.Segment, error) {
 		path: path,
 		refs: 1,
 	}
+	rv.SegmentBase.updateSize()
 
 	err = rv.loadConfig()
 	if err != nil {
@@ -99,9 +100,14 @@ type SegmentBase struct {
 	docValueOffset    uint64
 	dictLocs          []uint64
 	fieldDvIterMap    map[uint16]*docValueIterator // naive chunk cache per field
+	size              uint64
 }
 
 func (sb *SegmentBase) Size() int {
+	return int(sb.size)
+}
+
+func (sb *SegmentBase) updateSize() {
 	sizeInBytes := reflectStaticSizeSegmentBase +
 		len(sb.mem)
 
@@ -124,7 +130,7 @@ func (sb *SegmentBase) Size() int {
 		}
 	}
 
-	return sizeInBytes
+	sb.size = uint64(sizeInBytes)
 }
 
 func (sb *SegmentBase) AddRef()             {}
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 9394f391e..ddf42c60f 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -19,6 +19,7 @@ import (
 	"container/heap"
 	"encoding/binary"
 	"fmt"
+	"reflect"
 	"sort"
 	"sync"
 	"sync/atomic"
@@ -41,12 +42,20 @@ type asynchSegmentResult struct {
 	err error
 }
 
+var reflectStaticSizeIndexSnapshot int
+
+func init() {
+	var is interface{} = IndexSnapshot{}
+	reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size())
+}
+
 type IndexSnapshot struct {
 	parent   *Scorch
 	segment  []*SegmentSnapshot
 	offsets  []uint64
 	internal map[string][]byte
 	epoch    uint64
+	size     uint64
 
 	m    sync.Mutex // Protects the fields that follow.
 	refs int64
@@ -96,6 +105,17 @@ func (i *IndexSnapshot) Size() int {
 	return size.SizeOfPtr
 }
 
+func (i *IndexSnapshot) SizeFull() int {
+	return int(i.size)
+}
+
+func (i *IndexSnapshot) updateSize() {
+	i.size += uint64(reflectStaticSizeIndexSnapshot)
+	for _, s := range i.segment {
+		i.size += uint64(s.Size())
+	}
+}
+
 func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {
 
 	results := make(chan *asynchSegmentResult)
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index cdfe317fe..edf52a6e7 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -19,6 +19,7 @@ import (
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index/scorch/segment"
+	"github.com/blevesearch/bleve/size"
 )
 
 var TermSeparator byte = 0xff
@@ -128,15 +129,26 @@ func (s *SegmentSnapshot) Fields() []string {
 	return s.segment.Fields()
 }
 
+func (s *SegmentSnapshot) Size() (rv int) {
+	rv = s.segment.Size()
+	if s.deleted != nil {
+		rv += int(s.deleted.GetSizeInBytes())
+	}
+	rv += s.cachedDocs.Size()
+	return
+}
+
 type cachedFieldDocs struct {
 	readyCh chan struct{}     // closed when the cachedFieldDocs.docs is ready to be used.
 	err     error             // Non-nil if there was an error when preparing this cachedFieldDocs.
 	docs    map[uint64][]byte // Keyed by localDocNum, value is a list of terms delimited by 0xFF.
+	size    uint64
 }
 
 func (cfd *cachedFieldDocs) prepareFields(field string, ss *SegmentSnapshot) {
 	defer close(cfd.readyCh)
 
+	cfd.size += uint64(size.SizeOfUint64) /* size field */
 	dict, err := ss.segment.Dictionary(field)
 	if err != nil {
 		cfd.err = err
@@ -152,12 +164,14 @@ func (cfd *cachedFieldDocs) prepareFields(field string, ss *SegmentSnapshot) {
 			return
 		}
 
+		cfd.size += uint64(size.SizeOfUint64) /* map key */
 		postingsItr := postings.Iterator()
 		nextPosting, err2 := postingsItr.Next()
 		for err2 == nil && nextPosting != nil {
 			docNum := nextPosting.Number()
 			cfd.docs[docNum] = append(cfd.docs[docNum], []byte(next.Term)...)
 			cfd.docs[docNum] = append(cfd.docs[docNum], TermSeparator)
+			cfd.size += uint64(len(next.Term) + 1) // map value
 			nextPosting, err2 = postingsItr.Next()
 		}
 
@@ -178,6 +192,7 @@ func (cfd *cachedFieldDocs) prepareFields(field string, ss *SegmentSnapshot) {
 type cachedDocs struct {
 	m     sync.Mutex                  // As the cache is asynchronously prepared, need a lock
 	cache map[string]*cachedFieldDocs // Keyed by field
+	size  uint64
 }
 
 func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error {
@@ -208,14 +223,18 @@ func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) e
 		}
 		c.m.Lock()
 	}
+	c.updateSizeLOCKED()
 
 	c.m.Unlock()
 	return nil
 }
 
-func (c *cachedDocs) size() int {
+func (c *cachedDocs) Size() int {
+	return int(c.size)
+}
+
+func (c *cachedDocs) updateSizeLOCKED() {
 	sizeInBytes := 0
-	c.m.Lock()
 	for k, v := range c.cache { // cachedFieldDocs
 		sizeInBytes += len(k)
 		if v != nil {
@@ -224,6 +243,5 @@ func (c *cachedDocs) size() int {
 			}
 		}
 	}
-	c.m.Unlock()
-	return sizeInBytes
+	c.size = uint64(sizeInBytes)
 }

From 11ff31c2f993d98770ec79b481bccf87da206e7c Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 16 Mar 2018 11:31:47 -0400
Subject: [PATCH 300/728] rename SizeFull to Size

---
 index/scorch/scorch.go         | 2 +-
 index/scorch/snapshot_index.go | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 85f9082c5..cc47cda86 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -492,7 +492,7 @@ func (s *Scorch) MemoryUsed() uint64 {
 	defer func() {
 		_ = indexSnapshot.Close()
 	}()
-	return uint64(indexSnapshot.SizeFull())
+	return uint64(indexSnapshot.Size())
 }
 
 func (s *Scorch) markIneligibleForRemoval(filename string) {
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 755c4637f..6f4b0288e 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -98,7 +98,7 @@ func (i *IndexSnapshot) Close() error {
 	return i.DecRef()
 }
 
-func (i *IndexSnapshot) SizeFull() int {
+func (i *IndexSnapshot) Size() int {
 	return int(i.size)
 }
 

From e52eb84e37958bd7aed9c06412e131318400472e Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 16 Mar 2018 09:51:23 -0700
Subject: [PATCH 301/728] scorch zap optimize merge when deletion bitmap is
 empty

This change detects whether a deletion bitmap is empty, and treats
that as a nil bitmap, which allows further postings iterator codepaths
to avoid roaring bitmap operations (like, AndNot(docNums, drops)).
---
 index/scorch/segment/zap/merge.go | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 07de0943c..d0d08c101 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -207,7 +207,11 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 				}
 				if itr != nil {
 					newDocNums = append(newDocNums, newDocNumsIn[segmentI])
-					drops = append(drops, dropsIn[segmentI])
+					if dropsIn[segmentI] != nil && !dropsIn[segmentI].IsEmpty() {
+						drops = append(drops, dropsIn[segmentI])
+					} else {
+						drops = append(drops, nil)
+					}
 					dicts = append(dicts, dict)
 					itrs = append(itrs, itr)
 				}

From b411e65234c9a2805e9de826a70612a29045b956 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 16 Mar 2018 09:58:53 -0700
Subject: [PATCH 302/728] scorch zap optimize postingsIterator reuse of
 freq/locChunkOffsets

---
 index/scorch/segment/zap/posting.go | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 6bc6e9268..081ec5f6c 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -152,6 +152,9 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
 		}
 		locDecoder := rv.locDecoder
 
+		freqChunkOffsets := rv.freqChunkOffsets[:0]
+		locChunkOffsets := rv.locChunkOffsets[:0]
+
 		buf := rv.buf
 
 		*rv = PostingsIterator{} // clear the struct
@@ -162,6 +165,9 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
 		rv.locReader = locReader
 		rv.locDecoder = locDecoder
 
+		rv.freqChunkOffsets = freqChunkOffsets
+		rv.locChunkOffsets = locChunkOffsets
+
 		rv.buf = buf
 	}
 	rv.postings = p
@@ -189,7 +195,11 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
 	var numFreqChunks uint64
 	numFreqChunks, read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
 	n += uint64(read)
-	rv.freqChunkOffsets = make([]uint64, int(numFreqChunks))
+	if cap(rv.freqChunkOffsets) >= int(numFreqChunks) {
+		rv.freqChunkOffsets = rv.freqChunkOffsets[:int(numFreqChunks)]
+	} else {
+		rv.freqChunkOffsets = make([]uint64, int(numFreqChunks))
+	}
 	for i := 0; i < int(numFreqChunks); i++ {
 		rv.freqChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
 		n += uint64(read)
@@ -201,7 +211,11 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
 	var numLocChunks uint64
 	numLocChunks, read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
 	n += uint64(read)
-	rv.locChunkOffsets = make([]uint64, int(numLocChunks))
+	if cap(rv.locChunkOffsets) >= int(numLocChunks) {
+		rv.locChunkOffsets = rv.locChunkOffsets[:int(numLocChunks)]
+	} else {
+		rv.locChunkOffsets = make([]uint64, int(numLocChunks))
+	}
 	for i := 0; i < int(numLocChunks); i++ {
 		rv.locChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
 		n += uint64(read)

From 60bdf6d247d1611bc18b1f654ea6ba48f0bf3a49 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Fri, 16 Mar 2018 11:43:25 -0700
Subject: [PATCH 303/728] Return an error when the snapshotEpoch is invalid

Avoiding this stacktrace (SIGSEGV) while using bleve scorch cmd-line utility
when snapshotEpoch provided is invalid:

github.com/blevesearch/bleve/index/scorch.(*IndexSnapshot).Segments(...)
/Users/abhinavdangeti/Documents/couchbaseV/godeps/src/github.com/blevesearch/bleve/index/scorch/snapshot_index.go:56
github.com/blevesearch/bleve/cmd/bleve/cmd/scorch.glob..func1(0x1f347e0, 0xc4201f1400, 0x2, 0x2, 0x0, 0x0)
/Users/abhinavdangeti/Documents/couchbaseV/godeps/src/github.com/blevesearch/bleve/cmd/bleve/cmd/scorch/ascii.go:43 +0xe4
github.com/blevesearch/bleve/cmd/bleve/vendor/github.com/spf13/cobra.(*Command).execute(0x1f347e0, 0xc4201f12e0, 0x2, 0x2, 0x1f347e0, 0xc4201f12e0)
/Users/abhinavdangeti/Documents/couchbaseV/godeps/src/github.com/blevesearch/bleve/cmd/bleve/vendor/github.com/spf13/cobra/command.go:646 +0x3e8
github.com/blevesearch/bleve/cmd/bleve/vendor/github.com/spf13/cobra.(*Command).ExecuteC(0x1f334c0, 0x0, 0x0, 0x0)
/Users/abhinavdangeti/Documents/couchbaseV/godeps/src/github.com/blevesearch/bleve/cmd/bleve/vendor/github.com/spf13/cobra/command.go:737 +0x2fe
github.com/blevesearch/bleve/cmd/bleve/vendor/github.com/spf13/cobra.(*Command).Execute(0x1f334c0, 0x0, 0x0)
/Users/abhinavdangeti/Documents/couchbaseV/godeps/src/github.com/blevesearch/bleve/cmd/bleve/vendor/github.com/spf13/cobra/command.go:695 +0x2b
github.com/blevesearch/bleve/cmd/bleve/cmd.Execute()
/Users/abhinavdangeti/Documents/couchbaseV/godeps/src/github.com/blevesearch/bleve/cmd/bleve/cmd/root.go:74 +0x31
main.main()
/Users/abhinavdangeti/Documents/couchbaseV/goproj/src/github.com/couchbase/cbft/cmd/cbft-bleve/main.go:39 +0x1cb
---
 index/scorch/persister.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index ccb0c1f21..dda4bdfbb 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -514,7 +514,7 @@ func (s *Scorch) LoadSnapshot(epoch uint64) (rv *IndexSnapshot, err error) {
 		snapshotKey := segment.EncodeUvarintAscending(nil, epoch)
 		snapshot := snapshots.Bucket(snapshotKey)
 		if snapshot == nil {
-			return nil
+			return fmt.Errorf("snapshot with epoch: %v - doesn't exist", epoch)
 		}
 		rv, err = s.loadSnapshot(snapshot)
 		return err

From 5df53c8e1ff6677ba1920f8abb0e14195e5273c3 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 16 Mar 2018 11:49:50 -0700
Subject: [PATCH 304/728] scorch zap file merger uses 1MB buffered writer

pprof of bleve-blast was showing file merging was in syscall/write a
lot.  The bufio.NewWriter() provides a default buffer size of 4K,
which is too small, and using bufio.NewWriterSize(1MB buffer size)
leads to syscall/write dropping out of the file merging flame graphs.
---
 index/scorch/segment/zap/merge.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index d0d08c101..f3b6a68e7 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -29,6 +29,8 @@ import (
 	"github.com/golang/snappy"
 )
 
+var DefaultFileMergerBufferSize = 1024 * 1024
+
 const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc
 
 // Merge takes a slice of zap segments and bit masks describing which
@@ -55,7 +57,7 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
 	}
 
 	// buffer the output
-	br := bufio.NewWriter(f)
+	br := bufio.NewWriterSize(f, DefaultFileMergerBufferSize)
 
 	// wrap it for counting (tracking offsets)
 	cr := NewCountHashWriter(br)

From 980ce9ebb300d66abedcc430277fd228d4b0d772 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 19 Mar 2018 11:29:58 +0530
Subject: [PATCH 305/728] MB-28753 - document number "xxx" not found err with
 update workload

Introducer was incorrectly updating the offsets slice
of segments, by considering only the live doc count
while computing the "running". This can result in
incorrectly computing the residing segment as well as
the local doc numbers while loading a document after
a search hit.
---
 index/scorch/introducer.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 98769ed56..627d4e4cd 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -154,7 +154,7 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 			newSnapshot.segment = append(newSnapshot.segment, newss)
 			root.segment[i].segment.AddRef()
 			newSnapshot.offsets = append(newSnapshot.offsets, running)
-			running += root.segment[i].Count()
+			running += newss.segment.Count()
 		}
 	}
 
@@ -315,7 +315,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 			})
 			root.segment[i].segment.AddRef()
 			newSnapshot.offsets = append(newSnapshot.offsets, running)
-			running += root.segment[i].Count()
+			running += root.segment[i].segment.Count()
 		}
 	}
 

From 1ef41101bac733727e9930eeab96c458b5ebe68a Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 19 Mar 2018 17:20:49 +0530
Subject: [PATCH 306/728] vellum adoption for regex and fuzzy queries

---
 index/index.go                   |  7 ++++++
 index/scorch/segment/empty.go    |  9 +++++++
 index/scorch/segment/mem/dict.go | 24 ++++++++++++++++++
 index/scorch/segment/segment.go  |  2 ++
 index/scorch/segment/zap/dict.go | 42 ++++++++++++++++++++++++++++++++
 index/scorch/snapshot_index.go   | 14 +++++++++++
 index/scorch/snapshot_segment.go |  9 +++++++
 search/searcher/search_fuzzy.go  | 22 ++++++++++++++++-
 search/searcher/search_regexp.go | 41 ++++++++++++++++++++++++-------
 9 files changed, 160 insertions(+), 10 deletions(-)

diff --git a/index/index.go b/index/index.go
index e5a69297b..96fb7d25e 100644
--- a/index/index.go
+++ b/index/index.go
@@ -96,6 +96,13 @@ type IndexReader interface {
 	Close() error
 }
 
+// IndexReaderAdv is an optional interface for advanced users
+// Hope to have a better name here...
+type IndexReaderAdv interface {
+	FieldDictRegex(field string, regex []byte) (FieldDict, error)
+	FieldDictFuzzy(field string, term []byte, fuzziness int) (FieldDict, error)
+}
+
 // FieldTerms contains the terms used by a document, keyed by field
 type FieldTerms map[string][]string
 
diff --git a/index/scorch/segment/empty.go b/index/scorch/segment/empty.go
index 6c19f60f9..918875e17 100644
--- a/index/scorch/segment/empty.go
+++ b/index/scorch/segment/empty.go
@@ -76,6 +76,15 @@ func (e *EmptyDictionary) RangeIterator(start, end string) DictionaryIterator {
 	return &EmptyDictionaryIterator{}
 }
 
+func (e *EmptyDictionary) RegexIterator(start string) DictionaryIterator {
+	return &EmptyDictionaryIterator{}
+}
+
+func (e *EmptyDictionary) FuzzyIterator(term string,
+	fuzziness int) DictionaryIterator {
+	return &EmptyDictionaryIterator{}
+}
+
 type EmptyDictionaryIterator struct{}
 
 func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) {
diff --git a/index/scorch/segment/mem/dict.go b/index/scorch/segment/mem/dict.go
index 9f5a873ae..2877f9453 100644
--- a/index/scorch/segment/mem/dict.go
+++ b/index/scorch/segment/mem/dict.go
@@ -98,6 +98,30 @@ func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator
 	}
 }
 
+// RegexIterator returns an iterator which only visits terms matching
+// the given regex expression.
+// TODO complete the implementation
+func (d *Dictionary) RegexIterator(regex string) segment.DictionaryIterator {
+	offset := sort.SearchStrings(d.segment.DictKeys[d.fieldID], regex)
+	return &DictionaryIterator{
+		d:      d,
+		offset: offset,
+		prefix: regex,
+	}
+}
+
+// FuzzyIterator returns an iterator which only visits terms matching
+// the given edit distance.
+// TODO complete the implementation
+func (d *Dictionary) FuzzyIterator(term string, fuzziness int) segment.DictionaryIterator {
+	offset := sort.SearchStrings(d.segment.DictKeys[d.fieldID], term)
+	return &DictionaryIterator{
+		d:      d,
+		offset: offset,
+		prefix: term,
+	}
+}
+
 // DictionaryIterator is an iterator for term dictionary
 type DictionaryIterator struct {
 	d      *Dictionary
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index 8eee5f75f..cf3b21f41 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -48,6 +48,8 @@ type TermDictionary interface {
 	Iterator() DictionaryIterator
 	PrefixIterator(prefix string) DictionaryIterator
 	RangeIterator(start, end string) DictionaryIterator
+	RegexIterator(regex string) DictionaryIterator
+	FuzzyIterator(term string, fuzziness int) DictionaryIterator
 }
 
 type DictionaryIterator interface {
diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 3b8132f2c..2281ec044 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -21,6 +21,7 @@ import (
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
 	"github.com/couchbase/vellum"
+	"github.com/couchbase/vellum/levenshtein"
 	"github.com/couchbase/vellum/regexp"
 )
 
@@ -148,6 +149,47 @@ func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator
 	return rv
 }
 
+// RegexIterator returns an iterator which only visits terms having the
+// the specified regex
+func (d *Dictionary) RegexIterator(regex string) segment.DictionaryIterator {
+	rv := &DictionaryIterator{
+		d: d,
+	}
+
+	if d.fst != nil {
+		r, err := regexp.New(regex)
+		if err == nil {
+			itr, err := d.fst.Search(r, nil, nil)
+			if err == nil {
+				rv.itr = itr
+			}
+		}
+	}
+
+	return rv
+}
+
+// FuzzyIterator returns an iterator which only visits terms having the
+// the specified edit/levenshtein distance
+func (d *Dictionary) FuzzyIterator(term string,
+	fuzziness int) segment.DictionaryIterator {
+	rv := &DictionaryIterator{
+		d: d,
+	}
+
+	if d.fst != nil {
+		la, err := levenshtein.New(term, fuzziness)
+		if err == nil {
+			itr, err := d.fst.Search(la, nil, nil)
+			if err == nil {
+				rv.itr = itr
+			}
+		}
+	}
+
+	return rv
+}
+
 // DictionaryIterator is an iterator for term dictionary
 type DictionaryIterator struct {
 	d   *Dictionary
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 6f4b0288e..172086980 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -175,6 +175,20 @@ func (i *IndexSnapshot) FieldDictPrefix(field string,
 	})
 }
 
+func (i *IndexSnapshot) FieldDictRegex(field string,
+	termRegex []byte) (index.FieldDict, error) {
+	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
+		return i.RegexIterator(string(termRegex))
+	})
+}
+
+func (i *IndexSnapshot) FieldDictFuzzy(field string,
+	term []byte, fuzziness int) (index.FieldDict, error) {
+	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
+		return i.FuzzyIterator(string(term), fuzziness)
+	})
+}
+
 func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {
 	results := make(chan *asynchSegmentResult)
 	for index, segment := range i.segment {
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index edf52a6e7..6edc6ae6e 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -48,6 +48,15 @@ func (s *SegmentDictionarySnapshot) RangeIterator(start, end string) segment.Dic
 	return s.d.RangeIterator(start, end)
 }
 
+func (s *SegmentDictionarySnapshot) RegexIterator(regex string) segment.DictionaryIterator {
+	return s.d.RegexIterator(regex)
+}
+
+func (s *SegmentDictionarySnapshot) FuzzyIterator(term string,
+	fuzziness int) segment.DictionaryIterator {
+	return s.d.FuzzyIterator(term, fuzziness)
+}
+
 type SegmentSnapshot struct {
 	id      uint64
 	segment segment.Segment
diff --git a/search/searcher/search_fuzzy.go b/search/searcher/search_fuzzy.go
index 90abaa0a8..69aab2f73 100644
--- a/search/searcher/search_fuzzy.go
+++ b/search/searcher/search_fuzzy.go
@@ -15,6 +15,9 @@
 package searcher
 
 import (
+	"log"
+	"time"
+
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
 )
@@ -31,9 +34,10 @@ func NewFuzzySearcher(indexReader index.IndexReader, term string,
 			break
 		}
 	}
-
+	t := time.Now()
 	candidateTerms, err := findFuzzyCandidateTerms(indexReader, term, fuzziness,
 		field, prefixTerm)
+	log.Printf("time taken-> %f", time.Since(t).Seconds())
 	if err != nil {
 		return nil, err
 	}
@@ -49,6 +53,22 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
 	if len(prefixTerm) > 0 {
 		fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
 	} else {
+		// in case of advanced reader implementations directly call
+		// the levenshtein automaton based iterator to collect the
+		// candidate terms
+		if ir, ok := indexReader.(index.IndexReaderAdv); ok {
+			fieldDict, err = ir.FieldDictFuzzy(field, []byte(term), fuzziness)
+			if err != nil {
+				return rv, err
+			}
+			tfd, err := fieldDict.Next()
+			for err == nil && tfd != nil {
+				rv = append(rv, tfd.Term)
+				tfd, err = fieldDict.Next()
+			}
+			log.Printf("candidate FSA fuzzy terms: %+v", rv)
+			return rv, nil
+		}
 		fieldDict, err = indexReader.FieldDict(field)
 	}
 	defer func() {
diff --git a/search/searcher/search_regexp.go b/search/searcher/search_regexp.go
index b7cf520ac..806f135a3 100644
--- a/search/searcher/search_regexp.go
+++ b/search/searcher/search_regexp.go
@@ -15,7 +15,9 @@
 package searcher
 
 import (
+	"log"
 	"regexp"
+	"time"
 
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
@@ -29,19 +31,40 @@ import (
 func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp,
 	field string, boost float64, options search.SearcherOptions) (
 	search.Searcher, error) {
-
-	prefixTerm, complete := pattern.LiteralPrefix()
 	var candidateTerms []string
-	if complete {
-		// there is no pattern
-		candidateTerms = []string{prefixTerm}
-	} else {
-		var err error
-		candidateTerms, err = findRegexpCandidateTerms(indexReader, pattern, field,
-			prefixTerm)
+	t := time.Now()
+	if ir, ok := indexReader.(index.IndexReaderAdv); ok {
+		fieldDict, err := ir.FieldDictRegex(field, []byte(pattern.String()))
 		if err != nil {
 			return nil, err
 		}
+		defer func() {
+			if cerr := fieldDict.Close(); cerr != nil && err == nil {
+				err = cerr
+			}
+		}()
+
+		// enumerate the terms and check against regexp
+		tfd, err := fieldDict.Next()
+		for err == nil && tfd != nil {
+			candidateTerms = append(candidateTerms, tfd.Term)
+			tfd, err = fieldDict.Next()
+		}
+		log.Printf("fsa time took-> %f", time.Since(t).Seconds())
+	} else {
+		prefixTerm, complete := pattern.LiteralPrefix()
+		if complete {
+			// there is no pattern
+			candidateTerms = []string{prefixTerm}
+		} else {
+			var err error
+			candidateTerms, err = findRegexpCandidateTerms(indexReader, pattern, field,
+				prefixTerm)
+			if err != nil {
+				return nil, err
+			}
+		}
+		log.Printf("time took-> %f", time.Since(t).Seconds())
 	}
 
 	return NewMultiTermSearcher(indexReader, candidateTerms, field, boost,

From c881146270fdfb8e55b614f746da256eca6e29df Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 19 Mar 2018 10:06:17 -0700
Subject: [PATCH 307/728] scorch zap mergeTermFreqNormLocsByCopying() helper
 func

---
 index/scorch/segment/zap/merge.go | 81 +++++++++++++++++--------------
 1 file changed, 45 insertions(+), 36 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index f3b6a68e7..4fe10edd6 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -305,44 +305,13 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 				return nil, 0, err2
 			}
 
-			newDocNumsI := newDocNums[itrI]
-
 			postItr = postings.iterator(postItr)
 
-			nextDocNum, nextFreq, nextNorm, nextFreqNormBytes, nextLocBytes, err2 :=
-				postItr.nextBytes()
-			for err2 == nil && len(nextFreqNormBytes) > 0 {
-				hitNewDocNum := newDocNumsI[nextDocNum]
-				if hitNewDocNum == docDropped {
-					return nil, 0, fmt.Errorf("see hit with dropped doc num")
-				}
-
-				newRoaring.Add(uint32(hitNewDocNum))
-				err2 = tfEncoder.AddBytes(hitNewDocNum, nextFreqNormBytes)
-				if err2 != nil {
-					return nil, 0, err2
-				}
-
-				if len(nextLocBytes) > 0 {
-					newRoaringLocs.Add(uint32(hitNewDocNum))
-					err2 = locEncoder.AddBytes(hitNewDocNum, nextLocBytes)
-					if err2 != nil {
-						return nil, 0, err2
-					}
-				}
-
-				docTermMap[hitNewDocNum] =
-					append(append(docTermMap[hitNewDocNum], term...), termSeparator)
-
-				lastDocNum = hitNewDocNum
-				lastFreq = nextFreq
-				lastNorm = nextNorm
-
-				nextDocNum, nextFreq, nextNorm, nextFreqNormBytes, nextLocBytes, err2 =
-					postItr.nextBytes()
-			}
-			if err2 != nil {
-				return nil, 0, err2
+			lastDocNum, lastFreq, lastNorm, err = mergeTermFreqNormLocsByCopying(
+				term, postItr, newDocNums[itrI], newRoaring, newRoaringLocs,
+				tfEncoder, locEncoder, docTermMap)
+			if err != nil {
+				return nil, 0, err
 			}
 
 			prevTerm = prevTerm[:0] // copy to prevTerm in case Next() reuses term mem
@@ -428,6 +397,46 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 	return rv, fieldDvLocsOffset, nil
 }
 
+func mergeTermFreqNormLocsByCopying(term []byte, postItr *PostingsIterator,
+	newDocNums []uint64, newRoaring *roaring.Bitmap, newRoaringLocs *roaring.Bitmap,
+	tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder, docTermMap [][]byte) (
+	lastDocNum uint64, lastFreq uint64, lastNorm uint64, err error) {
+	nextDocNum, nextFreq, nextNorm, nextFreqNormBytes, nextLocBytes, err :=
+		postItr.nextBytes()
+	for err == nil && len(nextFreqNormBytes) > 0 {
+		hitNewDocNum := newDocNums[nextDocNum]
+		if hitNewDocNum == docDropped {
+			return 0, 0, 0, fmt.Errorf("see hit with dropped doc num")
+		}
+
+		newRoaring.Add(uint32(hitNewDocNum))
+		err = tfEncoder.AddBytes(hitNewDocNum, nextFreqNormBytes)
+		if err != nil {
+			return 0, 0, 0, err
+		}
+
+		if len(nextLocBytes) > 0 {
+			newRoaringLocs.Add(uint32(hitNewDocNum))
+			err = locEncoder.AddBytes(hitNewDocNum, nextLocBytes)
+			if err != nil {
+				return 0, 0, 0, err
+			}
+		}
+
+		docTermMap[hitNewDocNum] =
+			append(append(docTermMap[hitNewDocNum], term...), termSeparator)
+
+		lastDocNum = hitNewDocNum
+		lastFreq = nextFreq
+		lastNorm = nextNorm
+
+		nextDocNum, nextFreq, nextNorm, nextFreqNormBytes, nextLocBytes, err =
+			postItr.nextBytes()
+	}
+
+	return lastDocNum, lastFreq, lastNorm, err
+}
+
 func writePostings(postings, postingLocs *roaring.Bitmap,
 	tfEncoder, locEncoder *chunkedIntCoder,
 	use1HitEncoding func(uint64) (bool, uint64, uint64),

From f65ba5c0f4df639ab1a8f4ddb5502eb9e269f6a8 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 19 Mar 2018 10:28:01 -0700
Subject: [PATCH 308/728] MB-28781 - scorch zap merge freq/loc copying only
 when fieldsSame

The optimization recently introduced in commit 530a3d24cf0768f4c7a,
("scorch zap optimize merge by byte copying freq/norm/loc's") was to
byte-copy freq/norm/loc data directly during merging.  But, it was
incorrect if the fields were different across segments.

This change now performs that byte-copying merging optimization only
when the fields are the same across segments, and if not, leverages
the old approach of deserializing & re-serializing the freq/norm/loc
information, which has the important step of remapping fieldID's.

See also: https://issues.couchbase.com/browse/MB-28781
---
 index/scorch/segment/zap/merge.go | 78 ++++++++++++++++++++++++++++---
 1 file changed, 72 insertions(+), 6 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 4fe10edd6..1da5e5269 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -117,7 +117,8 @@ func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap,
 			return nil, 0, 0, 0, 0, nil, nil, nil, err
 		}
 
-		dictLocs, docValueOffset, err = persistMergedRest(segments, drops, fieldsInv, fieldsMap,
+		dictLocs, docValueOffset, err = persistMergedRest(segments, drops,
+			fieldsInv, fieldsMap, fieldsSame,
 			newDocNums, numDocs, chunkFactor, cr)
 		if err != nil {
 			return nil, 0, 0, 0, 0, nil, nil, nil, err
@@ -158,11 +159,12 @@ func computeNewDocCount(segments []*SegmentBase, drops []*roaring.Bitmap) uint64
 }
 
 func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
-	fieldsInv []string, fieldsMap map[string]uint16, newDocNumsIn [][]uint64,
-	newSegDocCount uint64, chunkFactor uint32,
+	fieldsInv []string, fieldsMap map[string]uint16, fieldsSame bool,
+	newDocNumsIn [][]uint64, newSegDocCount uint64, chunkFactor uint32,
 	w *CountHashWriter) ([]uint64, uint64, error) {
 
 	var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64)
+	var bufLoc []uint64
 
 	var postings *PostingsList
 	var postItr *PostingsIterator
@@ -307,9 +309,16 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 
 			postItr = postings.iterator(postItr)
 
-			lastDocNum, lastFreq, lastNorm, err = mergeTermFreqNormLocsByCopying(
-				term, postItr, newDocNums[itrI], newRoaring, newRoaringLocs,
-				tfEncoder, locEncoder, docTermMap)
+			if fieldsSame {
+				// can optimize by copying freq/norm/loc bytes directly
+				lastDocNum, lastFreq, lastNorm, err = mergeTermFreqNormLocsByCopying(
+					term, postItr, newDocNums[itrI], newRoaring, newRoaringLocs,
+					tfEncoder, locEncoder, docTermMap)
+			} else {
+				lastDocNum, lastFreq, lastNorm, bufLoc, err = mergeTermFreqNormLocs(
+					fieldsMap, term, postItr, newDocNums[itrI], newRoaring, newRoaringLocs,
+					tfEncoder, locEncoder, docTermMap, bufLoc)
+			}
 			if err != nil {
 				return nil, 0, err
 			}
@@ -397,6 +406,63 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 	return rv, fieldDvLocsOffset, nil
 }
 
+func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *PostingsIterator,
+	newDocNums []uint64, newRoaring *roaring.Bitmap, newRoaringLocs *roaring.Bitmap,
+	tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder, docTermMap [][]byte,
+	bufLoc []uint64) (
+	lastDocNum uint64, lastFreq uint64, lastNorm uint64, bufLocOut []uint64, err error) {
+	next, err := postItr.Next()
+	for next != nil && err == nil {
+		hitNewDocNum := newDocNums[next.Number()]
+		if hitNewDocNum == docDropped {
+			return 0, 0, 0, nil, fmt.Errorf("see hit with dropped docNum")
+		}
+
+		newRoaring.Add(uint32(hitNewDocNum))
+
+		nextFreq := next.Frequency()
+		nextNorm := uint64(math.Float32bits(float32(next.Norm())))
+
+		err = tfEncoder.Add(hitNewDocNum, nextFreq, nextNorm)
+		if err != nil {
+			return 0, 0, 0, nil, err
+		}
+
+		locs := next.Locations()
+		if len(locs) > 0 {
+			newRoaringLocs.Add(uint32(hitNewDocNum))
+
+			for _, loc := range locs {
+				if cap(bufLoc) < 5+len(loc.ArrayPositions()) {
+					bufLoc = make([]uint64, 0, 5+len(loc.ArrayPositions()))
+				}
+				args := bufLoc[0:5]
+				args[0] = uint64(fieldsMap[loc.Field()] - 1)
+				args[1] = loc.Pos()
+				args[2] = loc.Start()
+				args[3] = loc.End()
+				args[4] = uint64(len(loc.ArrayPositions()))
+				args = append(args, loc.ArrayPositions()...)
+				err = locEncoder.Add(hitNewDocNum, args...)
+				if err != nil {
+					return 0, 0, 0, nil, err
+				}
+			}
+		}
+
+		docTermMap[hitNewDocNum] =
+			append(append(docTermMap[hitNewDocNum], term...), termSeparator)
+
+		lastDocNum = hitNewDocNum
+		lastFreq = nextFreq
+		lastNorm = nextNorm
+
+		next, err = postItr.Next()
+	}
+
+	return lastDocNum, lastFreq, lastNorm, bufLoc, err
+}
+
 func mergeTermFreqNormLocsByCopying(term []byte, postItr *PostingsIterator,
 	newDocNums []uint64, newRoaring *roaring.Bitmap, newRoaringLocs *roaring.Bitmap,
 	tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder, docTermMap [][]byte) (

From e9b228bcdd09472170b27840539436e17da034c0 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Mon, 19 Mar 2018 14:14:59 -0400
Subject: [PATCH 309/728] improve command-line tool for zap

correctly handle/print additional loc bitmap address
this fixes bitmap length that is output
instantiate roaring bitmap and print it out
removed some unnecessary debug logging

updated dict command to print 1-hit encoded vals
this makes dict command usable for seeing which
doc ids are in a segment and their corresponding doc number
---
 cmd/bleve/cmd/zap/dict.go    | 11 ++++++++++-
 cmd/bleve/cmd/zap/explore.go | 22 ++++++++++++++++------
 2 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/cmd/bleve/cmd/zap/dict.go b/cmd/bleve/cmd/zap/dict.go
index 3e2727195..e80be3601 100644
--- a/cmd/bleve/cmd/zap/dict.go
+++ b/cmd/bleve/cmd/zap/dict.go
@@ -17,7 +17,9 @@ package zap
 import (
 	"encoding/binary"
 	"fmt"
+	"math"
 
+	"github.com/blevesearch/bleve/index/scorch/segment/zap"
 	"github.com/couchbase/vellum"
 	"github.com/spf13/cobra"
 )
@@ -54,7 +56,14 @@ var dictCmd = &cobra.Command{
 			itr, err := fst.Iterator(nil, nil)
 			for err == nil {
 				currTerm, currVal := itr.Current()
-				fmt.Printf("%s - %d (%x)\n", currTerm, currVal, currVal)
+				extra := ""
+				if currVal&zap.FSTValEncodingMask == zap.FSTValEncoding1Hit {
+					docNum, normBits := zap.FSTValDecode1Hit(currVal)
+					norm := math.Float32frombits(uint32(normBits))
+					extra = fmt.Sprintf("-- docNum: %d, norm: %f", docNum, norm)
+				}
+
+				fmt.Printf("%s - %d (%x) %s\n", currTerm, currVal, currVal, extra)
 				err = itr.Next()
 			}
 			if err != nil && err != vellum.ErrIteratorDone {
diff --git a/cmd/bleve/cmd/zap/explore.go b/cmd/bleve/cmd/zap/explore.go
index 543b572fd..225b7373f 100644
--- a/cmd/bleve/cmd/zap/explore.go
+++ b/cmd/bleve/cmd/zap/explore.go
@@ -17,9 +17,9 @@ package zap
 import (
 	"encoding/binary"
 	"fmt"
-	"log"
 	"math"
 
+	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index/scorch/segment/zap"
 	"github.com/couchbase/vellum"
 	"github.com/spf13/cobra"
@@ -59,7 +59,7 @@ var exploreCmd = &cobra.Command{
 					return fmt.Errorf("error looking for term : %v", err)
 				}
 				if exists {
-					fmt.Printf("fst val is %d (%x)\n", postingsAddr, postingsAddr)
+					fmt.Printf("FST val is %d (%x)\n", postingsAddr, postingsAddr)
 
 					if postingsAddr&zap.FSTValEncodingMask == zap.FSTValEncoding1Hit {
 						docNum, normBits := zap.FSTValDecode1Hit(postingsAddr)
@@ -81,10 +81,21 @@ var exploreCmd = &cobra.Command{
 					locAddr, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
 					n += uint64(read)
 
+					var locBitmapAddr uint64
+					locBitmapAddr, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
+					n += uint64(read)
+
 					var postingListLen uint64
-					postingListLen, _ = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
+					postingListLen, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
+					n += uint64(read)
 
 					fmt.Printf("Posting List Length: %d\n", postingListLen)
+					bitmap := roaring.New()
+					_, err = bitmap.FromBuffer(data[postingsAddr+n : postingsAddr+n+postingListLen])
+					if err != nil {
+						return err
+					}
+					fmt.Printf("Posting List: %v\n", bitmap)
 
 					fmt.Printf("Freq details at: %d (%x)\n", freqAddr, freqAddr)
 					numChunks, r2 := binary.Uvarint(data[freqAddr : freqAddr+binary.MaxVarintLen64])
@@ -109,11 +120,8 @@ var exploreCmd = &cobra.Command{
 
 					var locOffsets []uint64
 					for j := uint64(0); j < numLChunks; j++ {
-						log.Printf("reading from %d(%x)\n", locAddr+n, locAddr+n)
-						log.Printf("data i see here: % x\n", data[locAddr+n:locAddr+n+binary.MaxVarintLen64])
 						lchunkLen, r4 := binary.Uvarint(data[locAddr+n : locAddr+n+binary.MaxVarintLen64])
 						n += uint64(r4)
-						log.Printf("see chunk len %d(%x)\n", lchunkLen, lchunkLen)
 						locOffsets = append(locOffsets, lchunkLen)
 					}
 
@@ -123,6 +131,8 @@ var exploreCmd = &cobra.Command{
 						running2 += offset
 					}
 
+					fmt.Printf("Loc Bitmap at: %d (%x)\n", locBitmapAddr, locBitmapAddr)
+
 				} else {
 					fmt.Printf("dictionary does not contain term '%s'\n", args[2])
 				}

From 85df86ba17c1dceaa69264e22238432a6fd495fd Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 19 Mar 2018 12:33:13 -0700
Subject: [PATCH 310/728] Unit tests for segments with docs with
 non-overlapping fields

---
 index/scorch/segment/mem/segment_test.go | 177 +++++++++++++++++++++++
 index/scorch/segment/zap/build_test.go   | 165 +++++++++++++++++++++
 index/scorch/segment/zap/segment_test.go | 137 ++++++++++++++++++
 3 files changed, 479 insertions(+)

diff --git a/index/scorch/segment/mem/segment_test.go b/index/scorch/segment/mem/segment_test.go
index 6c5625d86..565719278 100644
--- a/index/scorch/segment/mem/segment_test.go
+++ b/index/scorch/segment/mem/segment_test.go
@@ -697,3 +697,180 @@ func TestMultiple(t *testing.T) {
 	}
 
 }
+
+func TestMultipleWithNonOverlappingFields(t *testing.T) {
+	doc1 := &document.Document{
+		ID: "a",
+		Fields: []document.Field{
+			document.NewTextField("_id", []uint64{}, []byte("a")),
+			document.NewTextField("name", []uint64{}, []byte("ABC")),
+			document.NewTextField("dept", []uint64{}, []byte("ABC dept")),
+			document.NewTextField("manages.id", []uint64{}, []byte("XYZ")),
+			document.NewTextField("manages.count", []uint64{}, []byte("1")),
+		},
+		CompositeFields: []*document.CompositeField{
+			document.NewCompositeField("_all", true, nil, []string{"_id"}),
+		},
+	}
+
+	doc2 := &document.Document{
+		ID: "b",
+		Fields: []document.Field{
+			document.NewTextField("_id", []uint64{}, []byte("b")),
+			document.NewTextField("name", []uint64{}, []byte("XYZ")),
+			document.NewTextField("dept", []uint64{}, []byte("ABC dept")),
+			document.NewTextField("reportsTo.id", []uint64{}, []byte("ABC")),
+		},
+		CompositeFields: []*document.CompositeField{
+			document.NewCompositeField("_all", true, nil, []string{"_id"}),
+		},
+	}
+
+	results := []*index.AnalysisResult{
+		&index.AnalysisResult{
+			Document: doc1,
+			Analyzed: []analysis.TokenFrequencies{
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      1,
+						Position: 1,
+						Term:     []byte("a"),
+					},
+				}, nil, false),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      3,
+						Position: 1,
+						Term:     []byte("ABC"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      3,
+						Position: 1,
+						Term:     []byte("ABC"),
+					},
+					&analysis.Token{
+						Start:    4,
+						End:      8,
+						Position: 2,
+						Term:     []byte("dept"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      3,
+						Position: 1,
+						Term:     []byte("XYZ"),
+					},
+				}, []uint64{0}, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      1,
+						Position: 1,
+						Term:     []byte("1"),
+					},
+				}, []uint64{1}, true),
+			},
+			Length: []int{
+				1,
+				1,
+				2,
+				1,
+				1,
+			},
+		},
+		&index.AnalysisResult{
+			Document: doc2,
+			Analyzed: []analysis.TokenFrequencies{
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      1,
+						Position: 1,
+						Term:     []byte("b"),
+					},
+				}, nil, false),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      3,
+						Position: 1,
+						Term:     []byte("XYZ"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      3,
+						Position: 1,
+						Term:     []byte("ABC"),
+					},
+					&analysis.Token{
+						Start:    4,
+						End:      8,
+						Position: 2,
+						Term:     []byte("dept"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      3,
+						Position: 1,
+						Term:     []byte("ABC"),
+					},
+				}, []uint64{0}, true),
+			},
+			Length: []int{
+				1,
+				1,
+				2,
+				1,
+			},
+		},
+	}
+
+	// fix up composite fields
+	for _, ar := range results {
+		for i, f := range ar.Document.Fields {
+			for _, cf := range ar.Document.CompositeFields {
+				cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
+			}
+		}
+	}
+
+	segment := NewFromAnalyzedDocs(results)
+	if segment == nil {
+		t.Fatalf("segment nil, not expected")
+	}
+
+	if segment.Count() != 2 {
+		t.Errorf("expected count 2, got %d", segment.Count())
+	}
+
+	expectFields := map[string]struct{}{
+		"_id":           struct{}{},
+		"_all":          struct{}{},
+		"name":          struct{}{},
+		"dept":          struct{}{},
+		"manages.id":    struct{}{},
+		"manages.count": struct{}{},
+		"reportsTo.id":  struct{}{},
+	}
+
+	fields := segment.Fields()
+	if len(fields) != len(expectFields) {
+		t.Errorf("expected %d fields, only got %d", len(expectFields), len(fields))
+	}
+	for _, field := range fields {
+		if _, ok := expectFields[field]; !ok {
+			t.Errorf("got unexpected field: %s", field)
+		}
+	}
+}
diff --git a/index/scorch/segment/zap/build_test.go b/index/scorch/segment/zap/build_test.go
index 65de7931d..e8189f760 100644
--- a/index/scorch/segment/zap/build_test.go
+++ b/index/scorch/segment/zap/build_test.go
@@ -137,6 +137,12 @@ func buildTestSegmentMultiWithChunkFactor(chunkFactor uint32) (*SegmentBase, err
 	return AnalysisResultsToSegmentBase(results, chunkFactor)
 }
 
+func buildTestSegmentMultiWithDifferentFields(includeDocA, includeDocB bool) (*SegmentBase, error) {
+	results := buildTestAnalysisResultsMultiWithDifferentFields(includeDocA, includeDocB)
+
+	return AnalysisResultsToSegmentBase(results, 1024)
+}
+
 func buildTestAnalysisResultsMulti() []*index.AnalysisResult {
 	doc := &document.Document{
 		ID: "a",
@@ -298,6 +304,165 @@ func buildTestAnalysisResultsMulti() []*index.AnalysisResult {
 	return results
 }
 
+func buildTestAnalysisResultsMultiWithDifferentFields(includeDocA, includeDocB bool) []*index.AnalysisResult {
+	results := []*index.AnalysisResult{}
+
+	if includeDocA {
+		doc := &document.Document{
+			ID: "a",
+			Fields: []document.Field{
+				document.NewTextField("_id", []uint64{}, []byte("a")),
+				document.NewTextField("name", []uint64{}, []byte("ABC")),
+				document.NewTextField("dept", []uint64{}, []byte("ABC dept")),
+				document.NewTextField("manages.id", []uint64{}, []byte("XYZ")),
+				document.NewTextField("manages.count", []uint64{}, []byte("1")),
+			},
+			CompositeFields: []*document.CompositeField{
+				document.NewCompositeField("_all", true, nil, []string{"_id"}),
+			},
+		}
+
+		result := &index.AnalysisResult{
+			Document: doc,
+			Analyzed: []analysis.TokenFrequencies{
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      1,
+						Position: 1,
+						Term:     []byte("a"),
+					},
+				}, nil, false),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      3,
+						Position: 1,
+						Term:     []byte("ABC"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      3,
+						Position: 1,
+						Term:     []byte("ABC"),
+					},
+					&analysis.Token{
+						Start:    4,
+						End:      8,
+						Position: 2,
+						Term:     []byte("dept"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      3,
+						Position: 1,
+						Term:     []byte("XYZ"),
+					},
+				}, []uint64{0}, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      1,
+						Position: 1,
+						Term:     []byte("1"),
+					},
+				}, []uint64{1}, true),
+			},
+			Length: []int{
+				1,
+				1,
+				2,
+				1,
+				1,
+			},
+		}
+
+		results = append(results, result)
+	}
+
+	if includeDocB {
+		doc := &document.Document{
+			ID: "b",
+			Fields: []document.Field{
+				document.NewTextField("_id", []uint64{}, []byte("b")),
+				document.NewTextField("name", []uint64{}, []byte("XYZ")),
+				document.NewTextField("dept", []uint64{}, []byte("ABC dept")),
+				document.NewTextField("reportsTo.id", []uint64{}, []byte("ABC")),
+			},
+			CompositeFields: []*document.CompositeField{
+				document.NewCompositeField("_all", true, nil, []string{"_id"}),
+			},
+		}
+
+		result := &index.AnalysisResult{
+			Document: doc,
+			Analyzed: []analysis.TokenFrequencies{
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      1,
+						Position: 1,
+						Term:     []byte("b"),
+					},
+				}, nil, false),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      3,
+						Position: 1,
+						Term:     []byte("XYZ"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      3,
+						Position: 1,
+						Term:     []byte("ABC"),
+					},
+					&analysis.Token{
+						Start:    4,
+						End:      8,
+						Position: 2,
+						Term:     []byte("dept"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      3,
+						Position: 1,
+						Term:     []byte("ABC"),
+					},
+				}, []uint64{0}, true),
+			},
+			Length: []int{
+				1,
+				1,
+				2,
+				1,
+			},
+		}
+
+		results = append(results, result)
+	}
+
+	// fix up composite fields
+	for _, ar := range results {
+		for i, f := range ar.Document.Fields {
+			for _, cf := range ar.Document.CompositeFields {
+				cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
+			}
+		}
+	}
+
+	return results
+}
+
 func buildTestSegmentWithDefaultFieldMapping(chunkFactor uint32) (
 	*SegmentBase, []string, error) {
 	doc := &document.Document{
diff --git a/index/scorch/segment/zap/segment_test.go b/index/scorch/segment/zap/segment_test.go
index 50d5dbd7f..339d24ce5 100644
--- a/index/scorch/segment/zap/segment_test.go
+++ b/index/scorch/segment/zap/segment_test.go
@@ -21,6 +21,7 @@ import (
 	"sort"
 	"testing"
 
+	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
 )
@@ -600,3 +601,139 @@ func TestSegmentVisitableDocValueFieldsList(t *testing.T) {
 
 	}
 }
+
+func TestSegmentDocsWithNonOverlappingFields(t *testing.T) {
+	_ = os.RemoveAll("/tmp/scorch.zap")
+
+	testSeg, err := buildTestSegmentMultiWithDifferentFields(true, true)
+	if err != nil {
+		t.Fatalf("error building segment: %v", err)
+	}
+	err = PersistSegmentBase(testSeg, "/tmp/scorch.zap")
+	if err != nil {
+		t.Fatalf("error persisting segment: %v", err)
+	}
+
+	segment, err := Open("/tmp/scorch.zap")
+	if err != nil {
+		t.Fatalf("error opening segment: %v", err)
+	}
+	defer func() {
+		cerr := segment.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", cerr)
+		}
+	}()
+
+	if segment.Count() != 2 {
+		t.Errorf("expected 2, got %d", segment.Count())
+	}
+
+	expectFields := map[string]struct{}{
+		"_id":           struct{}{},
+		"_all":          struct{}{},
+		"name":          struct{}{},
+		"dept":          struct{}{},
+		"manages.id":    struct{}{},
+		"manages.count": struct{}{},
+		"reportsTo.id":  struct{}{},
+	}
+
+	fields := segment.Fields()
+	if len(fields) != len(expectFields) {
+		t.Errorf("expected %d fields, only got %d", len(expectFields), len(fields))
+	}
+	for _, field := range fields {
+		if _, ok := expectFields[field]; !ok {
+			t.Errorf("got unexpected field: %s", field)
+		}
+	}
+}
+
+func TestMergedSegmentDocsWithNonOverlappingFields(t *testing.T) {
+	_ = os.RemoveAll("/tmp/scorch1.zap")
+	_ = os.RemoveAll("/tmp/scorch2.zap")
+	_ = os.RemoveAll("/tmp/scorch3.zap")
+
+	testSeg1, _ := buildTestSegmentMultiWithDifferentFields(true, false)
+	err := PersistSegmentBase(testSeg1, "/tmp/scorch1.zap")
+	if err != nil {
+		t.Fatalf("error persisting segment: %v", err)
+	}
+
+	testSeg2, _ := buildTestSegmentMultiWithDifferentFields(false, true)
+	err = PersistSegmentBase(testSeg2, "/tmp/scorch2.zap")
+	if err != nil {
+		t.Fatalf("error persisting segment: %v", err)
+	}
+
+	segment1, err := Open("/tmp/scorch1.zap")
+	if err != nil {
+		t.Fatalf("error opening segment: %v", err)
+	}
+	defer func() {
+		cerr := segment1.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", cerr)
+		}
+	}()
+
+	segment2, err := Open("/tmp/scorch2.zap")
+	if err != nil {
+		t.Fatalf("error opening segment: %v", err)
+	}
+	defer func() {
+		cerr := segment2.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", cerr)
+		}
+	}()
+
+	segsToMerge := make([]*Segment, 2)
+	segsToMerge[0] = segment1.(*Segment)
+	segsToMerge[1] = segment2.(*Segment)
+
+	_, nBytes, err := Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if nBytes == 0 {
+		t.Fatalf("expected a non zero total_compaction_written_bytes")
+	}
+
+	segmentM, err := Open("/tmp/scorch3.zap")
+	if err != nil {
+		t.Fatalf("error opening merged segment: %v", err)
+	}
+	defer func() {
+		cerr := segmentM.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", cerr)
+		}
+	}()
+
+	if segmentM.Count() != 2 {
+		t.Errorf("expected 2, got %d", segmentM.Count())
+	}
+
+	expectFields := map[string]struct{}{
+		"_id":           struct{}{},
+		"_all":          struct{}{},
+		"name":          struct{}{},
+		"dept":          struct{}{},
+		"manages.id":    struct{}{},
+		"manages.count": struct{}{},
+		"reportsTo.id":  struct{}{},
+	}
+
+	fields := segmentM.Fields()
+	if len(fields) != len(expectFields) {
+		t.Errorf("expected %d fields, only got %d", len(expectFields), len(fields))
+	}
+	for _, field := range fields {
+		if _, ok := expectFields[field]; !ok {
+			t.Errorf("got unexpected field: %s", field)
+		}
+	}
+}

From 85b4a31e2a05bac763f72ba8a129887e0772c0b3 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 20 Mar 2018 11:12:18 -0700
Subject: [PATCH 311/728] scorch zap getField() which panics if the field is
 unknown

---
 index/scorch/segment/zap/new.go | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/index/scorch/segment/zap/new.go b/index/scorch/segment/zap/new.go
index dd2740fb2..68827f0b7 100644
--- a/index/scorch/segment/zap/new.go
+++ b/index/scorch/segment/zap/new.go
@@ -17,6 +17,7 @@ package zap
 import (
 	"bytes"
 	"encoding/binary"
+	"fmt"
 	"math"
 	"sort"
 	"sync"
@@ -288,6 +289,16 @@ func (s *interim) getOrDefineField(fieldName string) int {
 	return int(fieldIDPlus1 - 1)
 }
 
+// the fieldName must be for a known field
+func (s *interim) getField(fieldName string) int {
+	fieldIDPlus1, exists := s.FieldsMap[fieldName]
+	if !exists || fieldIDPlus1 <= 0 {
+		panic(fmt.Sprintf("getField saw unknown fieldName: %s, fieldsMap: %#v",
+			fieldName, s.FieldsMap))
+	}
+	return int(fieldIDPlus1 - 1)
+}
+
 // fill Dicts and DictKeys from analysis results
 func (s *interim) prepareDicts() {
 	var pidNext int
@@ -328,14 +339,14 @@ func (s *interim) prepareDicts() {
 	for _, result := range s.results {
 		// walk each composite field
 		for _, field := range result.Document.CompositeFields {
-			fieldID := uint16(s.getOrDefineField(field.Name()))
+			fieldID := uint16(s.getField(field.Name()))
 			_, tf := field.Analyze()
 			visitField(fieldID, tf)
 		}
 
 		// walk each field
 		for i, field := range result.Document.Fields {
-			fieldID := uint16(s.getOrDefineField(field.Name()))
+			fieldID := uint16(s.getField(field.Name()))
 			tf := result.Analyzed[i]
 			visitField(fieldID, tf)
 		}
@@ -439,14 +450,14 @@ func (s *interim) processDocument(docNum uint64,
 
 	// walk each composite field
 	for _, field := range result.Document.CompositeFields {
-		fieldID := uint16(s.getOrDefineField(field.Name()))
+		fieldID := uint16(s.getField(field.Name()))
 		ln, tf := field.Analyze()
 		visitField(fieldID, field.Name(), ln, tf)
 	}
 
 	// walk each field
 	for i, field := range result.Document.Fields {
-		fieldID := uint16(s.getOrDefineField(field.Name()))
+		fieldID := uint16(s.getField(field.Name()))
 		ln := result.Length[i]
 		tf := result.Analyzed[i]
 		visitField(fieldID, field.Name(), ln, tf)
@@ -477,7 +488,7 @@ func (s *interim) processDocument(docNum uint64,
 				for _, loc := range tf.Locations {
 					var locf = uint16(fieldID)
 					if loc.Field != "" {
-						locf = uint16(s.getOrDefineField(loc.Field))
+						locf = uint16(s.getField(loc.Field))
 					}
 					var arrayposs []uint64
 					if len(loc.ArrayPositions) > 0 {
@@ -517,7 +528,7 @@ func (s *interim) writeStoredFields() (
 		}
 
 		for _, field := range result.Document.Fields {
-			fieldID := uint16(s.getOrDefineField(field.Name()))
+			fieldID := uint16(s.getField(field.Name()))
 
 			opts := field.Options()
 

From 2f4d3d858751a019650379443709d700454149a7 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 20 Mar 2018 11:17:46 -0700
Subject: [PATCH 312/728] scorch zap panic if mergeFields() sees unsorted
 fields

mergeFields depends on the fields from the various segments being
sorted for the fieldsSame comparison to work.

Of note, the 'fieldi > 1' guard skips the 0th field, which should
always be the '_id' field.
---
 index/scorch/segment/zap/merge.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 1da5e5269..622cf8cc2 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -755,6 +755,10 @@ func mergeFields(segments []*SegmentBase) (bool, []string) {
 			if len(segment0Fields) != len(fields) || segment0Fields[fieldi] != field {
 				fieldsSame = false
 			}
+
+			if fieldi > 1 && field <= fields[fieldi-1] {
+				panic(fmt.Sprintf("mergeFields on unsorted fields: %#v", fields))
+			}
 		}
 	}
 

From 35ea1d4423cc186a2540f442f62b975c43fcd4b8 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 20 Mar 2018 17:41:56 -0400
Subject: [PATCH 313/728] fix MB-28719 and MB-28781 invalid/missing field in
 scorch

Use of sync.Pool to reuse the interm structure relied on resetting
the fieldsInv slice.  However, actual segments continued to use
this same fieldsInv slice after returning it to the pool. Simple
fix is to nil out fieldsInv slice in reset method and let the
newly built segment keep the one from the interim struct.
---
 index/scorch/segment/zap/new.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/index/scorch/segment/zap/new.go b/index/scorch/segment/zap/new.go
index 68827f0b7..e9f0b2c06 100644
--- a/index/scorch/segment/zap/new.go
+++ b/index/scorch/segment/zap/new.go
@@ -135,7 +135,7 @@ func (s *interim) reset() (err error) {
 	s.chunkFactor = 0
 	s.w = nil
 	s.FieldsMap = nil
-	s.FieldsInv = s.FieldsInv[:0]
+	s.FieldsInv = nil
 	for i := range s.Dicts {
 		s.Dicts[i] = nil
 	}

From 844845b5d251cbd8b745cc5e2bd39139ad86393a Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Tue, 20 Mar 2018 14:51:25 -0700
Subject: [PATCH 314/728] Revert "scorch zap panic if mergeFields() sees
 unsorted fields"

This reverts commit 2f4d3d858751a019650379443709d700454149a7.
---
 index/scorch/segment/zap/merge.go | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 622cf8cc2..1da5e5269 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -755,10 +755,6 @@ func mergeFields(segments []*SegmentBase) (bool, []string) {
 			if len(segment0Fields) != len(fields) || segment0Fields[fieldi] != field {
 				fieldsSame = false
 			}
-
-			if fieldi > 1 && field <= fields[fieldi-1] {
-				panic(fmt.Sprintf("mergeFields on unsorted fields: %#v", fields))
-			}
 		}
 	}
 

From 0e3c57c4651599020763e1a577e352d449e57bfc Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Tue, 20 Mar 2018 14:51:33 -0700
Subject: [PATCH 315/728] Revert "scorch zap getField() which panics if the
 field is unknown"

This reverts commit 85b4a31e2a05bac763f72ba8a129887e0772c0b3.
---
 index/scorch/segment/zap/new.go | 23 ++++++-----------------
 1 file changed, 6 insertions(+), 17 deletions(-)

diff --git a/index/scorch/segment/zap/new.go b/index/scorch/segment/zap/new.go
index 68827f0b7..dd2740fb2 100644
--- a/index/scorch/segment/zap/new.go
+++ b/index/scorch/segment/zap/new.go
@@ -17,7 +17,6 @@ package zap
 import (
 	"bytes"
 	"encoding/binary"
-	"fmt"
 	"math"
 	"sort"
 	"sync"
@@ -289,16 +288,6 @@ func (s *interim) getOrDefineField(fieldName string) int {
 	return int(fieldIDPlus1 - 1)
 }
 
-// the fieldName must be for a known field
-func (s *interim) getField(fieldName string) int {
-	fieldIDPlus1, exists := s.FieldsMap[fieldName]
-	if !exists || fieldIDPlus1 <= 0 {
-		panic(fmt.Sprintf("getField saw unknown fieldName: %s, fieldsMap: %#v",
-			fieldName, s.FieldsMap))
-	}
-	return int(fieldIDPlus1 - 1)
-}
-
 // fill Dicts and DictKeys from analysis results
 func (s *interim) prepareDicts() {
 	var pidNext int
@@ -339,14 +328,14 @@ func (s *interim) prepareDicts() {
 	for _, result := range s.results {
 		// walk each composite field
 		for _, field := range result.Document.CompositeFields {
-			fieldID := uint16(s.getField(field.Name()))
+			fieldID := uint16(s.getOrDefineField(field.Name()))
 			_, tf := field.Analyze()
 			visitField(fieldID, tf)
 		}
 
 		// walk each field
 		for i, field := range result.Document.Fields {
-			fieldID := uint16(s.getField(field.Name()))
+			fieldID := uint16(s.getOrDefineField(field.Name()))
 			tf := result.Analyzed[i]
 			visitField(fieldID, tf)
 		}
@@ -450,14 +439,14 @@ func (s *interim) processDocument(docNum uint64,
 
 	// walk each composite field
 	for _, field := range result.Document.CompositeFields {
-		fieldID := uint16(s.getField(field.Name()))
+		fieldID := uint16(s.getOrDefineField(field.Name()))
 		ln, tf := field.Analyze()
 		visitField(fieldID, field.Name(), ln, tf)
 	}
 
 	// walk each field
 	for i, field := range result.Document.Fields {
-		fieldID := uint16(s.getField(field.Name()))
+		fieldID := uint16(s.getOrDefineField(field.Name()))
 		ln := result.Length[i]
 		tf := result.Analyzed[i]
 		visitField(fieldID, field.Name(), ln, tf)
@@ -488,7 +477,7 @@ func (s *interim) processDocument(docNum uint64,
 				for _, loc := range tf.Locations {
 					var locf = uint16(fieldID)
 					if loc.Field != "" {
-						locf = uint16(s.getField(loc.Field))
+						locf = uint16(s.getOrDefineField(loc.Field))
 					}
 					var arrayposs []uint64
 					if len(loc.ArrayPositions) > 0 {
@@ -528,7 +517,7 @@ func (s *interim) writeStoredFields() (
 		}
 
 		for _, field := range result.Document.Fields {
-			fieldID := uint16(s.getField(field.Name()))
+			fieldID := uint16(s.getOrDefineField(field.Name()))
 
 			opts := field.Options()
 

From d1e2b55c72f543300a1848f6c68fa4096846ed86 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 21 Mar 2018 17:47:56 -0700
Subject: [PATCH 316/728] scorch zap postingsItr.nextDocNum() maintains
 allNChunk correctly

When PostingsIterator.nextDocNum() moves the 'all' roaring bitmap
iterator forwards, it was incorrectly not keeping the allNChunk value
aligned.
---
 index/scorch/segment/zap/posting.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 081ec5f6c..4b4b4933c 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -620,6 +620,7 @@ func (i *PostingsIterator) nextDocNum() (uint64, bool, error) {
 		}
 
 		allN = i.all.Next()
+		allNChunk = allN / i.postings.sb.chunkFactor
 	}
 
 	if i.currChunk != nChunk || i.currChunkFreqNorm == nil {

From b506fae4f73dcb73674b6d2f8f88df8f5726a78e Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 21 Mar 2018 17:54:22 -0700
Subject: [PATCH 317/728] scorch zap postingsItr remove unused offset/locoffset
 fields

---
 index/scorch/segment/zap/posting.go | 25 ++++++++-----------------
 1 file changed, 8 insertions(+), 17 deletions(-)

diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 4b4b4933c..f5ccad1ad 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -316,11 +316,9 @@ func (rv *PostingsList) init1Hit(fstVal uint64) error {
 
 // PostingsIterator provides a way to iterate through the postings list
 type PostingsIterator struct {
-	postings  *PostingsList
-	all       roaring.IntIterable
-	offset    int
-	locoffset int
-	actual    roaring.IntIterable
+	postings *PostingsList
+	all      roaring.IntIterable
+	actual   roaring.IntIterable
 
 	currChunk         uint32
 	currChunkFreqNorm []byte
@@ -584,16 +582,12 @@ func (i *PostingsIterator) nextDocNum() (uint64, bool, error) {
 	nChunk := n / i.postings.sb.chunkFactor
 	allNChunk := allN / i.postings.sb.chunkFactor
 
-	// n is the next actual hit (excluding some postings)
-	// allN is the next hit in the full postings
-	// if they don't match, adjust offsets to factor in item we're skipping over
-	// incr the all iterator, and check again
+	// n is the next actual hit (excluding some postings), and
+	// allN is the next hit in the full postings, and
+	// if they don't match, move 'all' forwards until they do
 	for allN != n {
-		// in different chunks, reset offsets
-		if allNChunk != nChunk {
-			i.locoffset = 0
-			i.offset = 0
-		} else {
+		// in the same chunk, so move the freq/norm/loc decoders forward
+		if allNChunk == nChunk {
 			if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
 				err := i.loadChunk(int(nChunk))
 				if err != nil {
@@ -614,9 +608,6 @@ func (i *PostingsIterator) nextDocNum() (uint64, bool, error) {
 					}
 				}
 			}
-
-			// in same chunk, need to account for offsets
-			i.offset++
 		}
 
 		allN = i.all.Next()

From 6b78dd4184d9d8fd8c4f2c8d0422e7d7d16f64ac Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 22 Mar 2018 06:46:06 -0700
Subject: [PATCH 318/728] fix cmd/bleve scorch ascii cmd help text

Initially, there was a typo with an extra space char, but then I
realized there was some copypasting corrections.
---
 cmd/bleve/cmd/scorch/ascii.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cmd/bleve/cmd/scorch/ascii.go b/cmd/bleve/cmd/scorch/ascii.go
index 7b36b5b9c..34fb1ed70 100644
--- a/cmd/bleve/cmd/scorch/ascii.go
+++ b/cmd/bleve/cmd/scorch/ascii.go
@@ -22,11 +22,11 @@ import (
 	"github.com/spf13/cobra"
 )
 
-// asciiCmd represents the snapshots command
+// asciiCmd represents the ascii command
 var asciiCmd = &cobra.Command{
 	Use:   "ascii",
-	Short: "ascii prints details an ascii representation of the  snapshots in the index",
-	Long:  `The ascii command prints an ascii representation of the  snapshots in the index.`,
+	Short: "ascii prints an ascii representation of the segments in a snapshot",
+	Long:  `The ascii command prints an ascii representation of the segments in a given snapshot.`,
 	RunE: func(cmd *cobra.Command, args []string) error {
 
 		if len(args) < 2 {

From 18cfcd11d102a83271ab1d96b9035bdfe0e3f019 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Wed, 21 Mar 2018 12:12:49 -0700
Subject: [PATCH 319/728] MB-28782: Error handling in merger/persister when
 index is closed

When the index is closed, do not fire an AsyncError (fatal) from either
the merger or the persister that is actively working. This is quite a
probable situation, so exit the loop within the goroutine.
---
 index/scorch/merge.go     | 14 +++++++++-----
 index/scorch/persister.go |  5 +++++
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 41086ad3d..42b5e950f 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -17,7 +17,6 @@ package scorch
 import (
 	"bytes"
 	"encoding/json"
-
 	"fmt"
 	"os"
 	"sync/atomic"
@@ -59,6 +58,11 @@ OUTER:
 				// lets get started
 				err := s.planMergeAtSnapshot(ourSnapshot, mergePlannerOptions)
 				if err != nil {
+					if err == ErrClosed {
+						// index has been closed
+						_ = ourSnapshot.DecRef()
+						break OUTER
+					}
 					s.fireAsyncError(fmt.Errorf("merging err: %v", err))
 					_ = ourSnapshot.DecRef()
 					atomic.AddUint64(&s.stats.TotFileMergeLoopErr, 1)
@@ -231,7 +235,7 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 		select {
 		case <-s.closeCh:
 			_ = segment.Close()
-			return nil
+			return ErrClosed
 		case s.merges <- sm:
 			atomic.AddUint64(&s.stats.TotFileMergeIntroductions, 1)
 		}
@@ -242,7 +246,7 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 	for _, notification := range notifications {
 		select {
 		case <-s.closeCh:
-			return nil
+			return ErrClosed
 		case newSnapshot := <-notification:
 			atomic.AddUint64(&s.stats.TotFileMergeIntroductionsDone, 1)
 			if newSnapshot != nil {
@@ -338,13 +342,13 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 	select { // send to introducer
 	case <-s.closeCh:
 		_ = segment.DecRef()
-		return 0, nil, 0, nil // TODO: return ErrInterruptedClosed?
+		return 0, nil, 0, ErrClosed
 	case s.merges <- sm:
 	}
 
 	select { // wait for introduction to complete
 	case <-s.closeCh:
-		return 0, nil, 0, nil // TODO: return ErrInterruptedClosed?
+		return 0, nil, 0, ErrClosed
 	case newSnapshot := <-sm.notify:
 		atomic.AddUint64(&s.stats.TotMemMergeSegments, uint64(len(sbs)))
 		atomic.AddUint64(&s.stats.TotMemMergeDone, 1)
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index dda4bdfbb..2fab53240 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -94,6 +94,11 @@ OUTER:
 				close(ch)
 			}
 			if err != nil {
+				if err == ErrClosed {
+					// index has been closed
+					_ = ourSnapshot.DecRef()
+					break OUTER
+				}
 				s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err))
 				_ = ourSnapshot.DecRef()
 				atomic.AddUint64(&s.stats.TotPersistLoopErr, 1)

From 621b58dd8341232c01653db36372f0e0361bc90f Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 22 Mar 2018 16:36:38 -0700
Subject: [PATCH 320/728] scorch zap replace locsBitmap w/ 1 bit from freq-norm
 varint encoding

NOTE: this is a zap file format change.

The separate "postings locations" roaring Bitmap that encoded whether
a posting has locations info is now replaced by the least significant
bit in the freq varint encoded in the freq-norm chunkedIntCoder.

encode/decodeFreqHasLocs() are added as helper functions.
---
 cmd/bleve/cmd/zap/explore.go        |  6 ---
 index/scorch/segment/zap/build.go   |  2 +-
 index/scorch/segment/zap/dict.go    |  5 --
 index/scorch/segment/zap/merge.go   | 26 +++-------
 index/scorch/segment/zap/new.go     | 42 ++++-----------
 index/scorch/segment/zap/posting.go | 79 ++++++++++++++---------------
 6 files changed, 57 insertions(+), 103 deletions(-)

diff --git a/cmd/bleve/cmd/zap/explore.go b/cmd/bleve/cmd/zap/explore.go
index 225b7373f..0c2471edc 100644
--- a/cmd/bleve/cmd/zap/explore.go
+++ b/cmd/bleve/cmd/zap/explore.go
@@ -81,10 +81,6 @@ var exploreCmd = &cobra.Command{
 					locAddr, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
 					n += uint64(read)
 
-					var locBitmapAddr uint64
-					locBitmapAddr, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
-					n += uint64(read)
-
 					var postingListLen uint64
 					postingListLen, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
 					n += uint64(read)
@@ -131,8 +127,6 @@ var exploreCmd = &cobra.Command{
 						running2 += offset
 					}
 
-					fmt.Printf("Loc Bitmap at: %d (%x)\n", locBitmapAddr, locBitmapAddr)
-
 				} else {
 					fmt.Printf("dictionary does not contain term '%s'\n", args[2])
 				}
diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 20b892ca3..9e9d787bb 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -22,7 +22,7 @@ import (
 	"github.com/Smerity/govarint"
 )
 
-const version uint32 = 6
+const version uint32 = 7
 
 const fieldNotUninverted = math.MaxUint64
 
diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 3b8132f2c..38b4faca1 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -72,15 +72,10 @@ func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap)
 		if postings != nil {
 			postings.Clear()
 		}
-		locBitmap := rv.locBitmap
-		if locBitmap != nil {
-			locBitmap.Clear()
-		}
 
 		*rv = PostingsList{} // clear the struct
 
 		rv.postings = postings
-		rv.locBitmap = locBitmap
 	}
 	rv.sb = d.sb
 	rv.except = except
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 1da5e5269..51dd74206 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -259,9 +259,8 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 			tfEncoder.Close()
 			locEncoder.Close()
 
-			postingsOffset, err := writePostings(
-				newRoaring, newRoaringLocs, tfEncoder, locEncoder,
-				use1HitEncoding, w, bufMaxVarintLen64)
+			postingsOffset, err := writePostings(newRoaring,
+				tfEncoder, locEncoder, use1HitEncoding, w, bufMaxVarintLen64)
 			if err != nil {
 				return err
 			}
@@ -423,12 +422,14 @@ func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *Po
 		nextFreq := next.Frequency()
 		nextNorm := uint64(math.Float32bits(float32(next.Norm())))
 
-		err = tfEncoder.Add(hitNewDocNum, nextFreq, nextNorm)
+		locs := next.Locations()
+
+		err = tfEncoder.Add(hitNewDocNum,
+			encodeFreqHasLocs(nextFreq, len(locs) > 0), nextNorm)
 		if err != nil {
 			return 0, 0, 0, nil, err
 		}
 
-		locs := next.Locations()
 		if len(locs) > 0 {
 			newRoaringLocs.Add(uint32(hitNewDocNum))
 
@@ -503,8 +504,7 @@ func mergeTermFreqNormLocsByCopying(term []byte, postItr *PostingsIterator,
 	return lastDocNum, lastFreq, lastNorm, err
 }
 
-func writePostings(postings, postingLocs *roaring.Bitmap,
-	tfEncoder, locEncoder *chunkedIntCoder,
+func writePostings(postings *roaring.Bitmap, tfEncoder, locEncoder *chunkedIntCoder,
 	use1HitEncoding func(uint64) (bool, uint64, uint64),
 	w *CountHashWriter, bufMaxVarintLen64 []byte) (
 	offset uint64, err error) {
@@ -532,12 +532,6 @@ func writePostings(postings, postingLocs *roaring.Bitmap,
 		return 0, err
 	}
 
-	postingLocsOffset := uint64(w.Count())
-	_, err = writeRoaringWithLen(postingLocs, w, bufMaxVarintLen64)
-	if err != nil {
-		return 0, err
-	}
-
 	postingsOffset := uint64(w.Count())
 
 	n := binary.PutUvarint(bufMaxVarintLen64, tfOffset)
@@ -552,12 +546,6 @@ func writePostings(postings, postingLocs *roaring.Bitmap,
 		return 0, err
 	}
 
-	n = binary.PutUvarint(bufMaxVarintLen64, postingLocsOffset)
-	_, err = w.Write(bufMaxVarintLen64[:n])
-	if err != nil {
-		return 0, err
-	}
-
 	_, err = writeRoaringWithLen(postings, w, bufMaxVarintLen64)
 	if err != nil {
 		return 0, err
diff --git a/index/scorch/segment/zap/new.go b/index/scorch/segment/zap/new.go
index 7d098349d..5837436fe 100644
--- a/index/scorch/segment/zap/new.go
+++ b/index/scorch/segment/zap/new.go
@@ -103,9 +103,6 @@ type interim struct {
 	// postings id -> bitmap of docNums
 	Postings []*roaring.Bitmap
 
-	// postings id -> bitmap of docNums that have locations
-	PostingsLocs []*roaring.Bitmap
-
 	// postings id -> freq/norm's, one for each docNum in postings
 	FreqNorms        [][]interimFreqNorm
 	freqNormsBacking []interimFreqNorm
@@ -151,10 +148,6 @@ func (s *interim) reset() (err error) {
 		idn.Clear()
 	}
 	s.Postings = s.Postings[:0]
-	for _, idn := range s.PostingsLocs {
-		idn.Clear()
-	}
-	s.PostingsLocs = s.PostingsLocs[:0]
 	s.FreqNorms = s.FreqNorms[:0]
 	for i := range s.freqNormsBacking {
 		s.freqNormsBacking[i] = interimFreqNorm{}
@@ -196,8 +189,9 @@ type interimStoredField struct {
 }
 
 type interimFreqNorm struct {
-	freq uint64
-	norm float32
+	freq    uint64
+	norm    float32
+	hasLocs bool
 }
 
 type interimLoc struct {
@@ -356,19 +350,6 @@ func (s *interim) prepareDicts() {
 		s.Postings = postings
 	}
 
-	if cap(s.PostingsLocs) >= numPostingsLists {
-		s.PostingsLocs = s.PostingsLocs[:numPostingsLists]
-	} else {
-		postingsLocs := make([]*roaring.Bitmap, numPostingsLists)
-		copy(postingsLocs, s.PostingsLocs[:cap(s.PostingsLocs)])
-		for i := 0; i < numPostingsLists; i++ {
-			if postingsLocs[i] == nil {
-				postingsLocs[i] = roaring.New()
-			}
-		}
-		s.PostingsLocs = postingsLocs
-	}
-
 	if cap(s.FreqNorms) >= numPostingsLists {
 		s.FreqNorms = s.FreqNorms[:numPostingsLists]
 	} else {
@@ -464,14 +445,12 @@ func (s *interim) processDocument(docNum uint64,
 
 			s.FreqNorms[pid] = append(s.FreqNorms[pid],
 				interimFreqNorm{
-					freq: uint64(tf.Frequency()),
-					norm: norm,
+					freq:    uint64(tf.Frequency()),
+					norm:    norm,
+					hasLocs: len(tf.Locations) > 0,
 				})
 
 			if len(tf.Locations) > 0 {
-				locBS := s.PostingsLocs[pid]
-				locBS.Add(uint32(docNum))
-
 				locs := s.Locs[pid]
 
 				for _, loc := range tf.Locations {
@@ -625,7 +604,6 @@ func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err
 			pid := dict[term] - 1
 
 			postingsBS := s.Postings[pid]
-			postingsLocsBS := s.PostingsLocs[pid]
 
 			freqNorms := s.FreqNorms[pid]
 			freqNormOffset := 0
@@ -639,7 +617,8 @@ func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err
 
 				freqNorm := freqNorms[freqNormOffset]
 
-				err = tfEncoder.Add(docNum, freqNorm.freq,
+				err = tfEncoder.Add(docNum,
+					encodeFreqHasLocs(freqNorm.freq, freqNorm.hasLocs),
 					uint64(math.Float32bits(freqNorm.norm)))
 				if err != nil {
 					return 0, nil, err
@@ -675,9 +654,8 @@ func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err
 			tfEncoder.Close()
 			locEncoder.Close()
 
-			postingsOffset, err := writePostings(
-				postingsBS, postingsLocsBS, tfEncoder, locEncoder,
-				nil, s.w, buf)
+			postingsOffset, err :=
+				writePostings(postingsBS, tfEncoder, locEncoder, nil, s.w, buf)
 			if err != nil {
 				return 0, nil, err
 			}
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index f5ccad1ad..004b80317 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -100,7 +100,6 @@ type PostingsList struct {
 	postingsOffset uint64
 	freqOffset     uint64
 	locOffset      uint64
-	locBitmap      *roaring.Bitmap
 	postings       *roaring.Bitmap
 	except         *roaring.Bitmap
 
@@ -222,8 +221,6 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
 	}
 	rv.locChunkStart = p.locOffset + n
 
-	rv.locBitmap = p.locBitmap
-
 	rv.all = p.postings.Iterator()
 	if p.except != nil {
 		allExcept := roaring.AndNot(p.postings, p.except)
@@ -271,23 +268,6 @@ func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
 	rv.locOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
 	n += uint64(read)
 
-	var locBitmapOffset uint64
-	locBitmapOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
-	n += uint64(read)
-
-	var locBitmapLen uint64
-	locBitmapLen, read = binary.Uvarint(d.sb.mem[locBitmapOffset : locBitmapOffset+binary.MaxVarintLen64])
-
-	locRoaringBytes := d.sb.mem[locBitmapOffset+uint64(read) : locBitmapOffset+uint64(read)+locBitmapLen]
-
-	if rv.locBitmap == nil {
-		rv.locBitmap = roaring.NewBitmap()
-	}
-	_, err := rv.locBitmap.FromBuffer(locRoaringBytes)
-	if err != nil {
-		return fmt.Errorf("error loading roaring bitmap of locations with hits: %v", err)
-	}
-
 	var postingsLen uint64
 	postingsLen, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
 	n += uint64(read)
@@ -297,7 +277,7 @@ func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
 	if rv.postings == nil {
 		rv.postings = roaring.NewBitmap()
 	}
-	_, err = rv.postings.FromBuffer(roaringBytes)
+	_, err := rv.postings.FromBuffer(roaringBytes)
 	if err != nil {
 		return fmt.Errorf("error loading roaring bitmap: %v", err)
 	}
@@ -334,8 +314,6 @@ type PostingsIterator struct {
 	locChunkOffsets []uint64
 	locChunkStart   uint64
 
-	locBitmap *roaring.Bitmap
-
 	next     Posting    // reused across Next() calls
 	nextLocs []Location // reused across Next() calls
 
@@ -353,10 +331,6 @@ func (i *PostingsIterator) Size() int {
 		len(i.locChunkOffsets)*size.SizeOfUint64 +
 		i.next.Size()
 
-	if i.locBitmap != nil {
-		sizeInBytes += int(i.locBitmap.GetSizeInBytes())
-	}
-
 	for _, entry := range i.nextLocs {
 		sizeInBytes += entry.Size()
 	}
@@ -397,20 +371,37 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
 	return nil
 }
 
-func (i *PostingsIterator) readFreqNorm() (uint64, uint64, error) {
+func (i *PostingsIterator) readFreqNormHasLocs() (uint64, uint64, bool, error) {
 	if i.normBits1Hit != 0 {
-		return 1, i.normBits1Hit, nil
+		return 1, i.normBits1Hit, false, nil
 	}
 
-	freq, err := i.freqNormDecoder.GetU64()
+	freqHasLocs, err := i.freqNormDecoder.GetU64()
 	if err != nil {
-		return 0, 0, fmt.Errorf("error reading frequency: %v", err)
+		return 0, 0, false, fmt.Errorf("error reading frequency: %v", err)
 	}
+	freq, hasLocs := decodeFreqHasLocs(freqHasLocs)
+
 	normBits, err := i.freqNormDecoder.GetU64()
 	if err != nil {
-		return 0, 0, fmt.Errorf("error reading norm: %v", err)
+		return 0, 0, false, fmt.Errorf("error reading norm: %v", err)
 	}
-	return freq, normBits, err
+
+	return freq, normBits, hasLocs, err
+}
+
+func encodeFreqHasLocs(freq uint64, hasLocs bool) uint64 {
+	rv := freq << 1
+	if hasLocs {
+		rv = rv | 0x01 // 0'th LSB encodes whether there are locations
+	}
+	return rv
+}
+
+func decodeFreqHasLocs(freqHasLocs uint64) (uint64, bool) {
+	freq := freqHasLocs >> 1
+	hasLocs := freqHasLocs&0x01 != 0
+	return freq, hasLocs
 }
 
 // readLocation processes all the integers on the stream representing a single
@@ -484,13 +475,16 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 	rv.docNum = docNum
 
 	var normBits uint64
-	rv.freq, normBits, err = i.readFreqNorm()
+	var hasLocs bool
+
+	rv.freq, normBits, hasLocs, err = i.readFreqNormHasLocs()
 	if err != nil {
 		return nil, err
 	}
+
 	rv.norm = math.Float32frombits(uint32(normBits))
 
-	if i.locBitmap != nil && i.locBitmap.Contains(uint32(docNum)) {
+	if hasLocs {
 		// read off 'freq' locations, into reused slices
 		if cap(i.nextLocs) >= int(rv.freq) {
 			i.nextLocs = i.nextLocs[0:rv.freq]
@@ -514,6 +508,8 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 	return rv, nil
 }
 
+var freqHasLocs1Hit = encodeFreqHasLocs(1, false)
+
 // nextBytes returns the docNum and the encoded freq & loc bytes for
 // the next posting
 func (i *PostingsIterator) nextBytes() (
@@ -528,14 +524,16 @@ func (i *PostingsIterator) nextBytes() (
 		if i.buf == nil {
 			i.buf = make([]byte, binary.MaxVarintLen64*2)
 		}
-		n := binary.PutUvarint(i.buf, uint64(1))
+		n := binary.PutUvarint(i.buf, freqHasLocs1Hit)
 		n += binary.PutUvarint(i.buf, i.normBits1Hit)
 		return docNum, uint64(1), i.normBits1Hit, i.buf[:n], nil, nil
 	}
 
 	startFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
 
-	freq, normBits, err = i.readFreqNorm()
+	var hasLocs bool
+
+	freq, normBits, hasLocs, err = i.readFreqNormHasLocs()
 	if err != nil {
 		return 0, 0, 0, nil, nil, err
 	}
@@ -543,7 +541,7 @@ func (i *PostingsIterator) nextBytes() (
 	endFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
 	bytesFreqNorm = i.currChunkFreqNorm[startFreqNorm:endFreqNorm]
 
-	if i.locBitmap != nil && i.locBitmap.Contains(uint32(docNum)) {
+	if hasLocs {
 		startLoc := len(i.currChunkLoc) - i.locReader.Len()
 
 		for j := uint64(0); j < freq; j++ {
@@ -596,11 +594,12 @@ func (i *PostingsIterator) nextDocNum() (uint64, bool, error) {
 			}
 
 			// read off freq/offsets even though we don't care about them
-			freq, _, err := i.readFreqNorm()
+			freq, _, hasLocs, err := i.readFreqNormHasLocs()
 			if err != nil {
 				return 0, false, err
 			}
-			if i.locBitmap.Contains(allN) {
+
+			if hasLocs {
 				for j := 0; j < int(freq); j++ {
 					err := i.readLocation(nil)
 					if err != nil {

From 67f75005c4ba45391a562d6c70089c3ef9485fe6 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 22 Mar 2018 17:05:27 -0700
Subject: [PATCH 321/728] fix cmd/bleve help string for internal command

---
 cmd/bleve/cmd/scorch/internal.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmd/bleve/cmd/scorch/internal.go b/cmd/bleve/cmd/scorch/internal.go
index 027e90282..dc9497969 100644
--- a/cmd/bleve/cmd/scorch/internal.go
+++ b/cmd/bleve/cmd/scorch/internal.go
@@ -23,7 +23,7 @@ import (
 
 var ascii bool
 
-// internalCmd represents the snapshots command
+// internalCmd represents the internal command
 var internalCmd = &cobra.Command{
 	Use:   "internal",
 	Short: "internal prints the internal k/v pairs in a snapshot",

From 49a4ee60ba1d9569403a9c980aa87c5131048bf3 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 23 Mar 2018 10:01:30 -0700
Subject: [PATCH 322/728] Revert "scorch zap replace locsBitmap w/ 1 bit from
 freq-norm varint encoding"

Testing with the cbft application led to cbft process exits...

  AsyncError exit()... error reading location field: EOF --
  main.initBleveOptions.func1() at init_bleve.go:85

This reverts commit 621b58dd8341232c01653db36372f0e0361bc90f.
---
 cmd/bleve/cmd/zap/explore.go        |  6 +++
 index/scorch/segment/zap/build.go   |  2 +-
 index/scorch/segment/zap/dict.go    |  5 ++
 index/scorch/segment/zap/merge.go   | 26 +++++++---
 index/scorch/segment/zap/new.go     | 42 +++++++++++----
 index/scorch/segment/zap/posting.go | 79 +++++++++++++++--------------
 6 files changed, 103 insertions(+), 57 deletions(-)

diff --git a/cmd/bleve/cmd/zap/explore.go b/cmd/bleve/cmd/zap/explore.go
index 0c2471edc..225b7373f 100644
--- a/cmd/bleve/cmd/zap/explore.go
+++ b/cmd/bleve/cmd/zap/explore.go
@@ -81,6 +81,10 @@ var exploreCmd = &cobra.Command{
 					locAddr, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
 					n += uint64(read)
 
+					var locBitmapAddr uint64
+					locBitmapAddr, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
+					n += uint64(read)
+
 					var postingListLen uint64
 					postingListLen, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
 					n += uint64(read)
@@ -127,6 +131,8 @@ var exploreCmd = &cobra.Command{
 						running2 += offset
 					}
 
+					fmt.Printf("Loc Bitmap at: %d (%x)\n", locBitmapAddr, locBitmapAddr)
+
 				} else {
 					fmt.Printf("dictionary does not contain term '%s'\n", args[2])
 				}
diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 9e9d787bb..20b892ca3 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -22,7 +22,7 @@ import (
 	"github.com/Smerity/govarint"
 )
 
-const version uint32 = 7
+const version uint32 = 6
 
 const fieldNotUninverted = math.MaxUint64
 
diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 38b4faca1..3b8132f2c 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -72,10 +72,15 @@ func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap)
 		if postings != nil {
 			postings.Clear()
 		}
+		locBitmap := rv.locBitmap
+		if locBitmap != nil {
+			locBitmap.Clear()
+		}
 
 		*rv = PostingsList{} // clear the struct
 
 		rv.postings = postings
+		rv.locBitmap = locBitmap
 	}
 	rv.sb = d.sb
 	rv.except = except
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 51dd74206..1da5e5269 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -259,8 +259,9 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 			tfEncoder.Close()
 			locEncoder.Close()
 
-			postingsOffset, err := writePostings(newRoaring,
-				tfEncoder, locEncoder, use1HitEncoding, w, bufMaxVarintLen64)
+			postingsOffset, err := writePostings(
+				newRoaring, newRoaringLocs, tfEncoder, locEncoder,
+				use1HitEncoding, w, bufMaxVarintLen64)
 			if err != nil {
 				return err
 			}
@@ -422,14 +423,12 @@ func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *Po
 		nextFreq := next.Frequency()
 		nextNorm := uint64(math.Float32bits(float32(next.Norm())))
 
-		locs := next.Locations()
-
-		err = tfEncoder.Add(hitNewDocNum,
-			encodeFreqHasLocs(nextFreq, len(locs) > 0), nextNorm)
+		err = tfEncoder.Add(hitNewDocNum, nextFreq, nextNorm)
 		if err != nil {
 			return 0, 0, 0, nil, err
 		}
 
+		locs := next.Locations()
 		if len(locs) > 0 {
 			newRoaringLocs.Add(uint32(hitNewDocNum))
 
@@ -504,7 +503,8 @@ func mergeTermFreqNormLocsByCopying(term []byte, postItr *PostingsIterator,
 	return lastDocNum, lastFreq, lastNorm, err
 }
 
-func writePostings(postings *roaring.Bitmap, tfEncoder, locEncoder *chunkedIntCoder,
+func writePostings(postings, postingLocs *roaring.Bitmap,
+	tfEncoder, locEncoder *chunkedIntCoder,
 	use1HitEncoding func(uint64) (bool, uint64, uint64),
 	w *CountHashWriter, bufMaxVarintLen64 []byte) (
 	offset uint64, err error) {
@@ -532,6 +532,12 @@ func writePostings(postings *roaring.Bitmap, tfEncoder, locEncoder *chunkedIntCo
 		return 0, err
 	}
 
+	postingLocsOffset := uint64(w.Count())
+	_, err = writeRoaringWithLen(postingLocs, w, bufMaxVarintLen64)
+	if err != nil {
+		return 0, err
+	}
+
 	postingsOffset := uint64(w.Count())
 
 	n := binary.PutUvarint(bufMaxVarintLen64, tfOffset)
@@ -546,6 +552,12 @@ func writePostings(postings *roaring.Bitmap, tfEncoder, locEncoder *chunkedIntCo
 		return 0, err
 	}
 
+	n = binary.PutUvarint(bufMaxVarintLen64, postingLocsOffset)
+	_, err = w.Write(bufMaxVarintLen64[:n])
+	if err != nil {
+		return 0, err
+	}
+
 	_, err = writeRoaringWithLen(postings, w, bufMaxVarintLen64)
 	if err != nil {
 		return 0, err
diff --git a/index/scorch/segment/zap/new.go b/index/scorch/segment/zap/new.go
index 5837436fe..7d098349d 100644
--- a/index/scorch/segment/zap/new.go
+++ b/index/scorch/segment/zap/new.go
@@ -103,6 +103,9 @@ type interim struct {
 	// postings id -> bitmap of docNums
 	Postings []*roaring.Bitmap
 
+	// postings id -> bitmap of docNums that have locations
+	PostingsLocs []*roaring.Bitmap
+
 	// postings id -> freq/norm's, one for each docNum in postings
 	FreqNorms        [][]interimFreqNorm
 	freqNormsBacking []interimFreqNorm
@@ -148,6 +151,10 @@ func (s *interim) reset() (err error) {
 		idn.Clear()
 	}
 	s.Postings = s.Postings[:0]
+	for _, idn := range s.PostingsLocs {
+		idn.Clear()
+	}
+	s.PostingsLocs = s.PostingsLocs[:0]
 	s.FreqNorms = s.FreqNorms[:0]
 	for i := range s.freqNormsBacking {
 		s.freqNormsBacking[i] = interimFreqNorm{}
@@ -189,9 +196,8 @@ type interimStoredField struct {
 }
 
 type interimFreqNorm struct {
-	freq    uint64
-	norm    float32
-	hasLocs bool
+	freq uint64
+	norm float32
 }
 
 type interimLoc struct {
@@ -350,6 +356,19 @@ func (s *interim) prepareDicts() {
 		s.Postings = postings
 	}
 
+	if cap(s.PostingsLocs) >= numPostingsLists {
+		s.PostingsLocs = s.PostingsLocs[:numPostingsLists]
+	} else {
+		postingsLocs := make([]*roaring.Bitmap, numPostingsLists)
+		copy(postingsLocs, s.PostingsLocs[:cap(s.PostingsLocs)])
+		for i := 0; i < numPostingsLists; i++ {
+			if postingsLocs[i] == nil {
+				postingsLocs[i] = roaring.New()
+			}
+		}
+		s.PostingsLocs = postingsLocs
+	}
+
 	if cap(s.FreqNorms) >= numPostingsLists {
 		s.FreqNorms = s.FreqNorms[:numPostingsLists]
 	} else {
@@ -445,12 +464,14 @@ func (s *interim) processDocument(docNum uint64,
 
 			s.FreqNorms[pid] = append(s.FreqNorms[pid],
 				interimFreqNorm{
-					freq:    uint64(tf.Frequency()),
-					norm:    norm,
-					hasLocs: len(tf.Locations) > 0,
+					freq: uint64(tf.Frequency()),
+					norm: norm,
 				})
 
 			if len(tf.Locations) > 0 {
+				locBS := s.PostingsLocs[pid]
+				locBS.Add(uint32(docNum))
+
 				locs := s.Locs[pid]
 
 				for _, loc := range tf.Locations {
@@ -604,6 +625,7 @@ func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err
 			pid := dict[term] - 1
 
 			postingsBS := s.Postings[pid]
+			postingsLocsBS := s.PostingsLocs[pid]
 
 			freqNorms := s.FreqNorms[pid]
 			freqNormOffset := 0
@@ -617,8 +639,7 @@ func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err
 
 				freqNorm := freqNorms[freqNormOffset]
 
-				err = tfEncoder.Add(docNum,
-					encodeFreqHasLocs(freqNorm.freq, freqNorm.hasLocs),
+				err = tfEncoder.Add(docNum, freqNorm.freq,
 					uint64(math.Float32bits(freqNorm.norm)))
 				if err != nil {
 					return 0, nil, err
@@ -654,8 +675,9 @@ func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err
 			tfEncoder.Close()
 			locEncoder.Close()
 
-			postingsOffset, err :=
-				writePostings(postingsBS, tfEncoder, locEncoder, nil, s.w, buf)
+			postingsOffset, err := writePostings(
+				postingsBS, postingsLocsBS, tfEncoder, locEncoder,
+				nil, s.w, buf)
 			if err != nil {
 				return 0, nil, err
 			}
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 004b80317..f5ccad1ad 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -100,6 +100,7 @@ type PostingsList struct {
 	postingsOffset uint64
 	freqOffset     uint64
 	locOffset      uint64
+	locBitmap      *roaring.Bitmap
 	postings       *roaring.Bitmap
 	except         *roaring.Bitmap
 
@@ -221,6 +222,8 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
 	}
 	rv.locChunkStart = p.locOffset + n
 
+	rv.locBitmap = p.locBitmap
+
 	rv.all = p.postings.Iterator()
 	if p.except != nil {
 		allExcept := roaring.AndNot(p.postings, p.except)
@@ -268,6 +271,23 @@ func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
 	rv.locOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
 	n += uint64(read)
 
+	var locBitmapOffset uint64
+	locBitmapOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
+	n += uint64(read)
+
+	var locBitmapLen uint64
+	locBitmapLen, read = binary.Uvarint(d.sb.mem[locBitmapOffset : locBitmapOffset+binary.MaxVarintLen64])
+
+	locRoaringBytes := d.sb.mem[locBitmapOffset+uint64(read) : locBitmapOffset+uint64(read)+locBitmapLen]
+
+	if rv.locBitmap == nil {
+		rv.locBitmap = roaring.NewBitmap()
+	}
+	_, err := rv.locBitmap.FromBuffer(locRoaringBytes)
+	if err != nil {
+		return fmt.Errorf("error loading roaring bitmap of locations with hits: %v", err)
+	}
+
 	var postingsLen uint64
 	postingsLen, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
 	n += uint64(read)
@@ -277,7 +297,7 @@ func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
 	if rv.postings == nil {
 		rv.postings = roaring.NewBitmap()
 	}
-	_, err := rv.postings.FromBuffer(roaringBytes)
+	_, err = rv.postings.FromBuffer(roaringBytes)
 	if err != nil {
 		return fmt.Errorf("error loading roaring bitmap: %v", err)
 	}
@@ -314,6 +334,8 @@ type PostingsIterator struct {
 	locChunkOffsets []uint64
 	locChunkStart   uint64
 
+	locBitmap *roaring.Bitmap
+
 	next     Posting    // reused across Next() calls
 	nextLocs []Location // reused across Next() calls
 
@@ -331,6 +353,10 @@ func (i *PostingsIterator) Size() int {
 		len(i.locChunkOffsets)*size.SizeOfUint64 +
 		i.next.Size()
 
+	if i.locBitmap != nil {
+		sizeInBytes += int(i.locBitmap.GetSizeInBytes())
+	}
+
 	for _, entry := range i.nextLocs {
 		sizeInBytes += entry.Size()
 	}
@@ -371,37 +397,20 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
 	return nil
 }
 
-func (i *PostingsIterator) readFreqNormHasLocs() (uint64, uint64, bool, error) {
+func (i *PostingsIterator) readFreqNorm() (uint64, uint64, error) {
 	if i.normBits1Hit != 0 {
-		return 1, i.normBits1Hit, false, nil
+		return 1, i.normBits1Hit, nil
 	}
 
-	freqHasLocs, err := i.freqNormDecoder.GetU64()
+	freq, err := i.freqNormDecoder.GetU64()
 	if err != nil {
-		return 0, 0, false, fmt.Errorf("error reading frequency: %v", err)
+		return 0, 0, fmt.Errorf("error reading frequency: %v", err)
 	}
-	freq, hasLocs := decodeFreqHasLocs(freqHasLocs)
-
 	normBits, err := i.freqNormDecoder.GetU64()
 	if err != nil {
-		return 0, 0, false, fmt.Errorf("error reading norm: %v", err)
+		return 0, 0, fmt.Errorf("error reading norm: %v", err)
 	}
-
-	return freq, normBits, hasLocs, err
-}
-
-func encodeFreqHasLocs(freq uint64, hasLocs bool) uint64 {
-	rv := freq << 1
-	if hasLocs {
-		rv = rv | 0x01 // 0'th LSB encodes whether there are locations
-	}
-	return rv
-}
-
-func decodeFreqHasLocs(freqHasLocs uint64) (uint64, bool) {
-	freq := freqHasLocs >> 1
-	hasLocs := freqHasLocs&0x01 != 0
-	return freq, hasLocs
+	return freq, normBits, err
 }
 
 // readLocation processes all the integers on the stream representing a single
@@ -475,16 +484,13 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 	rv.docNum = docNum
 
 	var normBits uint64
-	var hasLocs bool
-
-	rv.freq, normBits, hasLocs, err = i.readFreqNormHasLocs()
+	rv.freq, normBits, err = i.readFreqNorm()
 	if err != nil {
 		return nil, err
 	}
-
 	rv.norm = math.Float32frombits(uint32(normBits))
 
-	if hasLocs {
+	if i.locBitmap != nil && i.locBitmap.Contains(uint32(docNum)) {
 		// read off 'freq' locations, into reused slices
 		if cap(i.nextLocs) >= int(rv.freq) {
 			i.nextLocs = i.nextLocs[0:rv.freq]
@@ -508,8 +514,6 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 	return rv, nil
 }
 
-var freqHasLocs1Hit = encodeFreqHasLocs(1, false)
-
 // nextBytes returns the docNum and the encoded freq & loc bytes for
 // the next posting
 func (i *PostingsIterator) nextBytes() (
@@ -524,16 +528,14 @@ func (i *PostingsIterator) nextBytes() (
 		if i.buf == nil {
 			i.buf = make([]byte, binary.MaxVarintLen64*2)
 		}
-		n := binary.PutUvarint(i.buf, freqHasLocs1Hit)
+		n := binary.PutUvarint(i.buf, uint64(1))
 		n += binary.PutUvarint(i.buf, i.normBits1Hit)
 		return docNum, uint64(1), i.normBits1Hit, i.buf[:n], nil, nil
 	}
 
 	startFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
 
-	var hasLocs bool
-
-	freq, normBits, hasLocs, err = i.readFreqNormHasLocs()
+	freq, normBits, err = i.readFreqNorm()
 	if err != nil {
 		return 0, 0, 0, nil, nil, err
 	}
@@ -541,7 +543,7 @@ func (i *PostingsIterator) nextBytes() (
 	endFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
 	bytesFreqNorm = i.currChunkFreqNorm[startFreqNorm:endFreqNorm]
 
-	if hasLocs {
+	if i.locBitmap != nil && i.locBitmap.Contains(uint32(docNum)) {
 		startLoc := len(i.currChunkLoc) - i.locReader.Len()
 
 		for j := uint64(0); j < freq; j++ {
@@ -594,12 +596,11 @@ func (i *PostingsIterator) nextDocNum() (uint64, bool, error) {
 			}
 
 			// read off freq/offsets even though we don't care about them
-			freq, _, hasLocs, err := i.readFreqNormHasLocs()
+			freq, _, err := i.readFreqNorm()
 			if err != nil {
 				return 0, false, err
 			}
-
-			if hasLocs {
+			if i.locBitmap.Contains(allN) {
 				for j := 0; j < int(freq); j++ {
 					err := i.readLocation(nil)
 					if err != nil {

From 7a19e6fd7e807171abbc2f823f3a12b037fefb89 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 23 Mar 2018 12:40:02 -0700
Subject: [PATCH 323/728] scorch zap replace locsBitmap w/ 1 bit from freq-norm
 varint encoding

This is attempt #2 of the optimization that replaces the locsBitmap,
without any changes from the original commit attempt.  A commit that
follows this one contains the actual fix.

See also...
- commit 621b58dd834123 (the 1st attempt)
- commit 49a4ee60ba1d95 (the revert)

-------------
The original commit message body from 621b58 was...

NOTE: this is a zap file format change.

The separate "postings locations" roaring Bitmap that encoded whether
a posting has locations info is now replaced by the least significant
bit in the freq varint encoded in the freq-norm chunkedIntCoder.

encode/decodeFreqHasLocs() are added as helper functions.
---
 cmd/bleve/cmd/zap/explore.go        |  6 ---
 index/scorch/segment/zap/build.go   |  2 +-
 index/scorch/segment/zap/dict.go    |  5 --
 index/scorch/segment/zap/merge.go   | 26 +++-------
 index/scorch/segment/zap/new.go     | 42 ++++-----------
 index/scorch/segment/zap/posting.go | 79 ++++++++++++++---------------
 6 files changed, 57 insertions(+), 103 deletions(-)

diff --git a/cmd/bleve/cmd/zap/explore.go b/cmd/bleve/cmd/zap/explore.go
index 225b7373f..0c2471edc 100644
--- a/cmd/bleve/cmd/zap/explore.go
+++ b/cmd/bleve/cmd/zap/explore.go
@@ -81,10 +81,6 @@ var exploreCmd = &cobra.Command{
 					locAddr, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
 					n += uint64(read)
 
-					var locBitmapAddr uint64
-					locBitmapAddr, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
-					n += uint64(read)
-
 					var postingListLen uint64
 					postingListLen, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
 					n += uint64(read)
@@ -131,8 +127,6 @@ var exploreCmd = &cobra.Command{
 						running2 += offset
 					}
 
-					fmt.Printf("Loc Bitmap at: %d (%x)\n", locBitmapAddr, locBitmapAddr)
-
 				} else {
 					fmt.Printf("dictionary does not contain term '%s'\n", args[2])
 				}
diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 20b892ca3..9e9d787bb 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -22,7 +22,7 @@ import (
 	"github.com/Smerity/govarint"
 )
 
-const version uint32 = 6
+const version uint32 = 7
 
 const fieldNotUninverted = math.MaxUint64
 
diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 3b8132f2c..38b4faca1 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -72,15 +72,10 @@ func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap)
 		if postings != nil {
 			postings.Clear()
 		}
-		locBitmap := rv.locBitmap
-		if locBitmap != nil {
-			locBitmap.Clear()
-		}
 
 		*rv = PostingsList{} // clear the struct
 
 		rv.postings = postings
-		rv.locBitmap = locBitmap
 	}
 	rv.sb = d.sb
 	rv.except = except
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 1da5e5269..51dd74206 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -259,9 +259,8 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 			tfEncoder.Close()
 			locEncoder.Close()
 
-			postingsOffset, err := writePostings(
-				newRoaring, newRoaringLocs, tfEncoder, locEncoder,
-				use1HitEncoding, w, bufMaxVarintLen64)
+			postingsOffset, err := writePostings(newRoaring,
+				tfEncoder, locEncoder, use1HitEncoding, w, bufMaxVarintLen64)
 			if err != nil {
 				return err
 			}
@@ -423,12 +422,14 @@ func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *Po
 		nextFreq := next.Frequency()
 		nextNorm := uint64(math.Float32bits(float32(next.Norm())))
 
-		err = tfEncoder.Add(hitNewDocNum, nextFreq, nextNorm)
+		locs := next.Locations()
+
+		err = tfEncoder.Add(hitNewDocNum,
+			encodeFreqHasLocs(nextFreq, len(locs) > 0), nextNorm)
 		if err != nil {
 			return 0, 0, 0, nil, err
 		}
 
-		locs := next.Locations()
 		if len(locs) > 0 {
 			newRoaringLocs.Add(uint32(hitNewDocNum))
 
@@ -503,8 +504,7 @@ func mergeTermFreqNormLocsByCopying(term []byte, postItr *PostingsIterator,
 	return lastDocNum, lastFreq, lastNorm, err
 }
 
-func writePostings(postings, postingLocs *roaring.Bitmap,
-	tfEncoder, locEncoder *chunkedIntCoder,
+func writePostings(postings *roaring.Bitmap, tfEncoder, locEncoder *chunkedIntCoder,
 	use1HitEncoding func(uint64) (bool, uint64, uint64),
 	w *CountHashWriter, bufMaxVarintLen64 []byte) (
 	offset uint64, err error) {
@@ -532,12 +532,6 @@ func writePostings(postings, postingLocs *roaring.Bitmap,
 		return 0, err
 	}
 
-	postingLocsOffset := uint64(w.Count())
-	_, err = writeRoaringWithLen(postingLocs, w, bufMaxVarintLen64)
-	if err != nil {
-		return 0, err
-	}
-
 	postingsOffset := uint64(w.Count())
 
 	n := binary.PutUvarint(bufMaxVarintLen64, tfOffset)
@@ -552,12 +546,6 @@ func writePostings(postings, postingLocs *roaring.Bitmap,
 		return 0, err
 	}
 
-	n = binary.PutUvarint(bufMaxVarintLen64, postingLocsOffset)
-	_, err = w.Write(bufMaxVarintLen64[:n])
-	if err != nil {
-		return 0, err
-	}
-
 	_, err = writeRoaringWithLen(postings, w, bufMaxVarintLen64)
 	if err != nil {
 		return 0, err
diff --git a/index/scorch/segment/zap/new.go b/index/scorch/segment/zap/new.go
index 7d098349d..5837436fe 100644
--- a/index/scorch/segment/zap/new.go
+++ b/index/scorch/segment/zap/new.go
@@ -103,9 +103,6 @@ type interim struct {
 	// postings id -> bitmap of docNums
 	Postings []*roaring.Bitmap
 
-	// postings id -> bitmap of docNums that have locations
-	PostingsLocs []*roaring.Bitmap
-
 	// postings id -> freq/norm's, one for each docNum in postings
 	FreqNorms        [][]interimFreqNorm
 	freqNormsBacking []interimFreqNorm
@@ -151,10 +148,6 @@ func (s *interim) reset() (err error) {
 		idn.Clear()
 	}
 	s.Postings = s.Postings[:0]
-	for _, idn := range s.PostingsLocs {
-		idn.Clear()
-	}
-	s.PostingsLocs = s.PostingsLocs[:0]
 	s.FreqNorms = s.FreqNorms[:0]
 	for i := range s.freqNormsBacking {
 		s.freqNormsBacking[i] = interimFreqNorm{}
@@ -196,8 +189,9 @@ type interimStoredField struct {
 }
 
 type interimFreqNorm struct {
-	freq uint64
-	norm float32
+	freq    uint64
+	norm    float32
+	hasLocs bool
 }
 
 type interimLoc struct {
@@ -356,19 +350,6 @@ func (s *interim) prepareDicts() {
 		s.Postings = postings
 	}
 
-	if cap(s.PostingsLocs) >= numPostingsLists {
-		s.PostingsLocs = s.PostingsLocs[:numPostingsLists]
-	} else {
-		postingsLocs := make([]*roaring.Bitmap, numPostingsLists)
-		copy(postingsLocs, s.PostingsLocs[:cap(s.PostingsLocs)])
-		for i := 0; i < numPostingsLists; i++ {
-			if postingsLocs[i] == nil {
-				postingsLocs[i] = roaring.New()
-			}
-		}
-		s.PostingsLocs = postingsLocs
-	}
-
 	if cap(s.FreqNorms) >= numPostingsLists {
 		s.FreqNorms = s.FreqNorms[:numPostingsLists]
 	} else {
@@ -464,14 +445,12 @@ func (s *interim) processDocument(docNum uint64,
 
 			s.FreqNorms[pid] = append(s.FreqNorms[pid],
 				interimFreqNorm{
-					freq: uint64(tf.Frequency()),
-					norm: norm,
+					freq:    uint64(tf.Frequency()),
+					norm:    norm,
+					hasLocs: len(tf.Locations) > 0,
 				})
 
 			if len(tf.Locations) > 0 {
-				locBS := s.PostingsLocs[pid]
-				locBS.Add(uint32(docNum))
-
 				locs := s.Locs[pid]
 
 				for _, loc := range tf.Locations {
@@ -625,7 +604,6 @@ func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err
 			pid := dict[term] - 1
 
 			postingsBS := s.Postings[pid]
-			postingsLocsBS := s.PostingsLocs[pid]
 
 			freqNorms := s.FreqNorms[pid]
 			freqNormOffset := 0
@@ -639,7 +617,8 @@ func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err
 
 				freqNorm := freqNorms[freqNormOffset]
 
-				err = tfEncoder.Add(docNum, freqNorm.freq,
+				err = tfEncoder.Add(docNum,
+					encodeFreqHasLocs(freqNorm.freq, freqNorm.hasLocs),
 					uint64(math.Float32bits(freqNorm.norm)))
 				if err != nil {
 					return 0, nil, err
@@ -675,9 +654,8 @@ func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err
 			tfEncoder.Close()
 			locEncoder.Close()
 
-			postingsOffset, err := writePostings(
-				postingsBS, postingsLocsBS, tfEncoder, locEncoder,
-				nil, s.w, buf)
+			postingsOffset, err :=
+				writePostings(postingsBS, tfEncoder, locEncoder, nil, s.w, buf)
 			if err != nil {
 				return 0, nil, err
 			}
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index f5ccad1ad..004b80317 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -100,7 +100,6 @@ type PostingsList struct {
 	postingsOffset uint64
 	freqOffset     uint64
 	locOffset      uint64
-	locBitmap      *roaring.Bitmap
 	postings       *roaring.Bitmap
 	except         *roaring.Bitmap
 
@@ -222,8 +221,6 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
 	}
 	rv.locChunkStart = p.locOffset + n
 
-	rv.locBitmap = p.locBitmap
-
 	rv.all = p.postings.Iterator()
 	if p.except != nil {
 		allExcept := roaring.AndNot(p.postings, p.except)
@@ -271,23 +268,6 @@ func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
 	rv.locOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
 	n += uint64(read)
 
-	var locBitmapOffset uint64
-	locBitmapOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
-	n += uint64(read)
-
-	var locBitmapLen uint64
-	locBitmapLen, read = binary.Uvarint(d.sb.mem[locBitmapOffset : locBitmapOffset+binary.MaxVarintLen64])
-
-	locRoaringBytes := d.sb.mem[locBitmapOffset+uint64(read) : locBitmapOffset+uint64(read)+locBitmapLen]
-
-	if rv.locBitmap == nil {
-		rv.locBitmap = roaring.NewBitmap()
-	}
-	_, err := rv.locBitmap.FromBuffer(locRoaringBytes)
-	if err != nil {
-		return fmt.Errorf("error loading roaring bitmap of locations with hits: %v", err)
-	}
-
 	var postingsLen uint64
 	postingsLen, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
 	n += uint64(read)
@@ -297,7 +277,7 @@ func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
 	if rv.postings == nil {
 		rv.postings = roaring.NewBitmap()
 	}
-	_, err = rv.postings.FromBuffer(roaringBytes)
+	_, err := rv.postings.FromBuffer(roaringBytes)
 	if err != nil {
 		return fmt.Errorf("error loading roaring bitmap: %v", err)
 	}
@@ -334,8 +314,6 @@ type PostingsIterator struct {
 	locChunkOffsets []uint64
 	locChunkStart   uint64
 
-	locBitmap *roaring.Bitmap
-
 	next     Posting    // reused across Next() calls
 	nextLocs []Location // reused across Next() calls
 
@@ -353,10 +331,6 @@ func (i *PostingsIterator) Size() int {
 		len(i.locChunkOffsets)*size.SizeOfUint64 +
 		i.next.Size()
 
-	if i.locBitmap != nil {
-		sizeInBytes += int(i.locBitmap.GetSizeInBytes())
-	}
-
 	for _, entry := range i.nextLocs {
 		sizeInBytes += entry.Size()
 	}
@@ -397,20 +371,37 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
 	return nil
 }
 
-func (i *PostingsIterator) readFreqNorm() (uint64, uint64, error) {
+func (i *PostingsIterator) readFreqNormHasLocs() (uint64, uint64, bool, error) {
 	if i.normBits1Hit != 0 {
-		return 1, i.normBits1Hit, nil
+		return 1, i.normBits1Hit, false, nil
 	}
 
-	freq, err := i.freqNormDecoder.GetU64()
+	freqHasLocs, err := i.freqNormDecoder.GetU64()
 	if err != nil {
-		return 0, 0, fmt.Errorf("error reading frequency: %v", err)
+		return 0, 0, false, fmt.Errorf("error reading frequency: %v", err)
 	}
+	freq, hasLocs := decodeFreqHasLocs(freqHasLocs)
+
 	normBits, err := i.freqNormDecoder.GetU64()
 	if err != nil {
-		return 0, 0, fmt.Errorf("error reading norm: %v", err)
+		return 0, 0, false, fmt.Errorf("error reading norm: %v", err)
 	}
-	return freq, normBits, err
+
+	return freq, normBits, hasLocs, err
+}
+
+func encodeFreqHasLocs(freq uint64, hasLocs bool) uint64 {
+	rv := freq << 1
+	if hasLocs {
+		rv = rv | 0x01 // 0'th LSB encodes whether there are locations
+	}
+	return rv
+}
+
+func decodeFreqHasLocs(freqHasLocs uint64) (uint64, bool) {
+	freq := freqHasLocs >> 1
+	hasLocs := freqHasLocs&0x01 != 0
+	return freq, hasLocs
 }
 
 // readLocation processes all the integers on the stream representing a single
@@ -484,13 +475,16 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 	rv.docNum = docNum
 
 	var normBits uint64
-	rv.freq, normBits, err = i.readFreqNorm()
+	var hasLocs bool
+
+	rv.freq, normBits, hasLocs, err = i.readFreqNormHasLocs()
 	if err != nil {
 		return nil, err
 	}
+
 	rv.norm = math.Float32frombits(uint32(normBits))
 
-	if i.locBitmap != nil && i.locBitmap.Contains(uint32(docNum)) {
+	if hasLocs {
 		// read off 'freq' locations, into reused slices
 		if cap(i.nextLocs) >= int(rv.freq) {
 			i.nextLocs = i.nextLocs[0:rv.freq]
@@ -514,6 +508,8 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 	return rv, nil
 }
 
+var freqHasLocs1Hit = encodeFreqHasLocs(1, false)
+
 // nextBytes returns the docNum and the encoded freq & loc bytes for
 // the next posting
 func (i *PostingsIterator) nextBytes() (
@@ -528,14 +524,16 @@ func (i *PostingsIterator) nextBytes() (
 		if i.buf == nil {
 			i.buf = make([]byte, binary.MaxVarintLen64*2)
 		}
-		n := binary.PutUvarint(i.buf, uint64(1))
+		n := binary.PutUvarint(i.buf, freqHasLocs1Hit)
 		n += binary.PutUvarint(i.buf, i.normBits1Hit)
 		return docNum, uint64(1), i.normBits1Hit, i.buf[:n], nil, nil
 	}
 
 	startFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
 
-	freq, normBits, err = i.readFreqNorm()
+	var hasLocs bool
+
+	freq, normBits, hasLocs, err = i.readFreqNormHasLocs()
 	if err != nil {
 		return 0, 0, 0, nil, nil, err
 	}
@@ -543,7 +541,7 @@ func (i *PostingsIterator) nextBytes() (
 	endFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
 	bytesFreqNorm = i.currChunkFreqNorm[startFreqNorm:endFreqNorm]
 
-	if i.locBitmap != nil && i.locBitmap.Contains(uint32(docNum)) {
+	if hasLocs {
 		startLoc := len(i.currChunkLoc) - i.locReader.Len()
 
 		for j := uint64(0); j < freq; j++ {
@@ -596,11 +594,12 @@ func (i *PostingsIterator) nextDocNum() (uint64, bool, error) {
 			}
 
 			// read off freq/offsets even though we don't care about them
-			freq, _, err := i.readFreqNorm()
+			freq, _, hasLocs, err := i.readFreqNormHasLocs()
 			if err != nil {
 				return 0, false, err
 			}
-			if i.locBitmap.Contains(allN) {
+
+			if hasLocs {
 				for j := 0; j < int(freq); j++ {
 					err := i.readLocation(nil)
 					if err != nil {

From ba644f38935bfc35a5578af6ce08a50555e896ea Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 23 Mar 2018 12:39:20 -0700
Subject: [PATCH 324/728] scorch zap fix postingsIter.nextBytes() when 1-bit
 encoded

The previous commit's optimization that replaced the locsBitmap was
incorrectly handling the case when there was a 1-bit encoding
optimization in the postingsIterator.nextBytes() method,
incorrectly generating the freq-norm bytes.

Also as part of this change, more unused locsBitmap's were removed.
---
 index/scorch/segment/zap/build.go   |  2 +-
 index/scorch/segment/zap/merge.go   | 14 ++++----------
 index/scorch/segment/zap/posting.go |  2 +-
 3 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 9e9d787bb..03e4cbae8 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -22,7 +22,7 @@ import (
 	"github.com/Smerity/govarint"
 )
 
-const version uint32 = 7
+const version uint32 = 8
 
 const fieldNotUninverted = math.MaxUint64
 
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 51dd74206..6faca646f 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -188,7 +188,6 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 	}
 
 	newRoaring := roaring.NewBitmap()
-	newRoaringLocs := roaring.NewBitmap()
 
 	// for each field
 	for fieldID, fieldName := range fieldsInv {
@@ -234,7 +233,6 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 		var prevTerm []byte
 
 		newRoaring.Clear()
-		newRoaringLocs.Clear()
 
 		var lastDocNum, lastFreq, lastNorm uint64
 
@@ -273,7 +271,6 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 			}
 
 			newRoaring.Clear()
-			newRoaringLocs.Clear()
 
 			tfEncoder.Reset()
 			locEncoder.Reset()
@@ -311,11 +308,11 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 			if fieldsSame {
 				// can optimize by copying freq/norm/loc bytes directly
 				lastDocNum, lastFreq, lastNorm, err = mergeTermFreqNormLocsByCopying(
-					term, postItr, newDocNums[itrI], newRoaring, newRoaringLocs,
+					term, postItr, newDocNums[itrI], newRoaring,
 					tfEncoder, locEncoder, docTermMap)
 			} else {
 				lastDocNum, lastFreq, lastNorm, bufLoc, err = mergeTermFreqNormLocs(
-					fieldsMap, term, postItr, newDocNums[itrI], newRoaring, newRoaringLocs,
+					fieldsMap, term, postItr, newDocNums[itrI], newRoaring,
 					tfEncoder, locEncoder, docTermMap, bufLoc)
 			}
 			if err != nil {
@@ -406,7 +403,7 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 }
 
 func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *PostingsIterator,
-	newDocNums []uint64, newRoaring *roaring.Bitmap, newRoaringLocs *roaring.Bitmap,
+	newDocNums []uint64, newRoaring *roaring.Bitmap,
 	tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder, docTermMap [][]byte,
 	bufLoc []uint64) (
 	lastDocNum uint64, lastFreq uint64, lastNorm uint64, bufLocOut []uint64, err error) {
@@ -431,8 +428,6 @@ func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *Po
 		}
 
 		if len(locs) > 0 {
-			newRoaringLocs.Add(uint32(hitNewDocNum))
-
 			for _, loc := range locs {
 				if cap(bufLoc) < 5+len(loc.ArrayPositions()) {
 					bufLoc = make([]uint64, 0, 5+len(loc.ArrayPositions()))
@@ -465,7 +460,7 @@ func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *Po
 }
 
 func mergeTermFreqNormLocsByCopying(term []byte, postItr *PostingsIterator,
-	newDocNums []uint64, newRoaring *roaring.Bitmap, newRoaringLocs *roaring.Bitmap,
+	newDocNums []uint64, newRoaring *roaring.Bitmap,
 	tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder, docTermMap [][]byte) (
 	lastDocNum uint64, lastFreq uint64, lastNorm uint64, err error) {
 	nextDocNum, nextFreq, nextNorm, nextFreqNormBytes, nextLocBytes, err :=
@@ -483,7 +478,6 @@ func mergeTermFreqNormLocsByCopying(term []byte, postItr *PostingsIterator,
 		}
 
 		if len(nextLocBytes) > 0 {
-			newRoaringLocs.Add(uint32(hitNewDocNum))
 			err = locEncoder.AddBytes(hitNewDocNum, nextLocBytes)
 			if err != nil {
 				return 0, 0, 0, err
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 004b80317..02e286575 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -525,7 +525,7 @@ func (i *PostingsIterator) nextBytes() (
 			i.buf = make([]byte, binary.MaxVarintLen64*2)
 		}
 		n := binary.PutUvarint(i.buf, freqHasLocs1Hit)
-		n += binary.PutUvarint(i.buf, i.normBits1Hit)
+		n += binary.PutUvarint(i.buf[n:], i.normBits1Hit)
 		return docNum, uint64(1), i.normBits1Hit, i.buf[:n], nil, nil
 	}
 

From 84424edcad4d55d7cd4ae69f40babc3ff8c479e4 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 16 Mar 2018 22:31:04 -0700
Subject: [PATCH 325/728] scorch zap sync.Pool for reusable VisitDocument()
 data structures

As part of this, snappy.Decode() is also provided a reused buffer for
decompression.
---
 index/scorch/segment/zap/segment.go | 34 +++++++++++++++++++++++++----
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 0d2ad072f..3a3fd02eb 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -273,19 +273,39 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) {
 	return rv, nil
 }
 
+// visitDocumentCtx holds data structures that are reusable across
+// multiple VisitDocument() calls to avoid memory allocations
+type visitDocumentCtx struct {
+	buf      []byte
+	reader   bytes.Reader
+	decoder  *govarint.Base128Decoder
+	arrayPos []uint64
+}
+
+var visitDocumentCtxPool = sync.Pool{
+	New: func() interface{} {
+		reuse := &visitDocumentCtx{}
+		reuse.decoder = govarint.NewU64Base128Decoder(&reuse.reader)
+		return reuse
+	},
+}
+
 // VisitDocument invokes the DocFieldValueVistor for each stored field
 // for the specified doc number
 func (s *SegmentBase) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
 	// first make sure this is a valid number in this segment
 	if num < s.numDocs {
+		vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx)
+
 		meta, compressed := s.getDocStoredMetaAndCompressed(num)
-		uncompressed, err := snappy.Decode(nil, compressed)
+		uncompressed, err := snappy.Decode(vdc.buf[:cap(vdc.buf)], compressed)
 		if err != nil {
 			return err
 		}
+
 		// now decode meta and process
-		reader := bytes.NewReader(meta)
-		decoder := govarint.NewU64Base128Decoder(reader)
+		vdc.reader.Reset(meta)
+		decoder := vdc.decoder
 
 		keepGoing := true
 		for keepGoing {
@@ -314,7 +334,10 @@ func (s *SegmentBase) VisitDocument(num uint64, visitor segment.DocumentFieldVal
 			}
 			var arrayPos []uint64
 			if numap > 0 {
-				arrayPos = make([]uint64, numap)
+				if cap(vdc.arrayPos) < int(numap) {
+					vdc.arrayPos = make([]uint64, numap)
+				}
+				arrayPos = vdc.arrayPos[:numap]
 				for i := 0; i < int(numap); i++ {
 					ap, err := decoder.GetU64()
 					if err != nil {
@@ -327,6 +350,9 @@ func (s *SegmentBase) VisitDocument(num uint64, visitor segment.DocumentFieldVal
 			value := uncompressed[offset : offset+l]
 			keepGoing = visitor(s.fieldsInv[field], byte(typ), value, arrayPos)
 		}
+
+		vdc.buf = uncompressed
+		visitDocumentCtxPool.Put(vdc)
 	}
 	return nil
 }

From 6540b197d495053d66b48141bb1525e9a86f61a5 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 17 Mar 2018 10:40:34 -0700
Subject: [PATCH 326/728] scorch zap provide full buffer capacity to snappy
 Encode/Decode()

The snappy Encode/Decode() API's accept an optional destination buffer
param where their encoded/decoded output results will be placed, but
they only check that the buffer has enough len() rather than enough
capacity before deciding to allocate a new buffer.
---
 index/scorch/segment/zap/contentcoder.go | 8 +++++---
 index/scorch/segment/zap/docvalues.go    | 4 +++-
 index/scorch/segment/zap/merge.go        | 3 +--
 index/scorch/segment/zap/new.go          | 3 +--
 4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/index/scorch/segment/zap/contentcoder.go b/index/scorch/segment/zap/contentcoder.go
index 1e7a785ca..2148d1d45 100644
--- a/index/scorch/segment/zap/contentcoder.go
+++ b/index/scorch/segment/zap/contentcoder.go
@@ -42,6 +42,8 @@ type chunkedContentCoder struct {
 	chunkBuf     bytes.Buffer
 
 	chunkMeta []MetaData
+
+	compressed []byte // temp buf for snappy compression
 }
 
 // MetaData represents the data information inside a
@@ -105,10 +107,10 @@ func (c *chunkedContentCoder) flushContents() error {
 	metaData := c.chunkMetaBuf.Bytes()
 	c.final = append(c.final, c.chunkMetaBuf.Bytes()...)
 	// write the compressed data to the final data
-	compressedData := snappy.Encode(nil, c.chunkBuf.Bytes())
-	c.final = append(c.final, compressedData...)
+	c.compressed = snappy.Encode(c.compressed[:cap(c.compressed)], c.chunkBuf.Bytes())
+	c.final = append(c.final, c.compressed...)
 
-	c.chunkLens[c.currChunk] = uint64(len(compressedData) + len(metaData))
+	c.chunkLens[c.currChunk] = uint64(len(c.compressed) + len(metaData))
 	return nil
 }
 
diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index 844271902..dcd2cb052 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -42,6 +42,7 @@ type docValueIterator struct {
 	dvDataLoc      uint64
 	curChunkHeader []MetaData
 	curChunkData   []byte // compressed data cache
+	uncompressed   []byte // temp buf for snappy decompression
 }
 
 func (di *docValueIterator) size() int {
@@ -135,10 +136,11 @@ func (di *docValueIterator) visitDocValues(docNum uint64,
 		return nil
 	}
 	// uncompress the already loaded data
-	uncompressed, err := snappy.Decode(nil, di.curChunkData)
+	uncompressed, err := snappy.Decode(di.uncompressed[:cap(di.uncompressed)], di.curChunkData)
 	if err != nil {
 		return err
 	}
+	di.uncompressed = uncompressed
 
 	// pick the terms for the given docNum
 	uncompressed = uncompressed[start:end]
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 6faca646f..167ebfa24 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -604,7 +604,6 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 			curr = 0
 			metaBuf.Reset()
 			data = data[:0]
-			compressed = compressed[:0]
 
 			// collect all the data
 			for i := 0; i < len(fieldsInv); i++ {
@@ -641,7 +640,7 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 			metaEncoder.Close()
 			metaBytes := metaBuf.Bytes()
 
-			compressed = snappy.Encode(compressed, data)
+			compressed = snappy.Encode(compressed[:cap(compressed)], data)
 
 			// record where we're about to start writing
 			docNumOffsets[newDocNum] = uint64(w.Count())
diff --git a/index/scorch/segment/zap/new.go b/index/scorch/segment/zap/new.go
index 5837436fe..83a0cbced 100644
--- a/index/scorch/segment/zap/new.go
+++ b/index/scorch/segment/zap/new.go
@@ -517,7 +517,6 @@ func (s *interim) writeStoredFields() (
 
 		s.metaBuf.Reset()
 		data = data[:0]
-		compressed = compressed[:0]
 
 		for fieldID := range s.FieldsInv {
 			isf, exists := docStoredFields[uint16(fieldID)]
@@ -534,7 +533,7 @@ func (s *interim) writeStoredFields() (
 		metaEncoder.Close()
 		metaBytes := s.metaBuf.Bytes()
 
-		compressed = snappy.Encode(compressed, data)
+		compressed = snappy.Encode(compressed[:cap(compressed)], data)
 
 		docStoredOffsets[docNum] = uint64(s.w.Count())
 

From db792717a6d83b0354dfbf9aa5a7ad0e9331db5b Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 17 Mar 2018 10:59:57 -0700
Subject: [PATCH 327/728] scorch zap postingsIter reuses
 nextLocs/nextSegmentLocs

The previous code would inefficiently throw away the nextLocs and
would also throw away the []segment.Location slice if there were no
locations, such as if it was a 1-hit postings list.

This change tries to reuse the nextLocs/nextSegmentLocs for all cases.
---
 index/scorch/segment/zap/posting.go | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 02e286575..3fecaa23e 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -154,6 +154,9 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
 		freqChunkOffsets := rv.freqChunkOffsets[:0]
 		locChunkOffsets := rv.locChunkOffsets[:0]
 
+		nextLocs := rv.nextLocs[:0]
+		nextSegmentLocs := rv.nextSegmentLocs[:0]
+
 		buf := rv.buf
 
 		*rv = PostingsIterator{} // clear the struct
@@ -167,6 +170,9 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
 		rv.freqChunkOffsets = freqChunkOffsets
 		rv.locChunkOffsets = locChunkOffsets
 
+		rv.nextLocs = nextLocs
+		rv.nextSegmentLocs = nextSegmentLocs
+
 		rv.buf = buf
 	}
 	rv.postings = p
@@ -314,8 +320,9 @@ type PostingsIterator struct {
 	locChunkOffsets []uint64
 	locChunkStart   uint64
 
-	next     Posting    // reused across Next() calls
-	nextLocs []Location // reused across Next() calls
+	next            Posting            // reused across Next() calls
+	nextLocs        []Location         // reused across Next() calls
+	nextSegmentLocs []segment.Location // reused across Next() calls
 
 	docNum1Hit   uint64
 	normBits1Hit uint64
@@ -469,8 +476,7 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 		return nil, err
 	}
 
-	reuseLocs := i.next.locs // hold for reuse before struct clearing
-	i.next = Posting{}       // clear the struct
+	i.next = Posting{} // clear the struct
 	rv := &i.next
 	rv.docNum = docNum
 
@@ -491,11 +497,10 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 		} else {
 			i.nextLocs = make([]Location, rv.freq)
 		}
-		if cap(reuseLocs) >= int(rv.freq) {
-			rv.locs = reuseLocs[0:rv.freq]
-		} else {
-			rv.locs = make([]segment.Location, rv.freq)
+		if cap(i.nextSegmentLocs) < int(rv.freq) {
+			i.nextSegmentLocs = make([]segment.Location, rv.freq)
 		}
+		rv.locs = i.nextSegmentLocs[0:rv.freq]
 		for j := 0; j < int(rv.freq); j++ {
 			err := i.readLocation(&i.nextLocs[j])
 			if err != nil {

From 3f4b161850618c9c026b06fcc694753e8ccff83d Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 21 Mar 2018 10:19:02 -0700
Subject: [PATCH 328/728] scorch zap postingsIter reuses array positions slice

---
 index/scorch/segment/zap/posting.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 3fecaa23e..e994617ad 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -448,10 +448,10 @@ func (i *PostingsIterator) readLocation(l *Location) error {
 		l.pos = pos
 		l.start = start
 		l.end = end
-		if numArrayPos > 0 {
+		if cap(l.ap) < int(numArrayPos) {
 			l.ap = make([]uint64, int(numArrayPos))
 		} else {
-			l.ap = l.ap[:0]
+			l.ap = l.ap[:int(numArrayPos)]
 		}
 	}
 

From fc7584f5a012ff5919156ecc8a0a414985218883 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 23 Mar 2018 16:56:37 -0700
Subject: [PATCH 329/728] scorch zap prealloc extra locs for future growth

---
 index/scorch/segment/zap/posting.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index e994617ad..cf1e1ab0e 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -495,10 +495,10 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 		if cap(i.nextLocs) >= int(rv.freq) {
 			i.nextLocs = i.nextLocs[0:rv.freq]
 		} else {
-			i.nextLocs = make([]Location, rv.freq)
+			i.nextLocs = make([]Location, rv.freq, rv.freq * 2)
 		}
 		if cap(i.nextSegmentLocs) < int(rv.freq) {
-			i.nextSegmentLocs = make([]segment.Location, rv.freq)
+			i.nextSegmentLocs = make([]segment.Location, rv.freq, rv.freq * 2)
 		}
 		rv.locs = i.nextSegmentLocs[0:rv.freq]
 		for j := 0; j < int(rv.freq); j++ {

From 192621f402857bd9bd1aa091be3ee38572256bee Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 24 Mar 2018 10:03:57 -0700
Subject: [PATCH 330/728] scorch includeFreq/Norm/Locs params for
 postingsList.Iterator API

This commit adds boolean flag params to the scorch
PostingsList.Iterator() method, so that the caller can specify whether
freq/norm/locs information is needed or not.

Future changes can leverage these params for optimizations.
---
 index/scorch/segment/empty.go            |  2 +-
 index/scorch/segment/mem/posting.go      |  2 +-
 index/scorch/segment/mem/segment_test.go | 14 +++++++-------
 index/scorch/segment/segment.go          |  2 +-
 index/scorch/segment/zap/merge.go        |  2 +-
 index/scorch/segment/zap/merge_test.go   |  4 ++--
 index/scorch/segment/zap/posting.go      | 11 ++++++-----
 index/scorch/segment/zap/segment_test.go | 16 ++++++++--------
 index/scorch/snapshot_index.go           |  2 +-
 index/scorch/snapshot_segment.go         |  2 +-
 10 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/index/scorch/segment/empty.go b/index/scorch/segment/empty.go
index 6c19f60f9..6fa85f657 100644
--- a/index/scorch/segment/empty.go
+++ b/index/scorch/segment/empty.go
@@ -84,7 +84,7 @@ func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) {
 
 type EmptyPostingsList struct{}
 
-func (e *EmptyPostingsList) Iterator() PostingsIterator {
+func (e *EmptyPostingsList) Iterator(includeFreq, includeNorm, includeLocations bool) PostingsIterator {
 	return &EmptyPostingsIterator{}
 }
 
diff --git a/index/scorch/segment/mem/posting.go b/index/scorch/segment/mem/posting.go
index 4203acbe5..362fdb7c5 100644
--- a/index/scorch/segment/mem/posting.go
+++ b/index/scorch/segment/mem/posting.go
@@ -78,7 +78,7 @@ func (p *PostingsList) Count() uint64 {
 }
 
 // Iterator returns an iterator for this postings list
-func (p *PostingsList) Iterator() segment.PostingsIterator {
+func (p *PostingsList) Iterator(includeFreq, includeNorm, includeLocations bool) segment.PostingsIterator {
 	return p.InitIterator(nil)
 }
 func (p *PostingsList) InitIterator(prealloc *PostingsIterator) *PostingsIterator {
diff --git a/index/scorch/segment/mem/segment_test.go b/index/scorch/segment/mem/segment_test.go
index 6c5625d86..c4c01f144 100644
--- a/index/scorch/segment/mem/segment_test.go
+++ b/index/scorch/segment/mem/segment_test.go
@@ -48,7 +48,7 @@ func TestEmpty(t *testing.T) {
 		t.Fatal("got nil postings list, expected non-nil")
 	}
 
-	postingsItr := postingsList.Iterator()
+	postingsItr := postingsList.Iterator(true, true, true)
 	if postingsItr == nil {
 		t.Fatal("got nil iterator, expected non-nil")
 	}
@@ -211,7 +211,7 @@ func TestSingle(t *testing.T) {
 		t.Fatal("got nil postings list, expected non-nil")
 	}
 
-	postingsItr := postingsList.Iterator()
+	postingsItr := postingsList.Iterator(true, true, true)
 	if postingsItr == nil {
 		t.Fatal("got nil iterator, expected non-nil")
 	}
@@ -257,7 +257,7 @@ func TestSingle(t *testing.T) {
 		t.Fatal("got nil postings list, expected non-nil")
 	}
 
-	postingsItr = postingsList.Iterator()
+	postingsItr = postingsList.Iterator(true, true, true)
 	if postingsItr == nil {
 		t.Fatal("got nil iterator, expected non-nil")
 	}
@@ -325,7 +325,7 @@ func TestSingle(t *testing.T) {
 		t.Fatal("got nil postings list, expected non-nil")
 	}
 
-	postingsItr = postingsList.Iterator()
+	postingsItr = postingsList.Iterator(true, true, true)
 	if postingsItr == nil {
 		t.Fatal("got nil iterator, expected non-nil")
 	}
@@ -394,7 +394,7 @@ func TestSingle(t *testing.T) {
 		t.Fatal("got nil postings list, expected non-nil")
 	}
 
-	postingsItr = postingsList.Iterator()
+	postingsItr = postingsList.Iterator(true, true, true)
 	if postingsItr == nil {
 		t.Fatal("got nil iterator, expected non-nil")
 	}
@@ -638,7 +638,7 @@ func TestMultiple(t *testing.T) {
 		t.Fatal("got nil postings list, expected non-nil")
 	}
 
-	postingsItr := postingsList.Iterator()
+	postingsItr := postingsList.Iterator(true, true, true)
 	if postingsItr == nil {
 		t.Fatal("got nil iterator, expected non-nil")
 	}
@@ -677,7 +677,7 @@ func TestMultiple(t *testing.T) {
 		t.Errorf("expected count from postings list to be 1, got %d", postingsListExcludingCount)
 	}
 
-	postingsItrExcluding := postingsListExcluding.Iterator()
+	postingsItrExcluding := postingsListExcluding.Iterator(true, true, true)
 	if postingsItr == nil {
 		t.Fatal("got nil iterator, expected non-nil")
 	}
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index 8eee5f75f..adab7a01e 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -55,7 +55,7 @@ type DictionaryIterator interface {
 }
 
 type PostingsList interface {
-	Iterator() PostingsIterator
+	Iterator(includeFreq, includeNorm, includeLocations bool) PostingsIterator
 
 	Size() int
 
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 167ebfa24..0d40d5f28 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -303,7 +303,7 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 				return nil, 0, err2
 			}
 
-			postItr = postings.iterator(postItr)
+			postItr = postings.iterator(true, true, true, postItr)
 
 			if fieldsSame {
 				// can optimize by copying freq/norm/loc bytes directly
diff --git a/index/scorch/segment/zap/merge_test.go b/index/scorch/segment/zap/merge_test.go
index d931f6c23..2675bf838 100644
--- a/index/scorch/segment/zap/merge_test.go
+++ b/index/scorch/segment/zap/merge_test.go
@@ -332,8 +332,8 @@ func compareSegments(a, b *Segment) string {
 						fieldName, next.Term, aplist.Count(), bplist.Count()))
 				}
 
-				apitr := aplist.Iterator()
-				bpitr := bplist.Iterator()
+				apitr := aplist.Iterator(true, true, true)
+				bpitr := bplist.Iterator(true, true, true)
 				if (apitr != nil) != (bpitr != nil) {
 					rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsList.Iterator() results different: %v %v",
 						fieldName, next.Term, apitr, bpitr))
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index cf1e1ab0e..4092b685e 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -131,11 +131,12 @@ func (p *PostingsList) OrInto(receiver *roaring.Bitmap) {
 }
 
 // Iterator returns an iterator for this postings list
-func (p *PostingsList) Iterator() segment.PostingsIterator {
-	return p.iterator(nil)
+func (p *PostingsList) Iterator(includeFreq, includeNorm, includeLocations bool) segment.PostingsIterator {
+	return p.iterator(includeFreq, includeNorm, includeLocations, nil)
 }
 
-func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
+func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocations bool,
+	rv *PostingsIterator) *PostingsIterator {
 	if rv == nil {
 		rv = &PostingsIterator{}
 	} else {
@@ -495,10 +496,10 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 		if cap(i.nextLocs) >= int(rv.freq) {
 			i.nextLocs = i.nextLocs[0:rv.freq]
 		} else {
-			i.nextLocs = make([]Location, rv.freq, rv.freq * 2)
+			i.nextLocs = make([]Location, rv.freq, rv.freq*2)
 		}
 		if cap(i.nextSegmentLocs) < int(rv.freq) {
-			i.nextSegmentLocs = make([]segment.Location, rv.freq, rv.freq * 2)
+			i.nextSegmentLocs = make([]segment.Location, rv.freq, rv.freq*2)
 		}
 		rv.locs = i.nextSegmentLocs[0:rv.freq]
 		for j := 0; j < int(rv.freq); j++ {
diff --git a/index/scorch/segment/zap/segment_test.go b/index/scorch/segment/zap/segment_test.go
index 50d5dbd7f..9f0a4015d 100644
--- a/index/scorch/segment/zap/segment_test.go
+++ b/index/scorch/segment/zap/segment_test.go
@@ -84,7 +84,7 @@ func TestOpen(t *testing.T) {
 		t.Fatal("got nil postings list, expected non-nil")
 	}
 
-	postingsItr := postingsList.Iterator()
+	postingsItr := postingsList.Iterator(true, true, true)
 	if postingsItr == nil {
 		t.Fatal("got nil iterator, expected non-nil")
 	}
@@ -130,7 +130,7 @@ func TestOpen(t *testing.T) {
 		t.Fatal("got nil postings list, expected non-nil")
 	}
 
-	postingsItr = postingsList.Iterator()
+	postingsItr = postingsList.Iterator(true, true, true)
 	if postingsItr == nil {
 		t.Fatal("got nil iterator, expected non-nil")
 	}
@@ -198,7 +198,7 @@ func TestOpen(t *testing.T) {
 		t.Fatal("got nil postings list, expected non-nil")
 	}
 
-	postingsItr = postingsList.Iterator()
+	postingsItr = postingsList.Iterator(true, true, true)
 	if postingsItr == nil {
 		t.Fatal("got nil iterator, expected non-nil")
 	}
@@ -267,7 +267,7 @@ func TestOpen(t *testing.T) {
 		t.Fatal("got nil postings list, expected non-nil")
 	}
 
-	postingsItr = postingsList.Iterator()
+	postingsItr = postingsList.Iterator(true, true, true)
 	if postingsItr == nil {
 		t.Fatal("got nil iterator, expected non-nil")
 	}
@@ -366,7 +366,7 @@ func TestOpenMulti(t *testing.T) {
 		t.Fatal("got nil postings list, expected non-nil")
 	}
 
-	postingsItr := postingsList.Iterator()
+	postingsItr := postingsList.Iterator(true, true, true)
 	if postingsItr == nil {
 		t.Fatal("got nil iterator, expected non-nil")
 	}
@@ -405,7 +405,7 @@ func TestOpenMulti(t *testing.T) {
 		t.Errorf("expected count from postings list to be 1, got %d", postingsListExcludingCount)
 	}
 
-	postingsItrExcluding := postingsListExcluding.Iterator()
+	postingsItrExcluding := postingsListExcluding.Iterator(true, true, true)
 	if postingsItr == nil {
 		t.Fatal("got nil iterator, expected non-nil")
 	}
@@ -466,7 +466,7 @@ func TestOpenMultiWithTwoChunks(t *testing.T) {
 		t.Fatal("got nil postings list, expected non-nil")
 	}
 
-	postingsItr := postingsList.Iterator()
+	postingsItr := postingsList.Iterator(true, true, true)
 	if postingsItr == nil {
 		t.Fatal("got nil iterator, expected non-nil")
 	}
@@ -500,7 +500,7 @@ func TestOpenMultiWithTwoChunks(t *testing.T) {
 		t.Fatal("got nil postings list, expected non-nil")
 	}
 
-	postingsItrExcluding := postingsListExcluding.Iterator()
+	postingsItrExcluding := postingsListExcluding.Iterator(true, true, true)
 	if postingsItr == nil {
 		t.Fatal("got nil iterator, expected non-nil")
 	}
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 6f4b0288e..95343af7f 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -394,7 +394,7 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
 			return nil, err
 		}
 		rv.postings[i] = pl
-		rv.iterators[i] = pl.Iterator()
+		rv.iterators[i] = pl.Iterator(includeFreq, includeNorm, includeTermVectors)
 	}
 	atomic.AddUint64(&i.parent.stats.TotTermSearchersStarted, uint64(1))
 	return rv, nil
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index edf52a6e7..805e56642 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -165,7 +165,7 @@ func (cfd *cachedFieldDocs) prepareFields(field string, ss *SegmentSnapshot) {
 		}
 
 		cfd.size += uint64(size.SizeOfUint64) /* map key */
-		postingsItr := postings.Iterator()
+		postingsItr := postings.Iterator(false, false, false)
 		nextPosting, err2 := postingsItr.Next()
 		for err2 == nil && nextPosting != nil {
 			docNum := nextPosting.Number()

From 1cab701f85fba914edf192ded54d76a088b415f1 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 24 Mar 2018 10:32:01 -0700
Subject: [PATCH 331/728] scorch zap postingsIter skips freq/norm/locs parsing
 if allowed

In this optimization, the zap PostingsIterator skips the parsing of
freq/norm/locs chunks based on the includeFreq|Norm|Locs flags.

In bleve-query microbenchmark on dev macbookpro, with 50K en-wiki
docs, on a medium frequency term search that does not ask for term
vectors, throughput was ~750 q/sec before the change and
went to ~1400 q/sec after the change.
---
 index/scorch/segment/zap/posting.go | 134 ++++++++++++++++------------
 1 file changed, 79 insertions(+), 55 deletions(-)

diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 4092b685e..b3a1891ec 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -131,11 +131,11 @@ func (p *PostingsList) OrInto(receiver *roaring.Bitmap) {
 }
 
 // Iterator returns an iterator for this postings list
-func (p *PostingsList) Iterator(includeFreq, includeNorm, includeLocations bool) segment.PostingsIterator {
-	return p.iterator(includeFreq, includeNorm, includeLocations, nil)
+func (p *PostingsList) Iterator(includeFreq, includeNorm, includeLocs bool) segment.PostingsIterator {
+	return p.iterator(includeFreq, includeNorm, includeLocs, nil)
 }
 
-func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocations bool,
+func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
 	rv *PostingsIterator) *PostingsIterator {
 	if rv == nil {
 		rv = &PostingsIterator{}
@@ -195,38 +195,45 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocations bool,
 		return rv
 	}
 
-	// prepare the freq chunk details
 	var n uint64
 	var read int
-	var numFreqChunks uint64
-	numFreqChunks, read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
-	n += uint64(read)
-	if cap(rv.freqChunkOffsets) >= int(numFreqChunks) {
-		rv.freqChunkOffsets = rv.freqChunkOffsets[:int(numFreqChunks)]
-	} else {
-		rv.freqChunkOffsets = make([]uint64, int(numFreqChunks))
-	}
-	for i := 0; i < int(numFreqChunks); i++ {
-		rv.freqChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
+
+	// prepare the freq chunk details
+	rv.includeFreqNorm = includeFreq || includeNorm
+	if rv.includeFreqNorm {
+		var numFreqChunks uint64
+		numFreqChunks, read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
 		n += uint64(read)
+		if cap(rv.freqChunkOffsets) >= int(numFreqChunks) {
+			rv.freqChunkOffsets = rv.freqChunkOffsets[:int(numFreqChunks)]
+		} else {
+			rv.freqChunkOffsets = make([]uint64, int(numFreqChunks))
+		}
+		for i := 0; i < int(numFreqChunks); i++ {
+			rv.freqChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
+			n += uint64(read)
+		}
+		rv.freqChunkStart = p.freqOffset + n
 	}
-	rv.freqChunkStart = p.freqOffset + n
 
 	// prepare the loc chunk details
-	n = 0
-	var numLocChunks uint64
-	numLocChunks, read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
-	n += uint64(read)
-	if cap(rv.locChunkOffsets) >= int(numLocChunks) {
-		rv.locChunkOffsets = rv.locChunkOffsets[:int(numLocChunks)]
-	} else {
-		rv.locChunkOffsets = make([]uint64, int(numLocChunks))
-	}
-	for i := 0; i < int(numLocChunks); i++ {
-		rv.locChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
+	rv.includeLocs = includeLocs
+	if rv.includeLocs {
+		n = 0
+		var numLocChunks uint64
+		numLocChunks, read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
 		n += uint64(read)
+		if cap(rv.locChunkOffsets) >= int(numLocChunks) {
+			rv.locChunkOffsets = rv.locChunkOffsets[:int(numLocChunks)]
+		} else {
+			rv.locChunkOffsets = make([]uint64, int(numLocChunks))
+		}
+		for i := 0; i < int(numLocChunks); i++ {
+			rv.locChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
+			n += uint64(read)
+		}
+		rv.locChunkStart = p.locOffset + n
 	}
-	rv.locChunkStart = p.locOffset + n
 
 	rv.all = p.postings.Iterator()
 	if p.except != nil {
@@ -329,6 +336,9 @@ type PostingsIterator struct {
 	normBits1Hit uint64
 
 	buf []byte
+
+	includeFreqNorm bool
+	includeLocs     bool
 }
 
 func (i *PostingsIterator) Size() int {
@@ -347,32 +357,42 @@ func (i *PostingsIterator) Size() int {
 }
 
 func (i *PostingsIterator) loadChunk(chunk int) error {
-	if chunk >= len(i.freqChunkOffsets) || chunk >= len(i.locChunkOffsets) {
-		return fmt.Errorf("tried to load chunk that doesn't exist %d/(%d %d)", chunk, len(i.freqChunkOffsets), len(i.locChunkOffsets))
-	}
-
-	end, start := i.freqChunkStart, i.freqChunkStart
-	s, e := readChunkBoundary(chunk, i.freqChunkOffsets)
-	start += s
-	end += e
-	i.currChunkFreqNorm = i.postings.sb.mem[start:end]
-	if i.freqNormReader == nil {
-		i.freqNormReader = bytes.NewReader(i.currChunkFreqNorm)
-		i.freqNormDecoder = govarint.NewU64Base128Decoder(i.freqNormReader)
-	} else {
-		i.freqNormReader.Reset(i.currChunkFreqNorm)
+	if i.includeFreqNorm {
+		if chunk >= len(i.freqChunkOffsets) {
+			return fmt.Errorf("tried to load freq chunk that doesn't exist %d/(%d)",
+				chunk, len(i.freqChunkOffsets))
+		}
+
+		end, start := i.freqChunkStart, i.freqChunkStart
+		s, e := readChunkBoundary(chunk, i.freqChunkOffsets)
+		start += s
+		end += e
+		i.currChunkFreqNorm = i.postings.sb.mem[start:end]
+		if i.freqNormReader == nil {
+			i.freqNormReader = bytes.NewReader(i.currChunkFreqNorm)
+			i.freqNormDecoder = govarint.NewU64Base128Decoder(i.freqNormReader)
+		} else {
+			i.freqNormReader.Reset(i.currChunkFreqNorm)
+		}
 	}
 
-	end, start = i.locChunkStart, i.locChunkStart
-	s, e = readChunkBoundary(chunk, i.locChunkOffsets)
-	start += s
-	end += e
-	i.currChunkLoc = i.postings.sb.mem[start:end]
-	if i.locReader == nil {
-		i.locReader = bytes.NewReader(i.currChunkLoc)
-		i.locDecoder = govarint.NewU64Base128Decoder(i.locReader)
-	} else {
-		i.locReader.Reset(i.currChunkLoc)
+	if i.includeLocs {
+		if chunk >= len(i.locChunkOffsets) {
+			return fmt.Errorf("tried to load loc chunk that doesn't exist %d/(%d)",
+				chunk, len(i.locChunkOffsets))
+		}
+
+		end, start := i.locChunkStart, i.locChunkStart
+		s, e := readChunkBoundary(chunk, i.locChunkOffsets)
+		start += s
+		end += e
+		i.currChunkLoc = i.postings.sb.mem[start:end]
+		if i.locReader == nil {
+			i.locReader = bytes.NewReader(i.currChunkLoc)
+			i.locDecoder = govarint.NewU64Base128Decoder(i.locReader)
+		} else {
+			i.locReader.Reset(i.currChunkLoc)
+		}
 	}
 
 	i.currChunk = uint32(chunk)
@@ -481,6 +501,10 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 	rv := &i.next
 	rv.docNum = docNum
 
+	if !i.includeFreqNorm {
+		return rv, nil
+	}
+
 	var normBits uint64
 	var hasLocs bool
 
@@ -491,7 +515,7 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 
 	rv.norm = math.Float32frombits(uint32(normBits))
 
-	if hasLocs {
+	if i.includeLocs && hasLocs {
 		// read off 'freq' locations, into reused slices
 		if cap(i.nextLocs) >= int(rv.freq) {
 			i.nextLocs = i.nextLocs[0:rv.freq]
@@ -591,7 +615,7 @@ func (i *PostingsIterator) nextDocNum() (uint64, bool, error) {
 	// if they don't match, move 'all' forwards until they do
 	for allN != n {
 		// in the same chunk, so move the freq/norm/loc decoders forward
-		if allNChunk == nChunk {
+		if i.includeFreqNorm && allNChunk == nChunk {
 			if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
 				err := i.loadChunk(int(nChunk))
 				if err != nil {
@@ -605,7 +629,7 @@ func (i *PostingsIterator) nextDocNum() (uint64, bool, error) {
 				return 0, false, err
 			}
 
-			if hasLocs {
+			if i.includeLocs && hasLocs {
 				for j := 0; j < int(freq); j++ {
 					err := i.readLocation(nil)
 					if err != nil {
@@ -619,7 +643,7 @@ func (i *PostingsIterator) nextDocNum() (uint64, bool, error) {
 		allNChunk = allN / i.postings.sb.chunkFactor
 	}
 
-	if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
+	if i.includeFreqNorm && (i.currChunk != nChunk || i.currChunkFreqNorm == nil) {
 		err := i.loadChunk(int(nChunk))
 		if err != nil {
 			return 0, false, fmt.Errorf("error loading chunk: %v", err)

From 72ac35296132e4ab3da85f56237bebbf3cf2bcbe Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Fri, 23 Mar 2018 16:07:14 +0530
Subject: [PATCH 332/728] TermFieldReader Advance optimization

skips to the target segment and avoid
un necesary read of freq,loc,norm details
---
 index/scorch/segment/empty.go       |  4 ++++
 index/scorch/segment/mem/posting.go | 21 +++++++++++++++++
 index/scorch/segment/segment.go     |  4 ++++
 index/scorch/segment/zap/posting.go | 31 +++++++++++++++++++++++++
 index/scorch/snapshot_index_tfr.go  | 35 +++++++++++++++++------------
 5 files changed, 81 insertions(+), 14 deletions(-)

diff --git a/index/scorch/segment/empty.go b/index/scorch/segment/empty.go
index 6c19f60f9..21f8db3fc 100644
--- a/index/scorch/segment/empty.go
+++ b/index/scorch/segment/empty.go
@@ -105,3 +105,7 @@ func (e *EmptyPostingsIterator) Next() (Posting, error) {
 func (e *EmptyPostingsIterator) Size() int {
 	return 0
 }
+
+func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) {
+	return nil, nil
+}
diff --git a/index/scorch/segment/mem/posting.go b/index/scorch/segment/mem/posting.go
index 4203acbe5..2b65a91c8 100644
--- a/index/scorch/segment/mem/posting.go
+++ b/index/scorch/segment/mem/posting.go
@@ -155,6 +155,27 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 	return &i.reuse, nil
 }
 
+func (i *PostingsIterator) Advance(docNumber uint64) (segment.Posting, error) {
+	if i.reuse.Number() == docNumber {
+		return &i.reuse, nil
+	}
+	next, err := i.Next()
+	if err != nil {
+		return next, err
+	}
+
+	nnum := next.Number()
+	for nnum < docNumber {
+		next, err = i.Next()
+		if err != nil || next == nil {
+			return next, err
+		}
+		nnum = next.Number()
+	}
+
+	return next, nil
+}
+
 // Posting is a single entry in a postings list
 type Posting struct {
 	iterator  *PostingsIterator
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index 8eee5f75f..5e917b798 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -75,6 +75,10 @@ type PostingsIterator interface {
 	Next() (Posting, error)
 
 	Size() int
+
+	// Advance will return the respective posting of the
+	// sepcified doc number or its immediate follower.
+	Advance(docNum uint64) (Posting, error)
 }
 
 type Posting interface {
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 004b80317..b756c936e 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -558,6 +558,37 @@ func (i *PostingsIterator) nextBytes() (
 	return docNum, freq, normBits, bytesFreqNorm, bytesLoc, nil
 }
 
+func (i *PostingsIterator) Advance(docNumber uint64) (segment.Posting, error) {
+	// check if we are already there
+	if i.next.Number() == docNumber {
+		return &i.next, nil
+	}
+
+	nChunk := uint32(docNumber) / i.postings.sb.chunkFactor
+	if i.currChunk != nChunk {
+		err := i.loadChunk(int(nChunk))
+		if err != nil {
+			return nil, fmt.Errorf("Advance, error loading chunk: %v", err)
+		}
+	}
+
+	next, err := i.Next()
+	if err != nil || next == nil {
+		return nil, err
+	}
+
+	nnum := next.Number()
+	for nnum < docNumber {
+		next, err = i.Next()
+		if err != nil || next == nil {
+			return next, err
+		}
+		nnum = next.Number()
+	}
+
+	return next, nil
+}
+
 // nextDocNum returns the next docNum on the postings list, and also
 // sets up the currChunk / loc related fields of the iterator.
 func (i *PostingsIterator) nextDocNum() (uint64, bool, error) {
diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go
index e1a0e9a59..4c3d08edd 100644
--- a/index/scorch/snapshot_index_tfr.go
+++ b/index/scorch/snapshot_index_tfr.go
@@ -115,7 +115,8 @@ func (i *IndexSnapshotTermFieldReader) postingToTermFieldDoc(next segment.Postin
 	}
 }
 
-func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
+func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID,
+	preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
 	// FIXME do something better
 	// for now, if we need to seek backwards, then restart from the beginning
 	if i.currPosting != nil && bytes.Compare(i.currID, ID) >= 0 {
@@ -126,24 +127,30 @@ func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAllo
 		}
 		*i = *(i2.(*IndexSnapshotTermFieldReader))
 	}
-	// FIXME do something better
-	next, err := i.Next(preAlloced)
+
+	num, err := docInternalToNumber(ID)
 	if err != nil {
-		return nil, err
+		return nil, nil
 	}
-	if next == nil {
+	segIndex, ldocNum := i.snapshot.segmentIndexAndLocalDocNumFromGlobal(num)
+	if segIndex > len(i.snapshot.segment) {
 		return nil, nil
 	}
-	for bytes.Compare(next.ID, ID) < 0 {
-		next, err = i.Next(preAlloced)
-		if err != nil {
-			return nil, err
-		}
-		if next == nil {
-			break
-		}
+	// skip directly to the target segment
+	next, err := i.iterators[segIndex].Advance(ldocNum)
+	if err != nil || next == nil {
+		return nil, err
+	}
+
+	if preAlloced == nil {
+		preAlloced = &index.TermFieldDoc{}
 	}
-	return next, nil
+	preAlloced.ID = docNumberToBytes(preAlloced.ID, next.Number()+
+		i.snapshot.offsets[segIndex])
+	i.postingToTermFieldDoc(next, preAlloced)
+	i.currID = preAlloced.ID
+	i.currPosting = next
+	return preAlloced, nil
 }
 
 func (i *IndexSnapshotTermFieldReader) Count() uint64 {

From db6a2c274f692fc77093ea93ea7146a3cba1b7e2 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Tue, 27 Mar 2018 22:10:09 +0530
Subject: [PATCH 333/728] adding nil check

---
 index/scorch/segment/mem/posting.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/index/scorch/segment/mem/posting.go b/index/scorch/segment/mem/posting.go
index 2b65a91c8..27be5dbc6 100644
--- a/index/scorch/segment/mem/posting.go
+++ b/index/scorch/segment/mem/posting.go
@@ -160,7 +160,7 @@ func (i *PostingsIterator) Advance(docNumber uint64) (segment.Posting, error) {
 		return &i.reuse, nil
 	}
 	next, err := i.Next()
-	if err != nil {
+	if err != nil || next == nil {
 		return next, err
 	}
 

From 596d990eb95198f7d0fc19353b052622587f8895 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 27 Mar 2018 13:57:46 -0700
Subject: [PATCH 334/728] scorch zap optimize when zero hits

Instead of allocating brand-new empty postingsList/Iterator instances,
reuse some empty singletons.
---
 index/scorch/segment/zap/dict.go    |  8 +++++++-
 index/scorch/segment/zap/posting.go | 13 +++++++++++++
 index/scorch/segment/zap/segment.go |  2 +-
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 38b4faca1..049ebb366 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -39,6 +39,9 @@ func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap) (segment.
 
 func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
 	if d.fst == nil {
+		if rv == nil || rv == emptyPostingsList {
+			return emptyPostingsList, nil
+		}
 		return d.postingsListInit(rv, except), nil
 	}
 
@@ -47,6 +50,9 @@ func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *Posti
 		return nil, fmt.Errorf("vellum err: %v", err)
 	}
 	if !exists {
+		if rv == nil || rv == emptyPostingsList {
+			return emptyPostingsList, nil
+		}
 		return d.postingsListInit(rv, except), nil
 	}
 
@@ -65,7 +71,7 @@ func (d *Dictionary) postingsListFromOffset(postingsOffset uint64, except *roari
 }
 
 func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap) *PostingsList {
-	if rv == nil {
+	if rv == nil || rv == emptyPostingsList {
 		rv = &PostingsList{}
 	} else {
 		postings := rv.postings
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 6b9e1a533..c0c39571a 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -109,6 +109,9 @@ type PostingsList struct {
 	normBits1Hit uint64
 }
 
+// represents an immutable, empty postings list
+var emptyPostingsList = &PostingsList{}
+
 func (p *PostingsList) Size() int {
 	sizeInBytes := reflectStaticSizePostingsList + size.SizeOfPtr
 
@@ -132,6 +135,10 @@ func (p *PostingsList) OrInto(receiver *roaring.Bitmap) {
 
 // Iterator returns an iterator for this postings list
 func (p *PostingsList) Iterator(includeFreq, includeNorm, includeLocs bool) segment.PostingsIterator {
+	if p.normBits1Hit == 0 && p.postings == nil {
+		return emptyPostingsIterator
+	}
+
 	return p.iterator(includeFreq, includeNorm, includeLocs, nil)
 }
 
@@ -341,6 +348,8 @@ type PostingsIterator struct {
 	includeLocs     bool
 }
 
+var emptyPostingsIterator = &PostingsIterator{}
+
 func (i *PostingsIterator) Size() int {
 	sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr +
 		len(i.currChunkFreqNorm) +
@@ -589,6 +598,10 @@ func (i *PostingsIterator) nextBytes() (
 }
 
 func (i *PostingsIterator) Advance(docNumber uint64) (segment.Posting, error) {
+	if i.postings == nil {
+		return nil, nil
+	}
+
 	// check if we are already there
 	if i.next.Number() == docNumber {
 		return &i.next, nil
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 3a3fd02eb..58f8bee02 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -373,7 +373,7 @@ func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) {
 			return nil, err
 		}
 
-		var postingsList *PostingsList
+		postingsList := emptyPostingsList
 		for _, id := range ids {
 			postingsList, err = idDict.postingsList([]byte(id), nil, postingsList)
 			if err != nil {

From 013d06d756ca362eb06586e7ac88ca508a2ddb9c Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 27 Mar 2018 14:05:17 -0700
Subject: [PATCH 335/728] scorch TermFieldReader() reuses string(term)

---
 index/scorch/snapshot_index.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 95343af7f..251aa9fad 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -373,7 +373,7 @@ func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err err
 
 func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
 	includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
-
+	termStr := string(term)
 	rv := &IndexSnapshotTermFieldReader{
 		term:               term,
 		field:              field,
@@ -389,7 +389,7 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
 		if err != nil {
 			return nil, err
 		}
-		pl, err := dict.PostingsList(string(term), nil)
+		pl, err := dict.PostingsList(termStr, nil)
 		if err != nil {
 			return nil, err
 		}

From b955bdcd72657b268eb6e6a8d530d919557a3ad0 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 28 Mar 2018 10:01:01 -0700
Subject: [PATCH 336/728] scorch optimize docInternalToNumber() to avoid
 allocations

docInternalToNumber() no longer allocates a reader instance and a
heap uint64 to hold the result.
---
 index/scorch/snapshot_index.go | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 251aa9fad..800d250f6 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -413,12 +413,10 @@ func docNumberToBytes(buf []byte, in uint64) []byte {
 }
 
 func docInternalToNumber(in index.IndexInternalID) (uint64, error) {
-	var res uint64
-	err := binary.Read(bytes.NewReader(in), binary.BigEndian, &res)
-	if err != nil {
-		return 0, err
+	if len(in) != 8 {
+		return 0, fmt.Errorf("wrong len for IndexInternalID: %q", in)
 	}
-	return res, nil
+	return binary.BigEndian.Uint64(in), nil
 }
 
 func (i *IndexSnapshot) DocumentVisitFieldTerms(id index.IndexInternalID,

From 72d65ab1e18d2959aa9690fcdb2d02450d1367cf Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 28 Mar 2018 15:53:25 -0700
Subject: [PATCH 337/728] scorch zap optimize stored field format for _id field

NOTE: This is a zap version and file format change.

This optimization treats the _id field as a special case, where the
_id field is no longer snappy compressed along with all the other
non-"_id" stored fields.  Instead, the _id field's length and bytes
are written out early and as-is, making the _id value easier to
access.

This can be beneficial to search-related methods, such as
ExternalID(), which only require _id field information.

As part of this, we're also no longer writing out unnecessary data for
the _id field that we persist for other non-"_id" fields, such as: the
numeric fieldID (i.e., uint16(0)), the type (i.e., assumed to be 't'
as a text field), the start offset, the end len, and the
arrayPositions.

Various unit tests were corrected as part of this change in order to
more uniformly treat the _id field as having IndexingOptions as a
stored field and not having DocValues.
---
 index/scorch/segment/zap/build.go        |  2 +-
 index/scorch/segment/zap/build_test.go   |  9 +++++---
 index/scorch/segment/zap/merge.go        | 26 ++++++++++++++++++------
 index/scorch/segment/zap/new.go          | 17 ++++++++++++++--
 index/scorch/segment/zap/segment.go      | 25 ++++++++++++++++++-----
 index/scorch/segment/zap/segment_test.go |  6 ++----
 6 files changed, 64 insertions(+), 21 deletions(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 03e4cbae8..e8f3499b4 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -22,7 +22,7 @@ import (
 	"github.com/Smerity/govarint"
 )
 
-const version uint32 = 8
+const version uint32 = 9
 
 const fieldNotUninverted = math.MaxUint64
 
diff --git a/index/scorch/segment/zap/build_test.go b/index/scorch/segment/zap/build_test.go
index e8189f760..5b6ffd559 100644
--- a/index/scorch/segment/zap/build_test.go
+++ b/index/scorch/segment/zap/build_test.go
@@ -311,7 +311,8 @@ func buildTestAnalysisResultsMultiWithDifferentFields(includeDocA, includeDocB b
 		doc := &document.Document{
 			ID: "a",
 			Fields: []document.Field{
-				document.NewTextField("_id", []uint64{}, []byte("a")),
+				document.NewTextFieldCustom("_id", nil, []byte("a"),
+					document.IndexField|document.StoreField, nil),
 				document.NewTextField("name", []uint64{}, []byte("ABC")),
 				document.NewTextField("dept", []uint64{}, []byte("ABC dept")),
 				document.NewTextField("manages.id", []uint64{}, []byte("XYZ")),
@@ -388,7 +389,8 @@ func buildTestAnalysisResultsMultiWithDifferentFields(includeDocA, includeDocB b
 		doc := &document.Document{
 			ID: "b",
 			Fields: []document.Field{
-				document.NewTextField("_id", []uint64{}, []byte("b")),
+				document.NewTextFieldCustom("_id", nil, []byte("b"),
+					document.IndexField|document.StoreField, nil),
 				document.NewTextField("name", []uint64{}, []byte("XYZ")),
 				document.NewTextField("dept", []uint64{}, []byte("ABC dept")),
 				document.NewTextField("reportsTo.id", []uint64{}, []byte("ABC")),
@@ -468,7 +470,8 @@ func buildTestSegmentWithDefaultFieldMapping(chunkFactor uint32) (
 	doc := &document.Document{
 		ID: "a",
 		Fields: []document.Field{
-			document.NewTextField("_id", nil, []byte("a")),
+			document.NewTextFieldCustom("_id", nil, []byte("a"),
+				document.IndexField|document.StoreField, nil),
 			document.NewTextField("name", nil, []byte("wow")),
 			document.NewTextField("desc", nil, []byte("some thing")),
 			document.NewTextField("tag", []uint64{0}, []byte("cold")),
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 0d40d5f28..3eafa0c77 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -622,12 +622,19 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 				return 0, nil, err
 			}
 
-			// now walk the fields in order
-			for fieldID := range fieldsInv {
-				storedFieldValues := vals[int(fieldID)]
+			// _id field special case optimizes ExternalID() lookups
+			idFieldVal := vals[uint16(0)][0]
+			_, err = metaEncoder.PutU64(uint64(len(idFieldVal)))
+			if err != nil {
+				return 0, nil, err
+			}
+
+			// now walk the non-"_id" fields in order
+			for fieldID := 1; fieldID < len(fieldsInv); fieldID++ {
+				storedFieldValues := vals[fieldID]
 
-				stf := typs[int(fieldID)]
-				spf := poss[int(fieldID)]
+				stf := typs[fieldID]
+				spf := poss[fieldID]
 
 				var err2 error
 				curr, data, err2 = persistStoredFieldValues(fieldID,
@@ -646,7 +653,9 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 			docNumOffsets[newDocNum] = uint64(w.Count())
 
 			// write out the meta len and compressed data len
-			_, err = writeUvarints(w, uint64(len(metaBytes)), uint64(len(compressed)))
+			_, err = writeUvarints(w,
+				uint64(len(metaBytes)),
+				uint64(len(idFieldVal)+len(compressed)))
 			if err != nil {
 				return 0, nil, err
 			}
@@ -655,6 +664,11 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 			if err != nil {
 				return 0, nil, err
 			}
+			// now write the _id field val (counted as part of the 'compressed' data)
+			_, err = w.Write(idFieldVal)
+			if err != nil {
+				return 0, nil, err
+			}
 			// now write the compressed data
 			_, err = w.Write(compressed)
 			if err != nil {
diff --git a/index/scorch/segment/zap/new.go b/index/scorch/segment/zap/new.go
index 83a0cbced..63b154622 100644
--- a/index/scorch/segment/zap/new.go
+++ b/index/scorch/segment/zap/new.go
@@ -518,7 +518,15 @@ func (s *interim) writeStoredFields() (
 		s.metaBuf.Reset()
 		data = data[:0]
 
-		for fieldID := range s.FieldsInv {
+		// _id field special case optimizes ExternalID() lookups
+		idFieldVal := docStoredFields[uint16(0)].vals[0]
+		_, err = metaEncoder.PutU64(uint64(len(idFieldVal)))
+		if err != nil {
+			return 0, err
+		}
+
+		// handle non-"_id" fields
+		for fieldID := 1; fieldID < len(s.FieldsInv); fieldID++ {
 			isf, exists := docStoredFields[uint16(fieldID)]
 			if exists {
 				curr, data, err = persistStoredFieldValues(
@@ -539,7 +547,7 @@ func (s *interim) writeStoredFields() (
 
 		_, err := writeUvarints(s.w,
 			uint64(len(metaBytes)),
-			uint64(len(compressed)))
+			uint64(len(idFieldVal)+len(compressed)))
 		if err != nil {
 			return 0, err
 		}
@@ -549,6 +557,11 @@ func (s *interim) writeStoredFields() (
 			return 0, err
 		}
 
+		_, err = s.w.Write(idFieldVal)
+		if err != nil {
+			return 0, err
+		}
+
 		_, err = s.w.Write(compressed)
 		if err != nil {
 			return 0, err
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 58f8bee02..541647abb 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -298,16 +298,31 @@ func (s *SegmentBase) VisitDocument(num uint64, visitor segment.DocumentFieldVal
 		vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx)
 
 		meta, compressed := s.getDocStoredMetaAndCompressed(num)
-		uncompressed, err := snappy.Decode(vdc.buf[:cap(vdc.buf)], compressed)
+
+		vdc.reader.Reset(meta)
+		decoder := vdc.decoder
+
+		// handle _id field special case
+		idFieldValLen, err := decoder.GetU64()
 		if err != nil {
 			return err
 		}
+		idFieldVal := compressed[:idFieldValLen]
 
-		// now decode meta and process
-		vdc.reader.Reset(meta)
-		decoder := vdc.decoder
+		keepGoing := visitor("_id", byte('t'), idFieldVal, nil)
+		if !keepGoing {
+			visitDocumentCtxPool.Put(vdc)
+			return nil
+		}
+
+		// handle non-"_id" fields
+		compressed = compressed[idFieldValLen:]
+
+		uncompressed, err := snappy.Decode(vdc.buf[:cap(vdc.buf)], compressed)
+		if err != nil {
+			return err
+		}
 
-		keepGoing := true
 		for keepGoing {
 			field, err := decoder.GetU64()
 			if err == io.EOF {
diff --git a/index/scorch/segment/zap/segment_test.go b/index/scorch/segment/zap/segment_test.go
index eb7ed65e4..00ae1c2f6 100644
--- a/index/scorch/segment/zap/segment_test.go
+++ b/index/scorch/segment/zap/segment_test.go
@@ -18,7 +18,6 @@ import (
 	"math"
 	"os"
 	"reflect"
-	"sort"
 	"testing"
 
 	"github.com/RoaringBitmap/roaring"
@@ -552,7 +551,7 @@ func TestSegmentVisitableDocValueFieldsList(t *testing.T) {
 	}
 	_ = os.RemoveAll("/tmp/scorch.zap")
 
-	testSeg, expectedFields, _ := buildTestSegmentWithDefaultFieldMapping(1)
+	testSeg, _, _ = buildTestSegmentWithDefaultFieldMapping(1)
 	err = PersistSegmentBase(testSeg, "/tmp/scorch.zap")
 	if err != nil {
 		t.Fatalf("error persisting segment: %v", err)
@@ -576,7 +575,7 @@ func TestSegmentVisitableDocValueFieldsList(t *testing.T) {
 			t.Fatalf("segment VisitableDocValueFields err: %v", err)
 		}
 
-		sort.Strings(expectedFields[1:]) // keep _id as first field
+		expectedFields := []string{"desc", "name", "tag"}
 		if !reflect.DeepEqual(fields, expectedFields) {
 			t.Errorf("expected field terms: %#v, got: %#v", expectedFields, fields)
 		}
@@ -593,7 +592,6 @@ func TestSegmentVisitableDocValueFieldsList(t *testing.T) {
 			"name": []string{"wow"},
 			"desc": []string{"some", "thing"},
 			"tag":  []string{"cold"},
-			"_id":  []string{"a"},
 		}
 		if !reflect.DeepEqual(fieldTerms, expectedFieldTerms) {
 			t.Errorf("expected field terms: %#v, got: %#v", expectedFieldTerms, fieldTerms)

From c892c4175100ca79d387fe4eb123b13ac3618384 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 28 Mar 2018 17:36:48 -0700
Subject: [PATCH 338/728] scorch optimize conjunctions via push-down to
 roaring.And()

This change pushes down conjunction information to indexers for
potential TermFieldReader optimizations.  The scorch indexer can
optimize this situation by constructing a more selective roaring
"actual" bitmap for any PostingsIterator's that are part of the
conjunction.

On a bleve-query microbenchmark, dev macbook, with a scorch index of
50K en-wiki docs, using a repeated, single-threaded conjunction query
of several relatively high-frequency terms (query-string of "+text:see
+text:also +text:where +text:http"), perf was ~265 queries/sec before
this change, and ~310 queries/sec after this change.
---
 index/index.go                        | 16 +++++
 index/scorch/optimize.go              | 93 +++++++++++++++++++++++++++
 index/scorch/segment/zap/posting.go   | 14 ++--
 search/searcher/search_conjunction.go | 24 +++++++
 search/searcher/search_disjunction.go | 15 +++++
 search/searcher/search_term.go        | 10 +++
 6 files changed, 166 insertions(+), 6 deletions(-)
 create mode 100644 index/scorch/optimize.go

diff --git a/index/index.go b/index/index.go
index e5a69297b..e3731e3f8 100644
--- a/index/index.go
+++ b/index/index.go
@@ -272,3 +272,19 @@ func (b *Batch) Reset() {
 	b.IndexOps = make(map[string]*document.Document)
 	b.InternalOps = make(map[string][]byte)
 }
+
+// Optimizable represents an optional interface that implementable by
+// optimizable resources (e.g., TermFieldReaders, Searchers).  These
+// optimizable resources are provided the same OptimizableContext
+// instance, so that they can coordinate via dynamic interface
+// casting.
+type Optimizable interface {
+	Optimize(kind string, octx OptimizableContext) (OptimizableContext, error)
+}
+
+type OptimizableContext interface {
+	// Once all the optimzable resources have been provided the same
+	// OptimizableContext instance, the optimization preparations are
+	// finished or completed via the Finish() method.
+	Finish() error
+}
diff --git a/index/scorch/optimize.go b/index/scorch/optimize.go
new file mode 100644
index 000000000..b45fc8b0d
--- /dev/null
+++ b/index/scorch/optimize.go
@@ -0,0 +1,93 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package scorch
+
+import (
+	"fmt"
+
+	"github.com/RoaringBitmap/roaring"
+
+	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/scorch/segment/zap"
+)
+
+func (s *IndexSnapshotTermFieldReader) Optimize(kind string, octx index.OptimizableContext) (
+	index.OptimizableContext, error) {
+	if kind != "conjunction" {
+		return octx, nil
+	}
+
+	if octx == nil {
+		octx = &OptimizeTFRConjunction{snapshot: s.snapshot}
+	}
+
+	o, ok := octx.(*OptimizeTFRConjunction)
+	if !ok {
+		return octx, nil
+	}
+
+	if o.snapshot != s.snapshot {
+		return nil, fmt.Errorf("tried to optimize across different snapshots")
+	}
+
+	o.tfrs = append(o.tfrs, s)
+
+	return o, nil
+}
+
+type OptimizeTFRConjunction struct {
+	snapshot *IndexSnapshot
+
+	tfrs []*IndexSnapshotTermFieldReader
+}
+
+func (o *OptimizeTFRConjunction) Finish() error {
+	if len(o.tfrs) <= 1 {
+		return nil
+	}
+
+	for i := range o.snapshot.segment {
+		itr0, ok := o.tfrs[0].iterators[i].(*zap.PostingsIterator)
+		if !ok || itr0.ActualBM == nil {
+			continue
+		}
+
+		itr1, ok := o.tfrs[1].iterators[i].(*zap.PostingsIterator)
+		if !ok || itr1.ActualBM == nil {
+			continue
+		}
+
+		bm := roaring.And(itr0.ActualBM, itr1.ActualBM)
+
+		for _, tfr := range o.tfrs[2:] {
+			itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
+			if !ok || itr.ActualBM == nil {
+				continue
+			}
+
+			bm.And(itr.ActualBM)
+		}
+
+		for _, tfr := range o.tfrs {
+			itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
+			if ok && itr.ActualBM != nil {
+				itr.ActualBM = bm
+				itr.Actual = bm.Iterator()
+			}
+		}
+	}
+
+	return nil
+}
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index c0c39571a..d2e51e039 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -244,10 +244,11 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
 
 	rv.all = p.postings.Iterator()
 	if p.except != nil {
-		allExcept := roaring.AndNot(p.postings, p.except)
-		rv.actual = allExcept.Iterator()
+		rv.ActualBM = roaring.AndNot(p.postings, p.except)
+		rv.Actual = rv.ActualBM.Iterator()
 	} else {
-		rv.actual = p.postings.Iterator()
+		rv.ActualBM = p.postings
+		rv.Actual = p.postings.Iterator()
 	}
 
 	return rv
@@ -319,7 +320,8 @@ func (rv *PostingsList) init1Hit(fstVal uint64) error {
 type PostingsIterator struct {
 	postings *PostingsList
 	all      roaring.IntIterable
-	actual   roaring.IntIterable
+	Actual   roaring.IntIterable
+	ActualBM *roaring.Bitmap
 
 	currChunk         uint32
 	currChunkFreqNorm []byte
@@ -644,11 +646,11 @@ func (i *PostingsIterator) nextDocNum() (uint64, bool, error) {
 		return docNum, true, nil
 	}
 
-	if i.actual == nil || !i.actual.HasNext() {
+	if i.Actual == nil || !i.Actual.HasNext() {
 		return 0, false, nil
 	}
 
-	n := i.actual.Next()
+	n := i.Actual.Next()
 	allN := i.all.Next()
 
 	nChunk := n / i.postings.sb.chunkFactor
diff --git a/search/searcher/search_conjunction.go b/search/searcher/search_conjunction.go
index da65f3981..a48052679 100644
--- a/search/searcher/search_conjunction.go
+++ b/search/searcher/search_conjunction.go
@@ -60,6 +60,30 @@ func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.S
 		scorer:      scorer.NewConjunctionQueryScorer(options),
 	}
 	rv.computeQueryNorm()
+
+	// attempt push-down conjunction optimization when there's >1 searchers
+	if len(searchers) > 1 {
+		var octx index.OptimizableContext
+
+		for _, searcher := range searchers {
+			o, ok := searcher.(index.Optimizable)
+			if ok {
+				var err error
+				octx, err = o.Optimize("conjunction", octx)
+				if err != nil {
+					return nil, err
+				}
+			}
+		}
+
+		if octx != nil {
+			err := octx.Finish()
+			if err != nil {
+				return nil, err
+			}
+		}
+	}
+
 	return &rv, nil
 }
 
diff --git a/search/searcher/search_disjunction.go b/search/searcher/search_disjunction.go
index 32d614801..b75041371 100644
--- a/search/searcher/search_disjunction.go
+++ b/search/searcher/search_disjunction.go
@@ -306,3 +306,18 @@ func (s *DisjunctionSearcher) DocumentMatchPoolSize() int {
 	}
 	return rv
 }
+
+// a disjunction searcher implements the index.Optimizable interface
+// but only activates on an edge case where the disjunction is a
+// wrapper around a single Optimizable child searcher
+func (s *DisjunctionSearcher) Optimize(kind string, octx index.OptimizableContext) (
+	index.OptimizableContext, error) {
+	if len(s.searchers) == 1 {
+		o, ok := s.searchers[0].(index.Optimizable)
+		if ok {
+			return o.Optimize(kind, octx)
+		}
+	}
+
+	return octx, nil
+}
diff --git a/search/searcher/search_term.go b/search/searcher/search_term.go
index b99e4c263..4fee58bbf 100644
--- a/search/searcher/search_term.go
+++ b/search/searcher/search_term.go
@@ -137,3 +137,13 @@ func (s *TermSearcher) Min() int {
 func (s *TermSearcher) DocumentMatchPoolSize() int {
 	return 1
 }
+
+func (s *TermSearcher) Optimize(kind string, octx index.OptimizableContext) (
+	index.OptimizableContext, error) {
+	o, ok := s.reader.(index.Optimizable)
+	if ok {
+		return o.Optimize(kind, octx)
+	}
+
+	return octx, nil
+}

From 71a9f6c1b36409fa34e2e4782932110a40ee4ab8 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 29 Mar 2018 16:12:08 -0700
Subject: [PATCH 339/728] scorch optimize DictionaryIterator.Next() entry reuse

Optimize DictionaryIterator to reuse a index.DictEntry rather than
allocating a new instance on every Next().  This follows an approach
used by upsidedown's dictionary iterators to avoid mem allocations.
---
 index/scorch/segment/zap/dict.go    | 17 ++++++++---------
 index/scorch/snapshot_index.go      |  2 +-
 index/scorch/snapshot_index_dict.go | 15 ++++++++-------
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 049ebb366..33240df2c 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -151,10 +151,11 @@ func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator
 
 // DictionaryIterator is an iterator for term dictionary
 type DictionaryIterator struct {
-	d   *Dictionary
-	itr vellum.Iterator
-	err error
-	tmp PostingsList
+	d     *Dictionary
+	itr   vellum.Iterator
+	err   error
+	tmp   PostingsList
+	entry index.DictEntry
 }
 
 // Next returns the next entry in the dictionary
@@ -169,10 +170,8 @@ func (i *DictionaryIterator) Next() (*index.DictEntry, error) {
 	if i.err != nil {
 		return nil, i.err
 	}
-	rv := &index.DictEntry{
-		Term:  string(term),
-		Count: i.tmp.Count(),
-	}
+	i.entry.Term = string(term)
+	i.entry.Count = i.tmp.Count()
 	i.err = i.itr.Next()
-	return rv, nil
+	return &i.entry, nil
 }
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 800d250f6..4e22f68c5 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -140,7 +140,7 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
 			if next != nil {
 				rv.cursors = append(rv.cursors, &segmentDictCursor{
 					itr:  asr.dictItr,
-					curr: next,
+					curr: *next,
 				})
 			}
 		}
diff --git a/index/scorch/snapshot_index_dict.go b/index/scorch/snapshot_index_dict.go
index 3c902cad6..2d229ca0f 100644
--- a/index/scorch/snapshot_index_dict.go
+++ b/index/scorch/snapshot_index_dict.go
@@ -23,12 +23,13 @@ import (
 
 type segmentDictCursor struct {
 	itr  segment.DictionaryIterator
-	curr *index.DictEntry
+	curr index.DictEntry
 }
 
 type IndexSnapshotFieldDict struct {
 	snapshot *IndexSnapshot
 	cursors  []*segmentDictCursor
+	entry    index.DictEntry
 }
 
 func (i *IndexSnapshotFieldDict) Len() int { return len(i.cursors) }
@@ -54,7 +55,7 @@ func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) {
 	if len(i.cursors) <= 0 {
 		return nil, nil
 	}
-	rv := i.cursors[0].curr
+	i.entry = i.cursors[0].curr
 	next, err := i.cursors[0].itr.Next()
 	if err != nil {
 		return nil, err
@@ -64,12 +65,12 @@ func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) {
 		heap.Pop(i)
 	} else {
 		// modified heap, fix it
-		i.cursors[0].curr = next
+		i.cursors[0].curr = *next
 		heap.Fix(i, 0)
 	}
 	// look for any other entries with the exact same term
-	for len(i.cursors) > 0 && i.cursors[0].curr.Term == rv.Term {
-		rv.Count += i.cursors[0].curr.Count
+	for len(i.cursors) > 0 && i.cursors[0].curr.Term == i.entry.Term {
+		i.entry.Count += i.cursors[0].curr.Count
 		next, err := i.cursors[0].itr.Next()
 		if err != nil {
 			return nil, err
@@ -79,12 +80,12 @@ func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) {
 			heap.Pop(i)
 		} else {
 			// modified heap, fix it
-			i.cursors[0].curr = next
+			i.cursors[0].curr = *next
 			heap.Fix(i, 0)
 		}
 	}
 
-	return rv, nil
+	return &i.entry, nil
 }
 
 func (i *IndexSnapshotFieldDict) Close() error {

From dd6ffbcb8ab38177809a2ce489d0a2ef65c7cfed Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Thu, 29 Mar 2018 18:36:48 -0700
Subject: [PATCH 340/728] Address data race with eligibleForRemoval

WARNING: DATA RACE
Read at 0x00c4201945c8 by goroutine 70:
  runtime.growslice()
      /usr/local/Cellar/go/1.9.3/libexec/src/runtime/slice.go:82 +0x0
  github.com/blevesearch/bleve/index/scorch.(*Scorch).AddEligibleForRemoval()
      /Users/abhinavdangeti/Documents/go/src/github.com/blevesearch/bleve/index/scorch/scorch.go:485 +0x1bd

Previous write at 0x00c4201945c8 by goroutine 47:
  github.com/blevesearch/bleve/index/scorch.(*Scorch).loadFromBolt.func1()
      /Users/abhinavdangeti/Documents/go/src/github.com/blevesearch/bleve/index/scorch/persister.go:476 +0x32b
  github.com/boltdb/bolt.(*DB).View()
      /Users/abhinavdangeti/Documents/go/src/github.com/boltdb/bolt/db.go:629 +0xc1
  github.com/blevesearch/bleve/index/scorch.(*Scorch).loadFromBolt()
      /Users/abhinavdangeti/Documents/go/src/github.com/blevesearch/bleve/index/scorch/persister.go:462 +0xa1
  github.com/blevesearch/bleve/index/scorch.(*Scorch).openBolt()
      /Users/abhinavdangeti/Documents/go/src/github.com/blevesearch/bleve/index/scorch/scorch.go:172 +0x9f7
  github.com/blevesearch/bleve/index/scorch.(*Scorch).Open()
      /Users/abhinavdangeti/Documents/go/src/github.com/blevesearch/bleve/index/scorch/scorch.go:121 +0x3c
  github.com/blevesearch/bleve/index/scorch.TestIndexInsertThenDelete()
      /Users/abhinavdangeti/Documents/go/src/github.com/blevesearch/bleve/index/scorch/scorch_test.go:341 +0x18d3
  testing.tRunner()
      /usr/local/Cellar/go/1.9.3/libexec/src/testing/testing.go:746 +0x16c
---
 index/scorch/persister.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 2fab53240..9a5c4ca46 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -473,19 +473,19 @@ func (s *Scorch) loadFromBolt() error {
 				continue
 			}
 			if foundRoot {
-				s.eligibleForRemoval = append(s.eligibleForRemoval, snapshotEpoch)
+				s.AddEligibleForRemoval(snapshotEpoch)
 				continue
 			}
 			snapshot := snapshots.Bucket(k)
 			if snapshot == nil {
 				log.Printf("snapshot key, but bucket missing %x, continuing", k)
-				s.eligibleForRemoval = append(s.eligibleForRemoval, snapshotEpoch)
+				s.AddEligibleForRemoval(snapshotEpoch)
 				continue
 			}
 			indexSnapshot, err := s.loadSnapshot(snapshot)
 			if err != nil {
 				log.Printf("unable to load snapshot, %v, continuing", err)
-				s.eligibleForRemoval = append(s.eligibleForRemoval, snapshotEpoch)
+				s.AddEligibleForRemoval(snapshotEpoch)
 				continue
 			}
 			indexSnapshot.epoch = snapshotEpoch

From 0b4dadf430338183b4ce0da0ffc92f6b2c1e4e25 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Thu, 29 Mar 2018 18:56:16 -0700
Subject: [PATCH 341/728] Update nextSnapshotEpoch only within root Lock

---
 index/scorch/introducer.go | 9 +++++----
 index/scorch/persister.go  | 2 +-
 index/scorch/scorch.go     | 2 +-
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 627d4e4cd..b00260bbe 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -212,19 +212,20 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
 	atomic.AddUint64(&s.stats.TotIntroducePersistBeg, 1)
 	defer atomic.AddUint64(&s.stats.TotIntroducePersistEnd, 1)
 
-	s.rootLock.RLock()
+	s.rootLock.Lock()
 	root := s.root
-	s.rootLock.RUnlock()
+	nextSnapshotEpoch := s.nextSnapshotEpoch
+	s.nextSnapshotEpoch++
+	s.rootLock.Unlock()
 
 	newIndexSnapshot := &IndexSnapshot{
 		parent:   s,
-		epoch:    s.nextSnapshotEpoch,
+		epoch:    nextSnapshotEpoch,
 		segment:  make([]*SegmentSnapshot, len(root.segment)),
 		offsets:  make([]uint64, len(root.offsets)),
 		internal: make(map[string][]byte, len(root.internal)),
 		refs:     1,
 	}
-	s.nextSnapshotEpoch++
 
 	for i, segmentSnapshot := range root.segment {
 		// see if this segment has been replaced
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 2fab53240..14fce1f30 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -495,8 +495,8 @@ func (s *Scorch) loadFromBolt() error {
 				return err
 			}
 			s.nextSegmentID++
-			s.nextSnapshotEpoch = snapshotEpoch + 1
 			s.rootLock.Lock()
+			s.nextSnapshotEpoch = snapshotEpoch + 1
 			if s.root != nil {
 				_ = s.root.DecRef()
 			}
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index cc47cda86..13e9a4027 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -57,8 +57,8 @@ type Scorch struct {
 	nextSnapshotEpoch    uint64
 	eligibleForRemoval   []uint64        // Index snapshot epochs that are safe to GC.
 	ineligibleForRemoval map[string]bool // Filenames that should not be GC'ed yet.
-	numSnapshotsToKeep   int
 
+	numSnapshotsToKeep int
 	closeCh            chan struct{}
 	introductions      chan *segmentIntroduction
 	persists           chan *persistIntroduction

From 7e3d8ae96d16204cda4c4adc310de4699c07b20d Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Thu, 29 Mar 2018 19:03:43 -0700
Subject: [PATCH 342/728] Address data race with size of cachedDocs

WARNING: DATA RACE
Read at 0x00c420901b50 by goroutine 240:
  github.com/blevesearch/bleve/index/scorch.(*SegmentSnapshot).Size()
      /Users/abhinavdangeti/Documents/go/src/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go:233 +0xbb
  github.com/blevesearch/bleve/index/scorch.(*IndexSnapshot).updateSize()
      /Users/abhinavdangeti/Documents/go/src/github.com/blevesearch/bleve/index/scorch/snapshot_index.go:108 +0x102
  github.com/blevesearch/bleve/index/scorch.(*Scorch).introduceMerge()
      /Users/abhinavdangeti/Documents/go/src/github.com/blevesearch/bleve/index/scorch/introducer.go:354 +0x131e
  github.com/blevesearch/bleve/index/scorch.(*Scorch).mainLoop()
      /Users/abhinavdangeti/Documents/go/src/github.com/blevesearch/bleve/index/scorch/introducer.go:66 +0x82d

Previous write at 0x00c420901b50 by goroutine 103:
  github.com/blevesearch/bleve/index/scorch.(*cachedDocs).updateSizeLOCKED()
      /Users/abhinavdangeti/Documents/go/src/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go:246 +0x203
  github.com/blevesearch/bleve/index/scorch.(*cachedDocs).prepareFields()
      /Users/abhinavdangeti/Documents/go/src/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go:226 +0x5fb
  github.com/blevesearch/bleve/index/scorch.(*IndexSnapshot).DocumentVisitFieldTerms.func1()
      /Users/abhinavdangeti/Documents/go/src/github.com/blevesearch/bleve/index/scorch/snapshot_index.go:456 +0x9f
---
 index/scorch/snapshot_segment.go | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index 805e56642..c1fc9b8f3 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -16,6 +16,7 @@ package scorch
 
 import (
 	"sync"
+	"sync/atomic"
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index/scorch/segment"
@@ -230,7 +231,7 @@ func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) e
 }
 
 func (c *cachedDocs) Size() int {
-	return int(c.size)
+	return int(atomic.LoadUint64(&c.size))
 }
 
 func (c *cachedDocs) updateSizeLOCKED() {
@@ -243,5 +244,5 @@ func (c *cachedDocs) updateSizeLOCKED() {
 			}
 		}
 	}
-	c.size = uint64(sizeInBytes)
+	atomic.StoreUint64(&c.size, uint64(sizeInBytes))
 }

From ecf91a296db857b5bfd9e3b4a6a23cd864ccb825 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Thu, 29 Mar 2018 12:32:56 +0530
Subject: [PATCH 343/728] removing dependency on govarint

---
 index/scorch/segment/zap/README.md   |  6 +++---
 index/scorch/segment/zap/build.go    | 16 +++++++-------
 index/scorch/segment/zap/intcoder.go | 12 +++++------
 index/scorch/segment/zap/merge.go    | 13 +++++++-----
 index/scorch/segment/zap/new.go      |  8 ++++---
 index/scorch/segment/zap/posting.go  | 31 ++++++++++------------------
 index/scorch/segment/zap/segment.go  | 16 ++++++--------
 7 files changed, 46 insertions(+), 56 deletions(-)

diff --git a/index/scorch/segment/zap/README.md b/index/scorch/segment/zap/README.md
index 179adceaf..41f5902a0 100644
--- a/index/scorch/segment/zap/README.md
+++ b/index/scorch/segment/zap/README.md
@@ -28,7 +28,7 @@ Current usage:
     - produce a slice of metadata bytes and data bytes
     - produce these slices in field id order
     - field value is appended to the data slice
-    - metadata slice is govarint encoded with the following values for each field value
+    - metadata slice is varint encoded with the following values for each field value
       - field id (uint16)
       - field type (byte)
       - field value start offset in uncompressed data slice (uint64)
@@ -53,7 +53,7 @@ With this index and a known document number, we have direct access to all the st
 ## posting details (freq/norm) section
 
 - for each posting list
-  - produce a slice containing multiple consecutive chunks (each chunk is govarint stream)
+  - produce a slice containing multiple consecutive chunks (each chunk is varint stream)
   - produce a slice remembering offsets of where each chunk starts
   - preparation phase:
     - for each hit in the posting list
@@ -71,7 +71,7 @@ If you know the doc number you're interested in, this format lets you jump to th
 ## posting details (location) section
 
 - for each posting list
-  - produce a slice containing multiple consecutive chunks (each chunk is govarint stream)
+  - produce a slice containing multiple consecutive chunks (each chunk is varint stream)
   - produce a slice remembering offsets of where each chunk starts
   - preparation phase:
     - for each hit in the posting list
diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 03e4cbae8..184b08d5b 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -18,8 +18,6 @@ import (
 	"bufio"
 	"math"
 	"os"
-
-	"github.com/Smerity/govarint"
 )
 
 const version uint32 = 8
@@ -78,37 +76,37 @@ func PersistSegmentBase(sb *SegmentBase, path string) error {
 
 func persistStoredFieldValues(fieldID int,
 	storedFieldValues [][]byte, stf []byte, spf [][]uint64,
-	curr int, metaEncoder *govarint.Base128Encoder, data []byte) (
+	curr int, metaEncode VarintEncoder, data []byte) (
 	int, []byte, error) {
 	for i := 0; i < len(storedFieldValues); i++ {
 		// encode field
-		_, err := metaEncoder.PutU64(uint64(fieldID))
+		_, err := metaEncode(uint64(fieldID))
 		if err != nil {
 			return 0, nil, err
 		}
 		// encode type
-		_, err = metaEncoder.PutU64(uint64(stf[i]))
+		_, err = metaEncode(uint64(stf[i]))
 		if err != nil {
 			return 0, nil, err
 		}
 		// encode start offset
-		_, err = metaEncoder.PutU64(uint64(curr))
+		_, err = metaEncode(uint64(curr))
 		if err != nil {
 			return 0, nil, err
 		}
 		// end len
-		_, err = metaEncoder.PutU64(uint64(len(storedFieldValues[i])))
+		_, err = metaEncode(uint64(len(storedFieldValues[i])))
 		if err != nil {
 			return 0, nil, err
 		}
 		// encode number of array pos
-		_, err = metaEncoder.PutU64(uint64(len(spf[i])))
+		_, err = metaEncode(uint64(len(spf[i])))
 		if err != nil {
 			return 0, nil, err
 		}
 		// encode all array positions
 		for _, pos := range spf[i] {
-			_, err = metaEncoder.PutU64(pos)
+			_, err = metaEncode(pos)
 			if err != nil {
 				return 0, nil, err
 			}
diff --git a/index/scorch/segment/zap/intcoder.go b/index/scorch/segment/zap/intcoder.go
index 81ef8bb2e..571d06edb 100644
--- a/index/scorch/segment/zap/intcoder.go
+++ b/index/scorch/segment/zap/intcoder.go
@@ -18,15 +18,12 @@ import (
 	"bytes"
 	"encoding/binary"
 	"io"
-
-	"github.com/Smerity/govarint"
 )
 
 type chunkedIntCoder struct {
 	final     []byte
 	chunkSize uint64
 	chunkBuf  bytes.Buffer
-	encoder   *govarint.Base128Encoder
 	chunkLens []uint64
 	currChunk uint64
 
@@ -43,7 +40,6 @@ func newChunkedIntCoder(chunkSize uint64, maxDocNum uint64) *chunkedIntCoder {
 		chunkLens: make([]uint64, total),
 		final:     make([]byte, 0, 64),
 	}
-	rv.encoder = govarint.NewU64Base128Encoder(&rv.chunkBuf)
 
 	return rv
 }
@@ -70,8 +66,13 @@ func (c *chunkedIntCoder) Add(docNum uint64, vals ...uint64) error {
 		c.currChunk = chunk
 	}
 
+	if len(c.buf) < binary.MaxVarintLen64 {
+		c.buf = make([]byte, binary.MaxVarintLen64)
+	}
+
 	for _, val := range vals {
-		_, err := c.encoder.PutU64(val)
+		wb := binary.PutUvarint(c.buf, val)
+		_, err := c.chunkBuf.Write(c.buf[:wb])
 		if err != nil {
 			return err
 		}
@@ -96,7 +97,6 @@ func (c *chunkedIntCoder) AddBytes(docNum uint64, buf []byte) error {
 // Close indicates you are done calling Add() this allows the final chunk
 // to be encoded.
 func (c *chunkedIntCoder) Close() {
-	c.encoder.Close()
 	encodingBytes := c.chunkBuf.Bytes()
 	c.chunkLens[c.currChunk] = uint64(len(encodingBytes))
 	c.final = append(c.final, encodingBytes...)
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 0d40d5f28..54c6b62cc 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -24,7 +24,6 @@ import (
 	"sort"
 
 	"github.com/RoaringBitmap/roaring"
-	"github.com/Smerity/govarint"
 	"github.com/couchbase/vellum"
 	"github.com/golang/snappy"
 )
@@ -548,6 +547,8 @@ func writePostings(postings *roaring.Bitmap, tfEncoder, locEncoder *chunkedIntCo
 	return postingsOffset, nil
 }
 
+type VarintEncoder func(uint64) (int, error)
+
 func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 	fieldsMap map[string]uint16, fieldsInv []string, fieldsSame bool, newSegDocCount uint64,
 	w *CountHashWriter) (uint64, [][]uint64, error) {
@@ -556,10 +557,13 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 	var newDocNum uint64
 
 	var curr int
-	var metaBuf bytes.Buffer
 	var data, compressed []byte
-
-	metaEncoder := govarint.NewU64Base128Encoder(&metaBuf)
+	var metaBuf bytes.Buffer
+	varBuf := make([]byte, binary.MaxVarintLen64)
+	metaEncoder := func(val uint64) (int, error) {
+		wb := binary.PutUvarint(varBuf, val)
+		return metaBuf.Write(varBuf[:wb])
+	}
 
 	vals := make([][][]byte, len(fieldsInv))
 	typs := make([][]byte, len(fieldsInv))
@@ -637,7 +641,6 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 				}
 			}
 
-			metaEncoder.Close()
 			metaBytes := metaBuf.Bytes()
 
 			compressed = snappy.Encode(compressed[:cap(compressed)], data)
diff --git a/index/scorch/segment/zap/new.go b/index/scorch/segment/zap/new.go
index 83a0cbced..7e64eb834 100644
--- a/index/scorch/segment/zap/new.go
+++ b/index/scorch/segment/zap/new.go
@@ -22,7 +22,6 @@ import (
 	"sync"
 
 	"github.com/RoaringBitmap/roaring"
-	"github.com/Smerity/govarint"
 	"github.com/blevesearch/bleve/analysis"
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index"
@@ -479,7 +478,11 @@ func (s *interim) processDocument(docNum uint64,
 
 func (s *interim) writeStoredFields() (
 	storedIndexOffset uint64, err error) {
-	metaEncoder := govarint.NewU64Base128Encoder(&s.metaBuf)
+	varBuf := make([]byte, binary.MaxVarintLen64)
+	metaEncoder := func(val uint64) (int, error) {
+		wb := binary.PutUvarint(varBuf, val)
+		return s.metaBuf.Write(varBuf[:wb])
+	}
 
 	data, compressed := s.tmp0[:0], s.tmp1[:0]
 	defer func() { s.tmp0, s.tmp1 = data, compressed }()
@@ -530,7 +533,6 @@ func (s *interim) writeStoredFields() (
 			}
 		}
 
-		metaEncoder.Close()
 		metaBytes := s.metaBuf.Bytes()
 
 		compressed = snappy.Encode(compressed[:cap(compressed)], data)
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index c0c39571a..f96683a14 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -22,7 +22,6 @@ import (
 	"reflect"
 
 	"github.com/RoaringBitmap/roaring"
-	"github.com/Smerity/govarint"
 	"github.com/blevesearch/bleve/index/scorch/segment"
 	"github.com/blevesearch/bleve/size"
 )
@@ -151,13 +150,11 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
 		if freqNormReader != nil {
 			freqNormReader.Reset([]byte(nil))
 		}
-		freqNormDecoder := rv.freqNormDecoder
 
 		locReader := rv.locReader
 		if locReader != nil {
 			locReader.Reset([]byte(nil))
 		}
-		locDecoder := rv.locDecoder
 
 		freqChunkOffsets := rv.freqChunkOffsets[:0]
 		locChunkOffsets := rv.locChunkOffsets[:0]
@@ -170,10 +167,7 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
 		*rv = PostingsIterator{} // clear the struct
 
 		rv.freqNormReader = freqNormReader
-		rv.freqNormDecoder = freqNormDecoder
-
 		rv.locReader = locReader
-		rv.locDecoder = locDecoder
 
 		rv.freqChunkOffsets = freqChunkOffsets
 		rv.locChunkOffsets = locChunkOffsets
@@ -324,10 +318,9 @@ type PostingsIterator struct {
 	currChunk         uint32
 	currChunkFreqNorm []byte
 	currChunkLoc      []byte
-	freqNormDecoder   *govarint.Base128Decoder
-	freqNormReader    *bytes.Reader
-	locDecoder        *govarint.Base128Decoder
-	locReader         *bytes.Reader
+
+	freqNormReader *bytes.Reader
+	locReader      *bytes.Reader
 
 	freqChunkOffsets []uint64
 	freqChunkStart   uint64
@@ -379,7 +372,6 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
 		i.currChunkFreqNorm = i.postings.sb.mem[start:end]
 		if i.freqNormReader == nil {
 			i.freqNormReader = bytes.NewReader(i.currChunkFreqNorm)
-			i.freqNormDecoder = govarint.NewU64Base128Decoder(i.freqNormReader)
 		} else {
 			i.freqNormReader.Reset(i.currChunkFreqNorm)
 		}
@@ -398,7 +390,6 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
 		i.currChunkLoc = i.postings.sb.mem[start:end]
 		if i.locReader == nil {
 			i.locReader = bytes.NewReader(i.currChunkLoc)
-			i.locDecoder = govarint.NewU64Base128Decoder(i.locReader)
 		} else {
 			i.locReader.Reset(i.currChunkLoc)
 		}
@@ -413,13 +404,13 @@ func (i *PostingsIterator) readFreqNormHasLocs() (uint64, uint64, bool, error) {
 		return 1, i.normBits1Hit, false, nil
 	}
 
-	freqHasLocs, err := i.freqNormDecoder.GetU64()
+	freqHasLocs, err := binary.ReadUvarint(i.freqNormReader)
 	if err != nil {
 		return 0, 0, false, fmt.Errorf("error reading frequency: %v", err)
 	}
 	freq, hasLocs := decodeFreqHasLocs(freqHasLocs)
 
-	normBits, err := i.freqNormDecoder.GetU64()
+	normBits, err := binary.ReadUvarint(i.freqNormReader)
 	if err != nil {
 		return 0, 0, false, fmt.Errorf("error reading norm: %v", err)
 	}
@@ -447,27 +438,27 @@ func decodeFreqHasLocs(freqHasLocs uint64) (uint64, bool) {
 // the contents.
 func (i *PostingsIterator) readLocation(l *Location) error {
 	// read off field
-	fieldID, err := i.locDecoder.GetU64()
+	fieldID, err := binary.ReadUvarint(i.locReader)
 	if err != nil {
 		return fmt.Errorf("error reading location field: %v", err)
 	}
 	// read off pos
-	pos, err := i.locDecoder.GetU64()
+	pos, err := binary.ReadUvarint(i.locReader)
 	if err != nil {
 		return fmt.Errorf("error reading location pos: %v", err)
 	}
 	// read off start
-	start, err := i.locDecoder.GetU64()
+	start, err := binary.ReadUvarint(i.locReader)
 	if err != nil {
 		return fmt.Errorf("error reading location start: %v", err)
 	}
 	// read off end
-	end, err := i.locDecoder.GetU64()
+	end, err := binary.ReadUvarint(i.locReader)
 	if err != nil {
 		return fmt.Errorf("error reading location end: %v", err)
 	}
 	// read off num array pos
-	numArrayPos, err := i.locDecoder.GetU64()
+	numArrayPos, err := binary.ReadUvarint(i.locReader)
 	if err != nil {
 		return fmt.Errorf("error reading location num array pos: %v", err)
 	}
@@ -487,7 +478,7 @@ func (i *PostingsIterator) readLocation(l *Location) error {
 
 	// read off array positions
 	for k := 0; k < int(numArrayPos); k++ {
-		ap, err := i.locDecoder.GetU64()
+		ap, err := binary.ReadUvarint(i.locReader)
 		if err != nil {
 			return fmt.Errorf("error reading array position: %v", err)
 		}
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 58f8bee02..95a45d86d 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -24,7 +24,6 @@ import (
 	"sync"
 
 	"github.com/RoaringBitmap/roaring"
-	"github.com/Smerity/govarint"
 	"github.com/blevesearch/bleve/index/scorch/segment"
 	"github.com/blevesearch/bleve/size"
 	"github.com/couchbase/vellum"
@@ -278,14 +277,12 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) {
 type visitDocumentCtx struct {
 	buf      []byte
 	reader   bytes.Reader
-	decoder  *govarint.Base128Decoder
 	arrayPos []uint64
 }
 
 var visitDocumentCtxPool = sync.Pool{
 	New: func() interface{} {
 		reuse := &visitDocumentCtx{}
-		reuse.decoder = govarint.NewU64Base128Decoder(&reuse.reader)
 		return reuse
 	},
 }
@@ -305,30 +302,29 @@ func (s *SegmentBase) VisitDocument(num uint64, visitor segment.DocumentFieldVal
 
 		// now decode meta and process
 		vdc.reader.Reset(meta)
-		decoder := vdc.decoder
 
 		keepGoing := true
 		for keepGoing {
-			field, err := decoder.GetU64()
+			field, err := binary.ReadUvarint(&vdc.reader)
 			if err == io.EOF {
 				break
 			}
 			if err != nil {
 				return err
 			}
-			typ, err := decoder.GetU64()
+			typ, err := binary.ReadUvarint(&vdc.reader)
 			if err != nil {
 				return err
 			}
-			offset, err := decoder.GetU64()
+			offset, err := binary.ReadUvarint(&vdc.reader)
 			if err != nil {
 				return err
 			}
-			l, err := decoder.GetU64()
+			l, err := binary.ReadUvarint(&vdc.reader)
 			if err != nil {
 				return err
 			}
-			numap, err := decoder.GetU64()
+			numap, err := binary.ReadUvarint(&vdc.reader)
 			if err != nil {
 				return err
 			}
@@ -339,7 +335,7 @@ func (s *SegmentBase) VisitDocument(num uint64, visitor segment.DocumentFieldVal
 				}
 				arrayPos = vdc.arrayPos[:numap]
 				for i := 0; i < int(numap); i++ {
-					ap, err := decoder.GetU64()
+					ap, err := binary.ReadUvarint(&vdc.reader)
 					if err != nil {
 						return err
 					}

From ff374bfb3acd87a29cd1b1d0fcf17bc82bba68ef Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Fri, 30 Mar 2018 15:51:51 +0530
Subject: [PATCH 344/728] fixing the function name

---
 index/scorch/segment/zap/build.go | 2 +-
 index/scorch/segment/zap/merge.go | 8 ++++----
 index/scorch/segment/zap/new.go   | 6 +++---
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 53da82f18..755808c27 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -76,7 +76,7 @@ func PersistSegmentBase(sb *SegmentBase, path string) error {
 
 func persistStoredFieldValues(fieldID int,
 	storedFieldValues [][]byte, stf []byte, spf [][]uint64,
-	curr int, metaEncode VarintEncoder, data []byte) (
+	curr int, metaEncode varintEncoder, data []byte) (
 	int, []byte, error) {
 	for i := 0; i < len(storedFieldValues); i++ {
 		// encode field
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 925a41a16..f2a881856 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -547,7 +547,7 @@ func writePostings(postings *roaring.Bitmap, tfEncoder, locEncoder *chunkedIntCo
 	return postingsOffset, nil
 }
 
-type VarintEncoder func(uint64) (int, error)
+type varintEncoder func(uint64) (int, error)
 
 func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 	fieldsMap map[string]uint16, fieldsInv []string, fieldsSame bool, newSegDocCount uint64,
@@ -560,7 +560,7 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 	var data, compressed []byte
 	var metaBuf bytes.Buffer
 	varBuf := make([]byte, binary.MaxVarintLen64)
-	metaEncoder := func(val uint64) (int, error) {
+	metaEncode := func(val uint64) (int, error) {
 		wb := binary.PutUvarint(varBuf, val)
 		return metaBuf.Write(varBuf[:wb])
 	}
@@ -628,7 +628,7 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 
 			// _id field special case optimizes ExternalID() lookups
 			idFieldVal := vals[uint16(0)][0]
-			_, err = metaEncoder.PutU64(uint64(len(idFieldVal)))
+			_, err = metaEncode(uint64(len(idFieldVal)))
 			if err != nil {
 				return 0, nil, err
 			}
@@ -642,7 +642,7 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 
 				var err2 error
 				curr, data, err2 = persistStoredFieldValues(fieldID,
-					storedFieldValues, stf, spf, curr, metaEncoder, data)
+					storedFieldValues, stf, spf, curr, metaEncode, data)
 				if err2 != nil {
 					return 0, nil, err2
 				}
diff --git a/index/scorch/segment/zap/new.go b/index/scorch/segment/zap/new.go
index d9f6e3409..da24988ae 100644
--- a/index/scorch/segment/zap/new.go
+++ b/index/scorch/segment/zap/new.go
@@ -479,7 +479,7 @@ func (s *interim) processDocument(docNum uint64,
 func (s *interim) writeStoredFields() (
 	storedIndexOffset uint64, err error) {
 	varBuf := make([]byte, binary.MaxVarintLen64)
-	metaEncoder := func(val uint64) (int, error) {
+	metaEncode := func(val uint64) (int, error) {
 		wb := binary.PutUvarint(varBuf, val)
 		return s.metaBuf.Write(varBuf[:wb])
 	}
@@ -523,7 +523,7 @@ func (s *interim) writeStoredFields() (
 
 		// _id field special case optimizes ExternalID() lookups
 		idFieldVal := docStoredFields[uint16(0)].vals[0]
-		_, err = metaEncoder.PutU64(uint64(len(idFieldVal)))
+		_, err = metaEncode(uint64(len(idFieldVal)))
 		if err != nil {
 			return 0, err
 		}
@@ -534,7 +534,7 @@ func (s *interim) writeStoredFields() (
 			if exists {
 				curr, data, err = persistStoredFieldValues(
 					fieldID, isf.vals, isf.typs, isf.arrayposs,
-					curr, metaEncoder, data)
+					curr, metaEncode, data)
 				if err != nil {
 					return 0, err
 				}

From 0ba1dd6139ab00443b224c7f895bf0a4d2372ab6 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 30 Mar 2018 16:14:00 -0400
Subject: [PATCH 345/728] code review changes based on my own feedback

split interface into two
changed regex to regexp
finished mem segment impl
removed log/time printfs
---
 index/index.go                        |   9 +-
 index/scorch/segment/empty.go         |   2 +-
 index/scorch/segment/mem/dict.go      | 115 ++++++++++++++++++++++----
 index/scorch/segment/mem/dict_test.go |  51 ++++++++++++
 index/scorch/segment/segment.go       |   2 +-
 index/scorch/segment/zap/dict.go      |   2 +-
 index/scorch/snapshot_index.go        |   4 +-
 index/scorch/snapshot_segment.go      |   4 +-
 search/searcher/search_fuzzy.go       |   8 +-
 search/searcher/search_regexp.go      |   9 +-
 10 files changed, 165 insertions(+), 41 deletions(-)

diff --git a/index/index.go b/index/index.go
index c03796d1c..ea53ee5d6 100644
--- a/index/index.go
+++ b/index/index.go
@@ -96,10 +96,11 @@ type IndexReader interface {
 	Close() error
 }
 
-// IndexReaderAdv is an optional interface for advanced users
-// Hope to have a better name here...
-type IndexReaderAdv interface {
-	FieldDictRegex(field string, regex []byte) (FieldDict, error)
+type IndexReaderRegexp interface {
+	FieldDictRegexp(field string, regex []byte) (FieldDict, error)
+}
+
+type IndexReaderFuzzy interface {
 	FieldDictFuzzy(field string, term []byte, fuzziness int) (FieldDict, error)
 }
 
diff --git a/index/scorch/segment/empty.go b/index/scorch/segment/empty.go
index d8ac45f47..f3315a804 100644
--- a/index/scorch/segment/empty.go
+++ b/index/scorch/segment/empty.go
@@ -76,7 +76,7 @@ func (e *EmptyDictionary) RangeIterator(start, end string) DictionaryIterator {
 	return &EmptyDictionaryIterator{}
 }
 
-func (e *EmptyDictionary) RegexIterator(start string) DictionaryIterator {
+func (e *EmptyDictionary) RegexpIterator(start string) DictionaryIterator {
 	return &EmptyDictionaryIterator{}
 }
 
diff --git a/index/scorch/segment/mem/dict.go b/index/scorch/segment/mem/dict.go
index 2877f9453..178c41cc8 100644
--- a/index/scorch/segment/mem/dict.go
+++ b/index/scorch/segment/mem/dict.go
@@ -15,7 +15,9 @@
 package mem
 
 import (
+	"math"
 	"reflect"
+	"regexp"
 	"sort"
 	"strings"
 
@@ -98,36 +100,42 @@ func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator
 	}
 }
 
-// RegexIterator returns an iterator which only visits terms matching
+// RegexpIterator returns an iterator which only visits terms matching
 // the given regex expression.
-// TODO complete the implementation
-func (d *Dictionary) RegexIterator(regex string) segment.DictionaryIterator {
-	offset := sort.SearchStrings(d.segment.DictKeys[d.fieldID], regex)
+func (d *Dictionary) RegexpIterator(pattern string) segment.DictionaryIterator {
+	regex, err := regexp.Compile(pattern)
+	if err != nil {
+		// invalid regexp, so set offset to the end
+		return &DictionaryIterator{
+			d:      d,
+			offset: len(d.segment.DictKeys[d.fieldID]),
+		}
+	}
 	return &DictionaryIterator{
-		d:      d,
-		offset: offset,
-		prefix: regex,
+		d:     d,
+		regex: regex,
 	}
 }
 
 // FuzzyIterator returns an iterator which only visits terms matching
 // the given edit distance.
-// TODO complete the implementation
 func (d *Dictionary) FuzzyIterator(term string, fuzziness int) segment.DictionaryIterator {
-	offset := sort.SearchStrings(d.segment.DictKeys[d.fieldID], term)
 	return &DictionaryIterator{
-		d:      d,
-		offset: offset,
-		prefix: term,
+		d:         d,
+		fuzzyTerm: term,
+		fuzziness: fuzziness,
 	}
 }
 
 // DictionaryIterator is an iterator for term dictionary
 type DictionaryIterator struct {
-	d      *Dictionary
-	prefix string
-	end    string
-	offset int
+	d         *Dictionary
+	prefix    string
+	end       string
+	offset    int
+	regex     *regexp.Regexp
+	fuzzyTerm string
+	fuzziness int
 
 	dictEntry index.DictEntry // reused across Next()'s
 }
@@ -146,6 +154,28 @@ func (d *DictionaryIterator) Next() (*index.DictEntry, error) {
 	if d.end != "" && next > d.end {
 		return nil, nil
 	}
+	// check regexp
+	if d.regex != nil {
+		// keep going until we find a match, mindful of the end of the slice
+		for !d.regex.MatchString(next) {
+			d.offset++
+			if d.offset > len(d.d.segment.DictKeys[d.d.fieldID])-1 {
+				return nil, nil
+			}
+			next = d.d.segment.DictKeys[d.d.fieldID][d.offset]
+		}
+	}
+	if d.fuzzyTerm != "" {
+		_, exceeded := LevenshteinDistanceMax(d.fuzzyTerm, next, d.fuzziness)
+		for exceeded {
+			d.offset++
+			if d.offset > len(d.d.segment.DictKeys[d.d.fieldID])-1 {
+				return nil, nil
+			}
+			next = d.d.segment.DictKeys[d.d.fieldID][d.offset]
+			_, exceeded = LevenshteinDistanceMax(d.fuzzyTerm, next, d.fuzziness)
+		}
+	}
 
 	d.offset++
 	postingID := d.d.segment.Dicts[d.d.fieldID][next]
@@ -153,3 +183,56 @@ func (d *DictionaryIterator) Next() (*index.DictEntry, error) {
 	d.dictEntry.Count = d.d.segment.Postings[postingID-1].GetCardinality()
 	return &d.dictEntry, nil
 }
+
+// LevenshteinDistanceMax same as LevenshteinDistance but
+// attempts to bail early once we know the distance
+// will be greater than max
+// in which case the first return val will be the max
+// and the second will be true, indicating max was exceeded
+func LevenshteinDistanceMax(a, b string, max int) (int, bool) {
+	la := len(a)
+	lb := len(b)
+
+	ld := int(math.Abs(float64(la - lb)))
+	if ld > max {
+		return max, true
+	}
+
+	d := make([]int, la+1)
+	var lastdiag, olddiag, temp int
+
+	for i := 1; i <= la; i++ {
+		d[i] = i
+	}
+	for i := 1; i <= lb; i++ {
+		d[0] = i
+		lastdiag = i - 1
+		rowmin := max + 1
+		for j := 1; j <= la; j++ {
+			olddiag = d[j]
+			min := d[j] + 1
+			if (d[j-1] + 1) < min {
+				min = d[j-1] + 1
+			}
+			if a[j-1] == b[i-1] {
+				temp = 0
+			} else {
+				temp = 1
+			}
+			if (lastdiag + temp) < min {
+				min = lastdiag + temp
+			}
+			if min < rowmin {
+				rowmin = min
+			}
+			d[j] = min
+
+			lastdiag = olddiag
+		}
+		// after each row if rowmin isn't less than max stop
+		if rowmin > max {
+			return max, true
+		}
+	}
+	return d[la], false
+}
diff --git a/index/scorch/segment/mem/dict_test.go b/index/scorch/segment/mem/dict_test.go
index adfa4957d..e69016a81 100644
--- a/index/scorch/segment/mem/dict_test.go
+++ b/index/scorch/segment/mem/dict_test.go
@@ -157,4 +157,55 @@ func TestDictionary(t *testing.T) {
 	if !reflect.DeepEqual(expected, got) {
 		t.Errorf("expected: %v, got: %v", expected, got)
 	}
+
+	// test regexp iterator
+	expected = []string{"ball", "bat"}
+	got = got[:0]
+	itr = dict.RegexpIterator("ba.*")
+	next, err = itr.Next()
+	for next != nil && err == nil {
+		got = append(got, next.Term)
+		next, err = itr.Next()
+	}
+	if err != nil {
+		t.Fatalf("dict itr error: %v", err)
+	}
+
+	if !reflect.DeepEqual(expected, got) {
+		t.Errorf("expected: %v, got: %v", expected, got)
+	}
+
+	// test regexp iterator with invalid regexp
+	expected = []string{}
+	got = got[:0]
+	itr = dict.RegexpIterator(string([]byte{0xff}))
+	next, err = itr.Next()
+	for next != nil && err == nil {
+		got = append(got, next.Term)
+		next, err = itr.Next()
+	}
+	if err != nil {
+		t.Fatalf("dict itr error: %v", err)
+	}
+
+	if !reflect.DeepEqual(expected, got) {
+		t.Errorf("expected: %v, got: %v", expected, got)
+	}
+
+	// test fuzzy iterator
+	expected = []string{"bat", "cat"}
+	got = got[:0]
+	itr = dict.FuzzyIterator("vat", 1)
+	next, err = itr.Next()
+	for next != nil && err == nil {
+		got = append(got, next.Term)
+		next, err = itr.Next()
+	}
+	if err != nil {
+		t.Fatalf("dict itr error: %v", err)
+	}
+
+	if !reflect.DeepEqual(expected, got) {
+		t.Errorf("expected: %v, got: %v", expected, got)
+	}
 }
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index de678e094..a79611363 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -48,7 +48,7 @@ type TermDictionary interface {
 	Iterator() DictionaryIterator
 	PrefixIterator(prefix string) DictionaryIterator
 	RangeIterator(start, end string) DictionaryIterator
-	RegexIterator(regex string) DictionaryIterator
+	RegexpIterator(regex string) DictionaryIterator
 	FuzzyIterator(term string, fuzziness int) DictionaryIterator
 }
 
diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index fa846bf3f..4cda63be1 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -152,7 +152,7 @@ func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator
 
 // RegexIterator returns an iterator which only visits terms having the
 // the specified regex
-func (d *Dictionary) RegexIterator(regex string) segment.DictionaryIterator {
+func (d *Dictionary) RegexpIterator(regex string) segment.DictionaryIterator {
 	rv := &DictionaryIterator{
 		d: d,
 	}
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 3e9d97f59..cdd983857 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -175,10 +175,10 @@ func (i *IndexSnapshot) FieldDictPrefix(field string,
 	})
 }
 
-func (i *IndexSnapshot) FieldDictRegex(field string,
+func (i *IndexSnapshot) FieldDictRegexp(field string,
 	termRegex []byte) (index.FieldDict, error) {
 	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
-		return i.RegexIterator(string(termRegex))
+		return i.RegexpIterator(string(termRegex))
 	})
 }
 
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index 422d1e814..ac95d450f 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -49,8 +49,8 @@ func (s *SegmentDictionarySnapshot) RangeIterator(start, end string) segment.Dic
 	return s.d.RangeIterator(start, end)
 }
 
-func (s *SegmentDictionarySnapshot) RegexIterator(regex string) segment.DictionaryIterator {
-	return s.d.RegexIterator(regex)
+func (s *SegmentDictionarySnapshot) RegexpIterator(regex string) segment.DictionaryIterator {
+	return s.d.RegexpIterator(regex)
 }
 
 func (s *SegmentDictionarySnapshot) FuzzyIterator(term string,
diff --git a/search/searcher/search_fuzzy.go b/search/searcher/search_fuzzy.go
index 69aab2f73..7d165b067 100644
--- a/search/searcher/search_fuzzy.go
+++ b/search/searcher/search_fuzzy.go
@@ -15,9 +15,6 @@
 package searcher
 
 import (
-	"log"
-	"time"
-
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
 )
@@ -34,10 +31,8 @@ func NewFuzzySearcher(indexReader index.IndexReader, term string,
 			break
 		}
 	}
-	t := time.Now()
 	candidateTerms, err := findFuzzyCandidateTerms(indexReader, term, fuzziness,
 		field, prefixTerm)
-	log.Printf("time taken-> %f", time.Since(t).Seconds())
 	if err != nil {
 		return nil, err
 	}
@@ -56,7 +51,7 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
 		// in case of advanced reader implementations directly call
 		// the levenshtein automaton based iterator to collect the
 		// candidate terms
-		if ir, ok := indexReader.(index.IndexReaderAdv); ok {
+		if ir, ok := indexReader.(index.IndexReaderFuzzy); ok {
 			fieldDict, err = ir.FieldDictFuzzy(field, []byte(term), fuzziness)
 			if err != nil {
 				return rv, err
@@ -66,7 +61,6 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
 				rv = append(rv, tfd.Term)
 				tfd, err = fieldDict.Next()
 			}
-			log.Printf("candidate FSA fuzzy terms: %+v", rv)
 			return rv, nil
 		}
 		fieldDict, err = indexReader.FieldDict(field)
diff --git a/search/searcher/search_regexp.go b/search/searcher/search_regexp.go
index 806f135a3..b88eb3eb5 100644
--- a/search/searcher/search_regexp.go
+++ b/search/searcher/search_regexp.go
@@ -15,9 +15,7 @@
 package searcher
 
 import (
-	"log"
 	"regexp"
-	"time"
 
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
@@ -32,9 +30,8 @@ func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp,
 	field string, boost float64, options search.SearcherOptions) (
 	search.Searcher, error) {
 	var candidateTerms []string
-	t := time.Now()
-	if ir, ok := indexReader.(index.IndexReaderAdv); ok {
-		fieldDict, err := ir.FieldDictRegex(field, []byte(pattern.String()))
+	if ir, ok := indexReader.(index.IndexReaderRegexp); ok {
+		fieldDict, err := ir.FieldDictRegexp(field, []byte(pattern.String()))
 		if err != nil {
 			return nil, err
 		}
@@ -50,7 +47,6 @@ func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp,
 			candidateTerms = append(candidateTerms, tfd.Term)
 			tfd, err = fieldDict.Next()
 		}
-		log.Printf("fsa time took-> %f", time.Since(t).Seconds())
 	} else {
 		prefixTerm, complete := pattern.LiteralPrefix()
 		if complete {
@@ -64,7 +60,6 @@ func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp,
 				return nil, err
 			}
 		}
-		log.Printf("time took-> %f", time.Since(t).Seconds())
 	}
 
 	return NewMultiTermSearcher(indexReader, candidateTerms, field, boost,

From f8498ae0e903fda095ef5b15064bff4a62fd566a Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 30 Mar 2018 16:16:32 -0400
Subject: [PATCH 346/728] add comment for consistency

---
 index/scorch/segment/mem/dict.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/index/scorch/segment/mem/dict.go b/index/scorch/segment/mem/dict.go
index 178c41cc8..b74872371 100644
--- a/index/scorch/segment/mem/dict.go
+++ b/index/scorch/segment/mem/dict.go
@@ -165,6 +165,7 @@ func (d *DictionaryIterator) Next() (*index.DictEntry, error) {
 			next = d.d.segment.DictKeys[d.d.fieldID][d.offset]
 		}
 	}
+	// check fuzziness
 	if d.fuzzyTerm != "" {
 		_, exceeded := LevenshteinDistanceMax(d.fuzzyTerm, next, d.fuzziness)
 		for exceeded {

From ce15a8d22ebd51c7366066e57b3e2556f9fc0b10 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 30 Mar 2018 14:55:42 -0700
Subject: [PATCH 347/728] remove unused PhraseSearcher fields

---
 search/searcher/search_phrase.go | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/search/searcher/search_phrase.go b/search/searcher/search_phrase.go
index 0026794dd..85d0f6b52 100644
--- a/search/searcher/search_phrase.go
+++ b/search/searcher/search_phrase.go
@@ -32,11 +32,9 @@ func init() {
 }
 
 type PhraseSearcher struct {
-	indexReader  index.IndexReader
 	mustSearcher *ConjunctionSearcher
 	queryNorm    float64
 	currMust     *search.DocumentMatch
-	slop         int
 	terms        [][]string
 	initialized  bool
 }
@@ -126,7 +124,6 @@ func NewMultiPhraseSearcher(indexReader index.IndexReader, terms [][]string, fie
 
 	// build our searcher
 	rv := PhraseSearcher{
-		indexReader:  indexReader,
 		mustSearcher: mustSearcher,
 		terms:        terms,
 	}

From 2e36384e3d0616354d0ead765d84d86bd091b682 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 30 Mar 2018 15:13:58 -0700
Subject: [PATCH 348/728] phrase searcher returns currMust back to pool

---
 search/searcher/search_phrase.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/search/searcher/search_phrase.go b/search/searcher/search_phrase.go
index 85d0f6b52..7591c7d76 100644
--- a/search/searcher/search_phrase.go
+++ b/search/searcher/search_phrase.go
@@ -160,6 +160,9 @@ func (s *PhraseSearcher) advanceNextMust(ctx *search.SearchContext) error {
 	var err error
 
 	if s.mustSearcher != nil {
+		if s.currMust != nil {
+			ctx.DocumentMatchPool.Put(s.currMust)
+		}
 		s.currMust, err = s.mustSearcher.Next(ctx)
 		if err != nil {
 			return err
@@ -227,6 +230,7 @@ func (s *PhraseSearcher) checkCurrMustMatch(ctx *search.SearchContext) *search.D
 	if freq > 0 {
 		// return match
 		rv := s.currMust
+		s.currMust = nil
 		rv.Locations = rvftlm
 		return rv
 	}

From f5e01aa54e0b23a86309169e7525f06a32e50c3f Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 30 Mar 2018 15:31:40 -0700
Subject: [PATCH 349/728] remove a level of phrasePath indirection for fewer
 alloc's

---
 search/searcher/search_phrase.go      | 10 +--
 search/searcher/search_phrase_test.go | 96 +++++++++++++--------------
 2 files changed, 54 insertions(+), 52 deletions(-)

diff --git a/search/searcher/search_phrase.go b/search/searcher/search_phrase.go
index 7591c7d76..dfb7267f7 100644
--- a/search/searcher/search_phrase.go
+++ b/search/searcher/search_phrase.go
@@ -243,7 +243,8 @@ func (s *PhraseSearcher) checkCurrMustMatch(ctx *search.SearchContext) *search.D
 // constraints (possibly more than once).  if so, the number of times it was
 // satisfied, and these locations are returned.  otherwise 0 and either
 // a nil or empty TermLocationMap
-func (s *PhraseSearcher) checkCurrMustMatchField(ctx *search.SearchContext, tlm search.TermLocationMap) (int, search.TermLocationMap) {
+func (s *PhraseSearcher) checkCurrMustMatchField(ctx *search.SearchContext, tlm search.TermLocationMap) (
+	int, search.TermLocationMap) {
 	paths := findPhrasePaths(0, nil, s.terms, tlm, nil, 0)
 	rv := make(search.TermLocationMap, len(s.terms))
 	for _, p := range paths {
@@ -261,7 +262,7 @@ func (p *phrasePart) String() string {
 	return fmt.Sprintf("[%s %v]", p.term, p.loc)
 }
 
-type phrasePath []*phrasePart
+type phrasePath []phrasePart
 
 func (p phrasePath) MergeInto(in search.TermLocationMap) {
 	for _, pp := range p {
@@ -282,7 +283,8 @@ func (p phrasePath) MergeInto(in search.TermLocationMap) {
 //     this is the primary state being built during the traversal
 //
 // returns slice of paths, or nil if invocation did not find any successul paths
-func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]string, tlm search.TermLocationMap, p phrasePath, remainingSlop int) []phrasePath {
+func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]string,
+	tlm search.TermLocationMap, p phrasePath, remainingSlop int) []phrasePath {
 
 	// no more terms
 	if len(phraseTerms) < 1 {
@@ -321,7 +323,7 @@ func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]s
 			// if enough slop reamining, continue recursively
 			if prevPos == 0 || (remainingSlop-dist) >= 0 {
 				// this location works, add it to the path (but not for empty term)
-				px := append(p, &phrasePart{term: carTerm, loc: loc})
+				px := append(p, phrasePart{term: carTerm, loc: loc})
 				rv = append(rv, findPhrasePaths(loc.Pos, loc.ArrayPositions, cdr, tlm, px, remainingSlop-dist)...)
 			}
 		}
diff --git a/search/searcher/search_phrase_test.go b/search/searcher/search_phrase_test.go
index 046b8d81d..a764ac2fe 100644
--- a/search/searcher/search_phrase_test.go
+++ b/search/searcher/search_phrase_test.go
@@ -185,8 +185,8 @@ func TestFindPhrasePaths(t *testing.T) {
 			},
 			paths: []phrasePath{
 				phrasePath{
-					&phrasePart{"cat", &search.Location{Pos: 1}},
-					&phrasePart{"dog", &search.Location{Pos: 2}},
+					phrasePart{"cat", &search.Location{Pos: 1}},
+					phrasePart{"dog", &search.Location{Pos: 2}},
 				},
 			},
 		},
@@ -242,12 +242,12 @@ func TestFindPhrasePaths(t *testing.T) {
 			},
 			paths: []phrasePath{
 				phrasePath{
-					&phrasePart{"cat", &search.Location{Pos: 1}},
-					&phrasePart{"dog", &search.Location{Pos: 2}},
+					phrasePart{"cat", &search.Location{Pos: 1}},
+					phrasePart{"dog", &search.Location{Pos: 2}},
 				},
 				phrasePath{
-					&phrasePart{"cat", &search.Location{Pos: 8}},
-					&phrasePart{"dog", &search.Location{Pos: 9}},
+					phrasePart{"cat", &search.Location{Pos: 8}},
+					phrasePart{"dog", &search.Location{Pos: 9}},
 				},
 			},
 		},
@@ -268,8 +268,8 @@ func TestFindPhrasePaths(t *testing.T) {
 			},
 			paths: []phrasePath{
 				phrasePath{
-					&phrasePart{"cat", &search.Location{Pos: 1}},
-					&phrasePart{"dog", &search.Location{Pos: 3}},
+					phrasePart{"cat", &search.Location{Pos: 1}},
+					phrasePart{"dog", &search.Location{Pos: 3}},
 				},
 			},
 		},
@@ -290,8 +290,8 @@ func TestFindPhrasePaths(t *testing.T) {
 			},
 			paths: []phrasePath{
 				phrasePath{
-					&phrasePart{"cat", &search.Location{Pos: 2}},
-					&phrasePart{"dog", &search.Location{Pos: 3}},
+					phrasePart{"cat", &search.Location{Pos: 2}},
+					phrasePart{"dog", &search.Location{Pos: 3}},
 				},
 			},
 		},
@@ -312,8 +312,8 @@ func TestFindPhrasePaths(t *testing.T) {
 			},
 			paths: []phrasePath{
 				phrasePath{
-					&phrasePart{"cat", &search.Location{Pos: 2}},
-					&phrasePart{"dog", &search.Location{Pos: 3}},
+					phrasePart{"cat", &search.Location{Pos: 2}},
+					phrasePart{"dog", &search.Location{Pos: 3}},
 				},
 			},
 		},
@@ -372,8 +372,8 @@ func TestFindPhrasePathsSloppy(t *testing.T) {
 			slop:   3,
 			paths: []phrasePath{
 				phrasePath{
-					&phrasePart{"one", &search.Location{Pos: 1}},
-					&phrasePart{"five", &search.Location{Pos: 5}},
+					phrasePart{"one", &search.Location{Pos: 1}},
+					phrasePart{"five", &search.Location{Pos: 5}},
 				},
 			},
 		},
@@ -383,8 +383,8 @@ func TestFindPhrasePathsSloppy(t *testing.T) {
 			slop:   0,
 			paths: []phrasePath{
 				phrasePath{
-					&phrasePart{"four", &search.Location{Pos: 4}},
-					&phrasePart{"five", &search.Location{Pos: 5}},
+					phrasePart{"four", &search.Location{Pos: 4}},
+					phrasePart{"five", &search.Location{Pos: 5}},
 				},
 			},
 		},
@@ -399,8 +399,8 @@ func TestFindPhrasePathsSloppy(t *testing.T) {
 			slop:   1,
 			paths: []phrasePath{
 				phrasePath{
-					&phrasePart{"one", &search.Location{Pos: 1}},
-					&phrasePart{"two", &search.Location{Pos: 2}},
+					phrasePart{"one", &search.Location{Pos: 1}},
+					phrasePart{"two", &search.Location{Pos: 2}},
 				},
 			},
 		},
@@ -415,8 +415,8 @@ func TestFindPhrasePathsSloppy(t *testing.T) {
 			slop:   2,
 			paths: []phrasePath{
 				phrasePath{
-					&phrasePart{"two", &search.Location{Pos: 2}},
-					&phrasePart{"one", &search.Location{Pos: 1}},
+					phrasePart{"two", &search.Location{Pos: 2}},
+					phrasePart{"one", &search.Location{Pos: 1}},
 				},
 			},
 		},
@@ -431,9 +431,9 @@ func TestFindPhrasePathsSloppy(t *testing.T) {
 			slop:   2,
 			paths: []phrasePath{
 				phrasePath{
-					&phrasePart{"one", &search.Location{Pos: 1}},
-					&phrasePart{"three", &search.Location{Pos: 3}},
-					&phrasePart{"five", &search.Location{Pos: 5}},
+					phrasePart{"one", &search.Location{Pos: 1}},
+					phrasePart{"three", &search.Location{Pos: 3}},
+					phrasePart{"five", &search.Location{Pos: 5}},
 				},
 			},
 		},
@@ -448,9 +448,9 @@ func TestFindPhrasePathsSloppy(t *testing.T) {
 			slop:   6,
 			paths: []phrasePath{
 				phrasePath{
-					&phrasePart{"five", &search.Location{Pos: 5}},
-					&phrasePart{"three", &search.Location{Pos: 3}},
-					&phrasePart{"one", &search.Location{Pos: 1}},
+					phrasePart{"five", &search.Location{Pos: 5}},
+					phrasePart{"three", &search.Location{Pos: 3}},
+					phrasePart{"one", &search.Location{Pos: 1}},
 				},
 			},
 		},
@@ -500,8 +500,8 @@ func TestFindPhrasePathsSloppyPalyndrome(t *testing.T) {
 			slop:   0,
 			paths: []phrasePath{
 				phrasePath{
-					&phrasePart{"two", &search.Location{Pos: 2}},
-					&phrasePart{"three", &search.Location{Pos: 3}},
+					phrasePart{"two", &search.Location{Pos: 2}},
+					phrasePart{"three", &search.Location{Pos: 3}},
 				},
 			},
 		},
@@ -511,12 +511,12 @@ func TestFindPhrasePathsSloppyPalyndrome(t *testing.T) {
 			slop:   2,
 			paths: []phrasePath{
 				phrasePath{
-					&phrasePart{"two", &search.Location{Pos: 2}},
-					&phrasePart{"three", &search.Location{Pos: 3}},
+					phrasePart{"two", &search.Location{Pos: 2}},
+					phrasePart{"three", &search.Location{Pos: 3}},
 				},
 				phrasePath{
-					&phrasePart{"two", &search.Location{Pos: 4}},
-					&phrasePart{"three", &search.Location{Pos: 3}},
+					phrasePart{"two", &search.Location{Pos: 4}},
+					phrasePart{"three", &search.Location{Pos: 3}},
 				},
 			},
 		},
@@ -526,12 +526,12 @@ func TestFindPhrasePathsSloppyPalyndrome(t *testing.T) {
 			slop:   2,
 			paths: []phrasePath{
 				phrasePath{
-					&phrasePart{"three", &search.Location{Pos: 3}},
-					&phrasePart{"two", &search.Location{Pos: 2}},
+					phrasePart{"three", &search.Location{Pos: 3}},
+					phrasePart{"two", &search.Location{Pos: 2}},
 				},
 				phrasePath{
-					&phrasePart{"three", &search.Location{Pos: 3}},
-					&phrasePart{"two", &search.Location{Pos: 4}},
+					phrasePart{"three", &search.Location{Pos: 3}},
+					phrasePart{"two", &search.Location{Pos: 4}},
 				},
 			},
 		},
@@ -574,8 +574,8 @@ func TestFindMultiPhrasePaths(t *testing.T) {
 			phrase: [][]string{[]string{"cat", "rat"}, []string{"dog"}},
 			paths: []phrasePath{
 				phrasePath{
-					&phrasePart{"cat", &search.Location{Pos: 1}},
-					&phrasePart{"dog", &search.Location{Pos: 2}},
+					phrasePart{"cat", &search.Location{Pos: 1}},
+					phrasePart{"dog", &search.Location{Pos: 2}},
 				},
 			},
 		},
@@ -592,12 +592,12 @@ func TestFindMultiPhrasePaths(t *testing.T) {
 			phrase: [][]string{[]string{"cat", "dog"}, []string{"dog", "frog"}},
 			paths: []phrasePath{
 				phrasePath{
-					&phrasePart{"cat", &search.Location{Pos: 1}},
-					&phrasePart{"dog", &search.Location{Pos: 2}},
+					phrasePart{"cat", &search.Location{Pos: 1}},
+					phrasePart{"dog", &search.Location{Pos: 2}},
 				},
 				phrasePath{
-					&phrasePart{"dog", &search.Location{Pos: 2}},
-					&phrasePart{"frog", &search.Location{Pos: 3}},
+					phrasePart{"dog", &search.Location{Pos: 2}},
+					phrasePart{"frog", &search.Location{Pos: 3}},
 				},
 			},
 		},
@@ -606,8 +606,8 @@ func TestFindMultiPhrasePaths(t *testing.T) {
 			phrase: [][]string{[]string{"cat", "rat"}, []string{""}, []string{"frog"}},
 			paths: []phrasePath{
 				phrasePath{
-					&phrasePart{"cat", &search.Location{Pos: 1}},
-					&phrasePart{"frog", &search.Location{Pos: 3}},
+					phrasePart{"cat", &search.Location{Pos: 1}},
+					phrasePart{"frog", &search.Location{Pos: 3}},
 				},
 			},
 		},
@@ -616,8 +616,8 @@ func TestFindMultiPhrasePaths(t *testing.T) {
 			phrase: [][]string{[]string{"cat", "rat"}, []string{}, []string{"frog"}},
 			paths: []phrasePath{
 				phrasePath{
-					&phrasePart{"cat", &search.Location{Pos: 1}},
-					&phrasePart{"frog", &search.Location{Pos: 3}},
+					phrasePart{"cat", &search.Location{Pos: 1}},
+					phrasePart{"frog", &search.Location{Pos: 3}},
 				},
 			},
 		},
@@ -626,8 +626,8 @@ func TestFindMultiPhrasePaths(t *testing.T) {
 			phrase: [][]string{[]string{"cat", "rat"}, nil, []string{"frog"}},
 			paths: []phrasePath{
 				phrasePath{
-					&phrasePart{"cat", &search.Location{Pos: 1}},
-					&phrasePart{"frog", &search.Location{Pos: 3}},
+					phrasePart{"cat", &search.Location{Pos: 1}},
+					phrasePart{"frog", &search.Location{Pos: 3}},
 				},
 			},
 		},

From d690422f6425e599f01574b928bef7a37cd77717 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 30 Mar 2018 16:34:31 -0700
Subject: [PATCH 350/728] scorch postingToTermFieldDoc() uses a backing array

This optimization is so that postingToTermFieldDoc() will hit the
allocator at most only twice per call rather than on every location
entry.
---
 index/scorch/snapshot_index_tfr.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go
index 4c3d08edd..0f015c8db 100644
--- a/index/scorch/snapshot_index_tfr.go
+++ b/index/scorch/snapshot_index_tfr.go
@@ -103,14 +103,16 @@ func (i *IndexSnapshotTermFieldReader) postingToTermFieldDoc(next segment.Postin
 	if i.includeTermVectors {
 		locs := next.Locations()
 		rv.Vectors = make([]*index.TermFieldVector, len(locs))
+		backing := make([]index.TermFieldVector, len(locs))
 		for i, loc := range locs {
-			rv.Vectors[i] = &index.TermFieldVector{
+			backing[i] = index.TermFieldVector{
 				Start:          loc.Start(),
 				End:            loc.End(),
 				Pos:            loc.Pos(),
 				ArrayPositions: loc.ArrayPositions(),
 				Field:          loc.Field(),
 			}
+			rv.Vectors[i] = &backing[i]
 		}
 	}
 }

From 01839100dcd0e40694645051057ef35a854d7768 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Fri, 30 Mar 2018 16:20:27 +0530
Subject: [PATCH 351/728] persisting the zap version to bolt

zap version is already a part of footer

s with '#' will be ignored, and an empty message aborts the commit.
---
 index/scorch/persister.go           | 20 +++++++++++++++++++-
 index/scorch/segment/zap/build.go   |  4 +++-
 index/scorch/segment/zap/segment.go |  2 +-
 index/scorch/segment/zap/write.go   |  2 +-
 4 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 2fab53240..1279a5c2a 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -16,6 +16,7 @@ package scorch
 
 import (
 	"bytes"
+	"encoding/binary"
 	"fmt"
 	"io/ioutil"
 	"log"
@@ -317,6 +318,22 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
 		return err
 	}
 
+	// persist meta values
+	metaBucket, err := snapshotBucket.CreateBucketIfNotExists(boltMetaDataKey)
+	if err != nil {
+		return err
+	}
+	err = metaBucket.Put([]byte("type"), []byte(zap.Type))
+	if err != nil {
+		return err
+	}
+	buf := make([]byte, binary.MaxVarintLen32)
+	binary.BigEndian.PutUint32(buf, zap.Version)
+	err = metaBucket.Put([]byte("version"), buf)
+	if err != nil {
+		return err
+	}
+
 	// persist internal values
 	internalBucket, err := snapshotBucket.CreateBucketIfNotExists(boltInternalKey)
 	if err != nil {
@@ -457,6 +474,7 @@ var boltSnapshotsBucket = []byte{'s'}
 var boltPathKey = []byte{'p'}
 var boltDeletedKey = []byte{'d'}
 var boltInternalKey = []byte{'i'}
+var boltMetaDataKey = []byte{'m'}
 
 func (s *Scorch) loadFromBolt() error {
 	return s.rootBolt.View(func(tx *bolt.Tx) error {
@@ -551,7 +569,7 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
 				_ = rv.DecRef()
 				return nil, err
 			}
-		} else {
+		} else if k[0] != boltMetaDataKey[0] {
 			segmentBucket := snapshot.Bucket(k)
 			if segmentBucket == nil {
 				_ = rv.DecRef()
diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index e8f3499b4..f519d4db4 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -22,7 +22,9 @@ import (
 	"github.com/Smerity/govarint"
 )
 
-const version uint32 = 9
+const Version uint32 = 9
+
+const Type string = "zap"
 
 const fieldNotUninverted = math.MaxUint64
 
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 541647abb..4cd44eb27 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -189,7 +189,7 @@ func (s *Segment) loadConfig() error {
 
 	verOffset := crcOffset - 4
 	s.version = binary.BigEndian.Uint32(s.mm[verOffset : verOffset+4])
-	if s.version != version {
+	if s.version != Version {
 		return fmt.Errorf("unsupported version %d", s.version)
 	}
 
diff --git a/index/scorch/segment/zap/write.go b/index/scorch/segment/zap/write.go
index 7f4f5a88b..cddaedd00 100644
--- a/index/scorch/segment/zap/write.go
+++ b/index/scorch/segment/zap/write.go
@@ -118,7 +118,7 @@ func persistFooter(numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset
 		return err
 	}
 	// write out 32-bit version
-	err = binary.Write(w, binary.BigEndian, version)
+	err = binary.Write(w, binary.BigEndian, Version)
 	if err != nil {
 		return err
 	}

From 56166ea4abaef47f729feff62ab9afdcf2343b30 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 2 Apr 2018 12:20:31 -0700
Subject: [PATCH 352/728] search_phrase findPhrasePaths() fix on slice reuse

With term locations laid out like "a b c d d", findPhrasePaths() on a
search for "a b c d" with sloppiness of 1 ought to have returned a
result like...

  a: 1, b: 2, c: 3, d: 4
  a: 1, b: 2, c: 3, d: 5

...but instead was incorrectly returning results like this due to a
subtle slice memory reuse issue...

  a: 1, b: 2, c: 3, d: 5
  a: 1, b: 2, c: 3, d: 5
---
 search/searcher/search_phrase.go      |  4 +--
 search/searcher/search_phrase_test.go | 46 ++++++++++++++++++++++++++-
 2 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/search/searcher/search_phrase.go b/search/searcher/search_phrase.go
index dfb7267f7..7166d4884 100644
--- a/search/searcher/search_phrase.go
+++ b/search/searcher/search_phrase.go
@@ -288,7 +288,7 @@ func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]s
 
 	// no more terms
 	if len(phraseTerms) < 1 {
-		return []phrasePath{p}
+		return []phrasePath{append(phrasePath(nil), p...)}
 	}
 
 	car := phraseTerms[0]
@@ -320,7 +320,7 @@ func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]s
 				dist = editDistance(prevPos+1, loc.Pos)
 			}
 
-			// if enough slop reamining, continue recursively
+			// if enough slop remaining, continue recursively
 			if prevPos == 0 || (remainingSlop-dist) >= 0 {
 				// this location works, add it to the path (but not for empty term)
 				px := append(p, phrasePart{term: carTerm, loc: loc})
diff --git a/search/searcher/search_phrase_test.go b/search/searcher/search_phrase_test.go
index a764ac2fe..d09be57df 100644
--- a/search/searcher/search_phrase_test.go
+++ b/search/searcher/search_phrase_test.go
@@ -360,6 +360,7 @@ func TestFindPhrasePathsSloppy(t *testing.T) {
 		phrase [][]string
 		paths  []phrasePath
 		slop   int
+		tlm    search.TermLocationMap
 	}{
 		// no match
 		{
@@ -454,10 +455,53 @@ func TestFindPhrasePathsSloppy(t *testing.T) {
 				},
 			},
 		},
+		// test an append() related edge case, where append()'s
+		// current behavior needs to be called 3 times starting from a
+		// nil slice before it grows to a slice with extra capacity --
+		// hence, 3 initial terms of ark, bat, cat
+		{
+			phrase: [][]string{
+				[]string{"ark"}, []string{"bat"}, []string{"cat"}, []string{"dog"},
+			},
+			slop: 1,
+			paths: []phrasePath{
+				phrasePath{
+					phrasePart{"ark", &search.Location{Pos: 1}},
+					phrasePart{"bat", &search.Location{Pos: 2}},
+					phrasePart{"cat", &search.Location{Pos: 3}},
+					phrasePart{"dog", &search.Location{Pos: 4}},
+				},
+				phrasePath{
+					phrasePart{"ark", &search.Location{Pos: 1}},
+					phrasePart{"bat", &search.Location{Pos: 2}},
+					phrasePart{"cat", &search.Location{Pos: 3}},
+					phrasePart{"dog", &search.Location{Pos: 5}},
+				},
+			},
+			tlm: search.TermLocationMap{ // ark bat cat dog dog
+				"ark": search.Locations{
+					&search.Location{Pos: 1},
+				},
+				"bat": search.Locations{
+					&search.Location{Pos: 2},
+				},
+				"cat": search.Locations{
+					&search.Location{Pos: 3},
+				},
+				"dog": search.Locations{
+					&search.Location{Pos: 4},
+					&search.Location{Pos: 5},
+				},
+			},
+		},
 	}
 
 	for i, test := range tests {
-		actualPaths := findPhrasePaths(0, nil, test.phrase, tlm, nil, test.slop)
+		tlmToUse := test.tlm
+		if tlmToUse == nil {
+			tlmToUse = tlm
+		}
+		actualPaths := findPhrasePaths(0, nil, test.phrase, tlmToUse, nil, test.slop)
 		if !reflect.DeepEqual(actualPaths, test.paths) {
 			t.Fatalf("expected: %v got %v for test %d", test.paths, actualPaths, i)
 		}

From 640d3c218ccb5c65375dbe06f8c13e6c62b59349 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 2 Apr 2018 22:53:45 -0700
Subject: [PATCH 353/728] optimization for LevenshteinDistanceMaxReuseSlice()

This optimization allows the levenshtein-distance-max calculation to
not necessarily have to allocate memory on every invocation.

This optimization is used in the prefix edge case by the fuzzy
searcher during the findFuzzyCandidateTerms() inner loop.
---
 search/levenshtein.go           | 17 +++++++++++++----
 search/searcher/search_fuzzy.go |  5 ++++-
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/search/levenshtein.go b/search/levenshtein.go
index ec033143a..687608d3f 100644
--- a/search/levenshtein.go
+++ b/search/levenshtein.go
@@ -57,15 +57,24 @@ func LevenshteinDistance(a, b string) int {
 // in which case the first return val will be the max
 // and the second will be true, indicating max was exceeded
 func LevenshteinDistanceMax(a, b string, max int) (int, bool) {
+	v, wasMax, _ := LevenshteinDistanceMaxReuseSlice(a, b, max, nil)
+	return v, wasMax
+}
+
+func LevenshteinDistanceMaxReuseSlice(a, b string, max int, d []int) (int, bool, []int) {
 	la := len(a)
 	lb := len(b)
 
 	ld := int(math.Abs(float64(la - lb)))
 	if ld > max {
-		return max, true
+		return max, true, d
 	}
 
-	d := make([]int, la+1)
+	if cap(d) < la+1 {
+		d = make([]int, la+1)
+	}
+	d = d[:la+1]
+
 	var lastdiag, olddiag, temp int
 
 	for i := 1; i <= la; i++ {
@@ -98,8 +107,8 @@ func LevenshteinDistanceMax(a, b string, max int) (int, bool) {
 		}
 		// after each row if rowmin isn't less than max stop
 		if rowmin > max {
-			return max, true
+			return max, true, d
 		}
 	}
-	return d[la], false
+	return d[la], false, d
 }
diff --git a/search/searcher/search_fuzzy.go b/search/searcher/search_fuzzy.go
index 7d165b067..41ad804f1 100644
--- a/search/searcher/search_fuzzy.go
+++ b/search/searcher/search_fuzzy.go
@@ -72,9 +72,12 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
 	}()
 
 	// enumerate terms and check levenshtein distance
+	var reuse []int
 	tfd, err := fieldDict.Next()
 	for err == nil && tfd != nil {
-		ld, exceeded := search.LevenshteinDistanceMax(term, tfd.Term, fuzziness)
+		var ld int
+		var exceeded bool
+		ld, exceeded, reuse = search.LevenshteinDistanceMaxReuseSlice(term, tfd.Term, fuzziness, reuse)
 		if !exceeded && ld <= fuzziness {
 			rv = append(rv, tfd.Term)
 			if tooManyClauses(len(rv)) {

From 57e0ff8b50d723c8cd471702a9a1046a912b1a56 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Wed, 4 Apr 2018 11:31:19 -0400
Subject: [PATCH 354/728] Revert "TermFieldReader Advance optimization"

This reverts commit 72ac35296132e4ab3da85f56237bebbf3cf2bcbe.
---
 index/scorch/segment/empty.go       |  4 ----
 index/scorch/segment/mem/posting.go | 21 -----------------
 index/scorch/segment/segment.go     |  4 ----
 index/scorch/segment/zap/posting.go | 35 -----------------------------
 index/scorch/snapshot_index_tfr.go  | 35 ++++++++++++-----------------
 5 files changed, 14 insertions(+), 85 deletions(-)

diff --git a/index/scorch/segment/empty.go b/index/scorch/segment/empty.go
index f3315a804..a0b8434c7 100644
--- a/index/scorch/segment/empty.go
+++ b/index/scorch/segment/empty.go
@@ -114,7 +114,3 @@ func (e *EmptyPostingsIterator) Next() (Posting, error) {
 func (e *EmptyPostingsIterator) Size() int {
 	return 0
 }
-
-func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) {
-	return nil, nil
-}
diff --git a/index/scorch/segment/mem/posting.go b/index/scorch/segment/mem/posting.go
index 710f6e764..362fdb7c5 100644
--- a/index/scorch/segment/mem/posting.go
+++ b/index/scorch/segment/mem/posting.go
@@ -155,27 +155,6 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 	return &i.reuse, nil
 }
 
-func (i *PostingsIterator) Advance(docNumber uint64) (segment.Posting, error) {
-	if i.reuse.Number() == docNumber {
-		return &i.reuse, nil
-	}
-	next, err := i.Next()
-	if err != nil || next == nil {
-		return next, err
-	}
-
-	nnum := next.Number()
-	for nnum < docNumber {
-		next, err = i.Next()
-		if err != nil || next == nil {
-			return next, err
-		}
-		nnum = next.Number()
-	}
-
-	return next, nil
-}
-
 // Posting is a single entry in a postings list
 type Posting struct {
 	iterator  *PostingsIterator
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index a79611363..b754e94b4 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -77,10 +77,6 @@ type PostingsIterator interface {
 	Next() (Posting, error)
 
 	Size() int
-
-	// Advance will return the respective posting of the
-	// sepcified doc number or its immediate follower.
-	Advance(docNum uint64) (Posting, error)
 }
 
 type Posting interface {
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 593773d07..1f198df5e 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -590,41 +590,6 @@ func (i *PostingsIterator) nextBytes() (
 	return docNum, freq, normBits, bytesFreqNorm, bytesLoc, nil
 }
 
-func (i *PostingsIterator) Advance(docNumber uint64) (segment.Posting, error) {
-	if i.postings == nil {
-		return nil, nil
-	}
-
-	// check if we are already there
-	if i.next.Number() == docNumber {
-		return &i.next, nil
-	}
-
-	nChunk := uint32(docNumber) / i.postings.sb.chunkFactor
-	if i.currChunk != nChunk {
-		err := i.loadChunk(int(nChunk))
-		if err != nil {
-			return nil, fmt.Errorf("Advance, error loading chunk: %v", err)
-		}
-	}
-
-	next, err := i.Next()
-	if err != nil || next == nil {
-		return nil, err
-	}
-
-	nnum := next.Number()
-	for nnum < docNumber {
-		next, err = i.Next()
-		if err != nil || next == nil {
-			return next, err
-		}
-		nnum = next.Number()
-	}
-
-	return next, nil
-}
-
 // nextDocNum returns the next docNum on the postings list, and also
 // sets up the currChunk / loc related fields of the iterator.
 func (i *PostingsIterator) nextDocNum() (uint64, bool, error) {
diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go
index 0f015c8db..46e657488 100644
--- a/index/scorch/snapshot_index_tfr.go
+++ b/index/scorch/snapshot_index_tfr.go
@@ -117,8 +117,7 @@ func (i *IndexSnapshotTermFieldReader) postingToTermFieldDoc(next segment.Postin
 	}
 }
 
-func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID,
-	preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
+func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
 	// FIXME do something better
 	// for now, if we need to seek backwards, then restart from the beginning
 	if i.currPosting != nil && bytes.Compare(i.currID, ID) >= 0 {
@@ -129,30 +128,24 @@ func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID,
 		}
 		*i = *(i2.(*IndexSnapshotTermFieldReader))
 	}
-
-	num, err := docInternalToNumber(ID)
+	// FIXME do something better
+	next, err := i.Next(preAlloced)
 	if err != nil {
-		return nil, nil
+		return nil, err
 	}
-	segIndex, ldocNum := i.snapshot.segmentIndexAndLocalDocNumFromGlobal(num)
-	if segIndex > len(i.snapshot.segment) {
+	if next == nil {
 		return nil, nil
 	}
-	// skip directly to the target segment
-	next, err := i.iterators[segIndex].Advance(ldocNum)
-	if err != nil || next == nil {
-		return nil, err
-	}
-
-	if preAlloced == nil {
-		preAlloced = &index.TermFieldDoc{}
+	for bytes.Compare(next.ID, ID) < 0 {
+		next, err = i.Next(preAlloced)
+		if err != nil {
+			return nil, err
+		}
+		if next == nil {
+			break
+		}
 	}
-	preAlloced.ID = docNumberToBytes(preAlloced.ID, next.Number()+
-		i.snapshot.offsets[segIndex])
-	i.postingToTermFieldDoc(next, preAlloced)
-	i.currID = preAlloced.ID
-	i.currPosting = next
-	return preAlloced, nil
+	return next, nil
 }
 
 func (i *IndexSnapshotTermFieldReader) Count() uint64 {

From 1bee8b51a0624219ac118ddae6e379b27a5c7508 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 2 Apr 2018 15:03:21 -0700
Subject: [PATCH 355/728] search_phrase findPhrasePaths() fix to avoid replays
 of locations

With term locations that look like "a b b", findPhrasePaths() on a
search for "a b b" with sloppiness of 1 ought to have a result like...

  a: 1, b: 2, b: 3

...but instead incorrectly had results like this due to not
ignoring previously used locations during recursion...

  a: 1, b: 2, b: 2
  a: 1, b: 2, b: 3
---
 search/searcher/search_phrase.go      | 30 ++++++++++++--
 search/searcher/search_phrase_test.go | 58 +++++++++++++++++++++++++++
 2 files changed, 84 insertions(+), 4 deletions(-)

diff --git a/search/searcher/search_phrase.go b/search/searcher/search_phrase.go
index 7166d4884..270d54dfb 100644
--- a/search/searcher/search_phrase.go
+++ b/search/searcher/search_phrase.go
@@ -270,6 +270,18 @@ func (p phrasePath) MergeInto(in search.TermLocationMap) {
 	}
 }
 
+func (p phrasePath) String() string {
+	rv := "["
+	for i, pp := range p {
+		if i > 0 {
+			rv += ", "
+		}
+		rv += pp.String()
+	}
+	rv += "]"
+	return rv
+}
+
 // findPhrasePaths is a function to identify phase matches from a set of known
 // term locations.  the implementation is recursive, so care must be taken
 // with arguments and return values.
@@ -285,10 +297,14 @@ func (p phrasePath) MergeInto(in search.TermLocationMap) {
 // returns slice of paths, or nil if invocation did not find any successul paths
 func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]string,
 	tlm search.TermLocationMap, p phrasePath, remainingSlop int) []phrasePath {
+	return findPhrasePathsRecur(prevPos, ap, phraseTerms, tlm, p, remainingSlop, nil)
+}
 
+func findPhrasePathsRecur(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]string,
+	tlm search.TermLocationMap, p phrasePath, remainingSlop int, rv []phrasePath) []phrasePath {
 	// no more terms
 	if len(phraseTerms) < 1 {
-		return []phrasePath{append(phrasePath(nil), p...)}
+		return append(rv, append(phrasePath(nil), p...))
 	}
 
 	car := phraseTerms[0]
@@ -301,13 +317,13 @@ func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]s
 			// if prevPos was 0, don't set it to 1 (as thats not a real abs pos)
 			nextPos = 0 // don't advance nextPos if prevPos was 0
 		}
-		return findPhrasePaths(nextPos, ap, cdr, tlm, p, remainingSlop)
+		return findPhrasePathsRecur(nextPos, ap, cdr, tlm, p, remainingSlop, rv)
 	}
 
-	var rv []phrasePath
 	// locations for this term
 	for _, carTerm := range car {
 		locations := tlm[carTerm]
+	LOCATIONS_LOOP:
 		for _, loc := range locations {
 			if prevPos != 0 && !loc.ArrayPositions.Equals(ap) {
 				// if the array positions are wrong, can't match, try next location
@@ -322,9 +338,15 @@ func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]s
 
 			// if enough slop remaining, continue recursively
 			if prevPos == 0 || (remainingSlop-dist) >= 0 {
+				for _, ppart := range p {
+					if ppart.term == carTerm && ppart.loc == loc {
+						continue LOCATIONS_LOOP
+					}
+				}
+
 				// this location works, add it to the path (but not for empty term)
 				px := append(p, phrasePart{term: carTerm, loc: loc})
-				rv = append(rv, findPhrasePaths(loc.Pos, loc.ArrayPositions, cdr, tlm, px, remainingSlop-dist)...)
+				rv = findPhrasePathsRecur(loc.Pos, loc.ArrayPositions, cdr, tlm, px, remainingSlop-dist, rv)
 			}
 		}
 	}
diff --git a/search/searcher/search_phrase_test.go b/search/searcher/search_phrase_test.go
index d09be57df..c14b3e5f4 100644
--- a/search/searcher/search_phrase_test.go
+++ b/search/searcher/search_phrase_test.go
@@ -494,6 +494,64 @@ func TestFindPhrasePathsSloppy(t *testing.T) {
 				},
 			},
 		},
+		// test that we don't see multiple hits from the same location
+		{
+			phrase: [][]string{
+				[]string{"cat"}, []string{"dog"}, []string{"dog"},
+			},
+			slop: 1,
+			paths: []phrasePath{
+				phrasePath{
+					phrasePart{"cat", &search.Location{Pos: 1}},
+					phrasePart{"dog", &search.Location{Pos: 2}},
+					phrasePart{"dog", &search.Location{Pos: 3}},
+				},
+			},
+			tlm: search.TermLocationMap{ // cat dog dog
+				"cat": search.Locations{
+					&search.Location{Pos: 1},
+				},
+				"dog": search.Locations{
+					&search.Location{Pos: 2},
+					&search.Location{Pos: 3},
+				},
+			},
+		},
+		// test that we don't see multiple hits from the same location
+		{
+			phrase: [][]string{
+				[]string{"cat"}, []string{"dog"},
+			},
+			slop: 10,
+			paths: []phrasePath{
+				phrasePath{
+					phrasePart{"cat", &search.Location{Pos: 1}},
+					phrasePart{"dog", &search.Location{Pos: 2}},
+				},
+				phrasePath{
+					phrasePart{"cat", &search.Location{Pos: 1}},
+					phrasePart{"dog", &search.Location{Pos: 4}},
+				},
+				phrasePath{
+					phrasePart{"cat", &search.Location{Pos: 3}},
+					phrasePart{"dog", &search.Location{Pos: 2}},
+				},
+				phrasePath{
+					phrasePart{"cat", &search.Location{Pos: 3}},
+					phrasePart{"dog", &search.Location{Pos: 4}},
+				},
+			},
+			tlm: search.TermLocationMap{ // cat dog cat dog
+				"cat": search.Locations{
+					&search.Location{Pos: 1},
+					&search.Location{Pos: 3},
+				},
+				"dog": search.Locations{
+					&search.Location{Pos: 2},
+					&search.Location{Pos: 4},
+				},
+			},
+		},
 	}
 
 	for i, test := range tests {

From 9d3ee754b01fbf6bc81e6b405bcc6e374754d1d0 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 4 Apr 2018 15:08:01 -0700
Subject: [PATCH 356/728] scorch introduce empty deleted bitmaps as nil

---
 index/scorch/introducer.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index b00260bbe..23749c906 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -148,6 +148,9 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 		} else {
 			newss.deleted = roaring.Or(root.segment[i].deleted, delta)
 		}
+		if newss.deleted.IsEmpty() {
+			newss.deleted = nil
+		}
 
 		// check for live size before copying
 		if newss.LiveSize() > 0 {

From 213e3b2197c92287a180b5590f4c3f74090c1147 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 4 Apr 2018 15:07:15 -0700
Subject: [PATCH 357/728] scorch track provenance or creator of snapshots

I've found that having this info helps a little with debugging.
---
 index/scorch/introducer.go       | 10 ++++++++++
 index/scorch/persister.go        |  2 ++
 index/scorch/scorch.go           |  2 +-
 index/scorch/snapshot_index.go   |  1 +
 index/scorch/snapshot_segment.go |  1 +
 5 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index b00260bbe..3b44e4c82 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -118,6 +118,7 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 		offsets:  make([]uint64, 0, nsegs+1),
 		internal: make(map[string][]byte, len(root.internal)),
 		refs:     1,
+		creator:  "introduceSegment",
 	}
 
 	// iterate through current segments
@@ -140,6 +141,7 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 			id:         root.segment[i].id,
 			segment:    root.segment[i].segment,
 			cachedDocs: root.segment[i].cachedDocs,
+			creator:    root.segment[i].creator,
 		}
 
 		// apply new obsoletions
@@ -164,6 +166,7 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 			id:         next.id,
 			segment:    next.data, // take ownership of next.data's ref-count
 			cachedDocs: &cachedDocs{cache: nil},
+			creator:    "introduceSegment",
 		}
 		newSnapshot.segment = append(newSnapshot.segment, newSegmentSnapshot)
 		newSnapshot.offsets = append(newSnapshot.offsets, running)
@@ -225,6 +228,7 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
 		offsets:  make([]uint64, len(root.offsets)),
 		internal: make(map[string][]byte, len(root.internal)),
 		refs:     1,
+		creator:  "introducePersist",
 	}
 
 	for i, segmentSnapshot := range root.segment {
@@ -235,6 +239,7 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
 				segment:    replacement,
 				deleted:    segmentSnapshot.deleted,
 				cachedDocs: segmentSnapshot.cachedDocs,
+				creator:    "introducePersist",
 			}
 			newIndexSnapshot.segment[i] = newSegmentSnapshot
 			delete(persist.persisted, segmentSnapshot.id)
@@ -278,6 +283,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 		parent:   s,
 		internal: root.internal,
 		refs:     1,
+		creator:  "introduceMerge",
 	}
 
 	// iterate through current segments
@@ -313,6 +319,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 				segment:    root.segment[i].segment,
 				deleted:    root.segment[i].deleted,
 				cachedDocs: root.segment[i].cachedDocs,
+				creator:    root.segment[i].creator,
 			})
 			root.segment[i].segment.AddRef()
 			newSnapshot.offsets = append(newSnapshot.offsets, running)
@@ -344,6 +351,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 			segment:    nextMerge.new, // take ownership for nextMerge.new's ref-count
 			deleted:    newSegmentDeleted,
 			cachedDocs: &cachedDocs{cache: nil},
+			creator:    "introduceMerge",
 		})
 		newSnapshot.offsets = append(newSnapshot.offsets, running)
 		atomic.AddUint64(&s.stats.TotIntroducedSegmentsMerge, 1)
@@ -391,6 +399,7 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
 		internal: revertTo.snapshot.internal,
 		epoch:    s.nextSnapshotEpoch,
 		refs:     1,
+		creator:  "revertToSnapshot",
 	}
 	s.nextSnapshotEpoch++
 
@@ -401,6 +410,7 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
 			segment:    segmentSnapshot.segment,
 			deleted:    segmentSnapshot.deleted,
 			cachedDocs: segmentSnapshot.cachedDocs,
+			creator:    segmentSnapshot.creator,
 		}
 		newSnapshot.segment[i].segment.AddRef()
 
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 6039b5097..b31686069 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -265,6 +265,7 @@ func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot) (
 		segment:  make([]*SegmentSnapshot, 0, len(snapshot.segment)),
 		internal: snapshot.internal,
 		epoch:    snapshot.epoch,
+		creator:  "persistSnapshotMaybeMerge",
 	}
 
 	// copy to the equiv the segments that weren't replaced
@@ -536,6 +537,7 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
 		parent:   s,
 		internal: make(map[string][]byte),
 		refs:     1,
+		creator:  "loadSnapshot",
 	}
 	var running uint64
 	c := snapshot.Cursor()
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 13e9a4027..f218f1d54 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -84,7 +84,7 @@ func NewScorch(storeName string,
 		closeCh:              make(chan struct{}),
 		ineligibleForRemoval: map[string]bool{},
 	}
-	rv.root = &IndexSnapshot{parent: rv, refs: 1}
+	rv.root = &IndexSnapshot{parent: rv, refs: 1, creator: "NewScorch"}
 	ro, ok := config["read_only"].(bool)
 	if ok {
 		rv.readOnly = ro
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 2e84b3f4b..cf6e6250d 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -55,6 +55,7 @@ type IndexSnapshot struct {
 	internal map[string][]byte
 	epoch    uint64
 	size     uint64
+	creator  string
 
 	m    sync.Mutex // Protects the fields that follow.
 	refs int64
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index ac95d450f..c29fac997 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -62,6 +62,7 @@ type SegmentSnapshot struct {
 	id      uint64
 	segment segment.Segment
 	deleted *roaring.Bitmap
+	creator string
 
 	cachedDocs *cachedDocs
 }

From 418f98ede8854ffb5528043f035f3b762bcb9d19 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 2 Apr 2018 10:28:09 -0700
Subject: [PATCH 358/728] Account for memory overhead from snapshots held by
 merger, persister

---
 index/scorch/merge.go     |  3 +++
 index/scorch/persister.go |  3 +++
 index/scorch/scorch.go    | 33 ++++++++++++++++++++++++++++++++-
 3 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 42b5e950f..73351aa4c 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -50,6 +50,8 @@ OUTER:
 			s.rootLock.RLock()
 			ourSnapshot := s.root
 			ourSnapshot.AddRef()
+			atomic.StoreUint64(&s.iStats.mergeSnapshotSize, uint64(ourSnapshot.Size()))
+			atomic.StoreUint64(&s.iStats.mergeEpoch, ourSnapshot.epoch)
 			s.rootLock.RUnlock()
 
 			if ourSnapshot.epoch != lastEpochMergePlanned {
@@ -58,6 +60,7 @@ OUTER:
 				// lets get started
 				err := s.planMergeAtSnapshot(ourSnapshot, mergePlannerOptions)
 				if err != nil {
+					atomic.StoreUint64(&s.iStats.mergeEpoch, 0)
 					if err == ErrClosed {
 						// index has been closed
 						_ = ourSnapshot.DecRef()
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 6039b5097..ff6ac8f26 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -80,6 +80,8 @@ OUTER:
 			ourSnapshot.AddRef()
 			ourPersisted = s.rootPersisted
 			s.rootPersisted = nil
+			atomic.StoreUint64(&s.iStats.persistSnapshotSize, uint64(ourSnapshot.Size()))
+			atomic.StoreUint64(&s.iStats.persistEpoch, ourSnapshot.epoch)
 		}
 		s.rootLock.Unlock()
 
@@ -94,6 +96,7 @@ OUTER:
 				close(ch)
 			}
 			if err != nil {
+				atomic.StoreUint64(&s.iStats.persistEpoch, 0)
 				if err == ErrClosed {
 					// index has been closed
 					_ = ourSnapshot.DecRef()
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 13e9a4027..0947e7501 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -71,6 +71,15 @@ type Scorch struct {
 
 	onEvent      func(event Event)
 	onAsyncError func(err error)
+
+	iStats internalStats
+}
+
+type internalStats struct {
+	persistEpoch        uint64
+	persistSnapshotSize uint64
+	mergeEpoch          uint64
+	mergeSnapshotSize   uint64
 }
 
 func NewScorch(storeName string,
@@ -492,7 +501,29 @@ func (s *Scorch) MemoryUsed() uint64 {
 	defer func() {
 		_ = indexSnapshot.Close()
 	}()
-	return uint64(indexSnapshot.Size())
+
+	// Account for current root snapshot overhead
+	memUsed := uint64(indexSnapshot.Size())
+
+	// Account for snapshot that the persister may be working on
+	persistEpoch := atomic.LoadUint64(&s.iStats.persistEpoch)
+	persistSnapshotSize := atomic.LoadUint64(&s.iStats.persistSnapshotSize)
+	if persistEpoch != 0 && indexSnapshot.epoch > persistEpoch {
+		// the snapshot that the persister is working on isn't the same as
+		// the current snapshot
+		memUsed += persistSnapshotSize
+	}
+
+	// Account for snapshot that the merger may be working on
+	mergeEpoch := atomic.LoadUint64(&s.iStats.mergeEpoch)
+	mergeSnapshotSize := atomic.LoadUint64(&s.iStats.mergeSnapshotSize)
+	if mergeEpoch != 0 && indexSnapshot.epoch > mergeEpoch {
+		// the snapshot that the merger is working on isn't the same as
+		// the current snapshot
+		memUsed += mergeSnapshotSize
+	}
+
+	return memUsed
 }
 
 func (s *Scorch) markIneligibleForRemoval(filename string) {

From 7dc965232c43ecbb2a5977bcdb6ae18fc5a0bf2e Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 5 Apr 2018 14:18:53 -0700
Subject: [PATCH 359/728] improve findPhrasePaths() doc comments and call
 signature

From feedback on a previous PR, the doc comments on findPhrasePaths()
was outdated.  See...

  https://github.com/blevesearch/bleve/pull/872

Additionally, the recently introduced recursive helper func allowed
simplification of the params of findPhrasePaths().
---
 search/searcher/search_phrase.go      | 38 +++++++++++++++++++--------
 search/searcher/search_phrase_test.go |  8 +++---
 2 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/search/searcher/search_phrase.go b/search/searcher/search_phrase.go
index 270d54dfb..76d9dc23f 100644
--- a/search/searcher/search_phrase.go
+++ b/search/searcher/search_phrase.go
@@ -245,7 +245,7 @@ func (s *PhraseSearcher) checkCurrMustMatch(ctx *search.SearchContext) *search.D
 // a nil or empty TermLocationMap
 func (s *PhraseSearcher) checkCurrMustMatchField(ctx *search.SearchContext, tlm search.TermLocationMap) (
 	int, search.TermLocationMap) {
-	paths := findPhrasePaths(0, nil, s.terms, tlm, nil, 0)
+	paths := findPhrasePaths(s.terms, tlm, 0)
 	rv := make(search.TermLocationMap, len(s.terms))
 	for _, p := range paths {
 		p.MergeInto(rv)
@@ -282,24 +282,39 @@ func (p phrasePath) String() string {
 	return rv
 }
 
-// findPhrasePaths is a function to identify phase matches from a set of known
-// term locations.  the implementation is recursive, so care must be taken
-// with arguments and return values.
+// findPhrasePaths is a function to identify phase matches from a set
+// of known term locations.
 //
-// prev - the previous location, nil on first invocation
+// phraseTerms - slice containing the phrase terms,
+//               may contain empty string as placeholder (don't care)
+// tlm - the Term Location Map containing all relevant term locations
+// slop - amount of sloppiness that's allowed, which is the cummulative
+//        sum of the editDistances of each matching phrase part,
+//        where 0 means no sloppiness allowed (all editDistances must be 0)
+//
+// returns slice of paths, or nil if invocation did not find any successul paths
+func findPhrasePaths(phraseTerms [][]string,
+	tlm search.TermLocationMap, slop int) []phrasePath {
+	return findPhrasePathsRecur(0, nil, phraseTerms, tlm, nil, slop, nil)
+}
+
+// findPhrasePathsRecur is the recursive implementation of
+// findPhrasePaths, so care must be taken with arguments and return
+// values.
+//
+// prevPos - the previous location, 0 on first invocation
+// ap - array positions of the first candidate phrase part to
+//      which further recursive phrase parts must match,
+//      nil on initial invocation or when there are no array positions
 // phraseTerms - slice containing the phrase terms themselves
 //               may contain empty string as placeholder (don't care)
 // tlm - the Term Location Map containing all relevant term locations
-// offset - the offset from the previous that this next term must match
 // p - the current path being explored (appended to in recursive calls)
 //     this is the primary state being built during the traversal
+// remainingSlop - decremented during recursion
+// rv - the final result being appended to by all the recursive calls
 //
 // returns slice of paths, or nil if invocation did not find any successul paths
-func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]string,
-	tlm search.TermLocationMap, p phrasePath, remainingSlop int) []phrasePath {
-	return findPhrasePathsRecur(prevPos, ap, phraseTerms, tlm, p, remainingSlop, nil)
-}
-
 func findPhrasePathsRecur(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]string,
 	tlm search.TermLocationMap, p phrasePath, remainingSlop int, rv []phrasePath) []phrasePath {
 	// no more terms
@@ -338,6 +353,7 @@ func findPhrasePathsRecur(prevPos uint64, ap search.ArrayPositions, phraseTerms
 
 			// if enough slop remaining, continue recursively
 			if prevPos == 0 || (remainingSlop-dist) >= 0 {
+				// skip if we've already used this term+loc already
 				for _, ppart := range p {
 					if ppart.term == carTerm && ppart.loc == loc {
 						continue LOCATIONS_LOOP
diff --git a/search/searcher/search_phrase_test.go b/search/searcher/search_phrase_test.go
index c14b3e5f4..ea351380f 100644
--- a/search/searcher/search_phrase_test.go
+++ b/search/searcher/search_phrase_test.go
@@ -320,7 +320,7 @@ func TestFindPhrasePaths(t *testing.T) {
 	}
 
 	for i, test := range tests {
-		actualPaths := findPhrasePaths(0, nil, test.phrase, test.tlm, nil, 0)
+		actualPaths := findPhrasePaths(test.phrase, test.tlm, 0)
 		if !reflect.DeepEqual(actualPaths, test.paths) {
 			t.Fatalf("expected: %v got %v for test %d", test.paths, actualPaths, i)
 		}
@@ -559,7 +559,7 @@ func TestFindPhrasePathsSloppy(t *testing.T) {
 		if tlmToUse == nil {
 			tlmToUse = tlm
 		}
-		actualPaths := findPhrasePaths(0, nil, test.phrase, tlmToUse, nil, test.slop)
+		actualPaths := findPhrasePaths(test.phrase, tlmToUse, test.slop)
 		if !reflect.DeepEqual(actualPaths, test.paths) {
 			t.Fatalf("expected: %v got %v for test %d", test.paths, actualPaths, i)
 		}
@@ -640,7 +640,7 @@ func TestFindPhrasePathsSloppyPalyndrome(t *testing.T) {
 	}
 
 	for i, test := range tests {
-		actualPaths := findPhrasePaths(0, nil, test.phrase, tlm, nil, test.slop)
+		actualPaths := findPhrasePaths(test.phrase, tlm, test.slop)
 		if !reflect.DeepEqual(actualPaths, test.paths) {
 			t.Fatalf("expected: %v got %v for test %d", test.paths, actualPaths, i)
 		}
@@ -736,7 +736,7 @@ func TestFindMultiPhrasePaths(t *testing.T) {
 	}
 
 	for i, test := range tests {
-		actualPaths := findPhrasePaths(0, nil, test.phrase, tlm, nil, 0)
+		actualPaths := findPhrasePaths(test.phrase, tlm, 0)
 		if !reflect.DeepEqual(actualPaths, test.paths) {
 			t.Fatalf("expected: %v got %v for test %d", test.paths, actualPaths, i)
 		}

From 6b448cffe55a79637b9dee0beac55904c307fdb0 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 6 Apr 2018 14:51:29 -0400
Subject: [PATCH 360/728] add ability change default open read-only flag

this change allows an alternate distribution of the bleve
command-line tool to change the default behavior and always
open the index read-only
---
 cmd/bleve/cmd/root.go | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/cmd/bleve/cmd/root.go b/cmd/bleve/cmd/root.go
index f0bea29fe..8dcf165b2 100644
--- a/cmd/bleve/cmd/root.go
+++ b/cmd/bleve/cmd/root.go
@@ -27,6 +27,10 @@ var cfgFile string
 
 var idx bleve.Index
 
+// DefaultOpenReadOnly allows some distributions of this command to default
+// to always opening the index read-only
+var DefaultOpenReadOnly = false
+
 const canMutateBleveIndex = "canMutateBleveIndex"
 
 // CanMutateBleveIndex returns true if the command is capable
@@ -52,8 +56,11 @@ var RootCmd = &cobra.Command{
 		if len(args) < 1 {
 			return fmt.Errorf("must specify path to index")
 		}
+		runtimeConfig := map[string]interface{}{
+			"read_only": DefaultOpenReadOnly,
+		}
 		var err error
-		idx, err = bleve.Open(args[0])
+		idx, err = bleve.OpenUsing(args[0], runtimeConfig)
 		if err != nil {
 			return fmt.Errorf("error opening bleve index: %v", err)
 		}

From 5af813d7e5f8424f09cbbf6ab414cf13a9ed0d0e Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 5 Apr 2018 15:59:19 -0700
Subject: [PATCH 361/728] optimize findPhrasePaths with path memory reuse

This optimization renames findPhrasePathsRecur() back to
findPhrasePaths().  The PhraseSearcher now also calls
findPhrasePaths() with preallocated / recycled phrase path slices.

Microbenchmarks of bleve-query for query-string of 'text:"see also"'
on a 50K en-wiki scorch index went from ~68 q/sec before this change
to ~74 q/sec after this change.
---
 search/searcher/search_phrase.go      | 54 +++++++++++++--------------
 search/searcher/search_phrase_test.go |  8 ++--
 2 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/search/searcher/search_phrase.go b/search/searcher/search_phrase.go
index 76d9dc23f..95d7a6b3e 100644
--- a/search/searcher/search_phrase.go
+++ b/search/searcher/search_phrase.go
@@ -36,6 +36,8 @@ type PhraseSearcher struct {
 	queryNorm    float64
 	currMust     *search.DocumentMatch
 	terms        [][]string
+	path         phrasePath
+	paths        []phrasePath
 	initialized  bool
 }
 
@@ -245,12 +247,15 @@ func (s *PhraseSearcher) checkCurrMustMatch(ctx *search.SearchContext) *search.D
 // a nil or empty TermLocationMap
 func (s *PhraseSearcher) checkCurrMustMatchField(ctx *search.SearchContext, tlm search.TermLocationMap) (
 	int, search.TermLocationMap) {
-	paths := findPhrasePaths(s.terms, tlm, 0)
+	if s.path == nil {
+		s.path = make(phrasePath, 0, len(s.terms))
+	}
+	s.paths = findPhrasePaths(0, nil, s.terms, tlm, s.path[:0], 0, s.paths[:0])
 	rv := make(search.TermLocationMap, len(s.terms))
-	for _, p := range paths {
+	for _, p := range s.paths {
 		p.MergeInto(rv)
 	}
-	return len(paths), rv
+	return len(s.paths), rv
 }
 
 type phrasePart struct {
@@ -283,43 +288,38 @@ func (p phrasePath) String() string {
 }
 
 // findPhrasePaths is a function to identify phase matches from a set
-// of known term locations.
-//
-// phraseTerms - slice containing the phrase terms,
-//               may contain empty string as placeholder (don't care)
-// tlm - the Term Location Map containing all relevant term locations
-// slop - amount of sloppiness that's allowed, which is the cummulative
-//        sum of the editDistances of each matching phrase part,
-//        where 0 means no sloppiness allowed (all editDistances must be 0)
-//
-// returns slice of paths, or nil if invocation did not find any successul paths
-func findPhrasePaths(phraseTerms [][]string,
-	tlm search.TermLocationMap, slop int) []phrasePath {
-	return findPhrasePathsRecur(0, nil, phraseTerms, tlm, nil, slop, nil)
-}
-
-// findPhrasePathsRecur is the recursive implementation of
-// findPhrasePaths, so care must be taken with arguments and return
-// values.
+// of known term locations.  it recursive so care must be taken with
+// arguments and return values.
 //
 // prevPos - the previous location, 0 on first invocation
 // ap - array positions of the first candidate phrase part to
 //      which further recursive phrase parts must match,
 //      nil on initial invocation or when there are no array positions
-// phraseTerms - slice containing the phrase terms themselves
+// phraseTerms - slice containing the phrase terms,
 //               may contain empty string as placeholder (don't care)
 // tlm - the Term Location Map containing all relevant term locations
 // p - the current path being explored (appended to in recursive calls)
 //     this is the primary state being built during the traversal
-// remainingSlop - decremented during recursion
+// remainingSlop - amount of sloppiness that's allowed, which is the
+//        sum of the editDistances from each matching phrase part,
+//        where 0 means no sloppiness allowed (all editDistances must be 0),
+//        decremented during recursion
 // rv - the final result being appended to by all the recursive calls
 //
 // returns slice of paths, or nil if invocation did not find any successul paths
-func findPhrasePathsRecur(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]string,
+func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]string,
 	tlm search.TermLocationMap, p phrasePath, remainingSlop int, rv []phrasePath) []phrasePath {
 	// no more terms
 	if len(phraseTerms) < 1 {
-		return append(rv, append(phrasePath(nil), p...))
+		// snapshot or copy the recursively built phrasePath p and
+		// append it to the rv, also optimizing by checking if next
+		// phrasePath item in the rv (which we're about to overwrite)
+		// is available for reuse
+		var pcopy phrasePath
+		if len(rv) < cap(rv) {
+			pcopy = rv[:len(rv)+1][len(rv)][:0]
+		}
+		return append(rv, append(pcopy, p...))
 	}
 
 	car := phraseTerms[0]
@@ -332,7 +332,7 @@ func findPhrasePathsRecur(prevPos uint64, ap search.ArrayPositions, phraseTerms
 			// if prevPos was 0, don't set it to 1 (as thats not a real abs pos)
 			nextPos = 0 // don't advance nextPos if prevPos was 0
 		}
-		return findPhrasePathsRecur(nextPos, ap, cdr, tlm, p, remainingSlop, rv)
+		return findPhrasePaths(nextPos, ap, cdr, tlm, p, remainingSlop, rv)
 	}
 
 	// locations for this term
@@ -362,7 +362,7 @@ func findPhrasePathsRecur(prevPos uint64, ap search.ArrayPositions, phraseTerms
 
 				// this location works, add it to the path (but not for empty term)
 				px := append(p, phrasePart{term: carTerm, loc: loc})
-				rv = findPhrasePathsRecur(loc.Pos, loc.ArrayPositions, cdr, tlm, px, remainingSlop-dist, rv)
+				rv = findPhrasePaths(loc.Pos, loc.ArrayPositions, cdr, tlm, px, remainingSlop-dist, rv)
 			}
 		}
 	}
diff --git a/search/searcher/search_phrase_test.go b/search/searcher/search_phrase_test.go
index ea351380f..1c3e18a5e 100644
--- a/search/searcher/search_phrase_test.go
+++ b/search/searcher/search_phrase_test.go
@@ -320,7 +320,7 @@ func TestFindPhrasePaths(t *testing.T) {
 	}
 
 	for i, test := range tests {
-		actualPaths := findPhrasePaths(test.phrase, test.tlm, 0)
+		actualPaths := findPhrasePaths(0, nil, test.phrase, test.tlm, nil, 0, nil)
 		if !reflect.DeepEqual(actualPaths, test.paths) {
 			t.Fatalf("expected: %v got %v for test %d", test.paths, actualPaths, i)
 		}
@@ -559,7 +559,7 @@ func TestFindPhrasePathsSloppy(t *testing.T) {
 		if tlmToUse == nil {
 			tlmToUse = tlm
 		}
-		actualPaths := findPhrasePaths(test.phrase, tlmToUse, test.slop)
+		actualPaths := findPhrasePaths(0, nil, test.phrase, tlmToUse, nil, test.slop, nil)
 		if !reflect.DeepEqual(actualPaths, test.paths) {
 			t.Fatalf("expected: %v got %v for test %d", test.paths, actualPaths, i)
 		}
@@ -640,7 +640,7 @@ func TestFindPhrasePathsSloppyPalyndrome(t *testing.T) {
 	}
 
 	for i, test := range tests {
-		actualPaths := findPhrasePaths(test.phrase, tlm, test.slop)
+		actualPaths := findPhrasePaths(0, nil, test.phrase, tlm, nil, test.slop, nil)
 		if !reflect.DeepEqual(actualPaths, test.paths) {
 			t.Fatalf("expected: %v got %v for test %d", test.paths, actualPaths, i)
 		}
@@ -736,7 +736,7 @@ func TestFindMultiPhrasePaths(t *testing.T) {
 	}
 
 	for i, test := range tests {
-		actualPaths := findPhrasePaths(test.phrase, tlm, 0)
+		actualPaths := findPhrasePaths(0, nil, test.phrase, tlm, nil, 0, nil)
 		if !reflect.DeepEqual(actualPaths, test.paths) {
 			t.Fatalf("expected: %v got %v for test %d", test.paths, actualPaths, i)
 		}

From 4dd4bc7d63c4b191621e930824085df35a2a5b34 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 5 Apr 2018 23:29:11 -0700
Subject: [PATCH 362/728] optimize to return nil facet results when there's no
 FacetBuilder

---
 search/collector/topn.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/search/collector/topn.go b/search/collector/topn.go
index d684868cc..28f284a94 100644
--- a/search/collector/topn.go
+++ b/search/collector/topn.go
@@ -313,5 +313,5 @@ func (hc *TopNCollector) FacetResults() search.FacetResults {
 	if hc.facetsBuilder != nil {
 		return hc.facetsBuilder.Results()
 	}
-	return search.FacetResults{}
+	return nil
 }

From d54898da6a50532bb18396417a63ae4e237696a2 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 5 Apr 2018 23:37:11 -0700
Subject: [PATCH 363/728] optimize to return nil SearchResult.Errors on success

---
 index_impl.go | 2 --
 1 file changed, 2 deletions(-)

diff --git a/index_impl.go b/index_impl.go
index 4d03b78af..b5373ff0d 100644
--- a/index_impl.go
+++ b/index_impl.go
@@ -609,9 +609,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
 	return &SearchResult{
 		Status: &SearchStatus{
 			Total:      1,
-			Failed:     0,
 			Successful: 1,
-			Errors:     make(map[string]error),
 		},
 		Request:  req,
 		Hits:     hits,

From baf73756eaa11b3951ed941986a168b143688bc5 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 6 Apr 2018 00:34:11 -0700
Subject: [PATCH 364/728] optimize sort cache preallocations

---
 search/sort.go | 29 ++++++++++-------------------
 1 file changed, 10 insertions(+), 19 deletions(-)

diff --git a/search/sort.go b/search/sort.go
index 28705d369..6afc9789b 100644
--- a/search/sort.go
+++ b/search/sort.go
@@ -251,23 +251,21 @@ func (so SortOrder) Compare(cachedScoring, cachedDesc []bool, i, j *DocumentMatc
 }
 
 func (so SortOrder) RequiresScore() bool {
-	rv := false
 	for _, soi := range so {
 		if soi.RequiresScoring() {
-			rv = true
+			return true
 		}
 	}
-	return rv
+	return false
 }
 
 func (so SortOrder) RequiresDocID() bool {
-	rv := false
 	for _, soi := range so {
 		if soi.RequiresDocID() {
-			rv = true
+			return true
 		}
 	}
-	return rv
+	return false
 }
 
 func (so SortOrder) RequiredFields() []string {
@@ -279,7 +277,7 @@ func (so SortOrder) RequiredFields() []string {
 }
 
 func (so SortOrder) CacheIsScore() []bool {
-	var rv []bool
+	rv := make([]bool, 0, len(so))
 	for _, soi := range so {
 		rv = append(rv, soi.RequiresScoring())
 	}
@@ -287,7 +285,7 @@ func (so SortOrder) CacheIsScore() []bool {
 }
 
 func (so SortOrder) CacheDescending() []bool {
-	var rv []bool
+	rv := make([]bool, 0, len(so))
 	for _, soi := range so {
 		rv = append(rv, soi.Descending())
 	}
@@ -486,8 +484,7 @@ func (s *SortField) MarshalJSON() ([]byte, error) {
 }
 
 func (s *SortField) Copy() SearchSort {
-	var rv SortField
-	rv = *s
+	rv := *s
 	return &rv
 }
 
@@ -499,7 +496,6 @@ type SortDocID struct {
 // UpdateVisitor is a no-op for SortDocID as it's value
 // is not dependent on any field terms
 func (s *SortDocID) UpdateVisitor(field string, term []byte) {
-
 }
 
 // Value returns the sort value of the DocumentMatch
@@ -529,8 +525,7 @@ func (s *SortDocID) MarshalJSON() ([]byte, error) {
 }
 
 func (s *SortDocID) Copy() SearchSort {
-	var rv SortDocID
-	rv = *s
+	rv := *s
 	return &rv
 }
 
@@ -542,7 +537,6 @@ type SortScore struct {
 // UpdateVisitor is a no-op for SortScore as it's value
 // is not dependent on any field terms
 func (s *SortScore) UpdateVisitor(field string, term []byte) {
-
 }
 
 // Value returns the sort value of the DocumentMatch
@@ -572,8 +566,7 @@ func (s *SortScore) MarshalJSON() ([]byte, error) {
 }
 
 func (s *SortScore) Copy() SearchSort {
-	var rv SortScore
-	rv = *s
+	rv := *s
 	return &rv
 }
 
@@ -583,7 +576,6 @@ var maxDistance = string(numeric.MustNewPrefixCodedInt64(math.MaxInt64, 0))
 // their distance from the specified point.
 func NewSortGeoDistance(field, unit string, lon, lat float64, desc bool) (
 	*SortGeoDistance, error) {
-
 	rv := &SortGeoDistance{
 		Field: field,
 		Desc:  desc,
@@ -705,7 +697,6 @@ func (s *SortGeoDistance) MarshalJSON() ([]byte, error) {
 }
 
 func (s *SortGeoDistance) Copy() SearchSort {
-	var rv SortGeoDistance
-	rv = *s
+	rv := *s
 	return &rv
 }

From b4ac8051d116093a3ac168e70a835e3ffae53964 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 5 Apr 2018 18:11:37 -0700
Subject: [PATCH 365/728] optimize reuse of DocMatch.FieldTermLocations &
 Vectors during search

In this commit, a new field, FieldTermLocations is introduced into the
DocumentMatch struct.  The FieldTermLocations is a slice that is not
JSON serialized and is more ammenable than the existing
DocumentMatch.Locations field for higher performance search
processing, as slices are more friendly than maps for memory reuse.

As the search collector finalizes results, it invokes the newly
introduced DocumentMatch.Complete() method on the final hits, which
converts the DocumentMatch.FieldTermLocations slice into a
DocumentMatch.Locations map.

All scorers and searchers that used to rely on DocumentMatch.Locations
are appropriately updated.  Of note, the phrase searcher also invokes
DocumentMatch.Complete() so that it has the Locations map available
for faster phrase processing.

Term vector slice capacity are now also reused as part of this change.

Microbenchmarks of bleve-query for query-string of 'text:"see also"'
on a 50K en-wiki scorch index went from ~74 q/sec before this change
to ~89 q/sec after this change.
---
 index/index.go                        |  2 +
 index/scorch/snapshot_index_tfr.go    | 13 +++--
 search/collector/topn.go              |  1 +
 search/scorer/scorer_conjunction.go   | 11 +----
 search/scorer/scorer_disjunction.go   | 11 +----
 search/scorer/scorer_term.go          | 60 ++++++++++------------
 search/scorer/scorer_term_test.go     |  4 ++
 search/search.go                      | 71 +++++++++++++++++++++++++++
 search/searcher/search_phrase.go      | 54 ++++++++++++--------
 search/searcher/search_phrase_test.go |  1 +
 search/util.go                        | 27 ++++++++++
 test/versus_test.go                   | 44 +++++++++++------
 12 files changed, 205 insertions(+), 94 deletions(-)

diff --git a/index/index.go b/index/index.go
index ea53ee5d6..861f6a2ea 100644
--- a/index/index.go
+++ b/index/index.go
@@ -174,10 +174,12 @@ func (tfd *TermFieldDoc) Size() int {
 func (tfd *TermFieldDoc) Reset() *TermFieldDoc {
 	// remember the []byte used for the ID
 	id := tfd.ID
+	vectors := tfd.Vectors
 	// idiom to copy over from empty TermFieldDoc (0 allocations)
 	*tfd = TermFieldDoc{}
 	// reuse the []byte already allocated (and reset len to 0)
 	tfd.ID = id[:0]
+	tfd.Vectors = vectors[:0]
 	return tfd
 }
 
diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go
index 46e657488..c111d5177 100644
--- a/index/scorch/snapshot_index_tfr.go
+++ b/index/scorch/snapshot_index_tfr.go
@@ -102,17 +102,22 @@ func (i *IndexSnapshotTermFieldReader) postingToTermFieldDoc(next segment.Postin
 	}
 	if i.includeTermVectors {
 		locs := next.Locations()
-		rv.Vectors = make([]*index.TermFieldVector, len(locs))
-		backing := make([]index.TermFieldVector, len(locs))
+		if cap(rv.Vectors) < len(locs) {
+			rv.Vectors = make([]*index.TermFieldVector, len(locs))
+			backing := make([]index.TermFieldVector, len(locs))
+			for i := range backing {
+				rv.Vectors[i] = &backing[i]
+			}
+		}
+		rv.Vectors = rv.Vectors[:len(locs)]
 		for i, loc := range locs {
-			backing[i] = index.TermFieldVector{
+			*rv.Vectors[i] = index.TermFieldVector{
 				Start:          loc.Start(),
 				End:            loc.End(),
 				Pos:            loc.Pos(),
 				ArrayPositions: loc.ArrayPositions(),
 				Field:          loc.Field(),
 			}
-			rv.Vectors[i] = &backing[i]
 		}
 	}
 }
diff --git a/search/collector/topn.go b/search/collector/topn.go
index d684868cc..7a0bf9860 100644
--- a/search/collector/topn.go
+++ b/search/collector/topn.go
@@ -282,6 +282,7 @@ func (hc *TopNCollector) finalizeResults(r index.IndexReader) error {
 				return err
 			}
 		}
+		doc.Complete(nil)
 		return nil
 	})
 
diff --git a/search/scorer/scorer_conjunction.go b/search/scorer/scorer_conjunction.go
index b866293e0..48cdf3ae9 100644
--- a/search/scorer/scorer_conjunction.go
+++ b/search/scorer/scorer_conjunction.go
@@ -49,15 +49,11 @@ func (s *ConjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [
 		childrenExplanations = make([]*search.Explanation, len(constituents))
 	}
 
-	locations := []search.FieldTermLocationMap{}
 	for i, docMatch := range constituents {
 		sum += docMatch.Score
 		if s.options.Explain {
 			childrenExplanations[i] = docMatch.Expl
 		}
-		if docMatch.Locations != nil {
-			locations = append(locations, docMatch.Locations)
-		}
 	}
 	newScore := sum
 	var newExpl *search.Explanation
@@ -69,11 +65,8 @@ func (s *ConjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [
 	rv := constituents[0]
 	rv.Score = newScore
 	rv.Expl = newExpl
-	if len(locations) == 1 {
-		rv.Locations = locations[0]
-	} else if len(locations) > 1 {
-		rv.Locations = search.MergeLocations(locations)
-	}
+	rv.FieldTermLocations = search.MergeFieldTermLocations(
+		rv.FieldTermLocations, constituents[1:])
 
 	return rv
 }
diff --git a/search/scorer/scorer_disjunction.go b/search/scorer/scorer_disjunction.go
index 36a601c72..7a955e168 100644
--- a/search/scorer/scorer_disjunction.go
+++ b/search/scorer/scorer_disjunction.go
@@ -50,15 +50,11 @@ func (s *DisjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [
 		childrenExplanations = make([]*search.Explanation, len(constituents))
 	}
 
-	var locations []search.FieldTermLocationMap
 	for i, docMatch := range constituents {
 		sum += docMatch.Score
 		if s.options.Explain {
 			childrenExplanations[i] = docMatch.Expl
 		}
-		if docMatch.Locations != nil {
-			locations = append(locations, docMatch.Locations)
-		}
 	}
 
 	var rawExpl *search.Explanation
@@ -80,11 +76,8 @@ func (s *DisjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [
 	rv := constituents[0]
 	rv.Score = newScore
 	rv.Expl = newExpl
-	if len(locations) == 1 {
-		rv.Locations = locations[0]
-	} else if len(locations) > 1 {
-		rv.Locations = search.MergeLocations(locations)
-	}
+	rv.FieldTermLocations = search.MergeFieldTermLocations(
+		rv.FieldTermLocations, constituents[1:])
 
 	return rv
 }
diff --git a/search/scorer/scorer_term.go b/search/scorer/scorer_term.go
index 077e38e0f..5544f2d01 100644
--- a/search/scorer/scorer_term.go
+++ b/search/scorer/scorer_term.go
@@ -32,7 +32,7 @@ func init() {
 }
 
 type TermQueryScorer struct {
-	queryTerm              []byte
+	queryTerm              string
 	queryField             string
 	queryBoost             float64
 	docTerm                uint64
@@ -62,7 +62,7 @@ func (s *TermQueryScorer) Size() int {
 
 func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer {
 	rv := TermQueryScorer{
-		queryTerm:   queryTerm,
+		queryTerm:   string(queryTerm),
 		queryField:  queryField,
 		queryBoost:  queryBoost,
 		docTerm:     docTerm,
@@ -106,7 +106,7 @@ func (s *TermQueryScorer) SetQueryNorm(qnorm float64) {
 		}
 		s.queryWeightExplanation = &search.Explanation{
 			Value:    s.queryWeight,
-			Message:  fmt.Sprintf("queryWeight(%s:%s^%f), product of:", s.queryField, string(s.queryTerm), s.queryBoost),
+			Message:  fmt.Sprintf("queryWeight(%s:%s^%f), product of:", s.queryField, s.queryTerm, s.queryBoost),
 			Children: childrenExplanations,
 		}
 	}
@@ -128,7 +128,7 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term
 		childrenExplanations := make([]*search.Explanation, 3)
 		childrenExplanations[0] = &search.Explanation{
 			Value:   tf,
-			Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, string(s.queryTerm), termMatch.Freq),
+			Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq),
 		}
 		childrenExplanations[1] = &search.Explanation{
 			Value:   termMatch.Norm,
@@ -137,7 +137,7 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term
 		childrenExplanations[2] = s.idfExplanation
 		scoreExplanation = &search.Explanation{
 			Value:    score,
-			Message:  fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, string(s.queryTerm), termMatch.ID),
+			Message:  fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, s.queryTerm, termMatch.ID),
 			Children: childrenExplanations,
 		}
 	}
@@ -151,7 +151,7 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term
 			childExplanations[1] = scoreExplanation
 			scoreExplanation = &search.Explanation{
 				Value:    score,
-				Message:  fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, string(s.queryTerm), s.queryBoost, termMatch.ID),
+				Message:  fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, s.queryTerm, s.queryBoost, termMatch.ID),
 				Children: childExplanations,
 			}
 		}
@@ -164,41 +164,31 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term
 		rv.Expl = scoreExplanation
 	}
 
-	if termMatch.Vectors != nil && len(termMatch.Vectors) > 0 {
-		locs := make([]search.Location, len(termMatch.Vectors))
-		locsUsed := 0
-
-		totalPositions := 0
-		for _, v := range termMatch.Vectors {
-			totalPositions += len(v.ArrayPositions)
+	if len(termMatch.Vectors) > 0 {
+		if cap(rv.FieldTermLocations) < len(termMatch.Vectors) {
+			rv.FieldTermLocations = make([]search.FieldTermLocation, 0, len(termMatch.Vectors))
 		}
-		positions := make(search.ArrayPositions, totalPositions)
-		positionsUsed := 0
 
-		rv.Locations = make(search.FieldTermLocationMap)
 		for _, v := range termMatch.Vectors {
-			tlm := rv.Locations[v.Field]
-			if tlm == nil {
-				tlm = make(search.TermLocationMap)
-				rv.Locations[v.Field] = tlm
-			}
-
-			loc := &locs[locsUsed]
-			locsUsed++
-
-			loc.Pos = v.Pos
-			loc.Start = v.Start
-			loc.End = v.End
-
+			var ap search.ArrayPositions
 			if len(v.ArrayPositions) > 0 {
-				loc.ArrayPositions = positions[positionsUsed : positionsUsed+len(v.ArrayPositions)]
-				for i, ap := range v.ArrayPositions {
-					loc.ArrayPositions[i] = ap
+				n := len(rv.FieldTermLocations)
+				if n < cap(rv.FieldTermLocations) { // reuse ap slice if available
+					ap = rv.FieldTermLocations[:n+1][n].Location.ArrayPositions[:0]
 				}
-				positionsUsed += len(v.ArrayPositions)
+				ap = append(ap, v.ArrayPositions...)
 			}
-
-			tlm[string(s.queryTerm)] = append(tlm[string(s.queryTerm)], loc)
+			rv.FieldTermLocations =
+				append(rv.FieldTermLocations, search.FieldTermLocation{
+					Field: v.Field,
+					Term:  s.queryTerm,
+					Location: search.Location{
+						Pos:            v.Pos,
+						Start:          v.Start,
+						End:            v.End,
+						ArrayPositions: ap,
+					},
+				})
 		}
 	}
 
diff --git a/search/scorer/scorer_term_test.go b/search/scorer/scorer_term_test.go
index bacc00295..23d449788 100644
--- a/search/scorer/scorer_term_test.go
+++ b/search/scorer/scorer_term_test.go
@@ -156,6 +156,10 @@ func TestTermScorer(t *testing.T) {
 			DocumentMatchPool: search.NewDocumentMatchPool(1, 0),
 		}
 		actual := scorer.Score(ctx, test.termMatch)
+		actual.Complete(nil)
+		if len(actual.FieldTermLocations) <= 0 {
+			actual.FieldTermLocations = nil
+		}
 
 		if !reflect.DeepEqual(actual, test.result) {
 			t.Errorf("expected %#v got %#v for %#v", test.result, actual, test.termMatch)
diff --git a/search/search.go b/search/search.go
index ca030df4b..440c09571 100644
--- a/search/search.go
+++ b/search/search.go
@@ -77,6 +77,12 @@ func (t TermLocationMap) AddLocation(term string, location *Location) {
 
 type FieldTermLocationMap map[string]TermLocationMap
 
+type FieldTermLocation struct {
+	Field    string
+	Term     string
+	Location Location
+}
+
 type FieldFragmentMap map[string][]string
 
 type DocumentMatch struct {
@@ -99,6 +105,12 @@ type DocumentMatch struct {
 
 	// used to maintain natural index order
 	HitNumber uint64 `json:"-"`
+
+	// used to temporarily hold field term location information during
+	// search processing in an efficient, recycle-friendly manner, to
+	// be later incorporated into the Locations map when search
+	// results are completed
+	FieldTermLocations []FieldTermLocation `json:"-"`
 }
 
 func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) {
@@ -128,12 +140,19 @@ func (dm *DocumentMatch) Reset() *DocumentMatch {
 	indexInternalID := dm.IndexInternalID
 	// remember the []interface{} used for sort
 	sort := dm.Sort
+	// remember the FieldTermLocations backing array
+	ftls := dm.FieldTermLocations
+	for i := range ftls { // recycle the ArrayPositions of each location
+		ftls[i].Location.ArrayPositions = ftls[i].Location.ArrayPositions[:0]
+	}
 	// idiom to copy over from empty DocumentMatch (0 allocations)
 	*dm = DocumentMatch{}
 	// reuse the []byte already allocated (and reset len to 0)
 	dm.IndexInternalID = indexInternalID[:0]
 	// reuse the []interface{} already allocated (and reset len to 0)
 	dm.Sort = sort[:0]
+	// reuse the FieldTermLocations already allocated (and reset len to 0)
+	dm.FieldTermLocations = ftls[:0]
 	return dm
 }
 
@@ -183,6 +202,58 @@ func (dm *DocumentMatch) Size() int {
 	return sizeInBytes
 }
 
+// Complete performs final preparation & transformation of the
+// DocumentMatch at the end of search processing, also allowing the
+// caller to provide an optional preallocated locations slice
+func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
+	// transform the FieldTermLocations slice into the Locations map
+	nlocs := len(dm.FieldTermLocations)
+	if nlocs > 0 {
+		if cap(prealloc) < nlocs {
+			prealloc = make([]Location, nlocs)
+		}
+		prealloc = prealloc[:nlocs]
+
+		var lastField string
+		var tlm TermLocationMap
+
+		for i, ftl := range dm.FieldTermLocations {
+			if lastField != ftl.Field {
+				lastField = ftl.Field
+
+				if dm.Locations == nil {
+					dm.Locations = make(FieldTermLocationMap)
+				}
+
+				tlm = dm.Locations[ftl.Field]
+				if tlm == nil {
+					tlm = make(TermLocationMap)
+					dm.Locations[ftl.Field] = tlm
+				}
+			}
+
+			loc := &prealloc[i]
+			*loc = ftl.Location
+
+			if len(loc.ArrayPositions) > 0 { // copy
+				loc.ArrayPositions = append(ArrayPositions(nil), loc.ArrayPositions...)
+			}
+
+			tlm[ftl.Term] = append(tlm[ftl.Term], loc)
+
+			dm.FieldTermLocations[i] = FieldTermLocation{ // recycle
+				Location: Location{
+					ArrayPositions: ftl.Location.ArrayPositions[:0],
+				},
+			}
+		}
+	}
+
+	dm.FieldTermLocations = dm.FieldTermLocations[:0] // recycle
+
+	return prealloc
+}
+
 func (dm *DocumentMatch) String() string {
 	return fmt.Sprintf("[%s-%f]", string(dm.IndexInternalID), dm.Score)
 }
diff --git a/search/searcher/search_phrase.go b/search/searcher/search_phrase.go
index 95d7a6b3e..3711da063 100644
--- a/search/searcher/search_phrase.go
+++ b/search/searcher/search_phrase.go
@@ -38,6 +38,7 @@ type PhraseSearcher struct {
 	terms        [][]string
 	path         phrasePath
 	paths        []phrasePath
+	locations    []search.Location
 	initialized  bool
 }
 
@@ -214,48 +215,59 @@ func (s *PhraseSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch,
 // also satisfies the phase constraints.  if so, it returns a DocumentMatch
 // for this document, otherwise nil
 func (s *PhraseSearcher) checkCurrMustMatch(ctx *search.SearchContext) *search.DocumentMatch {
-	rvftlm := make(search.FieldTermLocationMap, 0)
-	freq := 0
+	s.locations = s.currMust.Complete(s.locations)
+
+	locations := s.currMust.Locations
+	s.currMust.Locations = nil
+
+	ftls := s.currMust.FieldTermLocations
+
 	// typically we would expect there to only actually be results in
 	// one field, but we allow for this to not be the case
 	// but, we note that phrase constraints can only be satisfied within
 	// a single field, so we can check them each independently
-	for field, tlm := range s.currMust.Locations {
-
-		f, rvtlm := s.checkCurrMustMatchField(ctx, tlm)
-		if f > 0 {
-			freq += f
-			rvftlm[field] = rvtlm
-		}
+	for field, tlm := range locations {
+		ftls = s.checkCurrMustMatchField(ctx, field, tlm, ftls)
 	}
 
-	if freq > 0 {
+	if len(ftls) > 0 {
 		// return match
 		rv := s.currMust
 		s.currMust = nil
-		rv.Locations = rvftlm
+		rv.FieldTermLocations = ftls
 		return rv
 	}
 
 	return nil
 }
 
-// checkCurrMustMatchField is soley concerned with determining if one particular
-// field within the currMust DocumentMatch Locations satisfies the phase
-// constraints (possibly more than once).  if so, the number of times it was
-// satisfied, and these locations are returned.  otherwise 0 and either
-// a nil or empty TermLocationMap
-func (s *PhraseSearcher) checkCurrMustMatchField(ctx *search.SearchContext, tlm search.TermLocationMap) (
-	int, search.TermLocationMap) {
+// checkCurrMustMatchField is soley concerned with determining if one
+// particular field within the currMust DocumentMatch Locations
+// satisfies the phase constraints (possibly more than once).  if so,
+// the matching field term locations are appended to the provided
+// slice
+func (s *PhraseSearcher) checkCurrMustMatchField(ctx *search.SearchContext,
+	field string, tlm search.TermLocationMap,
+	ftls []search.FieldTermLocation) []search.FieldTermLocation {
 	if s.path == nil {
 		s.path = make(phrasePath, 0, len(s.terms))
 	}
 	s.paths = findPhrasePaths(0, nil, s.terms, tlm, s.path[:0], 0, s.paths[:0])
-	rv := make(search.TermLocationMap, len(s.terms))
 	for _, p := range s.paths {
-		p.MergeInto(rv)
+		for _, pp := range p {
+			ftls = append(ftls, search.FieldTermLocation{
+				Field: field,
+				Term:  pp.term,
+				Location: search.Location{
+					Pos:            pp.loc.Pos,
+					Start:          pp.loc.Start,
+					End:            pp.loc.End,
+					ArrayPositions: pp.loc.ArrayPositions,
+				},
+			})
+		}
 	}
-	return len(s.paths), rv
+	return ftls
 }
 
 type phrasePart struct {
diff --git a/search/searcher/search_phrase_test.go b/search/searcher/search_phrase_test.go
index 1c3e18a5e..04af20f08 100644
--- a/search/searcher/search_phrase_test.go
+++ b/search/searcher/search_phrase_test.go
@@ -74,6 +74,7 @@ func TestPhraseSearch(t *testing.T) {
 		next, err := test.searcher.Next(ctx)
 		i := 0
 		for err == nil && next != nil {
+			next.Complete(nil)
 			if i < len(test.results) {
 				if !next.IndexInternalID.Equals(test.results[i].IndexInternalID) {
 					t.Errorf("expected result %d to have id %s got %s for test %d\n", i, test.results[i].IndexInternalID, next.IndexInternalID, testIndex)
diff --git a/search/util.go b/search/util.go
index 83212af1f..19dd5d68b 100644
--- a/search/util.go
+++ b/search/util.go
@@ -40,3 +40,30 @@ func MergeTermLocationMaps(rv, other TermLocationMap) TermLocationMap {
 	}
 	return rv
 }
+
+func MergeFieldTermLocations(dest []FieldTermLocation, matches []*DocumentMatch) []FieldTermLocation {
+	n := len(dest)
+	for _, dm := range matches {
+		n += len(dm.FieldTermLocations)
+	}
+	if cap(dest) < n {
+		dest = append(make([]FieldTermLocation, 0, n), dest...)
+	}
+
+	for _, dm := range matches {
+		for _, ftl := range dm.FieldTermLocations {
+			dest = append(dest, FieldTermLocation{
+				Field: ftl.Field,
+				Term:  ftl.Term,
+				Location: Location{
+					Pos:            ftl.Location.Pos,
+					Start:          ftl.Location.Start,
+					End:            ftl.Location.End,
+					ArrayPositions: append(ArrayPositions(nil), ftl.Location.ArrayPositions...),
+				},
+			})
+		}
+	}
+
+	return dest
+}
diff --git a/test/versus_test.go b/test/versus_test.go
index 70463a93c..dbc7dd752 100644
--- a/test/versus_test.go
+++ b/test/versus_test.go
@@ -316,27 +316,39 @@ func testVersusSearches(vt *VersusTest, searchTemplates []string, idxA, idxB ble
 			// putting the hits from A & B into maps.
 			hitsA := hitsById(resA)
 			hitsB := hitsById(resB)
+			for id, hitA := range hitsA {
+				hitB := hitsB[id]
+				if len(hitA.FieldTermLocations) <= 0 {
+					hitA.FieldTermLocations = nil
+				}
+				if len(hitB.FieldTermLocations) <= 0 {
+					hitB.FieldTermLocations = nil
+				}
+				if !reflect.DeepEqual(hitA, hitB) {
+					t.Errorf("\n  driving from hitsA\n    hitA: %#v,\n    hitB: %#v", hitA, hitB)
+					idx, _ := strconv.Atoi(id)
+					t.Errorf("\n    doc: %d, body: %s", idx, strings.Join(vt.Bodies[idx], " "))
+				}
+			}
+			for id, hitB := range hitsB {
+				hitA := hitsA[id]
+				if len(hitA.FieldTermLocations) <= 0 {
+					hitA.FieldTermLocations = nil
+				}
+				if len(hitB.FieldTermLocations) <= 0 {
+					hitB.FieldTermLocations = nil
+				}
+				if !reflect.DeepEqual(hitA, hitB) {
+					t.Errorf("\n  driving from hitsB\n    hitA: %#v,\n    hitB: %#v", hitA, hitB)
+					idx, _ := strconv.Atoi(id)
+					t.Errorf("\n    doc: %d, body: %s", idx, strings.Join(vt.Bodies[idx], " "))
+				}
+			}
 			if !reflect.DeepEqual(hitsA, hitsB) {
 				t.Errorf("=========\nsearch: (%d) %s,\n res hits mismatch,\n len(hitsA): %d,\n len(hitsB): %d",
 					i, bufBytes, len(hitsA), len(hitsB))
 				t.Errorf("\n  hitsA: %#v,\n  hitsB: %#v",
 					hitsA, hitsB)
-				for id, hitA := range hitsA {
-					hitB := hitsB[id]
-					if !reflect.DeepEqual(hitA, hitB) {
-						t.Errorf("\n  driving from hitsA\n    hitA: %#v,\n    hitB: %#v", hitA, hitB)
-						idx, _ := strconv.Atoi(id)
-						t.Errorf("\n    doc: %d, body: %s", idx, strings.Join(vt.Bodies[idx], " "))
-					}
-				}
-				for id, hitB := range hitsB {
-					hitA := hitsA[id]
-					if !reflect.DeepEqual(hitA, hitB) {
-						t.Errorf("\n  driving from hitsB\n    hitA: %#v,\n    hitB: %#v", hitA, hitB)
-						idx, _ := strconv.Atoi(id)
-						t.Errorf("\n    doc: %d, body: %s", idx, strings.Join(vt.Bodies[idx], " "))
-					}
-				}
 			}
 
 			resA.Hits = nil

From bdd917bb1228d67837b566da78565e87fc94bd4b Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Sun, 8 Apr 2018 19:34:32 -0700
Subject: [PATCH 366/728] MB-28847: Account for total documents' size within a
 batch

+ Supporting APIs to fetch these stats: last added
  document's size and total documents' size.
---
 document/document.go        | 16 ++++++++++++----
 document/field.go           |  2 ++
 document/field_boolean.go   | 16 ++++++++++++++++
 document/field_composite.go | 16 ++++++++++++++++
 document/field_datetime.go  | 15 +++++++++++++++
 document/field_geopoint.go  | 15 +++++++++++++++
 document/field_numeric.go   | 15 +++++++++++++++
 document/field_text.go      | 16 ++++++++++++++++
 index.go                    | 17 +++++++++++++++++
 9 files changed, 124 insertions(+), 4 deletions(-)

diff --git a/document/document.go b/document/document.go
index 921098b0b..6ac17b9ab 100644
--- a/document/document.go
+++ b/document/document.go
@@ -43,10 +43,18 @@ func NewDocument(id string) *Document {
 }
 
 func (d *Document) Size() int {
-	return reflectStaticSizeDocument + size.SizeOfPtr +
-		len(d.ID) +
-		len(d.Fields)*size.SizeOfPtr +
-		len(d.CompositeFields)*(size.SizeOfPtr+reflectStaticSizeCompositeField)
+	sizeInBytes := reflectStaticSizeDocument + size.SizeOfPtr +
+		len(d.ID)
+
+	for _, entry := range d.Fields {
+		sizeInBytes += entry.Size()
+	}
+
+	for _, entry := range d.CompositeFields {
+		sizeInBytes += entry.Size()
+	}
+
+	return sizeInBytes
 }
 
 func (d *Document) AddField(f Field) *Document {
diff --git a/document/field.go b/document/field.go
index c17f81e5d..2fe916698 100644
--- a/document/field.go
+++ b/document/field.go
@@ -36,4 +36,6 @@ type Field interface {
 	// that this field represents - this is a common metric for tracking
 	// the rate of indexing
 	NumPlainTextBytes() uint64
+
+	Size() int
 }
diff --git a/document/field_boolean.go b/document/field_boolean.go
index c226374c0..6864b16f4 100644
--- a/document/field_boolean.go
+++ b/document/field_boolean.go
@@ -16,10 +16,19 @@ package document
 
 import (
 	"fmt"
+	"reflect"
 
 	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeBooleanField int
+
+func init() {
+	var f BooleanField
+	reflectStaticSizeBooleanField = int(reflect.TypeOf(f).Size())
+}
+
 const DefaultBooleanIndexingOptions = StoreField | IndexField | DocValues
 
 type BooleanField struct {
@@ -30,6 +39,13 @@ type BooleanField struct {
 	numPlainTextBytes uint64
 }
 
+func (b *BooleanField) Size() int {
+	return reflectStaticSizeBooleanField + size.SizeOfPtr +
+		len(b.name) +
+		len(b.arrayPositions)*size.SizeOfUint64 +
+		len(b.value)
+}
+
 func (b *BooleanField) Name() string {
 	return b.name
 }
diff --git a/document/field_composite.go b/document/field_composite.go
index e53cd4566..a8285880f 100644
--- a/document/field_composite.go
+++ b/document/field_composite.go
@@ -18,6 +18,7 @@ import (
 	"reflect"
 
 	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/size"
 )
 
 var reflectStaticSizeCompositeField int
@@ -63,6 +64,21 @@ func NewCompositeFieldWithIndexingOptions(name string, defaultInclude bool, incl
 	return rv
 }
 
+func (c *CompositeField) Size() int {
+	sizeInBytes := reflectStaticSizeCompositeField + size.SizeOfPtr +
+		len(c.name)
+
+	for k, _ := range c.includedFields {
+		sizeInBytes += size.SizeOfString + len(k) + size.SizeOfBool
+	}
+
+	for k, _ := range c.excludedFields {
+		sizeInBytes += size.SizeOfString + len(k) + size.SizeOfBool
+	}
+
+	return sizeInBytes
+}
+
 func (c *CompositeField) Name() string {
 	return c.name
 }
diff --git a/document/field_datetime.go b/document/field_datetime.go
index 1db068c87..583b44cde 100644
--- a/document/field_datetime.go
+++ b/document/field_datetime.go
@@ -17,12 +17,21 @@ package document
 import (
 	"fmt"
 	"math"
+	"reflect"
 	"time"
 
 	"github.com/blevesearch/bleve/analysis"
 	"github.com/blevesearch/bleve/numeric"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeDateTimeField int
+
+func init() {
+	var f DateTimeField
+	reflectStaticSizeDateTimeField = int(reflect.TypeOf(f).Size())
+}
+
 const DefaultDateTimeIndexingOptions = StoreField | IndexField | DocValues
 const DefaultDateTimePrecisionStep uint = 4
 
@@ -37,6 +46,12 @@ type DateTimeField struct {
 	numPlainTextBytes uint64
 }
 
+func (n *DateTimeField) Size() int {
+	return reflectStaticSizeDateTimeField + size.SizeOfPtr +
+		len(n.name) +
+		len(n.arrayPositions)*size.SizeOfUint64
+}
+
 func (n *DateTimeField) Name() string {
 	return n.name
 }
diff --git a/document/field_geopoint.go b/document/field_geopoint.go
index f508b3625..91fe23f96 100644
--- a/document/field_geopoint.go
+++ b/document/field_geopoint.go
@@ -16,12 +16,21 @@ package document
 
 import (
 	"fmt"
+	"reflect"
 
 	"github.com/blevesearch/bleve/analysis"
 	"github.com/blevesearch/bleve/geo"
 	"github.com/blevesearch/bleve/numeric"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeGeoPointField int
+
+func init() {
+	var f GeoPointField
+	reflectStaticSizeGeoPointField = int(reflect.TypeOf(f).Size())
+}
+
 var GeoPrecisionStep uint = 9
 
 type GeoPointField struct {
@@ -32,6 +41,12 @@ type GeoPointField struct {
 	numPlainTextBytes uint64
 }
 
+func (n *GeoPointField) Size() int {
+	return reflectStaticSizeGeoPointField + size.SizeOfPtr +
+		len(n.name) +
+		len(n.arrayPositions)*size.SizeOfUint64
+}
+
 func (n *GeoPointField) Name() string {
 	return n.name
 }
diff --git a/document/field_numeric.go b/document/field_numeric.go
index e32993c88..46c685e84 100644
--- a/document/field_numeric.go
+++ b/document/field_numeric.go
@@ -16,11 +16,20 @@ package document
 
 import (
 	"fmt"
+	"reflect"
 
 	"github.com/blevesearch/bleve/analysis"
 	"github.com/blevesearch/bleve/numeric"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeNumericField int
+
+func init() {
+	var f NumericField
+	reflectStaticSizeNumericField = int(reflect.TypeOf(f).Size())
+}
+
 const DefaultNumericIndexingOptions = StoreField | IndexField | DocValues
 
 const DefaultPrecisionStep uint = 4
@@ -33,6 +42,12 @@ type NumericField struct {
 	numPlainTextBytes uint64
 }
 
+func (n *NumericField) Size() int {
+	return reflectStaticSizeNumericField + size.SizeOfPtr +
+		len(n.name) +
+		len(n.arrayPositions)*size.SizeOfPtr
+}
+
 func (n *NumericField) Name() string {
 	return n.name
 }
diff --git a/document/field_text.go b/document/field_text.go
index 5f7a3ab64..c8e871c9d 100644
--- a/document/field_text.go
+++ b/document/field_text.go
@@ -16,10 +16,19 @@ package document
 
 import (
 	"fmt"
+	"reflect"
 
 	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeTextField int
+
+func init() {
+	var f TextField
+	reflectStaticSizeTextField = int(reflect.TypeOf(f).Size())
+}
+
 const DefaultTextIndexingOptions = IndexField | DocValues
 
 type TextField struct {
@@ -31,6 +40,13 @@ type TextField struct {
 	numPlainTextBytes uint64
 }
 
+func (t *TextField) Size() int {
+	return reflectStaticSizeTextField + size.SizeOfPtr +
+		len(t.name) +
+		len(t.arrayPositions)*size.SizeOfUint64 +
+		len(t.value)
+}
+
 func (t *TextField) Name() string {
 	return t.name
 }
diff --git a/index.go b/index.go
index ea7b3832a..197f4d4df 100644
--- a/index.go
+++ b/index.go
@@ -21,6 +21,7 @@ import (
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/store"
 	"github.com/blevesearch/bleve/mapping"
+	"github.com/blevesearch/bleve/size"
 )
 
 // A Batch groups together multiple Index and Delete
@@ -32,6 +33,9 @@ import (
 type Batch struct {
 	index    Index
 	internal *index.Batch
+
+	lastDocSize uint64
+	totalSize   uint64
 }
 
 // Index adds the specified index operation to the
@@ -47,9 +51,22 @@ func (b *Batch) Index(id string, data interface{}) error {
 		return err
 	}
 	b.internal.Update(doc)
+
+	b.lastDocSize = uint64(doc.Size() +
+		len(id) + size.SizeOfString) // overhead from internal
+	b.totalSize += b.lastDocSize
+
 	return nil
 }
 
+func (b *Batch) LastDocSize() uint64 {
+	return b.lastDocSize
+}
+
+func (b *Batch) TotalDocsSize() uint64 {
+	return b.totalSize
+}
+
 // IndexAdvanced adds the specified index operation to the
 // batch which skips the mapping.  NOTE: the bleve Index is not updated
 // until the batch is executed.

From b1c9ab49814da0d5738026d846b665d1137966a8 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 9 Apr 2018 18:13:03 +0530
Subject: [PATCH 367/728] bumping the scorch version with meta persisence in
 rootbolt

---
 index/scorch/scorch.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 162e70291..14796c5e8 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -36,7 +36,7 @@ import (
 
 const Name = "scorch"
 
-const Version uint8 = 1
+const Version uint8 = 2
 
 var ErrClosed = fmt.Errorf("scorch closed")
 

From ce8f7057f0a0679dff2ff6d4dcc7a7b141fcb679 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sun, 8 Apr 2018 22:49:26 -0700
Subject: [PATCH 368/728] scorch renamed dvIterator to dvReader

The structure wasn't actually iterating, but instead allowed for
random access reading by docNum of doc values.
---
 index/scorch/segment/zap/build.go     |  4 ++--
 index/scorch/segment/zap/docvalues.go | 34 +++++++++++++--------------
 index/scorch/segment/zap/segment.go   | 16 ++++++-------
 3 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index f3fa1e9fa..2c261a3eb 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -136,11 +136,11 @@ func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32,
 		fieldsIndexOffset: fieldsIndexOffset,
 		docValueOffset:    docValueOffset,
 		dictLocs:          dictLocs,
-		fieldDvIterMap:    make(map[uint16]*docValueIterator),
+		fieldDvReaders:    make(map[uint16]*docValueReader),
 	}
 	sb.updateSize()
 
-	err := sb.loadDvIterators()
+	err := sb.loadDvReaders()
 	if err != nil {
 		return nil, err
 	}
diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index dcd2cb052..f46f2bd5b 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -27,14 +27,14 @@ import (
 	"github.com/golang/snappy"
 )
 
-var reflectStaticSizedocValueIterator int
+var reflectStaticSizedocValueReader int
 
 func init() {
-	var dvi docValueIterator
-	reflectStaticSizedocValueIterator = int(reflect.TypeOf(dvi).Size())
+	var dvi docValueReader
+	reflectStaticSizedocValueReader = int(reflect.TypeOf(dvi).Size())
 }
 
-type docValueIterator struct {
+type docValueReader struct {
 	field          string
 	curChunkNum    uint64
 	numChunks      uint64
@@ -45,27 +45,27 @@ type docValueIterator struct {
 	uncompressed   []byte // temp buf for snappy decompression
 }
 
-func (di *docValueIterator) size() int {
-	return reflectStaticSizedocValueIterator + size.SizeOfPtr +
+func (di *docValueReader) size() int {
+	return reflectStaticSizedocValueReader + size.SizeOfPtr +
 		len(di.field) +
 		len(di.chunkOffsets)*size.SizeOfUint64 +
 		len(di.curChunkHeader)*reflectStaticSizeMetaData +
 		len(di.curChunkData)
 }
 
-func (di *docValueIterator) fieldName() string {
+func (di *docValueReader) fieldName() string {
 	return di.field
 }
 
-func (di *docValueIterator) curChunkNumber() uint64 {
+func (di *docValueReader) curChunkNumber() uint64 {
 	return di.curChunkNum
 }
 
-func (s *SegmentBase) loadFieldDocValueIterator(field string,
-	fieldDvLoc uint64) (*docValueIterator, error) {
+func (s *SegmentBase) loadFieldDocValueReader(field string,
+	fieldDvLoc uint64) (*docValueReader, error) {
 	// get the docValue offset for the given fields
 	if fieldDvLoc == fieldNotUninverted {
-		return nil, fmt.Errorf("loadFieldDocValueIterator: "+
+		return nil, fmt.Errorf("loadFieldDocValueReader: "+
 			"no docValues found for field: %s", field)
 	}
 
@@ -78,7 +78,7 @@ func (s *SegmentBase) loadFieldDocValueIterator(field string,
 	}
 	offset += uint64(read)
 
-	fdvIter := &docValueIterator{
+	fdvIter := &docValueReader{
 		curChunkNum:  math.MaxUint64,
 		field:        field,
 		chunkOffsets: make([]uint64, int(numChunks)),
@@ -96,7 +96,7 @@ func (s *SegmentBase) loadFieldDocValueIterator(field string,
 	return fdvIter, nil
 }
 
-func (di *docValueIterator) loadDvChunk(chunkNumber,
+func (di *docValueReader) loadDvChunk(chunkNumber,
 	localDocNum uint64, s *SegmentBase) error {
 	// advance to the chunk where the docValues
 	// reside for the given docNum
@@ -128,7 +128,7 @@ func (di *docValueIterator) loadDvChunk(chunkNumber,
 	return nil
 }
 
-func (di *docValueIterator) visitDocValues(docNum uint64,
+func (di *docValueReader) visitDocValues(docNum uint64,
 	visitor index.DocumentFieldTermVisitor) error {
 	// binary search the term locations for the docNum
 	start, end := di.getDocValueLocs(docNum)
@@ -157,7 +157,7 @@ func (di *docValueIterator) visitDocValues(docNum uint64,
 	return nil
 }
 
-func (di *docValueIterator) getDocValueLocs(docNum uint64) (uint64, uint64) {
+func (di *docValueReader) getDocValueLocs(docNum uint64) (uint64, uint64) {
 	i := sort.Search(len(di.curChunkHeader), func(i int) bool {
 		return di.curChunkHeader[i].DocNum >= docNum
 	})
@@ -180,7 +180,7 @@ func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []strin
 		// find the chunkNumber where the docValues are stored
 		docInChunk := localDocNum / uint64(s.chunkFactor)
 
-		if dvIter, exists := s.fieldDvIterMap[fieldIDPlus1-1]; exists &&
+		if dvIter, exists := s.fieldDvReaders[fieldIDPlus1-1]; exists &&
 			dvIter != nil {
 			// check if the chunk is already loaded
 			if docInChunk != dvIter.curChunkNumber() {
@@ -202,7 +202,7 @@ func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []strin
 func (s *Segment) VisitableDocValueFields() ([]string, error) {
 	var rv []string
 	for fieldID, field := range s.fieldsInv {
-		if dvIter, ok := s.fieldDvIterMap[uint16(fieldID)]; ok &&
+		if dvIter, ok := s.fieldDvReaders[uint16(fieldID)]; ok &&
 			dvIter != nil {
 			rv = append(rv, field)
 		}
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 138d88ab0..f9549416a 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -55,7 +55,7 @@ func Open(path string) (segment.Segment, error) {
 		SegmentBase: SegmentBase{
 			mem:            mm[0 : len(mm)-FooterSize],
 			fieldsMap:      make(map[string]uint16),
-			fieldDvIterMap: make(map[uint16]*docValueIterator),
+			fieldDvReaders: make(map[uint16]*docValueReader),
 		},
 		f:    f,
 		mm:   mm,
@@ -76,7 +76,7 @@ func Open(path string) (segment.Segment, error) {
 		return nil, err
 	}
 
-	err = rv.loadDvIterators()
+	err = rv.loadDvReaders()
 	if err != nil {
 		_ = rv.Close()
 		return nil, err
@@ -98,7 +98,7 @@ type SegmentBase struct {
 	fieldsIndexOffset uint64
 	docValueOffset    uint64
 	dictLocs          []uint64
-	fieldDvIterMap    map[uint16]*docValueIterator // naive chunk cache per field
+	fieldDvReaders    map[uint16]*docValueReader // naive chunk cache per field
 	size              uint64
 }
 
@@ -121,8 +121,8 @@ func (sb *SegmentBase) updateSize() {
 	}
 	sizeInBytes += len(sb.dictLocs) * size.SizeOfUint64
 
-	// fieldDvIterMap
-	for _, v := range sb.fieldDvIterMap {
+	// fieldDvReaders
+	for _, v := range sb.fieldDvReaders {
 		sizeInBytes += size.SizeOfUint16 + size.SizeOfPtr
 		if v != nil {
 			sizeInBytes += v.size()
@@ -480,7 +480,7 @@ func (s *Segment) DictAddr(field string) (uint64, error) {
 	return s.dictLocs[fieldIDPlus1-1], nil
 }
 
-func (s *SegmentBase) loadDvIterators() error {
+func (s *SegmentBase) loadDvReaders() error {
 	if s.docValueOffset == fieldNotUninverted {
 		return nil
 	}
@@ -489,9 +489,9 @@ func (s *SegmentBase) loadDvIterators() error {
 	for fieldID, field := range s.fieldsInv {
 		fieldLoc, n := binary.Uvarint(s.mem[s.docValueOffset+read : s.docValueOffset+read+binary.MaxVarintLen64])
 		if n <= 0 {
-			return fmt.Errorf("loadDvIterators: failed to read the docvalue offsets for field %d", fieldID)
+			return fmt.Errorf("loadDvReaders: failed to read the docvalue offsets for field %d", fieldID)
 		}
-		s.fieldDvIterMap[uint16(fieldID)], _ = s.loadFieldDocValueIterator(field, fieldLoc)
+		s.fieldDvReaders[uint16(fieldID)], _ = s.loadFieldDocValueReader(field, fieldLoc)
 		read += uint64(n)
 	}
 	return nil

From 69f797b438d06f958b7ca75f3da6c7932d3fd504 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Mon, 9 Apr 2018 17:03:18 -0400
Subject: [PATCH 369/728] remove mem segment

this was no longer used and becoming more work to maintain
---
 index/scorch/segment/mem/build.go        | 347 ---------
 index/scorch/segment/mem/dict.go         | 239 -------
 index/scorch/segment/mem/dict_test.go    | 211 ------
 index/scorch/segment/mem/posting.go      | 247 -------
 index/scorch/segment/mem/segment.go      | 286 --------
 index/scorch/segment/mem/segment_test.go | 876 -----------------------
 6 files changed, 2206 deletions(-)
 delete mode 100644 index/scorch/segment/mem/build.go
 delete mode 100644 index/scorch/segment/mem/dict.go
 delete mode 100644 index/scorch/segment/mem/dict_test.go
 delete mode 100644 index/scorch/segment/mem/posting.go
 delete mode 100644 index/scorch/segment/mem/segment.go
 delete mode 100644 index/scorch/segment/mem/segment_test.go

diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go
deleted file mode 100644
index 0b329704a..000000000
--- a/index/scorch/segment/mem/build.go
+++ /dev/null
@@ -1,347 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package mem
-
-import (
-	"math"
-	"sort"
-
-	"github.com/RoaringBitmap/roaring"
-	"github.com/blevesearch/bleve/analysis"
-	"github.com/blevesearch/bleve/document"
-	"github.com/blevesearch/bleve/index"
-)
-
-// NewFromAnalyzedDocs places the analyzed document mutations into a new segment
-func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment {
-	s := New()
-
-	// ensure that _id field get fieldID 0
-	s.getOrDefineField("_id")
-
-	// fill Dicts/DictKeys and preallocate memory
-	s.initializeDict(results)
-
-	// walk each doc
-	fieldLensReuse := make([]int, len(s.FieldsMap))
-	docMapReuse := make([]analysis.TokenFrequencies, len(s.FieldsMap))
-	for _, result := range results {
-		s.processDocument(result, fieldLensReuse, docMapReuse)
-	}
-
-	// go back and sort the dictKeys
-	for _, dict := range s.DictKeys {
-		sort.Strings(dict)
-	}
-
-	// compute memory usage of segment
-	s.updateSize()
-
-	// professional debugging
-	//
-	// log.Printf("fields: %v\n", s.FieldsMap)
-	// log.Printf("fieldsInv: %v\n", s.FieldsInv)
-	// log.Printf("fieldsLoc: %v\n", s.FieldsLoc)
-	// log.Printf("dicts: %v\n", s.Dicts)
-	// log.Printf("dict keys: %v\n", s.DictKeys)
-	// for i, posting := range s.Postings {
-	// 	log.Printf("posting %d: %v\n", i, posting)
-	// }
-	// for i, freq := range s.Freqs {
-	// 	log.Printf("freq %d: %v\n", i, freq)
-	// }
-	// for i, norm := range s.Norms {
-	// 	log.Printf("norm %d: %v\n", i, norm)
-	// }
-	// for i, field := range s.Locfields {
-	// 	log.Printf("field %d: %v\n", i, field)
-	// }
-	// for i, start := range s.Locstarts {
-	// 	log.Printf("start %d: %v\n", i, start)
-	// }
-	// for i, end := range s.Locends {
-	// 	log.Printf("end %d: %v\n", i, end)
-	// }
-	// for i, pos := range s.Locpos {
-	// 	log.Printf("pos %d: %v\n", i, pos)
-	// }
-	// for i, apos := range s.Locarraypos {
-	// 	log.Printf("apos %d: %v\n", i, apos)
-	// }
-	// log.Printf("stored: %v\n", s.Stored)
-	// log.Printf("stored types: %v\n", s.StoredTypes)
-	// log.Printf("stored pos: %v\n", s.StoredPos)
-
-	return s
-}
-
-// fill Dicts/DictKeys and preallocate memory for postings
-func (s *Segment) initializeDict(results []*index.AnalysisResult) {
-	var numPostingsLists int
-
-	numTermsPerPostingsList := make([]int, 0, 64) // Keyed by postings list id.
-	numLocsPerPostingsList := make([]int, 0, 64)  // Keyed by postings list id.
-
-	var numTokenFrequencies int
-	var totLocs int
-
-	// initial scan for all fieldID's to sort them
-	for _, result := range results {
-		for _, field := range result.Document.CompositeFields {
-			s.getOrDefineField(field.Name())
-		}
-		for _, field := range result.Document.Fields {
-			s.getOrDefineField(field.Name())
-		}
-	}
-	sort.Strings(s.FieldsInv[1:]) // keep _id as first field
-	s.FieldsMap = make(map[string]uint16, len(s.FieldsInv))
-	for fieldID, fieldName := range s.FieldsInv {
-		s.FieldsMap[fieldName] = uint16(fieldID + 1)
-	}
-
-	processField := func(fieldID uint16, tfs analysis.TokenFrequencies) {
-		dict := s.Dicts[fieldID]
-		dictKeys := s.DictKeys[fieldID]
-		for term, tf := range tfs {
-			pidPlus1, exists := dict[term]
-			if !exists {
-				numPostingsLists++
-				pidPlus1 = uint64(numPostingsLists)
-				dict[term] = pidPlus1
-				dictKeys = append(dictKeys, term)
-				numTermsPerPostingsList = append(numTermsPerPostingsList, 0)
-				numLocsPerPostingsList = append(numLocsPerPostingsList, 0)
-			}
-			pid := pidPlus1 - 1
-			numTermsPerPostingsList[pid] += 1
-			numLocsPerPostingsList[pid] += len(tf.Locations)
-			totLocs += len(tf.Locations)
-		}
-		numTokenFrequencies += len(tfs)
-		s.DictKeys[fieldID] = dictKeys
-	}
-
-	for _, result := range results {
-		// walk each composite field
-		for _, field := range result.Document.CompositeFields {
-			fieldID := uint16(s.getOrDefineField(field.Name()))
-			_, tf := field.Analyze()
-			processField(fieldID, tf)
-		}
-
-		// walk each field
-		for i, field := range result.Document.Fields {
-			fieldID := uint16(s.getOrDefineField(field.Name()))
-			tf := result.Analyzed[i]
-			processField(fieldID, tf)
-		}
-	}
-
-	s.Postings = make([]*roaring.Bitmap, numPostingsLists)
-	for i := 0; i < numPostingsLists; i++ {
-		s.Postings[i] = roaring.New()
-	}
-	s.PostingsLocs = make([]*roaring.Bitmap, numPostingsLists)
-	for i := 0; i < numPostingsLists; i++ {
-		s.PostingsLocs[i] = roaring.New()
-	}
-
-	// Preallocate big, contiguous backing arrays.
-	auint64Backing := make([][]uint64, numPostingsLists*4+totLocs) // For Freqs, Locstarts, Locends, Locpos, sub-Locarraypos.
-	uint64Backing := make([]uint64, numTokenFrequencies+totLocs*3) // For sub-Freqs, sub-Locstarts, sub-Locends, sub-Locpos.
-	float32Backing := make([]float32, numTokenFrequencies)         // For sub-Norms.
-	uint16Backing := make([]uint16, totLocs)                       // For sub-Locfields.
-
-	// Point top-level slices to the backing arrays.
-	s.Freqs = auint64Backing[0:numPostingsLists]
-	auint64Backing = auint64Backing[numPostingsLists:]
-
-	s.Norms = make([][]float32, numPostingsLists)
-
-	s.Locfields = make([][]uint16, numPostingsLists)
-
-	s.Locstarts = auint64Backing[0:numPostingsLists]
-	auint64Backing = auint64Backing[numPostingsLists:]
-
-	s.Locends = auint64Backing[0:numPostingsLists]
-	auint64Backing = auint64Backing[numPostingsLists:]
-
-	s.Locpos = auint64Backing[0:numPostingsLists]
-	auint64Backing = auint64Backing[numPostingsLists:]
-
-	s.Locarraypos = make([][][]uint64, numPostingsLists)
-
-	// Point sub-slices to the backing arrays.
-	for pid, numTerms := range numTermsPerPostingsList {
-		s.Freqs[pid] = uint64Backing[0:0]
-		uint64Backing = uint64Backing[numTerms:]
-
-		s.Norms[pid] = float32Backing[0:0]
-		float32Backing = float32Backing[numTerms:]
-	}
-
-	for pid, numLocs := range numLocsPerPostingsList {
-		s.Locfields[pid] = uint16Backing[0:0]
-		uint16Backing = uint16Backing[numLocs:]
-
-		s.Locstarts[pid] = uint64Backing[0:0]
-		uint64Backing = uint64Backing[numLocs:]
-
-		s.Locends[pid] = uint64Backing[0:0]
-		uint64Backing = uint64Backing[numLocs:]
-
-		s.Locpos[pid] = uint64Backing[0:0]
-		uint64Backing = uint64Backing[numLocs:]
-
-		s.Locarraypos[pid] = auint64Backing[0:0]
-		auint64Backing = auint64Backing[numLocs:]
-	}
-}
-
-func (s *Segment) processDocument(result *index.AnalysisResult,
-	fieldLens []int, docMap []analysis.TokenFrequencies) {
-	// clear the fieldLens and docMap for reuse
-	n := len(s.FieldsMap)
-	for i := 0; i < n; i++ {
-		fieldLens[i] = 0
-		docMap[i] = nil
-	}
-
-	docNum := uint64(s.addDocument())
-
-	processField := func(fieldID uint16, name string, l int, tf analysis.TokenFrequencies) {
-		fieldLens[fieldID] += l
-
-		existingFreqs := docMap[fieldID]
-		if existingFreqs != nil {
-			existingFreqs.MergeAll(name, tf)
-		} else {
-			docMap[fieldID] = tf
-		}
-	}
-
-	// walk each composite field
-	for _, field := range result.Document.CompositeFields {
-		fieldID := uint16(s.getOrDefineField(field.Name()))
-		l, tf := field.Analyze()
-		processField(fieldID, field.Name(), l, tf)
-	}
-
-	docStored := s.Stored[docNum]
-	docStoredTypes := s.StoredTypes[docNum]
-	docStoredPos := s.StoredPos[docNum]
-
-	// walk each field
-	for i, field := range result.Document.Fields {
-		fieldID := uint16(s.getOrDefineField(field.Name()))
-		l := result.Length[i]
-		tf := result.Analyzed[i]
-		processField(fieldID, field.Name(), l, tf)
-		if field.Options().IsStored() {
-			docStored[fieldID] = append(docStored[fieldID], field.Value())
-			docStoredTypes[fieldID] = append(docStoredTypes[fieldID], encodeFieldType(field))
-			docStoredPos[fieldID] = append(docStoredPos[fieldID], field.ArrayPositions())
-		}
-
-		if field.Options().IncludeDocValues() {
-			s.DocValueFields[fieldID] = true
-		}
-	}
-
-	// now that its been rolled up into docMap, walk that
-	for fieldID, tokenFrequencies := range docMap {
-		dict := s.Dicts[fieldID]
-		norm := float32(1.0 / math.Sqrt(float64(fieldLens[fieldID])))
-		for term, tokenFreq := range tokenFrequencies {
-			pid := dict[term] - 1
-			bs := s.Postings[pid]
-			bs.AddInt(int(docNum))
-			s.Freqs[pid] = append(s.Freqs[pid], uint64(tokenFreq.Frequency()))
-			s.Norms[pid] = append(s.Norms[pid], norm)
-			locationBS := s.PostingsLocs[pid]
-			if len(tokenFreq.Locations) > 0 {
-				locationBS.AddInt(int(docNum))
-
-				locfields := s.Locfields[pid]
-				locstarts := s.Locstarts[pid]
-				locends := s.Locends[pid]
-				locpos := s.Locpos[pid]
-				locarraypos := s.Locarraypos[pid]
-
-				for _, loc := range tokenFreq.Locations {
-					var locf = uint16(fieldID)
-					if loc.Field != "" {
-						locf = uint16(s.getOrDefineField(loc.Field))
-					}
-					locfields = append(locfields, locf)
-					locstarts = append(locstarts, uint64(loc.Start))
-					locends = append(locends, uint64(loc.End))
-					locpos = append(locpos, uint64(loc.Position))
-					if len(loc.ArrayPositions) > 0 {
-						locarraypos = append(locarraypos, loc.ArrayPositions)
-					} else {
-						locarraypos = append(locarraypos, nil)
-					}
-				}
-
-				s.Locfields[pid] = locfields
-				s.Locstarts[pid] = locstarts
-				s.Locends[pid] = locends
-				s.Locpos[pid] = locpos
-				s.Locarraypos[pid] = locarraypos
-			}
-		}
-	}
-}
-
-func (s *Segment) getOrDefineField(name string) int {
-	fieldIDPlus1, ok := s.FieldsMap[name]
-	if !ok {
-		fieldIDPlus1 = uint16(len(s.FieldsInv) + 1)
-		s.FieldsMap[name] = fieldIDPlus1
-		s.FieldsInv = append(s.FieldsInv, name)
-		s.Dicts = append(s.Dicts, make(map[string]uint64))
-		s.DictKeys = append(s.DictKeys, make([]string, 0))
-	}
-	return int(fieldIDPlus1 - 1)
-}
-
-func (s *Segment) addDocument() int {
-	docNum := len(s.Stored)
-	s.Stored = append(s.Stored, map[uint16][][]byte{})
-	s.StoredTypes = append(s.StoredTypes, map[uint16][]byte{})
-	s.StoredPos = append(s.StoredPos, map[uint16][][]uint64{})
-	return docNum
-}
-
-func encodeFieldType(f document.Field) byte {
-	fieldType := byte('x')
-	switch f.(type) {
-	case *document.TextField:
-		fieldType = 't'
-	case *document.NumericField:
-		fieldType = 'n'
-	case *document.DateTimeField:
-		fieldType = 'd'
-	case *document.BooleanField:
-		fieldType = 'b'
-	case *document.GeoPointField:
-		fieldType = 'g'
-	case *document.CompositeField:
-		fieldType = 'c'
-	}
-	return fieldType
-}
diff --git a/index/scorch/segment/mem/dict.go b/index/scorch/segment/mem/dict.go
deleted file mode 100644
index b74872371..000000000
--- a/index/scorch/segment/mem/dict.go
+++ /dev/null
@@ -1,239 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package mem
-
-import (
-	"math"
-	"reflect"
-	"regexp"
-	"sort"
-	"strings"
-
-	"github.com/RoaringBitmap/roaring"
-	"github.com/blevesearch/bleve/index"
-	"github.com/blevesearch/bleve/index/scorch/segment"
-	"github.com/blevesearch/bleve/size"
-)
-
-var reflectStaticSizeDictionary int
-
-func init() {
-	var d Dictionary
-	reflectStaticSizeDictionary = int(reflect.TypeOf(d).Size())
-}
-
-// Dictionary is the in-memory representation of the term dictionary
-type Dictionary struct {
-	segment *Segment
-	field   string
-	fieldID uint16
-}
-
-func (d *Dictionary) Size() int {
-	sizeInBytes := reflectStaticSizeDictionary + size.SizeOfPtr +
-		len(d.field)
-
-	if d.segment != nil {
-		sizeInBytes += int(d.segment.Size())
-	}
-
-	return sizeInBytes
-}
-
-// PostingsList returns the postings list for the specified term
-func (d *Dictionary) PostingsList(term string,
-	except *roaring.Bitmap) (segment.PostingsList, error) {
-	return d.InitPostingsList(term, except, nil)
-}
-
-func (d *Dictionary) InitPostingsList(term string, except *roaring.Bitmap,
-	prealloc *PostingsList) (*PostingsList, error) {
-	rv := prealloc
-	if rv == nil {
-		rv = &PostingsList{}
-	}
-	rv.dictionary = d
-	rv.term = term
-	rv.postingsID = d.segment.Dicts[d.fieldID][term]
-	rv.except = except
-	return rv, nil
-}
-
-// Iterator returns an iterator for this dictionary
-func (d *Dictionary) Iterator() segment.DictionaryIterator {
-	return &DictionaryIterator{
-		d: d,
-	}
-}
-
-// PrefixIterator returns an iterator which only visits terms having the
-// the specified prefix
-func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator {
-	offset := sort.SearchStrings(d.segment.DictKeys[d.fieldID], prefix)
-	return &DictionaryIterator{
-		d:      d,
-		prefix: prefix,
-		offset: offset,
-	}
-}
-
-// RangeIterator returns an iterator which only visits terms between the
-// start and end terms.  NOTE: bleve.index API specifies the end is inclusive.
-func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator {
-	offset := sort.SearchStrings(d.segment.DictKeys[d.fieldID], start)
-	return &DictionaryIterator{
-		d:      d,
-		offset: offset,
-		end:    end,
-	}
-}
-
-// RegexpIterator returns an iterator which only visits terms matching
-// the given regex expression.
-func (d *Dictionary) RegexpIterator(pattern string) segment.DictionaryIterator {
-	regex, err := regexp.Compile(pattern)
-	if err != nil {
-		// invalid regexp, so set offset to the end
-		return &DictionaryIterator{
-			d:      d,
-			offset: len(d.segment.DictKeys[d.fieldID]),
-		}
-	}
-	return &DictionaryIterator{
-		d:     d,
-		regex: regex,
-	}
-}
-
-// FuzzyIterator returns an iterator which only visits terms matching
-// the given edit distance.
-func (d *Dictionary) FuzzyIterator(term string, fuzziness int) segment.DictionaryIterator {
-	return &DictionaryIterator{
-		d:         d,
-		fuzzyTerm: term,
-		fuzziness: fuzziness,
-	}
-}
-
-// DictionaryIterator is an iterator for term dictionary
-type DictionaryIterator struct {
-	d         *Dictionary
-	prefix    string
-	end       string
-	offset    int
-	regex     *regexp.Regexp
-	fuzzyTerm string
-	fuzziness int
-
-	dictEntry index.DictEntry // reused across Next()'s
-}
-
-// Next returns the next entry in the dictionary
-func (d *DictionaryIterator) Next() (*index.DictEntry, error) {
-	if d.offset > len(d.d.segment.DictKeys[d.d.fieldID])-1 {
-		return nil, nil
-	}
-	next := d.d.segment.DictKeys[d.d.fieldID][d.offset]
-	// check prefix
-	if d.prefix != "" && !strings.HasPrefix(next, d.prefix) {
-		return nil, nil
-	}
-	// check end (bleve.index API demands inclusive end)
-	if d.end != "" && next > d.end {
-		return nil, nil
-	}
-	// check regexp
-	if d.regex != nil {
-		// keep going until we find a match, mindful of the end of the slice
-		for !d.regex.MatchString(next) {
-			d.offset++
-			if d.offset > len(d.d.segment.DictKeys[d.d.fieldID])-1 {
-				return nil, nil
-			}
-			next = d.d.segment.DictKeys[d.d.fieldID][d.offset]
-		}
-	}
-	// check fuzziness
-	if d.fuzzyTerm != "" {
-		_, exceeded := LevenshteinDistanceMax(d.fuzzyTerm, next, d.fuzziness)
-		for exceeded {
-			d.offset++
-			if d.offset > len(d.d.segment.DictKeys[d.d.fieldID])-1 {
-				return nil, nil
-			}
-			next = d.d.segment.DictKeys[d.d.fieldID][d.offset]
-			_, exceeded = LevenshteinDistanceMax(d.fuzzyTerm, next, d.fuzziness)
-		}
-	}
-
-	d.offset++
-	postingID := d.d.segment.Dicts[d.d.fieldID][next]
-	d.dictEntry.Term = next
-	d.dictEntry.Count = d.d.segment.Postings[postingID-1].GetCardinality()
-	return &d.dictEntry, nil
-}
-
-// LevenshteinDistanceMax same as LevenshteinDistance but
-// attempts to bail early once we know the distance
-// will be greater than max
-// in which case the first return val will be the max
-// and the second will be true, indicating max was exceeded
-func LevenshteinDistanceMax(a, b string, max int) (int, bool) {
-	la := len(a)
-	lb := len(b)
-
-	ld := int(math.Abs(float64(la - lb)))
-	if ld > max {
-		return max, true
-	}
-
-	d := make([]int, la+1)
-	var lastdiag, olddiag, temp int
-
-	for i := 1; i <= la; i++ {
-		d[i] = i
-	}
-	for i := 1; i <= lb; i++ {
-		d[0] = i
-		lastdiag = i - 1
-		rowmin := max + 1
-		for j := 1; j <= la; j++ {
-			olddiag = d[j]
-			min := d[j] + 1
-			if (d[j-1] + 1) < min {
-				min = d[j-1] + 1
-			}
-			if a[j-1] == b[i-1] {
-				temp = 0
-			} else {
-				temp = 1
-			}
-			if (lastdiag + temp) < min {
-				min = lastdiag + temp
-			}
-			if min < rowmin {
-				rowmin = min
-			}
-			d[j] = min
-
-			lastdiag = olddiag
-		}
-		// after each row if rowmin isn't less than max stop
-		if rowmin > max {
-			return max, true
-		}
-	}
-	return d[la], false
-}
diff --git a/index/scorch/segment/mem/dict_test.go b/index/scorch/segment/mem/dict_test.go
deleted file mode 100644
index e69016a81..000000000
--- a/index/scorch/segment/mem/dict_test.go
+++ /dev/null
@@ -1,211 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package mem
-
-import (
-	"reflect"
-	"testing"
-
-	"github.com/blevesearch/bleve/analysis"
-	"github.com/blevesearch/bleve/document"
-	"github.com/blevesearch/bleve/index"
-)
-
-func TestDictionary(t *testing.T) {
-	doc := &document.Document{
-		ID: "a",
-		Fields: []document.Field{
-			document.NewTextFieldCustom("_id", nil, []byte("a"), document.IndexField|document.StoreField, nil),
-			document.NewTextFieldCustom("desc", nil, []byte("apple ball cat dog egg fish bat"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-		},
-	}
-
-	// forge analyzed docs
-	results := []*index.AnalysisResult{
-		&index.AnalysisResult{
-			Document: doc,
-			Analyzed: []analysis.TokenFrequencies{
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      1,
-						Position: 1,
-						Term:     []byte("a"),
-					},
-				}, nil, false),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      5,
-						Position: 1,
-						Term:     []byte("apple"),
-					},
-					&analysis.Token{
-						Start:    6,
-						End:      10,
-						Position: 2,
-						Term:     []byte("ball"),
-					},
-					&analysis.Token{
-						Start:    11,
-						End:      14,
-						Position: 3,
-						Term:     []byte("cat"),
-					},
-					&analysis.Token{
-						Start:    15,
-						End:      18,
-						Position: 4,
-						Term:     []byte("dog"),
-					},
-					&analysis.Token{
-						Start:    19,
-						End:      22,
-						Position: 5,
-						Term:     []byte("egg"),
-					},
-					&analysis.Token{
-						Start:    20,
-						End:      24,
-						Position: 6,
-						Term:     []byte("fish"),
-					},
-					&analysis.Token{
-						Start:    25,
-						End:      28,
-						Position: 7,
-						Term:     []byte("bat"),
-					},
-				}, nil, true),
-			},
-			Length: []int{
-				1,
-				7,
-			},
-		},
-	}
-
-	segment := NewFromAnalyzedDocs(results)
-	if segment == nil {
-		t.Fatalf("segment nil, not expected")
-	}
-
-	dict, err := segment.Dictionary("desc")
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	// test basic full iterator
-	expected := []string{"apple", "ball", "bat", "cat", "dog", "egg", "fish"}
-	var got []string
-	itr := dict.Iterator()
-	next, err := itr.Next()
-	for next != nil && err == nil {
-		got = append(got, next.Term)
-		next, err = itr.Next()
-	}
-	if err != nil {
-		t.Fatalf("dict itr error: %v", err)
-	}
-
-	if !reflect.DeepEqual(expected, got) {
-		t.Errorf("expected: %v, got: %v", expected, got)
-	}
-
-	// test prefix iterator
-	expected = []string{"ball", "bat"}
-	got = got[:0]
-	itr = dict.PrefixIterator("b")
-	next, err = itr.Next()
-	for next != nil && err == nil {
-		got = append(got, next.Term)
-		next, err = itr.Next()
-	}
-	if err != nil {
-		t.Fatalf("dict itr error: %v", err)
-	}
-
-	if !reflect.DeepEqual(expected, got) {
-		t.Errorf("expected: %v, got: %v", expected, got)
-	}
-
-	// test range iterator
-	expected = []string{"cat", "dog", "egg"}
-	got = got[:0]
-	itr = dict.RangeIterator("cat", "egg")
-	next, err = itr.Next()
-	for next != nil && err == nil {
-		got = append(got, next.Term)
-		next, err = itr.Next()
-	}
-	if err != nil {
-		t.Fatalf("dict itr error: %v", err)
-	}
-
-	if !reflect.DeepEqual(expected, got) {
-		t.Errorf("expected: %v, got: %v", expected, got)
-	}
-
-	// test regexp iterator
-	expected = []string{"ball", "bat"}
-	got = got[:0]
-	itr = dict.RegexpIterator("ba.*")
-	next, err = itr.Next()
-	for next != nil && err == nil {
-		got = append(got, next.Term)
-		next, err = itr.Next()
-	}
-	if err != nil {
-		t.Fatalf("dict itr error: %v", err)
-	}
-
-	if !reflect.DeepEqual(expected, got) {
-		t.Errorf("expected: %v, got: %v", expected, got)
-	}
-
-	// test regexp iterator with invalid regexp
-	expected = []string{}
-	got = got[:0]
-	itr = dict.RegexpIterator(string([]byte{0xff}))
-	next, err = itr.Next()
-	for next != nil && err == nil {
-		got = append(got, next.Term)
-		next, err = itr.Next()
-	}
-	if err != nil {
-		t.Fatalf("dict itr error: %v", err)
-	}
-
-	if !reflect.DeepEqual(expected, got) {
-		t.Errorf("expected: %v, got: %v", expected, got)
-	}
-
-	// test fuzzy iterator
-	expected = []string{"bat", "cat"}
-	got = got[:0]
-	itr = dict.FuzzyIterator("vat", 1)
-	next, err = itr.Next()
-	for next != nil && err == nil {
-		got = append(got, next.Term)
-		next, err = itr.Next()
-	}
-	if err != nil {
-		t.Fatalf("dict itr error: %v", err)
-	}
-
-	if !reflect.DeepEqual(expected, got) {
-		t.Errorf("expected: %v, got: %v", expected, got)
-	}
-}
diff --git a/index/scorch/segment/mem/posting.go b/index/scorch/segment/mem/posting.go
deleted file mode 100644
index 362fdb7c5..000000000
--- a/index/scorch/segment/mem/posting.go
+++ /dev/null
@@ -1,247 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package mem
-
-import (
-	"reflect"
-
-	"github.com/RoaringBitmap/roaring"
-	"github.com/blevesearch/bleve/index/scorch/segment"
-	"github.com/blevesearch/bleve/size"
-)
-
-var reflectStaticSizePostingsList int
-var reflectStaticSizePostingsIterator int
-var reflectStaticSizePosting int
-var reflectStaticSizeLocation int
-
-func init() {
-	var pl PostingsList
-	reflectStaticSizePostingsList = int(reflect.TypeOf(pl).Size())
-	var pi PostingsIterator
-	reflectStaticSizePostingsIterator = int(reflect.TypeOf(pi).Size())
-	var p Posting
-	reflectStaticSizePosting = int(reflect.TypeOf(p).Size())
-	var l Location
-	reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
-}
-
-// PostingsList is an in-memory represenation of a postings list
-type PostingsList struct {
-	dictionary *Dictionary
-	term       string
-	postingsID uint64
-	except     *roaring.Bitmap
-}
-
-func (p *PostingsList) Size() int {
-	sizeInBytes := reflectStaticSizePostingsList + size.SizeOfPtr
-
-	if p.dictionary != nil {
-		sizeInBytes += p.dictionary.Size()
-	}
-
-	if p.except != nil {
-		sizeInBytes += int(p.except.GetSizeInBytes())
-	}
-
-	return sizeInBytes
-}
-
-// Count returns the number of items on this postings list
-func (p *PostingsList) Count() uint64 {
-	var rv uint64
-	if p.postingsID > 0 {
-		rv = p.dictionary.segment.Postings[p.postingsID-1].GetCardinality()
-		if p.except != nil {
-			except := p.except.GetCardinality()
-			if except > rv {
-				// avoid underflow
-				except = rv
-			}
-			rv -= except
-		}
-	}
-	return rv
-}
-
-// Iterator returns an iterator for this postings list
-func (p *PostingsList) Iterator(includeFreq, includeNorm, includeLocations bool) segment.PostingsIterator {
-	return p.InitIterator(nil)
-}
-func (p *PostingsList) InitIterator(prealloc *PostingsIterator) *PostingsIterator {
-	rv := prealloc
-	if rv == nil {
-		rv = &PostingsIterator{postings: p}
-	} else {
-		*rv = PostingsIterator{postings: p}
-	}
-
-	if p.postingsID > 0 {
-		allbits := p.dictionary.segment.Postings[p.postingsID-1]
-		rv.locations = p.dictionary.segment.PostingsLocs[p.postingsID-1]
-		rv.all = allbits.Iterator()
-		if p.except != nil {
-			allExcept := allbits.Clone()
-			allExcept.AndNot(p.except)
-			rv.actual = allExcept.Iterator()
-		} else {
-			rv.actual = allbits.Iterator()
-		}
-	}
-
-	return rv
-}
-
-// PostingsIterator provides a way to iterate through the postings list
-type PostingsIterator struct {
-	postings  *PostingsList
-	all       roaring.IntIterable
-	locations *roaring.Bitmap
-	offset    int
-	locoffset int
-	actual    roaring.IntIterable
-	reuse     Posting
-}
-
-func (i *PostingsIterator) Size() int {
-	sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr
-
-	if i.locations != nil {
-		sizeInBytes += int(i.locations.GetSizeInBytes())
-	}
-
-	return sizeInBytes
-}
-
-// Next returns the next posting on the postings list, or nil at the end
-func (i *PostingsIterator) Next() (segment.Posting, error) {
-	if i.actual == nil || !i.actual.HasNext() {
-		return nil, nil
-	}
-	n := i.actual.Next()
-	allN := i.all.Next()
-
-	// n is the next actual hit (excluding some postings)
-	// allN is the next hit in the full postings
-	// if they don't match, adjust offsets to factor in item we're skipping over
-	// incr the all iterator, and check again
-	for allN != n {
-		i.locoffset += int(i.postings.dictionary.segment.Freqs[i.postings.postingsID-1][i.offset])
-		i.offset++
-		allN = i.all.Next()
-	}
-	i.reuse = Posting{
-		iterator:  i,
-		docNum:    uint64(n),
-		offset:    i.offset,
-		locoffset: i.locoffset,
-		hasLoc:    i.locations.Contains(n),
-	}
-	i.locoffset += int(i.postings.dictionary.segment.Freqs[i.postings.postingsID-1][i.offset])
-	i.offset++
-	return &i.reuse, nil
-}
-
-// Posting is a single entry in a postings list
-type Posting struct {
-	iterator  *PostingsIterator
-	docNum    uint64
-	offset    int
-	locoffset int
-	hasLoc    bool
-}
-
-func (p *Posting) Size() int {
-	sizeInBytes := reflectStaticSizePosting + size.SizeOfPtr
-
-	if p.iterator != nil {
-		sizeInBytes += p.iterator.Size()
-	}
-
-	return sizeInBytes
-}
-
-// Number returns the document number of this posting in this segment
-func (p *Posting) Number() uint64 {
-	return p.docNum
-}
-
-// Frequency returns the frequence of occurance of this term in this doc/field
-func (p *Posting) Frequency() uint64 {
-	return p.iterator.postings.dictionary.segment.Freqs[p.iterator.postings.postingsID-1][p.offset]
-}
-
-// Norm returns the normalization factor for this posting
-func (p *Posting) Norm() float64 {
-	return float64(p.iterator.postings.dictionary.segment.Norms[p.iterator.postings.postingsID-1][p.offset])
-}
-
-// Locations returns the location information for each occurance
-func (p *Posting) Locations() []segment.Location {
-	if !p.hasLoc {
-		return nil
-	}
-	freq := int(p.Frequency())
-	rv := make([]segment.Location, freq)
-	for i := 0; i < freq; i++ {
-		rv[i] = &Location{
-			p:      p,
-			offset: p.locoffset + i,
-		}
-	}
-	return rv
-}
-
-// Location represents the location of a single occurance
-type Location struct {
-	p      *Posting
-	offset int
-}
-
-func (l *Location) Size() int {
-	sizeInBytes := reflectStaticSizeLocation
-	if l.p != nil {
-		sizeInBytes += l.p.Size()
-	}
-
-	return sizeInBytes
-}
-
-// Field returns the name of the field (useful in composite fields to know
-// which original field the value came from)
-func (l *Location) Field() string {
-	return l.p.iterator.postings.dictionary.segment.FieldsInv[l.p.iterator.postings.dictionary.segment.Locfields[l.p.iterator.postings.postingsID-1][l.offset]]
-}
-
-// Start returns the start byte offset of this occurance
-func (l *Location) Start() uint64 {
-	return l.p.iterator.postings.dictionary.segment.Locstarts[l.p.iterator.postings.postingsID-1][l.offset]
-}
-
-// End returns the end byte offset of this occurance
-func (l *Location) End() uint64 {
-	return l.p.iterator.postings.dictionary.segment.Locends[l.p.iterator.postings.postingsID-1][l.offset]
-}
-
-// Pos returns the 1-based phrase position of this occurance
-func (l *Location) Pos() uint64 {
-	return l.p.iterator.postings.dictionary.segment.Locpos[l.p.iterator.postings.postingsID-1][l.offset]
-}
-
-// ArrayPositions returns the array position vector associated with this occurance
-func (l *Location) ArrayPositions() []uint64 {
-	return l.p.iterator.postings.dictionary.segment.Locarraypos[l.p.iterator.postings.postingsID-1][l.offset]
-}
diff --git a/index/scorch/segment/mem/segment.go b/index/scorch/segment/mem/segment.go
deleted file mode 100644
index e9c4a2730..000000000
--- a/index/scorch/segment/mem/segment.go
+++ /dev/null
@@ -1,286 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package mem
-
-import (
-	"fmt"
-	"reflect"
-
-	"github.com/RoaringBitmap/roaring"
-	"github.com/blevesearch/bleve/index/scorch/segment"
-	"github.com/blevesearch/bleve/size"
-)
-
-var reflectStaticSizeSegment int
-
-func init() {
-	var s Segment
-	reflectStaticSizeSegment = int(reflect.TypeOf(s).Size())
-}
-
-// _id field is always guaranteed to have fieldID of 0
-const idFieldID uint16 = 0
-
-// KNOWN ISSUES
-// - LIMITATION - we decided whether or not to store term vectors for a field
-//                at the segment level, based on the first definition of a
-//                field we see.  in normal bleve usage this is fine, all
-//                instances of a field definition will be the same.  however,
-//                advanced users may violate this and provide unique field
-//                definitions with each document.  this segment does not
-//                support this usage.
-
-// TODO
-// - need better testing of multiple docs, iterating freqs, locations and
-//   and verifying the correct results are returned
-
-// Segment is an in memory implementation of scorch.Segment
-type Segment struct {
-
-	// FieldsMap adds 1 to field id to avoid zero value issues
-	//  name -> field id + 1
-	FieldsMap map[string]uint16
-
-	// FieldsInv is the inverse of FieldsMap
-	//  field id -> name
-	FieldsInv []string
-
-	// Term dictionaries for each field
-	//  field id -> term -> postings list id + 1
-	Dicts []map[string]uint64
-
-	// Terms for each field, where terms are sorted ascending
-	//  field id -> []term
-	DictKeys [][]string
-
-	// Postings list
-	//  postings list id -> bitmap by docNum
-	Postings []*roaring.Bitmap
-
-	// Postings list has locations
-	PostingsLocs []*roaring.Bitmap
-
-	// Term frequencies
-	//  postings list id -> Freqs (one for each hit in bitmap)
-	Freqs [][]uint64
-
-	// Field norms
-	//  postings list id -> Norms (one for each hit in bitmap)
-	Norms [][]float32
-
-	// Field/start/end/pos/locarraypos
-	//  postings list id -> start/end/pos/locarraypos (one for each freq)
-	Locfields   [][]uint16
-	Locstarts   [][]uint64
-	Locends     [][]uint64
-	Locpos      [][]uint64
-	Locarraypos [][][]uint64
-
-	// Stored field values
-	//  docNum -> field id -> slice of values (each value []byte)
-	Stored []map[uint16][][]byte
-
-	// Stored field types
-	//  docNum -> field id -> slice of types (each type byte)
-	StoredTypes []map[uint16][]byte
-
-	// Stored field array positions
-	//  docNum -> field id -> slice of array positions (each is []uint64)
-	StoredPos []map[uint16][][]uint64
-
-	// For storing the docValue persisted fields
-	DocValueFields map[uint16]bool
-
-	// Footprint of the segment, updated when analyzed document mutations
-	// are added into the segment
-	sizeInBytes int
-}
-
-// New builds a new empty Segment
-func New() *Segment {
-	return &Segment{
-		FieldsMap:      map[string]uint16{},
-		DocValueFields: map[uint16]bool{},
-	}
-}
-
-func (s *Segment) updateSize() {
-	sizeInBytes := reflectStaticSizeSegment
-
-	// FieldsMap, FieldsInv
-	for k, _ := range s.FieldsMap {
-		sizeInBytes += (len(k)+size.SizeOfString)*2 +
-			size.SizeOfUint16
-	}
-
-	// Dicts, DictKeys
-	for _, entry := range s.Dicts {
-		for k, _ := range entry {
-			sizeInBytes += (len(k)+size.SizeOfString)*2 +
-				size.SizeOfUint64
-		}
-		// overhead from the data structures
-		sizeInBytes += (size.SizeOfMap + size.SizeOfSlice)
-	}
-
-	// Postings, PostingsLocs
-	for i := 0; i < len(s.Postings); i++ {
-		sizeInBytes += (int(s.Postings[i].GetSizeInBytes()) + size.SizeOfPtr) +
-			(int(s.PostingsLocs[i].GetSizeInBytes()) + size.SizeOfPtr)
-	}
-
-	// Freqs, Norms
-	for i := 0; i < len(s.Freqs); i++ {
-		sizeInBytes += (len(s.Freqs[i])*size.SizeOfUint64 +
-			len(s.Norms[i])*size.SizeOfFloat32) +
-			(size.SizeOfSlice * 2)
-	}
-
-	// Location data
-	for i := 0; i < len(s.Locfields); i++ {
-		sizeInBytes += len(s.Locfields[i])*size.SizeOfUint16 +
-			len(s.Locstarts[i])*size.SizeOfUint64 +
-			len(s.Locends[i])*size.SizeOfUint64 +
-			len(s.Locpos[i])*size.SizeOfUint64
-
-		for j := 0; j < len(s.Locarraypos[i]); j++ {
-			sizeInBytes += len(s.Locarraypos[i][j])*size.SizeOfUint64 +
-				size.SizeOfSlice
-		}
-
-		sizeInBytes += (size.SizeOfSlice * 5)
-	}
-
-	// Stored data
-	for i := 0; i < len(s.Stored); i++ {
-		for _, v := range s.Stored[i] {
-			sizeInBytes += size.SizeOfUint16
-			for _, arr := range v {
-				sizeInBytes += len(arr) + size.SizeOfSlice
-			}
-			sizeInBytes += size.SizeOfSlice
-		}
-
-		for _, v := range s.StoredTypes[i] {
-			sizeInBytes += size.SizeOfUint16 + len(v) + size.SizeOfSlice
-		}
-
-		for _, v := range s.StoredPos[i] {
-			sizeInBytes += size.SizeOfUint16
-			for _, arr := range v {
-				sizeInBytes += len(arr)*size.SizeOfUint64 +
-					size.SizeOfSlice
-			}
-			sizeInBytes += size.SizeOfSlice
-		}
-
-		// overhead from map(s) within Stored, StoredTypes, StoredPos
-		sizeInBytes += (size.SizeOfMap * 3)
-	}
-
-	// DocValueFields
-	sizeInBytes += len(s.DocValueFields) * (size.SizeOfUint16 + size.SizeOfBool)
-
-	s.sizeInBytes = sizeInBytes
-}
-
-func (s *Segment) Size() int {
-	return s.sizeInBytes
-}
-
-func (s *Segment) AddRef() {
-}
-
-func (s *Segment) DecRef() error {
-	return nil
-}
-
-// Fields returns the field names used in this segment
-func (s *Segment) Fields() []string {
-	return s.FieldsInv
-}
-
-// VisitDocument invokes the DocFieldValueVistor for each stored field
-// for the specified doc number
-func (s *Segment) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
-	// ensure document number exists
-	if int(num) > len(s.Stored)-1 {
-		return nil
-	}
-	docFields := s.Stored[int(num)]
-	st := s.StoredTypes[int(num)]
-	sp := s.StoredPos[int(num)]
-	for field, values := range docFields {
-		for i, value := range values {
-			keepGoing := visitor(s.FieldsInv[field], st[field][i], value, sp[field][i])
-			if !keepGoing {
-				return nil
-			}
-		}
-	}
-	return nil
-}
-
-func (s *Segment) getField(name string) (int, error) {
-	fieldID, ok := s.FieldsMap[name]
-	if !ok {
-		return 0, fmt.Errorf("no field named %s", name)
-	}
-	return int(fieldID - 1), nil
-}
-
-// Dictionary returns the term dictionary for the specified field
-func (s *Segment) Dictionary(field string) (segment.TermDictionary, error) {
-	fieldID, err := s.getField(field)
-	if err != nil {
-		// no such field, return empty dictionary
-		return &segment.EmptyDictionary{}, nil
-	}
-	return &Dictionary{
-		segment: s,
-		field:   field,
-		fieldID: uint16(fieldID),
-	}, nil
-}
-
-// Count returns the number of documents in this segment
-// (this has no notion of deleted docs)
-func (s *Segment) Count() uint64 {
-	return uint64(len(s.Stored))
-}
-
-// DocNumbers returns a bitset corresponding to the doc numbers of all the
-// provided _id strings
-func (s *Segment) DocNumbers(ids []string) (*roaring.Bitmap, error) {
-	rv := roaring.New()
-
-	// guard against empty segment
-	if len(s.FieldsMap) > 0 {
-		idDictionary := s.Dicts[idFieldID]
-
-		for _, id := range ids {
-			postingID := idDictionary[id]
-			if postingID > 0 {
-				rv.Or(s.Postings[postingID-1])
-			}
-		}
-	}
-	return rv, nil
-}
-
-// Close releases all resources associated with this segment
-func (s *Segment) Close() error {
-	return nil
-}
diff --git a/index/scorch/segment/mem/segment_test.go b/index/scorch/segment/mem/segment_test.go
deleted file mode 100644
index 79a11d122..000000000
--- a/index/scorch/segment/mem/segment_test.go
+++ /dev/null
@@ -1,876 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package mem
-
-import (
-	"math"
-	"reflect"
-	"testing"
-
-	"github.com/blevesearch/bleve/analysis"
-	"github.com/blevesearch/bleve/document"
-	"github.com/blevesearch/bleve/index"
-)
-
-func TestEmpty(t *testing.T) {
-
-	emptySegment := New()
-
-	if emptySegment.Count() != 0 {
-		t.Errorf("expected count 0, got %d", emptySegment.Count())
-	}
-
-	dict, err := emptySegment.Dictionary("name")
-	if err != nil {
-		t.Fatal(err)
-	}
-	if dict == nil {
-		t.Fatal("got nil dict, expected non-nil")
-	}
-
-	postingsList, err := dict.PostingsList("marty", nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if postingsList == nil {
-		t.Fatal("got nil postings list, expected non-nil")
-	}
-
-	postingsItr := postingsList.Iterator(true, true, true)
-	if postingsItr == nil {
-		t.Fatal("got nil iterator, expected non-nil")
-	}
-
-	count := 0
-	nextPosting, err := postingsItr.Next()
-	for nextPosting != nil && err == nil {
-		count++
-		nextPosting, err = postingsItr.Next()
-	}
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if count != 0 {
-		t.Errorf("expected count to be 0, got %d", count)
-	}
-
-	// now try and visit a document
-	err = emptySegment.VisitDocument(0, func(field string, typ byte, value []byte, pos []uint64) bool {
-		t.Errorf("document visitor called, not expected")
-		return true
-	})
-	if err != nil {
-		t.Fatal(err)
-	}
-}
-
-func TestSingle(t *testing.T) {
-
-	doc := &document.Document{
-		ID: "a",
-		Fields: []document.Field{
-			document.NewTextFieldCustom("_id", nil, []byte("a"), document.IndexField|document.StoreField, nil),
-			document.NewTextFieldCustom("name", nil, []byte("wow"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-		},
-		CompositeFields: []*document.CompositeField{
-			document.NewCompositeField("_all", true, nil, nil),
-		},
-	}
-
-	// forge analyzed docs
-	results := []*index.AnalysisResult{
-		&index.AnalysisResult{
-			Document: doc,
-			Analyzed: []analysis.TokenFrequencies{
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      1,
-						Position: 1,
-						Term:     []byte("a"),
-					},
-				}, nil, false),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      3,
-						Position: 1,
-						Term:     []byte("wow"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("some"),
-					},
-					&analysis.Token{
-						Start:    5,
-						End:      10,
-						Position: 2,
-						Term:     []byte("thing"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("cold"),
-					},
-				}, []uint64{0}, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("dark"),
-					},
-				}, []uint64{1}, true),
-			},
-			Length: []int{
-				1,
-				1,
-				2,
-				1,
-				1,
-			},
-		},
-	}
-
-	// fix up composite fields
-	for _, ar := range results {
-		for i, f := range ar.Document.Fields {
-			for _, cf := range ar.Document.CompositeFields {
-				cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
-			}
-		}
-	}
-
-	segment := NewFromAnalyzedDocs(results)
-	if segment == nil {
-		t.Fatalf("segment nil, not expected")
-	}
-
-	if segment.Size() <= 0 {
-		t.Fatalf("segment size not updated")
-	}
-
-	expectFields := map[string]struct{}{
-		"_id":  struct{}{},
-		"_all": struct{}{},
-		"name": struct{}{},
-		"desc": struct{}{},
-		"tag":  struct{}{},
-	}
-	fields := segment.Fields()
-	if len(fields) != len(expectFields) {
-		t.Errorf("expected %d fields, only got %d", len(expectFields), len(fields))
-	}
-	for _, field := range fields {
-		if _, ok := expectFields[field]; !ok {
-			t.Errorf("got unexpected field: %s", field)
-		}
-	}
-
-	if segment.Count() != 1 {
-		t.Errorf("expected count 1, got %d", segment.Count())
-	}
-
-	// check the _id field
-	dict, err := segment.Dictionary("_id")
-	if err != nil {
-		t.Fatal(err)
-	}
-	if dict == nil {
-		t.Fatal("got nil dict, expected non-nil")
-	}
-
-	postingsList, err := dict.PostingsList("a", nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if postingsList == nil {
-		t.Fatal("got nil postings list, expected non-nil")
-	}
-
-	postingsItr := postingsList.Iterator(true, true, true)
-	if postingsItr == nil {
-		t.Fatal("got nil iterator, expected non-nil")
-	}
-
-	count := 0
-	nextPosting, err := postingsItr.Next()
-	for nextPosting != nil && err == nil {
-		count++
-		if nextPosting.Frequency() != 1 {
-			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
-		}
-		if nextPosting.Number() != 0 {
-			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
-		}
-		if nextPosting.Norm() != 1.0 {
-			t.Errorf("expected norm 1.0, got %f", nextPosting.Norm())
-		}
-
-		nextPosting, err = postingsItr.Next()
-	}
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if count != 1 {
-		t.Errorf("expected count to be 1, got %d", count)
-	}
-
-	// check the name field
-	dict, err = segment.Dictionary("name")
-	if err != nil {
-		t.Fatal(err)
-	}
-	if dict == nil {
-		t.Fatal("got nil dict, expected non-nil")
-	}
-
-	postingsList, err = dict.PostingsList("wow", nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if postingsList == nil {
-		t.Fatal("got nil postings list, expected non-nil")
-	}
-
-	postingsItr = postingsList.Iterator(true, true, true)
-	if postingsItr == nil {
-		t.Fatal("got nil iterator, expected non-nil")
-	}
-
-	count = 0
-	nextPosting, err = postingsItr.Next()
-	for nextPosting != nil && err == nil {
-		count++
-		if nextPosting.Frequency() != 1 {
-			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
-		}
-		if nextPosting.Number() != 0 {
-			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
-		}
-		if nextPosting.Norm() != 1.0 {
-			t.Errorf("expected norm 1.0, got %f", nextPosting.Norm())
-		}
-		var numLocs uint64
-		for _, loc := range nextPosting.Locations() {
-			numLocs++
-			if loc.Field() != "name" {
-				t.Errorf("expected loc field to be 'name', got '%s'", loc.Field())
-			}
-			if loc.Start() != 0 {
-				t.Errorf("expected loc start to be 0, got %d", loc.Start())
-			}
-			if loc.End() != 3 {
-				t.Errorf("expected loc end to be 3, got %d", loc.End())
-			}
-			if loc.Pos() != 1 {
-				t.Errorf("expected loc pos to be 1, got %d", loc.Pos())
-			}
-			if loc.ArrayPositions() != nil {
-				t.Errorf("expect loc array pos to be nil, got %v", loc.ArrayPositions())
-			}
-		}
-		if numLocs != nextPosting.Frequency() {
-			t.Errorf("expected %d locations, got %d", nextPosting.Frequency(), numLocs)
-		}
-
-		nextPosting, err = postingsItr.Next()
-	}
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if count != 1 {
-		t.Errorf("expected count to be 1, got %d", count)
-	}
-
-	// check the _all field (composite)
-	dict, err = segment.Dictionary("_all")
-	if err != nil {
-		t.Fatal(err)
-	}
-	if dict == nil {
-		t.Fatal("got nil dict, expected non-nil")
-	}
-
-	postingsList, err = dict.PostingsList("wow", nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if postingsList == nil {
-		t.Fatal("got nil postings list, expected non-nil")
-	}
-
-	postingsItr = postingsList.Iterator(true, true, true)
-	if postingsItr == nil {
-		t.Fatal("got nil iterator, expected non-nil")
-	}
-
-	count = 0
-	nextPosting, err = postingsItr.Next()
-	for nextPosting != nil && err == nil {
-		count++
-		if nextPosting.Frequency() != 1 {
-			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
-		}
-		if nextPosting.Number() != 0 {
-			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
-		}
-		expectedNorm := float32(1.0 / math.Sqrt(float64(6)))
-		if nextPosting.Norm() != float64(expectedNorm) {
-			t.Errorf("expected norm %f, got %f", expectedNorm, nextPosting.Norm())
-		}
-		var numLocs uint64
-		for _, loc := range nextPosting.Locations() {
-			numLocs++
-			if loc.Field() != "name" {
-				t.Errorf("expected loc field to be 'name', got '%s'", loc.Field())
-			}
-			if loc.Start() != 0 {
-				t.Errorf("expected loc start to be 0, got %d", loc.Start())
-			}
-			if loc.End() != 3 {
-				t.Errorf("expected loc end to be 3, got %d", loc.End())
-			}
-			if loc.Pos() != 1 {
-				t.Errorf("expected loc pos to be 1, got %d", loc.Pos())
-			}
-			if loc.ArrayPositions() != nil {
-				t.Errorf("expect loc array pos to be nil, got %v", loc.ArrayPositions())
-			}
-		}
-		if numLocs != nextPosting.Frequency() {
-			t.Errorf("expected %d locations, got %d", nextPosting.Frequency(), numLocs)
-		}
-
-		nextPosting, err = postingsItr.Next()
-	}
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if count != 1 {
-		t.Errorf("expected count to be 1, got %d", count)
-	}
-
-	// now try a field with array positions
-	dict, err = segment.Dictionary("tag")
-	if err != nil {
-		t.Fatal(err)
-	}
-	if dict == nil {
-		t.Fatal("got nil dict, expected non-nil")
-	}
-
-	postingsList, err = dict.PostingsList("dark", nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if postingsList == nil {
-		t.Fatal("got nil postings list, expected non-nil")
-	}
-
-	postingsItr = postingsList.Iterator(true, true, true)
-	if postingsItr == nil {
-		t.Fatal("got nil iterator, expected non-nil")
-	}
-
-	nextPosting, err = postingsItr.Next()
-	for nextPosting != nil && err == nil {
-
-		if nextPosting.Frequency() != 1 {
-			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
-		}
-		if nextPosting.Number() != 0 {
-			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
-		}
-		var numLocs uint64
-		for _, loc := range nextPosting.Locations() {
-			numLocs++
-			if loc.Field() != "tag" {
-				t.Errorf("expected loc field to be 'name', got '%s'", loc.Field())
-			}
-			if loc.Start() != 0 {
-				t.Errorf("expected loc start to be 0, got %d", loc.Start())
-			}
-			if loc.End() != 4 {
-				t.Errorf("expected loc end to be 3, got %d", loc.End())
-			}
-			if loc.Pos() != 1 {
-				t.Errorf("expected loc pos to be 1, got %d", loc.Pos())
-			}
-			expectArrayPos := []uint64{1}
-			if !reflect.DeepEqual(loc.ArrayPositions(), expectArrayPos) {
-				t.Errorf("expect loc array pos to be %v, got %v", expectArrayPos, loc.ArrayPositions())
-			}
-		}
-		if numLocs != nextPosting.Frequency() {
-			t.Errorf("expected %d locations, got %d", nextPosting.Frequency(), numLocs)
-		}
-
-		nextPosting, err = postingsItr.Next()
-	}
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	// now try and visit a document
-	var fieldValuesSeen int
-	err = segment.VisitDocument(0, func(field string, typ byte, value []byte, pos []uint64) bool {
-		fieldValuesSeen++
-		return true
-	})
-	if err != nil {
-		t.Fatal(err)
-	}
-	if fieldValuesSeen != 5 {
-		t.Errorf("expected 5 field values, got %d", fieldValuesSeen)
-	}
-
-}
-
-func TestMultiple(t *testing.T) {
-
-	doc := &document.Document{
-		ID: "a",
-		Fields: []document.Field{
-			document.NewTextFieldCustom("_id", nil, []byte("a"), document.IndexField|document.StoreField, nil),
-			document.NewTextFieldCustom("name", nil, []byte("wow"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-		},
-		CompositeFields: []*document.CompositeField{
-			document.NewCompositeField("_all", true, nil, nil),
-		},
-	}
-
-	doc2 := &document.Document{
-		ID: "b",
-		Fields: []document.Field{
-			document.NewTextFieldCustom("_id", nil, []byte("b"), document.IndexField|document.StoreField, nil),
-			document.NewTextFieldCustom("name", nil, []byte("who"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-		},
-		CompositeFields: []*document.CompositeField{
-			document.NewCompositeField("_all", true, nil, nil),
-		},
-	}
-
-	// forge analyzed docs
-	results := []*index.AnalysisResult{
-		&index.AnalysisResult{
-			Document: doc,
-			Analyzed: []analysis.TokenFrequencies{
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      1,
-						Position: 1,
-						Term:     []byte("a"),
-					},
-				}, nil, false),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      3,
-						Position: 1,
-						Term:     []byte("wow"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("some"),
-					},
-					&analysis.Token{
-						Start:    5,
-						End:      10,
-						Position: 2,
-						Term:     []byte("thing"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("cold"),
-					},
-				}, []uint64{0}, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("dark"),
-					},
-				}, []uint64{1}, true),
-			},
-			Length: []int{
-				1,
-				1,
-				2,
-				1,
-				1,
-			},
-		},
-		&index.AnalysisResult{
-			Document: doc2,
-			Analyzed: []analysis.TokenFrequencies{
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      1,
-						Position: 1,
-						Term:     []byte("b"),
-					},
-				}, nil, false),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      3,
-						Position: 1,
-						Term:     []byte("who"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("some"),
-					},
-					&analysis.Token{
-						Start:    5,
-						End:      10,
-						Position: 2,
-						Term:     []byte("thing"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("cold"),
-					},
-				}, []uint64{0}, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("dark"),
-					},
-				}, []uint64{1}, true),
-			},
-			Length: []int{
-				1,
-				1,
-				2,
-				1,
-				1,
-			},
-		},
-	}
-
-	// fix up composite fields
-	for _, ar := range results {
-		for i, f := range ar.Document.Fields {
-			for _, cf := range ar.Document.CompositeFields {
-				cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
-			}
-		}
-	}
-
-	segment := NewFromAnalyzedDocs(results)
-	if segment == nil {
-		t.Fatalf("segment nil, not expected")
-	}
-
-	if segment.Count() != 2 {
-		t.Errorf("expected count 2, got %d", segment.Count())
-	}
-
-	// check the desc field
-	dict, err := segment.Dictionary("desc")
-	if err != nil {
-		t.Fatal(err)
-	}
-	if dict == nil {
-		t.Fatal("got nil dict, expected non-nil")
-	}
-
-	postingsList, err := dict.PostingsList("thing", nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if postingsList == nil {
-		t.Fatal("got nil postings list, expected non-nil")
-	}
-
-	postingsItr := postingsList.Iterator(true, true, true)
-	if postingsItr == nil {
-		t.Fatal("got nil iterator, expected non-nil")
-	}
-
-	count := 0
-	nextPosting, err := postingsItr.Next()
-	for nextPosting != nil && err == nil {
-		count++
-		nextPosting, err = postingsItr.Next()
-	}
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if count != 2 {
-		t.Errorf("expected count to be 2, got %d", count)
-	}
-
-	// get docnum of a
-	exclude, err := segment.DocNumbers([]string{"a"})
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	// look for term 'thing' excluding doc 'a'
-	postingsListExcluding, err := dict.PostingsList("thing", exclude)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if postingsList == nil {
-		t.Fatal("got nil postings list, expected non-nil")
-	}
-
-	postingsListExcludingCount := postingsListExcluding.Count()
-	if postingsListExcludingCount != 1 {
-		t.Errorf("expected count from postings list to be 1, got %d", postingsListExcludingCount)
-	}
-
-	postingsItrExcluding := postingsListExcluding.Iterator(true, true, true)
-	if postingsItr == nil {
-		t.Fatal("got nil iterator, expected non-nil")
-	}
-
-	count = 0
-	nextPosting, err = postingsItrExcluding.Next()
-	for nextPosting != nil && err == nil {
-		count++
-		nextPosting, err = postingsItrExcluding.Next()
-	}
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if count != 1 {
-		t.Errorf("expected count to be 1, got %d", count)
-	}
-
-}
-
-func TestMultipleWithNonOverlappingFields(t *testing.T) {
-	doc1 := &document.Document{
-		ID: "a",
-		Fields: []document.Field{
-			document.NewTextField("_id", []uint64{}, []byte("a")),
-			document.NewTextField("name", []uint64{}, []byte("ABC")),
-			document.NewTextField("dept", []uint64{}, []byte("ABC dept")),
-			document.NewTextField("manages.id", []uint64{}, []byte("XYZ")),
-			document.NewTextField("manages.count", []uint64{}, []byte("1")),
-		},
-		CompositeFields: []*document.CompositeField{
-			document.NewCompositeField("_all", true, nil, []string{"_id"}),
-		},
-	}
-
-	doc2 := &document.Document{
-		ID: "b",
-		Fields: []document.Field{
-			document.NewTextField("_id", []uint64{}, []byte("b")),
-			document.NewTextField("name", []uint64{}, []byte("XYZ")),
-			document.NewTextField("dept", []uint64{}, []byte("ABC dept")),
-			document.NewTextField("reportsTo.id", []uint64{}, []byte("ABC")),
-		},
-		CompositeFields: []*document.CompositeField{
-			document.NewCompositeField("_all", true, nil, []string{"_id"}),
-		},
-	}
-
-	results := []*index.AnalysisResult{
-		&index.AnalysisResult{
-			Document: doc1,
-			Analyzed: []analysis.TokenFrequencies{
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      1,
-						Position: 1,
-						Term:     []byte("a"),
-					},
-				}, nil, false),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      3,
-						Position: 1,
-						Term:     []byte("ABC"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      3,
-						Position: 1,
-						Term:     []byte("ABC"),
-					},
-					&analysis.Token{
-						Start:    4,
-						End:      8,
-						Position: 2,
-						Term:     []byte("dept"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      3,
-						Position: 1,
-						Term:     []byte("XYZ"),
-					},
-				}, []uint64{0}, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      1,
-						Position: 1,
-						Term:     []byte("1"),
-					},
-				}, []uint64{1}, true),
-			},
-			Length: []int{
-				1,
-				1,
-				2,
-				1,
-				1,
-			},
-		},
-		&index.AnalysisResult{
-			Document: doc2,
-			Analyzed: []analysis.TokenFrequencies{
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      1,
-						Position: 1,
-						Term:     []byte("b"),
-					},
-				}, nil, false),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      3,
-						Position: 1,
-						Term:     []byte("XYZ"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      3,
-						Position: 1,
-						Term:     []byte("ABC"),
-					},
-					&analysis.Token{
-						Start:    4,
-						End:      8,
-						Position: 2,
-						Term:     []byte("dept"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      3,
-						Position: 1,
-						Term:     []byte("ABC"),
-					},
-				}, []uint64{0}, true),
-			},
-			Length: []int{
-				1,
-				1,
-				2,
-				1,
-			},
-		},
-	}
-
-	// fix up composite fields
-	for _, ar := range results {
-		for i, f := range ar.Document.Fields {
-			for _, cf := range ar.Document.CompositeFields {
-				cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
-			}
-		}
-	}
-
-	segment := NewFromAnalyzedDocs(results)
-	if segment == nil {
-		t.Fatalf("segment nil, not expected")
-	}
-
-	if segment.Count() != 2 {
-		t.Errorf("expected count 2, got %d", segment.Count())
-	}
-
-	expectFields := map[string]struct{}{
-		"_id":           struct{}{},
-		"_all":          struct{}{},
-		"name":          struct{}{},
-		"dept":          struct{}{},
-		"manages.id":    struct{}{},
-		"manages.count": struct{}{},
-		"reportsTo.id":  struct{}{},
-	}
-
-	fields := segment.Fields()
-	if len(fields) != len(expectFields) {
-		t.Errorf("expected %d fields, only got %d", len(expectFields), len(fields))
-	}
-	for _, field := range fields {
-		if _, ok := expectFields[field]; !ok {
-			t.Errorf("got unexpected field: %s", field)
-		}
-	}
-}

From 5fea87cf9d52de20448e3b454e137120e6acef43 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Mon, 9 Apr 2018 17:05:35 -0400
Subject: [PATCH 370/728] new optional interface, access dictionary just for
 some terms

this interface is useful when attempting to filter a larger
list of terms which may or may not be in the dictionary
this interface also allows you to not include the count
in your dictionary iterator traversal
---
 index/index.go                          |  4 ++
 index/scorch/segment/empty.go           |  5 ++
 index/scorch/segment/segment.go         |  1 +
 index/scorch/segment/zap/dict.go        | 65 +++++++++++++++++++++----
 index/scorch/snapshot_index.go          |  7 +++
 index/scorch/snapshot_segment.go        |  5 ++
 search/searcher/search_numeric_range.go | 19 ++++++++
 7 files changed, 96 insertions(+), 10 deletions(-)

diff --git a/index/index.go b/index/index.go
index 861f6a2ea..42a452cde 100644
--- a/index/index.go
+++ b/index/index.go
@@ -104,6 +104,10 @@ type IndexReaderFuzzy interface {
 	FieldDictFuzzy(field string, term []byte, fuzziness int) (FieldDict, error)
 }
 
+type IndexReaderOnly interface {
+	FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (FieldDict, error)
+}
+
 // FieldTerms contains the terms used by a document, keyed by field
 type FieldTerms map[string][]string
 
diff --git a/index/scorch/segment/empty.go b/index/scorch/segment/empty.go
index a0b8434c7..17749f1bf 100644
--- a/index/scorch/segment/empty.go
+++ b/index/scorch/segment/empty.go
@@ -85,6 +85,11 @@ func (e *EmptyDictionary) FuzzyIterator(term string,
 	return &EmptyDictionaryIterator{}
 }
 
+func (e *EmptyDictionary) OnlyIterator(onlyTerms [][]byte,
+	includeCount bool) DictionaryIterator {
+	return &EmptyDictionaryIterator{}
+}
+
 type EmptyDictionaryIterator struct{}
 
 func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) {
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index b754e94b4..62b8a29b2 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -50,6 +50,7 @@ type TermDictionary interface {
 	RangeIterator(start, end string) DictionaryIterator
 	RegexpIterator(regex string) DictionaryIterator
 	FuzzyIterator(term string, fuzziness int) DictionaryIterator
+	OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator
 }
 
 type DictionaryIterator interface {
diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index b06cd7f6c..7cf3364bc 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -15,6 +15,7 @@
 package zap
 
 import (
+	"bytes"
 	"fmt"
 
 	"github.com/RoaringBitmap/roaring"
@@ -191,13 +192,55 @@ func (d *Dictionary) FuzzyIterator(term string,
 	return rv
 }
 
+func (d *Dictionary) OnlyIterator(onlyTerms [][]byte,
+	includeCount bool) segment.DictionaryIterator {
+
+	rv := &DictionaryIterator{
+		d:         d,
+		omitCount: !includeCount,
+	}
+
+	var buf bytes.Buffer
+	builder, err := vellum.New(&buf, nil)
+	if err != nil {
+		rv.err = err
+		return rv
+	}
+	for _, term := range onlyTerms {
+		err = builder.Insert(term, 0)
+		if err != nil {
+			rv.err = err
+			return rv
+		}
+	}
+	err = builder.Close()
+	if err != nil {
+		rv.err = err
+		return rv
+	}
+
+	onlyFST, err := vellum.Load(buf.Bytes())
+	if err != nil {
+		rv.err = err
+		return rv
+	}
+
+	itr, err := d.fst.Search(onlyFST, nil, nil)
+	if err == nil {
+		rv.itr = itr
+	}
+
+	return rv
+}
+
 // DictionaryIterator is an iterator for term dictionary
 type DictionaryIterator struct {
-	d     *Dictionary
-	itr   vellum.Iterator
-	err   error
-	tmp   PostingsList
-	entry index.DictEntry
+	d         *Dictionary
+	itr       vellum.Iterator
+	err       error
+	tmp       PostingsList
+	entry     index.DictEntry
+	omitCount bool
 }
 
 // Next returns the next entry in the dictionary
@@ -208,12 +251,14 @@ func (i *DictionaryIterator) Next() (*index.DictEntry, error) {
 		return nil, i.err
 	}
 	term, postingsOffset := i.itr.Current()
-	i.err = i.tmp.read(postingsOffset, i.d)
-	if i.err != nil {
-		return nil, i.err
-	}
 	i.entry.Term = string(term)
-	i.entry.Count = i.tmp.Count()
+	if !i.omitCount {
+		i.err = i.tmp.read(postingsOffset, i.d)
+		if i.err != nil {
+			return nil, i.err
+		}
+		i.entry.Count = i.tmp.Count()
+	}
 	i.err = i.itr.Next()
 	return &i.entry, nil
 }
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index cf6e6250d..f4c75fdea 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -190,6 +190,13 @@ func (i *IndexSnapshot) FieldDictFuzzy(field string,
 	})
 }
 
+func (i *IndexSnapshot) FieldDictOnly(field string,
+	onlyTerms [][]byte, includeCount bool) (index.FieldDict, error) {
+	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
+		return i.OnlyIterator(onlyTerms, includeCount)
+	})
+}
+
 func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {
 	results := make(chan *asynchSegmentResult)
 	for index, segment := range i.segment {
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index c29fac997..39539364c 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -58,6 +58,11 @@ func (s *SegmentDictionarySnapshot) FuzzyIterator(term string,
 	return s.d.FuzzyIterator(term, fuzziness)
 }
 
+func (s *SegmentDictionarySnapshot) OnlyIterator(onlyTerms [][]byte,
+	includeCount bool) segment.DictionaryIterator {
+	return s.d.OnlyIterator(onlyTerms, includeCount)
+}
+
 type SegmentSnapshot struct {
 	id      uint64
 	segment segment.Segment
diff --git a/search/searcher/search_numeric_range.go b/search/searcher/search_numeric_range.go
index 7f42d7250..1eae7a5ec 100644
--- a/search/searcher/search_numeric_range.go
+++ b/search/searcher/search_numeric_range.go
@@ -77,6 +77,25 @@ func NewNumericRangeSearcher(indexReader index.IndexReader,
 
 func filterCandidateTerms(indexReader index.IndexReader,
 	terms [][]byte, field string) (rv [][]byte, err error) {
+
+	if ir, ok := indexReader.(index.IndexReaderOnly); ok {
+		fieldDict, err := ir.FieldDictOnly(field, terms, false)
+		if err != nil {
+			return nil, err
+		}
+		// enumerate the terms (no need to check them again)
+		tfd, err := fieldDict.Next()
+		for err == nil && tfd != nil {
+			rv = append(rv, []byte(tfd.Term))
+			tfd, err = fieldDict.Next()
+		}
+		if cerr := fieldDict.Close(); cerr != nil && err == nil {
+			err = cerr
+		}
+
+		return rv, err
+	}
+
 	fieldDict, err := indexReader.FieldDictRange(field, terms[0], terms[len(terms)-1])
 	if err != nil {
 		return nil, err

From 6aadf2b305c0758f6e170f7ad2bccb55ac89ce2b Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Mon, 9 Apr 2018 17:27:47 -0400
Subject: [PATCH 371/728] make error handing inside iterator creation mroe
 consistent

---
 index/scorch/segment/zap/dict.go | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 7cf3364bc..68c062c43 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -100,6 +100,8 @@ func (d *Dictionary) Iterator() segment.DictionaryIterator {
 		itr, err := d.fst.Iterator(nil, nil)
 		if err == nil {
 			rv.itr = itr
+		} else if err != nil && err != vellum.ErrIteratorDone {
+			rv.err = err
 		}
 	}
 
@@ -119,7 +121,11 @@ func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator {
 			itr, err := d.fst.Search(r, nil, nil)
 			if err == nil {
 				rv.itr = itr
+			} else if err != nil && err != vellum.ErrIteratorDone {
+				rv.err = err
 			}
+		} else {
+			rv.err = err
 		}
 	}
 
@@ -145,13 +151,15 @@ func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator
 		itr, err := d.fst.Iterator([]byte(start), endBytes)
 		if err == nil {
 			rv.itr = itr
+		} else if err != nil && err != vellum.ErrIteratorDone {
+			rv.err = err
 		}
 	}
 
 	return rv
 }
 
-// RegexIterator returns an iterator which only visits terms having the
+// RegexpIterator returns an iterator which only visits terms having the
 // the specified regex
 func (d *Dictionary) RegexpIterator(regex string) segment.DictionaryIterator {
 	rv := &DictionaryIterator{
@@ -161,10 +169,14 @@ func (d *Dictionary) RegexpIterator(regex string) segment.DictionaryIterator {
 	if d.fst != nil {
 		r, err := regexp.New(regex)
 		if err == nil {
-			itr, err := d.fst.Search(r, nil, nil)
-			if err == nil {
+			itr, err2 := d.fst.Search(r, nil, nil)
+			if err2 == nil {
 				rv.itr = itr
+			} else if err2 != nil && err2 != vellum.ErrIteratorDone {
+				rv.err = err2
 			}
+		} else {
+			rv.err = err
 		}
 	}
 
@@ -182,10 +194,14 @@ func (d *Dictionary) FuzzyIterator(term string,
 	if d.fst != nil {
 		la, err := levenshtein.New(term, fuzziness)
 		if err == nil {
-			itr, err := d.fst.Search(la, nil, nil)
-			if err == nil {
+			itr, err2 := d.fst.Search(la, nil, nil)
+			if err2 == nil {
 				rv.itr = itr
+			} else if err2 != nil && err2 != vellum.ErrIteratorDone {
+				rv.err = err2
 			}
+		} else {
+			rv.err = err
 		}
 	}
 
@@ -228,6 +244,8 @@ func (d *Dictionary) OnlyIterator(onlyTerms [][]byte,
 	itr, err := d.fst.Search(onlyFST, nil, nil)
 	if err == nil {
 		rv.itr = itr
+	} else if err != nil && err != vellum.ErrIteratorDone {
+		rv.err = err
 	}
 
 	return rv

From 901d847f49b419a30cc2dab79fc8abf28d176291 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 9 Apr 2018 16:04:08 -0700
Subject: [PATCH 372/728] scorch optimize loadSegment() to treat empty deleted
 bitmaps as nil

In this optimization, loadSegment() will convert an empty 'deleted'
roaring bitmap to nil, so that later codepaths that test for nil'ness
can avoid roaring operations like AndNot() -- which helps in the case
when there's no mutations / insertions-only.
---
 index/scorch/persister.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 758b342f2..76fd746d5 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -622,7 +622,9 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro
 			_ = segment.Close()
 			return nil, fmt.Errorf("error reading deleted bytes: %v", err)
 		}
-		rv.deleted = deletedBitmap
+		if !deletedBitmap.IsEmpty() {
+			rv.deleted = deletedBitmap
+		}
 	}
 
 	return rv, nil

From 02c6befe408658e5d03bc548b96ddf3120fd1245 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Wed, 11 Apr 2018 15:46:33 +0530
Subject: [PATCH 373/728] avoiding redundant snappy decode calls in dv

---
 index/scorch/segment/zap/docvalues.go | 21 +++++++++++++++------
 index/scorch/snapshot_index.go        |  2 +-
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index f46f2bd5b..13f41bd2d 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -125,6 +125,7 @@ func (di *docValueReader) loadDvChunk(chunkNumber,
 	dataLength := curChunkEnd - compressedDataLoc
 	di.curChunkData = s.mem[compressedDataLoc : compressedDataLoc+dataLength]
 	di.curChunkNum = chunkNumber
+	di.uncompressed = di.uncompressed[:0]
 	return nil
 }
 
@@ -135,12 +136,20 @@ func (di *docValueReader) visitDocValues(docNum uint64,
 	if start == math.MaxUint64 || end == math.MaxUint64 {
 		return nil
 	}
-	// uncompress the already loaded data
-	uncompressed, err := snappy.Decode(di.uncompressed[:cap(di.uncompressed)], di.curChunkData)
-	if err != nil {
-		return err
+
+	var uncompressed []byte
+	var err error
+	// use the uncompressed copy if available
+	if len(di.uncompressed) > 0 {
+		uncompressed = di.uncompressed
+	} else {
+		// uncompress the already loaded data
+		uncompressed, err = snappy.Decode(di.uncompressed[:cap(di.uncompressed)], di.curChunkData)
+		if err != nil {
+			return err
+		}
+		di.uncompressed = uncompressed
 	}
-	di.uncompressed = uncompressed
 
 	// pick the terms for the given docNum
 	uncompressed = uncompressed[start:end]
@@ -200,7 +209,7 @@ func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []strin
 // persisted doc value terms ready to be visitable using the
 // VisitDocumentFieldTerms method.
 func (s *Segment) VisitableDocValueFields() ([]string, error) {
-	var rv []string
+	rv := make([]string, 0, len(s.fieldDvReaders))
 	for fieldID, field := range s.fieldsInv {
 		if dvIter, ok := s.fieldDvReaders[uint16(fieldID)]; ok &&
 			dvIter != nil {
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index f4c75fdea..888e95fb9 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -532,7 +532,7 @@ func visitDocumentFieldCacheTerms(localDocNum uint64, fields []string,
 }
 
 func extractDvPendingFields(requestedFields, persistedFields []string) []string {
-	removeMap := map[string]struct{}{}
+	removeMap := make(map[string]struct{}, len(persistedFields))
 	for _, str := range persistedFields {
 		removeMap[str] = struct{}{}
 	}

From 4c642cffdc2af390030c87314586beeb89b80739 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 4 Apr 2018 08:59:11 -0700
Subject: [PATCH 374/728] scorch optimize TermFieldReaders via
 Dict/PostingsList/Iterator reuse

This optimization is helpful for low-frequency term searches.

The scorch IndexSnapshot struct now tracks a mutable slice of recycled
TermFieldReader instances, to avoid TFR allocations.  The
TermFieldReader.Close() method recycles the instance back onto the
IndexSnapshot.

This commit also adds an optional 'prealloc' param to the
TermDictionary.PostingsList() and PostingsList.Iterator() methods,
which allows the TermFieldReader to avoid allocations by reusing
recycled postings list and postings iterators.
---
 index/scorch/segment/empty.go            |  5 +-
 index/scorch/segment/segment.go          |  4 +-
 index/scorch/segment/zap/dict.go         | 10 +++-
 index/scorch/segment/zap/merge_test.go   |  4 +-
 index/scorch/segment/zap/posting.go      | 14 ++++-
 index/scorch/segment/zap/segment_test.go | 32 +++++-----
 index/scorch/snapshot_index.go           | 76 +++++++++++++++++++-----
 index/scorch/snapshot_index_tfr.go       |  2 +
 index/scorch/snapshot_segment.go         | 13 ++--
 9 files changed, 114 insertions(+), 46 deletions(-)

diff --git a/index/scorch/segment/empty.go b/index/scorch/segment/empty.go
index 17749f1bf..9a64f223e 100644
--- a/index/scorch/segment/empty.go
+++ b/index/scorch/segment/empty.go
@@ -60,7 +60,7 @@ func (e *EmptySegment) DecRef() error {
 type EmptyDictionary struct{}
 
 func (e *EmptyDictionary) PostingsList(term string,
-	except *roaring.Bitmap) (PostingsList, error) {
+	except *roaring.Bitmap, prealloc PostingsList) (PostingsList, error) {
 	return &EmptyPostingsList{}, nil
 }
 
@@ -98,7 +98,8 @@ func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) {
 
 type EmptyPostingsList struct{}
 
-func (e *EmptyPostingsList) Iterator(includeFreq, includeNorm, includeLocations bool) PostingsIterator {
+func (e *EmptyPostingsList) Iterator(includeFreq, includeNorm, includeLocations bool,
+	prealloc PostingsIterator) PostingsIterator {
 	return &EmptyPostingsIterator{}
 }
 
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index 62b8a29b2..ff0b5124e 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -43,7 +43,7 @@ type Segment interface {
 }
 
 type TermDictionary interface {
-	PostingsList(term string, except *roaring.Bitmap) (PostingsList, error)
+	PostingsList(term string, except *roaring.Bitmap, prealloc PostingsList) (PostingsList, error)
 
 	Iterator() DictionaryIterator
 	PrefixIterator(prefix string) DictionaryIterator
@@ -58,7 +58,7 @@ type DictionaryIterator interface {
 }
 
 type PostingsList interface {
-	Iterator(includeFreq, includeNorm, includeLocations bool) PostingsIterator
+	Iterator(includeFreq, includeNorm, includeLocations bool, prealloc PostingsIterator) PostingsIterator
 
 	Size() int
 
diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 68c062c43..3010fe6dc 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -35,8 +35,14 @@ type Dictionary struct {
 }
 
 // PostingsList returns the postings list for the specified term
-func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap) (segment.PostingsList, error) {
-	return d.postingsList([]byte(term), except, nil)
+func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap,
+	prealloc segment.PostingsList) (segment.PostingsList, error) {
+	var preallocPL *PostingsList
+	pl, ok := prealloc.(*PostingsList)
+	if ok && pl != nil {
+		preallocPL = pl
+	}
+	return d.postingsList([]byte(term), except, preallocPL)
 }
 
 func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
diff --git a/index/scorch/segment/zap/merge_test.go b/index/scorch/segment/zap/merge_test.go
index 2675bf838..6b168c907 100644
--- a/index/scorch/segment/zap/merge_test.go
+++ b/index/scorch/segment/zap/merge_test.go
@@ -332,8 +332,8 @@ func compareSegments(a, b *Segment) string {
 						fieldName, next.Term, aplist.Count(), bplist.Count()))
 				}
 
-				apitr := aplist.Iterator(true, true, true)
-				bpitr := bplist.Iterator(true, true, true)
+				apitr := aplist.Iterator(true, true, true, nil)
+				bpitr := bplist.Iterator(true, true, true, nil)
 				if (apitr != nil) != (bpitr != nil) {
 					rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsList.Iterator() results different: %v %v",
 						fieldName, next.Term, apitr, bpitr))
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 1f198df5e..cc8bfefee 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -133,12 +133,22 @@ func (p *PostingsList) OrInto(receiver *roaring.Bitmap) {
 }
 
 // Iterator returns an iterator for this postings list
-func (p *PostingsList) Iterator(includeFreq, includeNorm, includeLocs bool) segment.PostingsIterator {
+func (p *PostingsList) Iterator(includeFreq, includeNorm, includeLocs bool,
+	prealloc segment.PostingsIterator) segment.PostingsIterator {
 	if p.normBits1Hit == 0 && p.postings == nil {
 		return emptyPostingsIterator
 	}
 
-	return p.iterator(includeFreq, includeNorm, includeLocs, nil)
+	var preallocPI *PostingsIterator
+	pi, ok := prealloc.(*PostingsIterator)
+	if ok && pi != nil {
+		preallocPI = pi
+	}
+	if preallocPI == emptyPostingsIterator {
+		preallocPI = nil
+	}
+
+	return p.iterator(includeFreq, includeNorm, includeLocs, preallocPI)
 }
 
 func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
diff --git a/index/scorch/segment/zap/segment_test.go b/index/scorch/segment/zap/segment_test.go
index 00ae1c2f6..a6698e054 100644
--- a/index/scorch/segment/zap/segment_test.go
+++ b/index/scorch/segment/zap/segment_test.go
@@ -76,7 +76,7 @@ func TestOpen(t *testing.T) {
 		t.Fatal("got nil dict, expected non-nil")
 	}
 
-	postingsList, err := dict.PostingsList("a", nil)
+	postingsList, err := dict.PostingsList("a", nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -84,7 +84,7 @@ func TestOpen(t *testing.T) {
 		t.Fatal("got nil postings list, expected non-nil")
 	}
 
-	postingsItr := postingsList.Iterator(true, true, true)
+	postingsItr := postingsList.Iterator(true, true, true, nil)
 	if postingsItr == nil {
 		t.Fatal("got nil iterator, expected non-nil")
 	}
@@ -122,7 +122,7 @@ func TestOpen(t *testing.T) {
 		t.Fatal("got nil dict, expected non-nil")
 	}
 
-	postingsList, err = dict.PostingsList("wow", nil)
+	postingsList, err = dict.PostingsList("wow", nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -130,7 +130,7 @@ func TestOpen(t *testing.T) {
 		t.Fatal("got nil postings list, expected non-nil")
 	}
 
-	postingsItr = postingsList.Iterator(true, true, true)
+	postingsItr = postingsList.Iterator(true, true, true, nil)
 	if postingsItr == nil {
 		t.Fatal("got nil iterator, expected non-nil")
 	}
@@ -190,7 +190,7 @@ func TestOpen(t *testing.T) {
 		t.Fatal("got nil dict, expected non-nil")
 	}
 
-	postingsList, err = dict.PostingsList("wow", nil)
+	postingsList, err = dict.PostingsList("wow", nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -198,7 +198,7 @@ func TestOpen(t *testing.T) {
 		t.Fatal("got nil postings list, expected non-nil")
 	}
 
-	postingsItr = postingsList.Iterator(true, true, true)
+	postingsItr = postingsList.Iterator(true, true, true, nil)
 	if postingsItr == nil {
 		t.Fatal("got nil iterator, expected non-nil")
 	}
@@ -259,7 +259,7 @@ func TestOpen(t *testing.T) {
 		t.Fatal("got nil dict, expected non-nil")
 	}
 
-	postingsList, err = dict.PostingsList("dark", nil)
+	postingsList, err = dict.PostingsList("dark", nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -267,7 +267,7 @@ func TestOpen(t *testing.T) {
 		t.Fatal("got nil postings list, expected non-nil")
 	}
 
-	postingsItr = postingsList.Iterator(true, true, true)
+	postingsItr = postingsList.Iterator(true, true, true, nil)
 	if postingsItr == nil {
 		t.Fatal("got nil iterator, expected non-nil")
 	}
@@ -358,7 +358,7 @@ func TestOpenMulti(t *testing.T) {
 		t.Fatal("got nil dict, expected non-nil")
 	}
 
-	postingsList, err := dict.PostingsList("thing", nil)
+	postingsList, err := dict.PostingsList("thing", nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -366,7 +366,7 @@ func TestOpenMulti(t *testing.T) {
 		t.Fatal("got nil postings list, expected non-nil")
 	}
 
-	postingsItr := postingsList.Iterator(true, true, true)
+	postingsItr := postingsList.Iterator(true, true, true, nil)
 	if postingsItr == nil {
 		t.Fatal("got nil iterator, expected non-nil")
 	}
@@ -392,7 +392,7 @@ func TestOpenMulti(t *testing.T) {
 	}
 
 	// look for term 'thing' excluding doc 'a'
-	postingsListExcluding, err := dict.PostingsList("thing", exclude)
+	postingsListExcluding, err := dict.PostingsList("thing", exclude, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -405,7 +405,7 @@ func TestOpenMulti(t *testing.T) {
 		t.Errorf("expected count from postings list to be 1, got %d", postingsListExcludingCount)
 	}
 
-	postingsItrExcluding := postingsListExcluding.Iterator(true, true, true)
+	postingsItrExcluding := postingsListExcluding.Iterator(true, true, true, nil)
 	if postingsItr == nil {
 		t.Fatal("got nil iterator, expected non-nil")
 	}
@@ -458,7 +458,7 @@ func TestOpenMultiWithTwoChunks(t *testing.T) {
 		t.Fatal("got nil dict, expected non-nil")
 	}
 
-	postingsList, err := dict.PostingsList("thing", nil)
+	postingsList, err := dict.PostingsList("thing", nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -466,7 +466,7 @@ func TestOpenMultiWithTwoChunks(t *testing.T) {
 		t.Fatal("got nil postings list, expected non-nil")
 	}
 
-	postingsItr := postingsList.Iterator(true, true, true)
+	postingsItr := postingsList.Iterator(true, true, true, nil)
 	if postingsItr == nil {
 		t.Fatal("got nil iterator, expected non-nil")
 	}
@@ -492,7 +492,7 @@ func TestOpenMultiWithTwoChunks(t *testing.T) {
 	}
 
 	// look for term 'thing' excluding doc 'a'
-	postingsListExcluding, err := dict.PostingsList("thing", exclude)
+	postingsListExcluding, err := dict.PostingsList("thing", exclude, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -500,7 +500,7 @@ func TestOpenMultiWithTwoChunks(t *testing.T) {
 		t.Fatal("got nil postings list, expected non-nil")
 	}
 
-	postingsItrExcluding := postingsListExcluding.Iterator(true, true, true)
+	postingsItrExcluding := postingsListExcluding.Iterator(true, true, true, nil)
 	if postingsItr == nil {
 		t.Fatal("got nil iterator, expected non-nil")
 	}
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 888e95fb9..f98dab24c 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -59,6 +59,9 @@ type IndexSnapshot struct {
 
 	m    sync.Mutex // Protects the fields that follow.
 	refs int64
+
+	tfrsm sync.Mutex                                 // Protects the fields that follow.
+	tfrs  map[string][]*IndexSnapshotTermFieldReader // keyed by field, recycled TFR's
 }
 
 func (i *IndexSnapshot) Segments() []*SegmentSnapshot {
@@ -394,34 +397,75 @@ func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err err
 }
 
 func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
-	includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
+	includeNorm, includeTermVectors bool) (tfr index.TermFieldReader, err error) {
+	rv := i.allocTermFieldReader(field)
+	rv.term = term
+	rv.field = field
+	rv.snapshot = i
+	if rv.dicts == nil {
+		rv.dicts = make([]segment.TermDictionary, len(i.segment))
+	}
+	if rv.postings == nil {
+		rv.postings = make([]segment.PostingsList, len(i.segment))
+	}
+	if rv.iterators == nil {
+		rv.iterators = make([]segment.PostingsIterator, len(i.segment))
+	}
+	rv.segmentOffset = 0
+	rv.includeFreq = includeFreq
+	rv.includeNorm = includeNorm
+	rv.includeTermVectors = includeTermVectors
+	rv.currPosting = nil
+	rv.currID = rv.currID[:0]
+
 	termStr := string(term)
-	rv := &IndexSnapshotTermFieldReader{
-		term:               term,
-		field:              field,
-		snapshot:           i,
-		postings:           make([]segment.PostingsList, len(i.segment)),
-		iterators:          make([]segment.PostingsIterator, len(i.segment)),
-		includeFreq:        includeFreq,
-		includeNorm:        includeNorm,
-		includeTermVectors: includeTermVectors,
-	}
+
 	for i, segment := range i.segment {
-		dict, err := segment.Dictionary(field)
-		if err != nil {
-			return nil, err
+		dict := rv.dicts[i]
+		if dict == nil {
+			dict, err = segment.Dictionary(field)
+			if err != nil {
+				return nil, err
+			}
+			rv.dicts[i] = dict
 		}
-		pl, err := dict.PostingsList(termStr, nil)
+		pl, err := dict.PostingsList(termStr, nil, rv.postings[i])
 		if err != nil {
 			return nil, err
 		}
 		rv.postings[i] = pl
-		rv.iterators[i] = pl.Iterator(includeFreq, includeNorm, includeTermVectors)
+		rv.iterators[i] = pl.Iterator(includeFreq, includeNorm, includeTermVectors, rv.iterators[i])
 	}
 	atomic.AddUint64(&i.parent.stats.TotTermSearchersStarted, uint64(1))
 	return rv, nil
 }
 
+func (i *IndexSnapshot) allocTermFieldReader(field string) *IndexSnapshotTermFieldReader {
+	i.tfrsm.Lock()
+	if i.tfrs != nil {
+		tfrs := i.tfrs[field]
+		last := len(tfrs) - 1
+		if last >= 0 {
+			rv := tfrs[last]
+			tfrs[last] = nil
+			i.tfrs[field] = tfrs[:last]
+			i.tfrsm.Unlock()
+			return rv
+		}
+	}
+	i.tfrsm.Unlock()
+	return &IndexSnapshotTermFieldReader{}
+}
+
+func (i *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReader) {
+	i.tfrsm.Lock()
+	if i.tfrs == nil {
+		i.tfrs = map[string][]*IndexSnapshotTermFieldReader{}
+	}
+	i.tfrs[tfr.field] = append(i.tfrs[tfr.field], tfr)
+	i.tfrsm.Unlock()
+}
+
 func docNumberToBytes(buf []byte, in uint64) []byte {
 	if len(buf) != 8 {
 		if cap(buf) >= 8 {
diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go
index c111d5177..11f1283ec 100644
--- a/index/scorch/snapshot_index_tfr.go
+++ b/index/scorch/snapshot_index_tfr.go
@@ -35,6 +35,7 @@ type IndexSnapshotTermFieldReader struct {
 	term               []byte
 	field              string
 	snapshot           *IndexSnapshot
+	dicts              []segment.TermDictionary
 	postings           []segment.PostingsList
 	iterators          []segment.PostingsIterator
 	segmentOffset      int
@@ -164,6 +165,7 @@ func (i *IndexSnapshotTermFieldReader) Count() uint64 {
 func (i *IndexSnapshotTermFieldReader) Close() error {
 	if i.snapshot != nil {
 		atomic.AddUint64(&i.snapshot.parent.stats.TotTermSearchersFinished, uint64(1))
+		i.snapshot.recycleTermFieldReader(i)
 	}
 	return nil
 }
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index 39539364c..247131ae5 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -32,9 +32,10 @@ type SegmentDictionarySnapshot struct {
 	d segment.TermDictionary
 }
 
-func (s *SegmentDictionarySnapshot) PostingsList(term string, except *roaring.Bitmap) (segment.PostingsList, error) {
+func (s *SegmentDictionarySnapshot) PostingsList(term string, except *roaring.Bitmap,
+	prealloc segment.PostingsList) (segment.PostingsList, error) {
 	// TODO: if except is non-nil, perhaps need to OR it with s.s.deleted?
-	return s.d.PostingsList(term, s.s.deleted)
+	return s.d.PostingsList(term, s.s.deleted, prealloc)
 }
 
 func (s *SegmentDictionarySnapshot) Iterator() segment.DictionaryIterator {
@@ -171,17 +172,21 @@ func (cfd *cachedFieldDocs) prepareFields(field string, ss *SegmentSnapshot) {
 		return
 	}
 
+	var postings segment.PostingsList
+	var postingsItr segment.PostingsIterator
+
 	dictItr := dict.Iterator()
 	next, err := dictItr.Next()
 	for err == nil && next != nil {
-		postings, err1 := dict.PostingsList(next.Term, nil)
+		var err1 error
+		postings, err1 = dict.PostingsList(next.Term, nil, postings)
 		if err1 != nil {
 			cfd.err = err1
 			return
 		}
 
 		cfd.size += uint64(size.SizeOfUint64) /* map key */
-		postingsItr := postings.Iterator(false, false, false)
+		postingsItr = postings.Iterator(false, false, false, postingsItr)
 		nextPosting, err2 := postingsItr.Next()
 		for err2 == nil && nextPosting != nil {
 			docNum := nextPosting.Number()

From 8a2250b3ee7add726ca4e770537d0fbd24e70947 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 4 Apr 2018 15:36:55 -0700
Subject: [PATCH 375/728] scorch optimize TermFieldReader to reuse dict even
 more

A single scorch TermDictionary can be shared by all TermFieldReaders.
---
 index/scorch/snapshot_index.go | 67 ++++++++++++++++++++--------------
 1 file changed, 39 insertions(+), 28 deletions(-)

diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index f98dab24c..4e80a7156 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -60,8 +60,9 @@ type IndexSnapshot struct {
 	m    sync.Mutex // Protects the fields that follow.
 	refs int64
 
-	tfrsm sync.Mutex                                 // Protects the fields that follow.
-	tfrs  map[string][]*IndexSnapshotTermFieldReader // keyed by field, recycled TFR's
+	m2         sync.Mutex                                 // Protects the fields that follow.
+	fieldTFRs  map[string][]*IndexSnapshotTermFieldReader // keyed by field, recycled TFR's
+	fieldDicts map[string][]segment.TermDictionary        // keyed by field, recycled dicts
 }
 
 func (i *IndexSnapshot) Segments() []*SegmentSnapshot {
@@ -398,13 +399,11 @@ func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err err
 
 func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
 	includeNorm, includeTermVectors bool) (tfr index.TermFieldReader, err error) {
-	rv := i.allocTermFieldReader(field)
+	rv, dicts := i.allocTermFieldReaderDicts(field)
+
 	rv.term = term
 	rv.field = field
 	rv.snapshot = i
-	if rv.dicts == nil {
-		rv.dicts = make([]segment.TermDictionary, len(i.segment))
-	}
 	if rv.postings == nil {
 		rv.postings = make([]segment.PostingsList, len(i.segment))
 	}
@@ -418,18 +417,22 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
 	rv.currPosting = nil
 	rv.currID = rv.currID[:0]
 
-	termStr := string(term)
-
-	for i, segment := range i.segment {
-		dict := rv.dicts[i]
-		if dict == nil {
-			dict, err = segment.Dictionary(field)
+	if dicts == nil {
+		dicts = make([]segment.TermDictionary, len(i.segment))
+		for i, segment := range i.segment {
+			dict, err := segment.Dictionary(field)
 			if err != nil {
 				return nil, err
 			}
-			rv.dicts[i] = dict
+			dicts[i] = dict
 		}
-		pl, err := dict.PostingsList(termStr, nil, rv.postings[i])
+	}
+	rv.dicts = dicts
+
+	termStr := string(term)
+
+	for i := range i.segment {
+		pl, err := dicts[i].PostingsList(termStr, nil, rv.postings[i])
 		if err != nil {
 			return nil, err
 		}
@@ -440,30 +443,38 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
 	return rv, nil
 }
 
-func (i *IndexSnapshot) allocTermFieldReader(field string) *IndexSnapshotTermFieldReader {
-	i.tfrsm.Lock()
-	if i.tfrs != nil {
-		tfrs := i.tfrs[field]
+func (i *IndexSnapshot) allocTermFieldReaderDicts(field string) (
+	tfr *IndexSnapshotTermFieldReader, dicts []segment.TermDictionary) {
+	i.m2.Lock()
+	if i.fieldDicts != nil {
+		dicts = i.fieldDicts[field]
+	}
+	if i.fieldTFRs != nil {
+		tfrs := i.fieldTFRs[field]
 		last := len(tfrs) - 1
 		if last >= 0 {
 			rv := tfrs[last]
 			tfrs[last] = nil
-			i.tfrs[field] = tfrs[:last]
-			i.tfrsm.Unlock()
-			return rv
+			i.fieldTFRs[field] = tfrs[:last]
+			i.m2.Unlock()
+			return rv, dicts
 		}
 	}
-	i.tfrsm.Unlock()
-	return &IndexSnapshotTermFieldReader{}
+	i.m2.Unlock()
+	return &IndexSnapshotTermFieldReader{}, dicts
 }
 
 func (i *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReader) {
-	i.tfrsm.Lock()
-	if i.tfrs == nil {
-		i.tfrs = map[string][]*IndexSnapshotTermFieldReader{}
+	i.m2.Lock()
+	if i.fieldTFRs == nil {
+		i.fieldTFRs = map[string][]*IndexSnapshotTermFieldReader{}
+	}
+	i.fieldTFRs[tfr.field] = append(i.fieldTFRs[tfr.field], tfr)
+	if i.fieldDicts == nil {
+		i.fieldDicts = map[string][]segment.TermDictionary{}
 	}
-	i.tfrs[tfr.field] = append(i.tfrs[tfr.field], tfr)
-	i.tfrsm.Unlock()
+	i.fieldDicts[tfr.field] = tfr.dicts
+	i.m2.Unlock()
 }
 
 func docNumberToBytes(buf []byte, in uint64) []byte {

From bdc868c07c2ae08fb67826e33d38edcffe5211da Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Wed, 11 Apr 2018 19:20:57 -0700
Subject: [PATCH 376/728] Remove unused parameter for docValueReader API:
 loadDvChunk(..)

---
 index/scorch/segment/zap/docvalues.go | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index 13f41bd2d..220897853 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -96,8 +96,7 @@ func (s *SegmentBase) loadFieldDocValueReader(field string,
 	return fdvIter, nil
 }
 
-func (di *docValueReader) loadDvChunk(chunkNumber,
-	localDocNum uint64, s *SegmentBase) error {
+func (di *docValueReader) loadDvChunk(chunkNumber uint64, s *SegmentBase) error {
 	// advance to the chunk where the docValues
 	// reside for the given docNum
 	destChunkDataLoc, curChunkEnd := di.dvDataLoc, di.dvDataLoc
@@ -193,7 +192,7 @@ func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []strin
 			dvIter != nil {
 			// check if the chunk is already loaded
 			if docInChunk != dvIter.curChunkNumber() {
-				err := dvIter.loadDvChunk(docInChunk, localDocNum, s)
+				err := dvIter.loadDvChunk(docInChunk, s)
 				if err != nil {
 					continue
 				}

From e1cf8b5b8df482033fbf73bf73b92d10a3414f25 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 9 Apr 2018 16:53:51 -0700
Subject: [PATCH 377/728] scorch PostingsList() takes []byte, not string

---
 index/scorch/segment/empty.go            |  2 +-
 index/scorch/segment/segment.go          |  2 +-
 index/scorch/segment/zap/dict.go         |  4 ++--
 index/scorch/segment/zap/segment_test.go | 16 ++++++++--------
 index/scorch/snapshot_index.go           |  4 +---
 index/scorch/snapshot_segment.go         |  4 ++--
 6 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/index/scorch/segment/empty.go b/index/scorch/segment/empty.go
index 9a64f223e..839d4983c 100644
--- a/index/scorch/segment/empty.go
+++ b/index/scorch/segment/empty.go
@@ -59,7 +59,7 @@ func (e *EmptySegment) DecRef() error {
 
 type EmptyDictionary struct{}
 
-func (e *EmptyDictionary) PostingsList(term string,
+func (e *EmptyDictionary) PostingsList(term []byte,
 	except *roaring.Bitmap, prealloc PostingsList) (PostingsList, error) {
 	return &EmptyPostingsList{}, nil
 }
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index ff0b5124e..a3f215804 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -43,7 +43,7 @@ type Segment interface {
 }
 
 type TermDictionary interface {
-	PostingsList(term string, except *roaring.Bitmap, prealloc PostingsList) (PostingsList, error)
+	PostingsList(term []byte, except *roaring.Bitmap, prealloc PostingsList) (PostingsList, error)
 
 	Iterator() DictionaryIterator
 	PrefixIterator(prefix string) DictionaryIterator
diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 3010fe6dc..b0664534d 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -35,14 +35,14 @@ type Dictionary struct {
 }
 
 // PostingsList returns the postings list for the specified term
-func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap,
+func (d *Dictionary) PostingsList(term []byte, except *roaring.Bitmap,
 	prealloc segment.PostingsList) (segment.PostingsList, error) {
 	var preallocPL *PostingsList
 	pl, ok := prealloc.(*PostingsList)
 	if ok && pl != nil {
 		preallocPL = pl
 	}
-	return d.postingsList([]byte(term), except, preallocPL)
+	return d.postingsList(term, except, preallocPL)
 }
 
 func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
diff --git a/index/scorch/segment/zap/segment_test.go b/index/scorch/segment/zap/segment_test.go
index a6698e054..27f87faff 100644
--- a/index/scorch/segment/zap/segment_test.go
+++ b/index/scorch/segment/zap/segment_test.go
@@ -76,7 +76,7 @@ func TestOpen(t *testing.T) {
 		t.Fatal("got nil dict, expected non-nil")
 	}
 
-	postingsList, err := dict.PostingsList("a", nil, nil)
+	postingsList, err := dict.PostingsList([]byte("a"), nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -122,7 +122,7 @@ func TestOpen(t *testing.T) {
 		t.Fatal("got nil dict, expected non-nil")
 	}
 
-	postingsList, err = dict.PostingsList("wow", nil, nil)
+	postingsList, err = dict.PostingsList([]byte("wow"), nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -190,7 +190,7 @@ func TestOpen(t *testing.T) {
 		t.Fatal("got nil dict, expected non-nil")
 	}
 
-	postingsList, err = dict.PostingsList("wow", nil, nil)
+	postingsList, err = dict.PostingsList([]byte("wow"), nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -259,7 +259,7 @@ func TestOpen(t *testing.T) {
 		t.Fatal("got nil dict, expected non-nil")
 	}
 
-	postingsList, err = dict.PostingsList("dark", nil, nil)
+	postingsList, err = dict.PostingsList([]byte("dark"), nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -358,7 +358,7 @@ func TestOpenMulti(t *testing.T) {
 		t.Fatal("got nil dict, expected non-nil")
 	}
 
-	postingsList, err := dict.PostingsList("thing", nil, nil)
+	postingsList, err := dict.PostingsList([]byte("thing"), nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -392,7 +392,7 @@ func TestOpenMulti(t *testing.T) {
 	}
 
 	// look for term 'thing' excluding doc 'a'
-	postingsListExcluding, err := dict.PostingsList("thing", exclude, nil)
+	postingsListExcluding, err := dict.PostingsList([]byte("thing"), exclude, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -458,7 +458,7 @@ func TestOpenMultiWithTwoChunks(t *testing.T) {
 		t.Fatal("got nil dict, expected non-nil")
 	}
 
-	postingsList, err := dict.PostingsList("thing", nil, nil)
+	postingsList, err := dict.PostingsList([]byte("thing"), nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -492,7 +492,7 @@ func TestOpenMultiWithTwoChunks(t *testing.T) {
 	}
 
 	// look for term 'thing' excluding doc 'a'
-	postingsListExcluding, err := dict.PostingsList("thing", exclude, nil)
+	postingsListExcluding, err := dict.PostingsList([]byte("thing"), exclude, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 4e80a7156..2664fe425 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -429,10 +429,8 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
 	}
 	rv.dicts = dicts
 
-	termStr := string(term)
-
 	for i := range i.segment {
-		pl, err := dicts[i].PostingsList(termStr, nil, rv.postings[i])
+		pl, err := dicts[i].PostingsList(term, nil, rv.postings[i])
 		if err != nil {
 			return nil, err
 		}
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index 247131ae5..44aafa523 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -32,7 +32,7 @@ type SegmentDictionarySnapshot struct {
 	d segment.TermDictionary
 }
 
-func (s *SegmentDictionarySnapshot) PostingsList(term string, except *roaring.Bitmap,
+func (s *SegmentDictionarySnapshot) PostingsList(term []byte, except *roaring.Bitmap,
 	prealloc segment.PostingsList) (segment.PostingsList, error) {
 	// TODO: if except is non-nil, perhaps need to OR it with s.s.deleted?
 	return s.d.PostingsList(term, s.s.deleted, prealloc)
@@ -179,7 +179,7 @@ func (cfd *cachedFieldDocs) prepareFields(field string, ss *SegmentSnapshot) {
 	next, err := dictItr.Next()
 	for err == nil && next != nil {
 		var err1 error
-		postings, err1 = dict.PostingsList(next.Term, nil, postings)
+		postings, err1 = dict.PostingsList([]byte(next.Term), nil, postings)
 		if err1 != nil {
 			cfd.err = err1
 			return

From 293403a012f26f26956ac75e080d1bab7bc3c707 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 13 Apr 2018 09:42:16 -0400
Subject: [PATCH 378/728] improve scorch Advance implementation

advance will jump directly to the expected segment
then it will skip directly to the expectec chunk
---
 index/scorch/reader_test.go         |  8 +++---
 index/scorch/segment/empty.go       |  4 +++
 index/scorch/segment/segment.go     |  6 +++++
 index/scorch/segment/zap/posting.go | 29 +++++++++++++++++++---
 index/scorch/snapshot_index_tfr.go  | 38 ++++++++++++++++++++---------
 5 files changed, 67 insertions(+), 18 deletions(-)

diff --git a/index/scorch/reader_test.go b/index/scorch/reader_test.go
index 4eb9b5fb9..8414cbdc1 100644
--- a/index/scorch/reader_test.go
+++ b/index/scorch/reader_test.go
@@ -15,6 +15,7 @@
 package scorch
 
 import (
+	"encoding/binary"
 	"reflect"
 	"testing"
 
@@ -171,9 +172,10 @@ func TestIndexReader(t *testing.T) {
 	if !match.ID.Equals(internalID2) {
 		t.Errorf("Expected ID '2', got '%s'", match.ID)
 	}
-	// NOTE: no point in changing this to internal id 3, there is no id 3
-	// the test is looking for something that doens't exist and this doesn't
-	match, err = reader.Advance(index.IndexInternalID("3"), nil)
+	// have to manually construct bogus id, because it doesn't exist
+	internalID3 := make([]byte, 8)
+	binary.BigEndian.PutUint64(internalID3, 3)
+	match, err = reader.Advance(index.IndexInternalID(internalID3), nil)
 	if err != nil {
 		t.Errorf("unexpected error: %v", err)
 	}
diff --git a/index/scorch/segment/empty.go b/index/scorch/segment/empty.go
index 9a64f223e..0350267b4 100644
--- a/index/scorch/segment/empty.go
+++ b/index/scorch/segment/empty.go
@@ -96,6 +96,10 @@ func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) {
 	return nil, nil
 }
 
+func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) {
+	return nil, nil
+}
+
 type EmptyPostingsList struct{}
 
 func (e *EmptyPostingsList) Iterator(includeFreq, includeNorm, includeLocations bool,
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index ff0b5124e..3b4a263a7 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -77,6 +77,12 @@ type PostingsIterator interface {
 	// allocations.
 	Next() (Posting, error)
 
+	// Advance will return the posting with the specified doc number
+	// or if there is no such posting, the next posting.
+	// Callers MUST NOT attempt to pass a docNum that is less than or
+	// equal to the currently visited posting doc Num.
+	Advance(docNum uint64) (Posting, error)
+
 	Size() int
 }
 
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index cc8bfefee..f709920fc 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -504,7 +504,18 @@ func (i *PostingsIterator) readLocation(l *Location) error {
 
 // Next returns the next posting on the postings list, or nil at the end
 func (i *PostingsIterator) Next() (segment.Posting, error) {
-	docNum, exists, err := i.nextDocNum()
+	return i.nextAtOrAfter(0)
+}
+
+// Advance returns the posting at the specified docNum or it is not present
+// the next posting, or if the end is reached, nil
+func (i *PostingsIterator) Advance(docNum uint64) (segment.Posting, error) {
+	return i.nextAtOrAfter(docNum)
+}
+
+// Next returns the next posting on the postings list, or nil at the end
+func (i *PostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.Posting, error) {
+	docNum, exists, err := i.nextDocNumAtOrAfter(atOrAfter)
 	if err != nil || !exists {
 		return nil, err
 	}
@@ -557,7 +568,7 @@ var freqHasLocs1Hit = encodeFreqHasLocs(1, false)
 func (i *PostingsIterator) nextBytes() (
 	docNumOut uint64, freq uint64, normBits uint64,
 	bytesFreqNorm []byte, bytesLoc []byte, err error) {
-	docNum, exists, err := i.nextDocNum()
+	docNum, exists, err := i.nextDocNumAtOrAfter(0)
 	if err != nil || !exists {
 		return 0, 0, 0, nil, nil, err
 	}
@@ -602,11 +613,16 @@ func (i *PostingsIterator) nextBytes() (
 
 // nextDocNum returns the next docNum on the postings list, and also
 // sets up the currChunk / loc related fields of the iterator.
-func (i *PostingsIterator) nextDocNum() (uint64, bool, error) {
+func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool, error) {
 	if i.normBits1Hit != 0 {
 		if i.docNum1Hit == docNum1HitFinished {
 			return 0, false, nil
 		}
+		if i.docNum1Hit < atOrAfter {
+			// advanced past our 1-hit
+			i.docNum1Hit = docNum1HitFinished // consume our 1-hit docNum
+			return 0, false, nil
+		}
 		docNum := i.docNum1Hit
 		i.docNum1Hit = docNum1HitFinished // consume our 1-hit docNum
 		return docNum, true, nil
@@ -617,6 +633,13 @@ func (i *PostingsIterator) nextDocNum() (uint64, bool, error) {
 	}
 
 	n := i.Actual.Next()
+	for uint64(n) < atOrAfter && i.Actual.HasNext() {
+		n = i.Actual.Next()
+	}
+	if uint64(n) < atOrAfter {
+		// couldn't find anything
+		return 0, false, nil
+	}
 	allN := i.all.Next()
 
 	nChunk := n / i.postings.sb.chunkFactor
diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go
index 11f1283ec..89af3be4c 100644
--- a/index/scorch/snapshot_index_tfr.go
+++ b/index/scorch/snapshot_index_tfr.go
@@ -16,6 +16,7 @@ package scorch
 
 import (
 	"bytes"
+	"fmt"
 	"reflect"
 	"sync/atomic"
 
@@ -134,24 +135,37 @@ func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAllo
 		}
 		*i = *(i2.(*IndexSnapshotTermFieldReader))
 	}
-	// FIXME do something better
-	next, err := i.Next(preAlloced)
+	num, err := docInternalToNumber(ID)
+	if err != nil {
+		return nil, fmt.Errorf("error converting to doc number % x - %v", ID, err)
+	}
+	segIndex, ldocNum := i.snapshot.segmentIndexAndLocalDocNumFromGlobal(num)
+	if segIndex >= len(i.snapshot.segment) {
+		return nil, fmt.Errorf("computed segment index %d out of bounds %d",
+			segIndex, len(i.snapshot.segment))
+	}
+	// skip directly to the target segment
+	i.segmentOffset = segIndex
+	next, err := i.iterators[i.segmentOffset].Advance(ldocNum)
 	if err != nil {
 		return nil, err
 	}
 	if next == nil {
-		return nil, nil
+		// we jumped directly to the segment that should have contained it
+		// but it wasn't there, so reuse Next() which should correctly
+		// get the next hit after it (we moved i.segmentOffset)
+		return i.Next(preAlloced)
 	}
-	for bytes.Compare(next.ID, ID) < 0 {
-		next, err = i.Next(preAlloced)
-		if err != nil {
-			return nil, err
-		}
-		if next == nil {
-			break
-		}
+
+	if preAlloced == nil {
+		preAlloced = &index.TermFieldDoc{}
 	}
-	return next, nil
+	preAlloced.ID = docNumberToBytes(preAlloced.ID, next.Number()+
+		i.snapshot.offsets[segIndex])
+	i.postingToTermFieldDoc(next, preAlloced)
+	i.currID = preAlloced.ID
+	i.currPosting = next
+	return preAlloced, nil
 }
 
 func (i *IndexSnapshotTermFieldReader) Count() uint64 {

From a4954aa8577f7da5e543f3c101b2f516eafeea05 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 13 Apr 2018 08:38:56 -0700
Subject: [PATCH 379/728] fixes #895 - scorch zap file format updated w/ #
 bytes of locations

NOTE: This is a zap file format change.

In the case of a composite field (i.e., _all field), the component
fields might have differing configurations for IncludeTermVectors.
This can mean the freq can sometimes be > numberOfLocations, where the
previous code incorrectly assumed freq == numberOfLocations.

By encoding the # of bytes used for locations data, the loops
performing readLocation() will be able to read the correct # of
locations.

And, encoding the # of bytes used for locations data instead of the
count of locations allows for an optimization of no longer reading
unneeded locations data when skipping ahead.

This fixes issue https://github.com/blevesearch/bleve/issues/895
reported by @xeizmendi.
---
 index/scorch/scorch_test.go         | 55 +++++++++++++++++++++++++++++
 index/scorch/segment/zap/build.go   |  2 +-
 index/scorch/segment/zap/merge.go   | 21 ++++++++---
 index/scorch/segment/zap/new.go     | 51 +++++++++++++++++++++-----
 index/scorch/segment/zap/posting.go | 52 +++++++++++++++++++--------
 5 files changed, 152 insertions(+), 29 deletions(-)

diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index 3be52cb1f..cf784755d 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -25,9 +25,12 @@ import (
 	"time"
 
 	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/analysis/analyzer/keyword"
+	"github.com/blevesearch/bleve/analysis/analyzer/standard"
 	regexpTokenizer "github.com/blevesearch/bleve/analysis/tokenizer/regexp"
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/mapping"
 )
 
 func DestroyTest() error {
@@ -1707,3 +1710,55 @@ func TestIndexDocumentVisitFieldTermsWithMultipleFieldOptions(t *testing.T) {
 	}
 
 }
+
+func TestAllFieldWithDifferentTermVectorsEnabled(t *testing.T) {
+	// Based on https://github.com/blevesearch/bleve/issues/895 from xeizmendi
+	mp := mapping.NewIndexMapping()
+
+	keywordMapping := mapping.NewTextFieldMapping()
+	keywordMapping.Analyzer = keyword.Name
+	keywordMapping.IncludeTermVectors = false
+	keywordMapping.IncludeInAll = true
+
+	textMapping := mapping.NewTextFieldMapping()
+	textMapping.Analyzer = standard.Name
+	textMapping.IncludeTermVectors = true
+	textMapping.IncludeInAll = true
+
+	docMapping := mapping.NewDocumentStaticMapping()
+	docMapping.AddFieldMappingsAt("keyword", keywordMapping)
+	docMapping.AddFieldMappingsAt("text", textMapping)
+
+	mp.DefaultMapping = docMapping
+
+	_ = os.RemoveAll(testConfig["path"].(string))
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch("storeName", testConfig, analysisQueue)
+	if err != nil {
+		log.Fatalln(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+	defer func() {
+		_ = idx.Close()
+		_ = os.RemoveAll(testConfig["path"].(string))
+	}()
+
+	data := map[string]string{
+		"keyword": "something",
+		"text":    "A sentence that includes something within.",
+	}
+
+	doc := document.NewDocument("1")
+	err = mp.MapDocument(doc, data)
+	if err != nil {
+		t.Errorf("error mapping doc: %v", err)
+	}
+
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+}
diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 2c261a3eb..ae460cef1 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -20,7 +20,7 @@ import (
 	"os"
 )
 
-const Version uint32 = 9
+const Version uint32 = 10
 
 const Type string = "zap"
 
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index f2a881856..9a7041b07 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -427,17 +427,30 @@ func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *Po
 		}
 
 		if len(locs) > 0 {
+			numBytesLocs := 0
 			for _, loc := range locs {
-				if cap(bufLoc) < 5+len(loc.ArrayPositions()) {
-					bufLoc = make([]uint64, 0, 5+len(loc.ArrayPositions()))
+				ap := loc.ArrayPositions()
+				numBytesLocs += totalUvarintBytes(uint64(fieldsMap[loc.Field()]-1),
+					loc.Pos(), loc.Start(), loc.End(), uint64(len(ap)), ap)
+			}
+
+			err = locEncoder.Add(hitNewDocNum, uint64(numBytesLocs))
+			if err != nil {
+				return 0, 0, 0, nil, err
+			}
+
+			for _, loc := range locs {
+				ap := loc.ArrayPositions()
+				if cap(bufLoc) < 5+len(ap) {
+					bufLoc = make([]uint64, 0, 5+len(ap))
 				}
 				args := bufLoc[0:5]
 				args[0] = uint64(fieldsMap[loc.Field()] - 1)
 				args[1] = loc.Pos()
 				args[2] = loc.Start()
 				args[3] = loc.End()
-				args[4] = uint64(len(loc.ArrayPositions()))
-				args = append(args, loc.ArrayPositions()...)
+				args[4] = uint64(len(ap))
+				args = append(args, ap...)
 				err = locEncoder.Add(hitNewDocNum, args...)
 				if err != nil {
 					return 0, 0, 0, nil, err
diff --git a/index/scorch/segment/zap/new.go b/index/scorch/segment/zap/new.go
index da24988ae..4d56b0694 100644
--- a/index/scorch/segment/zap/new.go
+++ b/index/scorch/segment/zap/new.go
@@ -190,7 +190,7 @@ type interimStoredField struct {
 type interimFreqNorm struct {
 	freq    uint64
 	norm    float32
-	hasLocs bool
+	numLocs int
 }
 
 type interimLoc struct {
@@ -446,7 +446,7 @@ func (s *interim) processDocument(docNum uint64,
 				interimFreqNorm{
 					freq:    uint64(tf.Frequency()),
 					norm:    norm,
-					hasLocs: len(tf.Locations) > 0,
+					numLocs: len(tf.Locations),
 				})
 
 			if len(tf.Locations) > 0 {
@@ -632,18 +632,28 @@ func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err
 				freqNorm := freqNorms[freqNormOffset]
 
 				err = tfEncoder.Add(docNum,
-					encodeFreqHasLocs(freqNorm.freq, freqNorm.hasLocs),
+					encodeFreqHasLocs(freqNorm.freq, freqNorm.numLocs > 0),
 					uint64(math.Float32bits(freqNorm.norm)))
 				if err != nil {
 					return 0, nil, err
 				}
 
-				for i := uint64(0); i < freqNorm.freq; i++ {
-					if len(locs) > 0 {
-						loc := locs[locOffset]
+				if freqNorm.numLocs > 0 {
+					numBytesLocs := 0
+					for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] {
+						numBytesLocs += totalUvarintBytes(
+							uint64(loc.fieldID), loc.pos, loc.start, loc.end,
+							uint64(len(loc.arrayposs)), loc.arrayposs)
+					}
+
+					err = locEncoder.Add(docNum, uint64(numBytesLocs))
+					if err != nil {
+						return 0, nil, err
+					}
 
-						err = locEncoder.Add(docNum, uint64(loc.fieldID),
-							loc.pos, loc.start, loc.end,
+					for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] {
+						err = locEncoder.Add(docNum,
+							uint64(loc.fieldID), loc.pos, loc.start, loc.end,
 							uint64(len(loc.arrayposs)))
 						if err != nil {
 							return 0, nil, err
@@ -655,7 +665,7 @@ func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err
 						}
 					}
 
-					locOffset++
+					locOffset += freqNorm.numLocs
 				}
 
 				freqNormOffset++
@@ -775,3 +785,26 @@ func encodeFieldType(f document.Field) byte {
 	}
 	return fieldType
 }
+
+// returns the total # of bytes needed to encode the given uint64's
+// into binary.PutUVarint() encoding
+func totalUvarintBytes(a, b, c, d, e uint64, more []uint64) (n int) {
+	n = numUvarintBytes(a)
+	n += numUvarintBytes(b)
+	n += numUvarintBytes(c)
+	n += numUvarintBytes(d)
+	n += numUvarintBytes(e)
+	for _, v := range more {
+		n += numUvarintBytes(v)
+	}
+	return n
+}
+
+// returns # of bytes needed to encode x in binary.PutUvarint() encoding
+func numUvarintBytes(x uint64) (n int) {
+	for x >= 0x80 {
+		x >>= 7
+		n++
+	}
+	return n + 1
+}
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index f709920fc..7bfbe0bef 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -18,6 +18,7 @@ import (
 	"bytes"
 	"encoding/binary"
 	"fmt"
+	"io"
 	"math"
 	"reflect"
 
@@ -539,7 +540,10 @@ func (i *PostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.Posting, err
 	rv.norm = math.Float32frombits(uint32(normBits))
 
 	if i.includeLocs && hasLocs {
-		// read off 'freq' locations, into reused slices
+		// prepare locations into reused slices, where we assume
+		// rv.freq >= "number of locs", since in a composite field,
+		// some component fields might have their IncludeTermVector
+		// flags disabled while other component fields are enabled
 		if cap(i.nextLocs) >= int(rv.freq) {
 			i.nextLocs = i.nextLocs[0:rv.freq]
 		} else {
@@ -548,13 +552,22 @@ func (i *PostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.Posting, err
 		if cap(i.nextSegmentLocs) < int(rv.freq) {
 			i.nextSegmentLocs = make([]segment.Location, rv.freq, rv.freq*2)
 		}
-		rv.locs = i.nextSegmentLocs[0:rv.freq]
-		for j := 0; j < int(rv.freq); j++ {
+		rv.locs = i.nextSegmentLocs[:0]
+
+		numLocsBytes, err := binary.ReadUvarint(i.locReader)
+		if err != nil {
+			return nil, fmt.Errorf("error reading location numLocsBytes: %v", err)
+		}
+
+		j := 0
+		startBytesRemaining := i.locReader.Len() // # bytes remaining in the locReader
+		for startBytesRemaining-i.locReader.Len() < int(numLocsBytes) {
 			err := i.readLocation(&i.nextLocs[j])
 			if err != nil {
 				return nil, err
 			}
-			rv.locs[j] = &i.nextLocs[j]
+			rv.locs = append(rv.locs, &i.nextLocs[j])
+			j++
 		}
 	}
 
@@ -597,11 +610,16 @@ func (i *PostingsIterator) nextBytes() (
 	if hasLocs {
 		startLoc := len(i.currChunkLoc) - i.locReader.Len()
 
-		for j := uint64(0); j < freq; j++ {
-			err := i.readLocation(nil)
-			if err != nil {
-				return 0, 0, 0, nil, nil, err
-			}
+		numLocsBytes, err := binary.ReadUvarint(i.locReader)
+		if err != nil {
+			return 0, 0, 0, nil, nil,
+				fmt.Errorf("error reading location nextBytes numLocs: %v", err)
+		}
+
+		// skip over all the location bytes
+		_, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent)
+		if err != nil {
+			return 0, 0, 0, nil, nil, err
 		}
 
 		endLoc := len(i.currChunkLoc) - i.locReader.Len()
@@ -659,17 +677,21 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool,
 			}
 
 			// read off freq/offsets even though we don't care about them
-			freq, _, hasLocs, err := i.readFreqNormHasLocs()
+			_, _, hasLocs, err := i.readFreqNormHasLocs()
 			if err != nil {
 				return 0, false, err
 			}
 
 			if i.includeLocs && hasLocs {
-				for j := 0; j < int(freq); j++ {
-					err := i.readLocation(nil)
-					if err != nil {
-						return 0, false, err
-					}
+				numLocsBytes, err := binary.ReadUvarint(i.locReader)
+				if err != nil {
+					return 0, false, fmt.Errorf("error reading location numLocsBytes: %v", err)
+				}
+
+				// skip over all the location bytes
+				_, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent)
+				if err != nil {
+					return 0, false, err
 				}
 			}
 		}

From dc6c1f80c138263ea2c22bf81f745974e5bb7b80 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Tue, 10 Apr 2018 21:07:15 -0700
Subject: [PATCH 380/728] [1/3] Optimization for reducing merger overhead from
 doc-values

Part1: Changing the format in which chunkedContentCoder writes,
       essentially reversing the way data is written.

*This commit changes the zap file format, bumping up the version.
---
 index/scorch/segment/zap/build.go             |  2 +-
 index/scorch/segment/zap/contentcoder.go      | 40 +++++++++++++++----
 index/scorch/segment/zap/contentcoder_test.go | 13 ++++--
 index/scorch/segment/zap/docvalues.go         | 29 +++++++++-----
 index/scorch/segment/zap/merge.go             | 19 ++++++---
 index/scorch/segment/zap/new.go               | 19 ++++++---
 index/scorch/segment/zap/segment.go           | 14 +++++--
 7 files changed, 100 insertions(+), 36 deletions(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index ae460cef1..91bfd4e24 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -20,7 +20,7 @@ import (
 	"os"
 )
 
-const Version uint32 = 10
+const Version uint32 = 11
 
 const Type string = "zap"
 
diff --git a/index/scorch/segment/zap/contentcoder.go b/index/scorch/segment/zap/contentcoder.go
index 2148d1d45..6630a59ce 100644
--- a/index/scorch/segment/zap/contentcoder.go
+++ b/index/scorch/segment/zap/contentcoder.go
@@ -146,33 +146,59 @@ func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
 }
 
 // Write commits all the encoded chunked contents to the provided writer.
+//
+// | ..... data ..... | chunk offsets (varints)
+// | position of chunk offsets (uint64) | number of offsets (uint64) |
+//
 func (c *chunkedContentCoder) Write(w io.Writer) (int, error) {
 	var tw int
-	buf := make([]byte, binary.MaxVarintLen64)
-	// write out the number of chunks
-	n := binary.PutUvarint(buf, uint64(len(c.chunkLens)))
-	nw, err := w.Write(buf[:n])
+
+	// write out the data section first
+	nw, err := w.Write(c.final)
 	tw += nw
 	if err != nil {
 		return tw, err
 	}
 
+	chunkOffsetsStart := uint64(tw)
+
+	if cap(c.final) < binary.MaxVarintLen64 {
+		c.final = make([]byte, binary.MaxVarintLen64)
+	} else {
+		c.final = c.final[0:binary.MaxVarintLen64]
+	}
 	chunkOffsets := modifyLengthsToEndOffsets(c.chunkLens)
 	// write out the chunk offsets
 	for _, chunkOffset := range chunkOffsets {
-		n := binary.PutUvarint(buf, chunkOffset)
-		nw, err = w.Write(buf[:n])
+		n := binary.PutUvarint(c.final, chunkOffset)
+		nw, err = w.Write(c.final[:n])
 		tw += nw
 		if err != nil {
 			return tw, err
 		}
 	}
-	// write out the data
+
+	chunkOffsetsLen := uint64(tw) - chunkOffsetsStart
+
+	c.final = c.final[0:8]
+	// write out the length of chunk offsets
+	binary.BigEndian.PutUint64(c.final, chunkOffsetsLen)
 	nw, err = w.Write(c.final)
 	tw += nw
 	if err != nil {
 		return tw, err
 	}
+
+	// write out the number of chunks
+	binary.BigEndian.PutUint64(c.final, uint64(len(c.chunkLens)))
+	nw, err = w.Write(c.final)
+	tw += nw
+	if err != nil {
+		return tw, err
+	}
+
+	c.final = c.final[:0]
+
 	return tw, nil
 }
 
diff --git a/index/scorch/segment/zap/contentcoder_test.go b/index/scorch/segment/zap/contentcoder_test.go
index ff26138a6..4392a4046 100644
--- a/index/scorch/segment/zap/contentcoder_test.go
+++ b/index/scorch/segment/zap/contentcoder_test.go
@@ -35,7 +35,10 @@ func TestChunkContentCoder(t *testing.T) {
 			docNums:   []uint64{0},
 			vals:      [][]byte{[]byte("bleve")},
 			// 1 chunk, chunk-0 length 11(b), value
-			expected: string([]byte{0x1, 0xa, 0x1, 0x0, 0x05, 0x05, 0x10, 0x62, 0x6c, 0x65, 0x76, 0x65}),
+			expected: string([]byte{0x1, 0x0, 0x5, 0x5, 0x10, 0x62, 0x6c, 0x65, 0x76, 0x65,
+				0xa,
+				0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1,
+				0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1}),
 		},
 		{
 			maxDocNum: 1,
@@ -46,9 +49,11 @@ func TestChunkContentCoder(t *testing.T) {
 				[]byte("scorch"),
 			},
 
-			expected: string([]byte{0x02, 0x0b, 0x16, 0x01, 0x00, 0x06, 0x06, 0x14,
-				0x75, 0x70, 0x73, 0x69, 0x64, 0x65, 0x01, 0x01, 0x06, 0x06,
-				0x14, 0x73, 0x63, 0x6f, 0x72, 0x63, 0x68}),
+			expected: string([]byte{0x1, 0x0, 0x6, 0x6, 0x14, 0x75, 0x70, 0x73, 0x69, 0x64,
+				0x65, 0x1, 0x1, 0x6, 0x6, 0x14, 0x73, 0x63, 0x6f, 0x72, 0x63, 0x68,
+				0xb, 0x16,
+				0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2,
+				0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2}),
 		},
 	}
 
diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index 220897853..c87d920b5 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -62,29 +62,34 @@ func (di *docValueReader) curChunkNumber() uint64 {
 }
 
 func (s *SegmentBase) loadFieldDocValueReader(field string,
-	fieldDvLoc uint64) (*docValueReader, error) {
+	fieldDvLocStart, fieldDvLocEnd uint64) (*docValueReader, error) {
 	// get the docValue offset for the given fields
-	if fieldDvLoc == fieldNotUninverted {
+	if fieldDvLocStart == fieldNotUninverted {
 		return nil, fmt.Errorf("loadFieldDocValueReader: "+
 			"no docValues found for field: %s", field)
 	}
 
-	// read the number of chunks, chunk lengths
-	var offset, loc uint64
-	numChunks, read := binary.Uvarint(s.mem[fieldDvLoc : fieldDvLoc+binary.MaxVarintLen64])
-	if read <= 0 {
-		return nil, fmt.Errorf("failed to read the field "+
-			"doc values for field %s", field)
+	// read the number of chunks, and chunk offsets position
+	var numChunks, chunkOffsetsPosition uint64
+
+	if fieldDvLocEnd-fieldDvLocStart > 16 {
+		numChunks = binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-8 : fieldDvLocEnd])
+		// read the length of chunk offsets
+		chunkOffsetsLen := binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-16 : fieldDvLocEnd-8])
+		// acquire position of chunk offsets
+		chunkOffsetsPosition = (fieldDvLocEnd - 16) - chunkOffsetsLen
 	}
-	offset += uint64(read)
 
 	fdvIter := &docValueReader{
 		curChunkNum:  math.MaxUint64,
 		field:        field,
 		chunkOffsets: make([]uint64, int(numChunks)),
 	}
+
+	// read the chunk offsets
+	var offset uint64
 	for i := 0; i < int(numChunks); i++ {
-		loc, read = binary.Uvarint(s.mem[fieldDvLoc+offset : fieldDvLoc+offset+binary.MaxVarintLen64])
+		loc, read := binary.Uvarint(s.mem[chunkOffsetsPosition+offset : chunkOffsetsPosition+offset+binary.MaxVarintLen64])
 		if read <= 0 {
 			return nil, fmt.Errorf("corrupted chunk offset during segment load")
 		}
@@ -92,7 +97,9 @@ func (s *SegmentBase) loadFieldDocValueReader(field string,
 		offset += uint64(read)
 	}
 
-	fdvIter.dvDataLoc = fieldDvLoc + offset
+	// set the data offset
+	fdvIter.dvDataLoc = fieldDvLocStart
+
 	return fdvIter, nil
 }
 
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 9a7041b07..fb64a5a26 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -169,7 +169,8 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 	var postItr *PostingsIterator
 
 	rv := make([]uint64, len(fieldsInv))
-	fieldDvLocs := make([]uint64, len(fieldsInv))
+	fieldDvLocsStart := make([]uint64, len(fieldsInv))
+	fieldDvLocsEnd := make([]uint64, len(fieldsInv))
 
 	tfEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
 	locEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
@@ -370,8 +371,8 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 			return nil, 0, err
 		}
 
-		// get the field doc value offset
-		fieldDvLocs[fieldID] = uint64(w.Count())
+		// get the field doc value offset (start)
+		fieldDvLocsStart[fieldID] = uint64(w.Count())
 
 		// persist the doc value details for this field
 		_, err = fdvEncoder.Write(w)
@@ -379,6 +380,9 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 			return nil, 0, err
 		}
 
+		// get the field doc value offset (end)
+		fieldDvLocsEnd[fieldID] = uint64(w.Count())
+
 		// reset vellum buffer and vellum builder
 		vellumBuf.Reset()
 		err = newVellum.Reset(&vellumBuf)
@@ -390,12 +394,17 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 	fieldDvLocsOffset := uint64(w.Count())
 
 	buf := bufMaxVarintLen64
-	for _, offset := range fieldDvLocs {
-		n := binary.PutUvarint(buf, uint64(offset))
+	for i := 0; i < len(fieldDvLocsStart); i++ {
+		n := binary.PutUvarint(buf, fieldDvLocsStart[i])
 		_, err := w.Write(buf[:n])
 		if err != nil {
 			return nil, 0, err
 		}
+		n = binary.PutUvarint(buf, fieldDvLocsEnd[i])
+		_, err = w.Write(buf[:n])
+		if err != nil {
+			return nil, 0, err
+		}
 	}
 
 	return rv, fieldDvLocsOffset, nil
diff --git a/index/scorch/segment/zap/new.go b/index/scorch/segment/zap/new.go
index 4d56b0694..7b022474e 100644
--- a/index/scorch/segment/zap/new.go
+++ b/index/scorch/segment/zap/new.go
@@ -585,7 +585,8 @@ func (s *interim) writeStoredFields() (
 func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err error) {
 	dictOffsets = make([]uint64, len(s.FieldsInv))
 
-	fdvOffsets := make([]uint64, len(s.FieldsInv))
+	fdvOffsetsStart := make([]uint64, len(s.FieldsInv))
+	fdvOffsetsEnd := make([]uint64, len(s.FieldsInv))
 
 	buf := s.grabBuf(binary.MaxVarintLen64)
 
@@ -741,27 +742,35 @@ func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err
 				return 0, nil, err
 			}
 
-			fdvOffsets[fieldID] = uint64(s.w.Count())
+			fdvOffsetsStart[fieldID] = uint64(s.w.Count())
 
 			_, err = fdvEncoder.Write(s.w)
 			if err != nil {
 				return 0, nil, err
 			}
 
+			fdvOffsetsEnd[fieldID] = uint64(s.w.Count())
+
 			fdvEncoder.Reset()
 		} else {
-			fdvOffsets[fieldID] = fieldNotUninverted
+			fdvOffsetsStart[fieldID] = fieldNotUninverted
+			fdvOffsetsEnd[fieldID] = fieldNotUninverted
 		}
 	}
 
 	fdvIndexOffset = uint64(s.w.Count())
 
-	for _, fdvOffset := range fdvOffsets {
-		n := binary.PutUvarint(buf, fdvOffset)
+	for i := 0; i < len(fdvOffsetsStart); i++ {
+		n := binary.PutUvarint(buf, fdvOffsetsStart[i])
 		_, err := s.w.Write(buf[:n])
 		if err != nil {
 			return 0, nil, err
 		}
+		n = binary.PutUvarint(buf, fdvOffsetsEnd[i])
+		_, err = s.w.Write(buf[:n])
+		if err != nil {
+			return 0, nil, err
+		}
 	}
 
 	return fdvIndexOffset, dictOffsets, nil
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index f9549416a..3bae48ddb 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -487,12 +487,20 @@ func (s *SegmentBase) loadDvReaders() error {
 
 	var read uint64
 	for fieldID, field := range s.fieldsInv {
-		fieldLoc, n := binary.Uvarint(s.mem[s.docValueOffset+read : s.docValueOffset+read+binary.MaxVarintLen64])
+		var fieldLocStart, fieldLocEnd uint64
+		var n int
+		fieldLocStart, n = binary.Uvarint(s.mem[s.docValueOffset+read : s.docValueOffset+read+binary.MaxVarintLen64])
 		if n <= 0 {
-			return fmt.Errorf("loadDvReaders: failed to read the docvalue offsets for field %d", fieldID)
+			return fmt.Errorf("loadDvReaders: failed to read the docvalue offset start for field %d", fieldID)
 		}
-		s.fieldDvReaders[uint16(fieldID)], _ = s.loadFieldDocValueReader(field, fieldLoc)
 		read += uint64(n)
+		fieldLocEnd, n = binary.Uvarint(s.mem[s.docValueOffset+read : s.docValueOffset+read+binary.MaxVarintLen64])
+		if n <= 0 {
+			return fmt.Errorf("loadDvReaders: failed to read the docvalue offset end for field %d", fieldID)
+		}
+		read += uint64(n)
+
+		s.fieldDvReaders[uint16(fieldID)], _ = s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd)
 	}
 	return nil
 }

From e3f2a721e979911bc004b40cf4a29e4a70c4b98c Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Wed, 11 Apr 2018 11:29:59 -0700
Subject: [PATCH 381/728] [2/3] Optimization for reducing merger overhead from
 doc-values

Part2: Introduce a new flag to the chunkedContentCoder to write out
       to the output buffer in chunks.

*Added unit tests to compare behavior
---
 index/scorch/segment/zap/contentcoder.go      | 53 +++++++++-----
 index/scorch/segment/zap/contentcoder_test.go | 69 +++++++++++++++----
 index/scorch/segment/zap/merge.go             | 10 +--
 index/scorch/segment/zap/new.go               |  4 +-
 4 files changed, 99 insertions(+), 37 deletions(-)

diff --git a/index/scorch/segment/zap/contentcoder.go b/index/scorch/segment/zap/contentcoder.go
index 6630a59ce..b9ff8179b 100644
--- a/index/scorch/segment/zap/contentcoder.go
+++ b/index/scorch/segment/zap/contentcoder.go
@@ -34,10 +34,14 @@ var termSeparator byte = 0xff
 var termSeparatorSplitSlice = []byte{termSeparator}
 
 type chunkedContentCoder struct {
-	final        []byte
-	chunkSize    uint64
-	currChunk    uint64
-	chunkLens    []uint64
+	final     []byte
+	chunkSize uint64
+	currChunk uint64
+	chunkLens []uint64
+
+	w                io.Writer
+	progressiveWrite bool
+
 	chunkMetaBuf bytes.Buffer
 	chunkBuf     bytes.Buffer
 
@@ -55,13 +59,15 @@ type MetaData struct {
 
 // newChunkedContentCoder returns a new chunk content coder which
 // packs data into chunks based on the provided chunkSize
-func newChunkedContentCoder(chunkSize uint64,
-	maxDocNum uint64) *chunkedContentCoder {
+func newChunkedContentCoder(chunkSize uint64, maxDocNum uint64,
+	w io.Writer, progressiveWrite bool) *chunkedContentCoder {
 	total := maxDocNum/chunkSize + 1
 	rv := &chunkedContentCoder{
-		chunkSize: chunkSize,
-		chunkLens: make([]uint64, total),
-		chunkMeta: make([]MetaData, 0, total),
+		chunkSize:        chunkSize,
+		chunkLens:        make([]uint64, total),
+		chunkMeta:        make([]MetaData, 0, total),
+		w:                w,
+		progressiveWrite: progressiveWrite,
 	}
 
 	return rv
@@ -111,6 +117,15 @@ func (c *chunkedContentCoder) flushContents() error {
 	c.final = append(c.final, c.compressed...)
 
 	c.chunkLens[c.currChunk] = uint64(len(c.compressed) + len(metaData))
+
+	if c.progressiveWrite {
+		_, err := c.w.Write(c.final)
+		if err != nil {
+			return err
+		}
+		c.final = c.final[:0]
+	}
+
 	return nil
 }
 
@@ -150,14 +165,16 @@ func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
 // | ..... data ..... | chunk offsets (varints)
 // | position of chunk offsets (uint64) | number of offsets (uint64) |
 //
-func (c *chunkedContentCoder) Write(w io.Writer) (int, error) {
+func (c *chunkedContentCoder) Write() (int, error) {
 	var tw int
 
-	// write out the data section first
-	nw, err := w.Write(c.final)
-	tw += nw
-	if err != nil {
-		return tw, err
+	if c.final != nil {
+		// write out the data section first
+		nw, err := c.w.Write(c.final)
+		tw += nw
+		if err != nil {
+			return tw, err
+		}
 	}
 
 	chunkOffsetsStart := uint64(tw)
@@ -171,7 +188,7 @@ func (c *chunkedContentCoder) Write(w io.Writer) (int, error) {
 	// write out the chunk offsets
 	for _, chunkOffset := range chunkOffsets {
 		n := binary.PutUvarint(c.final, chunkOffset)
-		nw, err = w.Write(c.final[:n])
+		nw, err := c.w.Write(c.final[:n])
 		tw += nw
 		if err != nil {
 			return tw, err
@@ -183,7 +200,7 @@ func (c *chunkedContentCoder) Write(w io.Writer) (int, error) {
 	c.final = c.final[0:8]
 	// write out the length of chunk offsets
 	binary.BigEndian.PutUint64(c.final, chunkOffsetsLen)
-	nw, err = w.Write(c.final)
+	nw, err := c.w.Write(c.final)
 	tw += nw
 	if err != nil {
 		return tw, err
@@ -191,7 +208,7 @@ func (c *chunkedContentCoder) Write(w io.Writer) (int, error) {
 
 	// write out the number of chunks
 	binary.BigEndian.PutUint64(c.final, uint64(len(c.chunkLens)))
-	nw, err = w.Write(c.final)
+	nw, err = c.w.Write(c.final)
 	tw += nw
 	if err != nil {
 		return tw, err
diff --git a/index/scorch/segment/zap/contentcoder_test.go b/index/scorch/segment/zap/contentcoder_test.go
index 4392a4046..62ffde413 100644
--- a/index/scorch/segment/zap/contentcoder_test.go
+++ b/index/scorch/segment/zap/contentcoder_test.go
@@ -16,18 +16,17 @@ package zap
 
 import (
 	"bytes"
-	"reflect"
 	"testing"
 )
 
-func TestChunkContentCoder(t *testing.T) {
+func TestChunkedContentCoder(t *testing.T) {
 
 	tests := []struct {
 		maxDocNum uint64
 		chunkSize uint64
 		docNums   []uint64
 		vals      [][]byte
-		expected  string
+		expected  []byte
 	}{
 		{
 			maxDocNum: 0,
@@ -35,10 +34,10 @@ func TestChunkContentCoder(t *testing.T) {
 			docNums:   []uint64{0},
 			vals:      [][]byte{[]byte("bleve")},
 			// 1 chunk, chunk-0 length 11(b), value
-			expected: string([]byte{0x1, 0x0, 0x5, 0x5, 0x10, 0x62, 0x6c, 0x65, 0x76, 0x65,
+			expected: []byte{0x1, 0x0, 0x5, 0x5, 0x10, 0x62, 0x6c, 0x65, 0x76, 0x65,
 				0xa,
 				0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1,
-				0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1}),
+				0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
 		},
 		{
 			maxDocNum: 1,
@@ -49,17 +48,18 @@ func TestChunkContentCoder(t *testing.T) {
 				[]byte("scorch"),
 			},
 
-			expected: string([]byte{0x1, 0x0, 0x6, 0x6, 0x14, 0x75, 0x70, 0x73, 0x69, 0x64,
+			expected: []byte{0x1, 0x0, 0x6, 0x6, 0x14, 0x75, 0x70, 0x73, 0x69, 0x64,
 				0x65, 0x1, 0x1, 0x6, 0x6, 0x14, 0x73, 0x63, 0x6f, 0x72, 0x63, 0x68,
 				0xb, 0x16,
 				0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2,
-				0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2}),
+				0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
 		},
 	}
 
 	for _, test := range tests {
 
-		cic := newChunkedContentCoder(test.chunkSize, test.maxDocNum)
+		var actual bytes.Buffer
+		cic := newChunkedContentCoder(test.chunkSize, test.maxDocNum, &actual, false)
 		for i, docNum := range test.docNums {
 			err := cic.Add(docNum, test.vals[i])
 			if err != nil {
@@ -67,14 +67,59 @@ func TestChunkContentCoder(t *testing.T) {
 			}
 		}
 		_ = cic.Close()
-		var actual bytes.Buffer
-		_, err := cic.Write(&actual)
+		_, err := cic.Write()
 		if err != nil {
 			t.Fatalf("error writing: %v", err)
 		}
 
-		if !reflect.DeepEqual(test.expected, string(actual.Bytes())) {
-			t.Errorf("got:%s, expected:%s", string(actual.Bytes()), test.expected)
+		if !bytes.Equal(test.expected, actual.Bytes()) {
+			t.Errorf("got:%s, expected:%s", string(actual.Bytes()), string(test.expected))
 		}
 	}
 }
+
+func TestChunkedContentCoders(t *testing.T) {
+	maxDocNum := uint64(5)
+	chunkSize := uint64(1)
+	docNums := []uint64{0, 1, 2, 3, 4, 5}
+	vals := [][]byte{
+		[]byte("scorch"),
+		[]byte("does"),
+		[]byte("better"),
+		[]byte("than"),
+		[]byte("upside"),
+		[]byte("down"),
+	}
+
+	var actual1, actual2 bytes.Buffer
+	// chunkedContentCoder that writes out at the end
+	cic1 := newChunkedContentCoder(chunkSize, maxDocNum, &actual1, false)
+	// chunkedContentCoder that writes out in chunks
+	cic2 := newChunkedContentCoder(chunkSize, maxDocNum, &actual2, true)
+
+	for i, docNum := range docNums {
+		err := cic1.Add(docNum, vals[i])
+		if err != nil {
+			t.Fatalf("error adding to intcoder: %v", err)
+		}
+		err = cic2.Add(docNum, vals[i])
+		if err != nil {
+			t.Fatalf("error adding to intcoder: %v", err)
+		}
+	}
+	_ = cic1.Close()
+	_ = cic2.Close()
+
+	_, err := cic1.Write()
+	if err != nil {
+		t.Fatalf("error writing: %v", err)
+	}
+	_, err = cic2.Write()
+	if err != nil {
+		t.Fatalf("error writing: %v", err)
+	}
+
+	if !bytes.Equal(actual1.Bytes(), actual2.Bytes()) {
+		t.Errorf("%s != %s", string(actual1.Bytes()), string(actual2.Bytes()))
+	}
+}
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index fb64a5a26..cd3e76199 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -356,8 +356,11 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 
 		rv[fieldID] = dictOffset
 
+		// get the field doc value offset (start)
+		fieldDvLocsStart[fieldID] = uint64(w.Count())
+
 		// update the field doc values
-		fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), newSegDocCount-1)
+		fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), newSegDocCount-1, w, true)
 		for docNum, docTerms := range docTermMap {
 			if len(docTerms) > 0 {
 				err = fdvEncoder.Add(uint64(docNum), docTerms)
@@ -371,11 +374,8 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 			return nil, 0, err
 		}
 
-		// get the field doc value offset (start)
-		fieldDvLocsStart[fieldID] = uint64(w.Count())
-
 		// persist the doc value details for this field
-		_, err = fdvEncoder.Write(w)
+		_, err = fdvEncoder.Write()
 		if err != nil {
 			return nil, 0, err
 		}
diff --git a/index/scorch/segment/zap/new.go b/index/scorch/segment/zap/new.go
index 7b022474e..7471cf277 100644
--- a/index/scorch/segment/zap/new.go
+++ b/index/scorch/segment/zap/new.go
@@ -592,7 +592,7 @@ func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err
 
 	tfEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1))
 	locEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1))
-	fdvEncoder := newChunkedContentCoder(uint64(s.chunkFactor), uint64(len(s.results)-1))
+	fdvEncoder := newChunkedContentCoder(uint64(s.chunkFactor), uint64(len(s.results)-1), s.w, false)
 
 	var docTermMap [][]byte
 
@@ -744,7 +744,7 @@ func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err
 
 			fdvOffsetsStart[fieldID] = uint64(s.w.Count())
 
-			_, err = fdvEncoder.Write(s.w)
+			_, err = fdvEncoder.Write()
 			if err != nil {
 				return 0, nil, err
 			}

From 3079fdc1a715643b7cf597cd61bf8a2ef6cfcd6a Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 19 Apr 2018 07:46:08 -0700
Subject: [PATCH 382/728] fixes #893 - clone zap docValueReader before using

---
 index/scorch/segment/zap/docvalues.go | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index c87d920b5..bc5d1cac6 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -53,6 +53,22 @@ func (di *docValueReader) size() int {
 		len(di.curChunkData)
 }
 
+func (di *docValueReader) cloneInto(rv *docValueReader) *docValueReader {
+	if rv == nil {
+		rv = &docValueReader{}
+	}
+
+	rv.field = di.field
+	rv.curChunkNum = math.MaxUint64
+	rv.chunkOffsets = di.chunkOffsets // immutable, so it's sharable
+	rv.dvDataLoc = di.dvDataLoc
+	rv.curChunkHeader = nil
+	rv.curChunkData = nil
+	rv.uncompressed = nil
+
+	return rv
+}
+
 func (di *docValueReader) fieldName() string {
 	return di.field
 }
@@ -186,6 +202,7 @@ func (di *docValueReader) getDocValueLocs(docNum uint64) (uint64, uint64) {
 // DocumentFieldTermVisitable interface
 func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
 	visitor index.DocumentFieldTermVisitor) error {
+	var dvIterClone *docValueReader
 	fieldIDPlus1 := uint16(0)
 	ok := true
 	for _, field := range fields {
@@ -197,15 +214,17 @@ func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []strin
 
 		if dvIter, exists := s.fieldDvReaders[fieldIDPlus1-1]; exists &&
 			dvIter != nil {
+			dvIterClone = dvIter.cloneInto(dvIterClone)
+
 			// check if the chunk is already loaded
-			if docInChunk != dvIter.curChunkNumber() {
-				err := dvIter.loadDvChunk(docInChunk, s)
+			if docInChunk != dvIterClone.curChunkNumber() {
+				err := dvIterClone.loadDvChunk(docInChunk, s)
 				if err != nil {
 					continue
 				}
 			}
 
-			_ = dvIter.visitDocValues(localDocNum, visitor)
+			_ = dvIterClone.visitDocValues(localDocNum, visitor)
 		}
 	}
 	return nil

From 13ddd75be803d5b6c493c86b603f61c84c09f479 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Wed, 18 Apr 2018 17:05:18 -0700
Subject: [PATCH 383/728] [3/3] Optimization for reducing merger overhead from
 doc-values

Part3: Getting rid of docTermMap during the merge operation,
       instead fetch all valid docNums and their corresponding
       values from the segmentBases via docValueReaders.
---
 index/scorch/segment/zap/docvalues.go | 31 +++++++++-
 index/scorch/segment/zap/merge.go     | 83 ++++++++++++++-------------
 2 files changed, 73 insertions(+), 41 deletions(-)

diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index bc5d1cac6..c73c01215 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -34,10 +34,11 @@ func init() {
 	reflectStaticSizedocValueReader = int(reflect.TypeOf(dvi).Size())
 }
 
+type docNumTermsVisitor func(docNum uint64, terms []byte) error
+
 type docValueReader struct {
 	field          string
 	curChunkNum    uint64
-	numChunks      uint64
 	chunkOffsets   []uint64
 	dvDataLoc      uint64
 	curChunkHeader []MetaData
@@ -151,6 +152,34 @@ func (di *docValueReader) loadDvChunk(chunkNumber uint64, s *SegmentBase) error
 	return nil
 }
 
+func (di *docValueReader) iterateAllDocValues(s *SegmentBase, visitor docNumTermsVisitor) error {
+	for i := 0; i < len(di.chunkOffsets); i++ {
+		err := di.loadDvChunk(uint64(i), s)
+		if err != nil {
+			return err
+		}
+
+		// uncompress the already loaded data
+		uncompressed, err := snappy.Decode(di.uncompressed[:cap(di.uncompressed)], di.curChunkData)
+		if err != nil {
+			return err
+		}
+		di.uncompressed = uncompressed
+
+		start := uint64(0)
+		for _, entry := range di.curChunkHeader {
+			err = visitor(entry.DocNum, uncompressed[start:entry.DocDvOffset])
+			if err != nil {
+				return err
+			}
+
+			start = entry.DocDvOffset
+		}
+	}
+
+	return nil
+}
+
 func (di *docValueReader) visitDocValues(docNum uint64,
 	visitor index.DocumentFieldTermVisitor) error {
 	// binary search the term locations for the docNum
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index cd3e76199..7f8f23f77 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -175,12 +175,6 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 	tfEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
 	locEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
 
-	// docTermMap is keyed by docNum, where the array impl provides
-	// better memory usage behavior than a sparse-friendlier hashmap
-	// for when docs have much structural similarity (i.e., every doc
-	// has a given field)
-	var docTermMap [][]byte
-
 	var vellumBuf bytes.Buffer
 	newVellum, err := vellum.New(&vellumBuf, nil)
 	if err != nil {
@@ -198,6 +192,8 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 		var dicts []*Dictionary
 		var itrs []vellum.Iterator
 
+		var segmentsInFocus []*SegmentBase
+
 		for segmentI, segment := range segments {
 			dict, err2 := segment.dictionary(fieldName)
 			if err2 != nil {
@@ -217,19 +213,11 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 					}
 					dicts = append(dicts, dict)
 					itrs = append(itrs, itr)
+					segmentsInFocus = append(segmentsInFocus, segment)
 				}
 			}
 		}
 
-		if uint64(cap(docTermMap)) < newSegDocCount {
-			docTermMap = make([][]byte, newSegDocCount)
-		} else {
-			docTermMap = docTermMap[0:newSegDocCount]
-			for docNum := range docTermMap { // reset the docTermMap
-				docTermMap[docNum] = docTermMap[docNum][:0]
-			}
-		}
-
 		var prevTerm []byte
 
 		newRoaring.Clear()
@@ -309,11 +297,11 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 				// can optimize by copying freq/norm/loc bytes directly
 				lastDocNum, lastFreq, lastNorm, err = mergeTermFreqNormLocsByCopying(
 					term, postItr, newDocNums[itrI], newRoaring,
-					tfEncoder, locEncoder, docTermMap)
+					tfEncoder, locEncoder)
 			} else {
 				lastDocNum, lastFreq, lastNorm, bufLoc, err = mergeTermFreqNormLocs(
 					fieldsMap, term, postItr, newDocNums[itrI], newRoaring,
-					tfEncoder, locEncoder, docTermMap, bufLoc)
+					tfEncoder, locEncoder, bufLoc)
 			}
 			if err != nil {
 				return nil, 0, err
@@ -361,27 +349,49 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 
 		// update the field doc values
 		fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), newSegDocCount-1, w, true)
-		for docNum, docTerms := range docTermMap {
-			if len(docTerms) > 0 {
-				err = fdvEncoder.Add(uint64(docNum), docTerms)
+
+		fdvReadersAvailable := false
+		var dvIterClone *docValueReader
+		for segmentI, segment := range segmentsInFocus {
+			fieldIDPlus1 := uint16(segment.fieldsMap[fieldName])
+			if dvIter, exists := segment.fieldDvReaders[fieldIDPlus1-1]; exists &&
+				dvIter != nil {
+				fdvReadersAvailable = true
+				dvIterClone = dvIter.cloneInto(dvIterClone)
+				err = dvIterClone.iterateAllDocValues(segment, func(docNum uint64, terms []byte) error {
+					if newDocNums[segmentI][docNum] == docDropped {
+						return nil
+					}
+					err := fdvEncoder.Add(newDocNums[segmentI][docNum], terms)
+					if err != nil {
+						return err
+					}
+					return nil
+				})
 				if err != nil {
 					return nil, 0, err
 				}
 			}
 		}
-		err = fdvEncoder.Close()
-		if err != nil {
-			return nil, 0, err
-		}
 
-		// persist the doc value details for this field
-		_, err = fdvEncoder.Write()
-		if err != nil {
-			return nil, 0, err
-		}
+		if fdvReadersAvailable {
+			err = fdvEncoder.Close()
+			if err != nil {
+				return nil, 0, err
+			}
 
-		// get the field doc value offset (end)
-		fieldDvLocsEnd[fieldID] = uint64(w.Count())
+			// persist the doc value details for this field
+			_, err = fdvEncoder.Write()
+			if err != nil {
+				return nil, 0, err
+			}
+
+			// get the field doc value offset (end)
+			fieldDvLocsEnd[fieldID] = uint64(w.Count())
+		} else {
+			fieldDvLocsStart[fieldID] = fieldNotUninverted
+			fieldDvLocsEnd[fieldID] = fieldNotUninverted
+		}
 
 		// reset vellum buffer and vellum builder
 		vellumBuf.Reset()
@@ -412,8 +422,7 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 
 func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *PostingsIterator,
 	newDocNums []uint64, newRoaring *roaring.Bitmap,
-	tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder, docTermMap [][]byte,
-	bufLoc []uint64) (
+	tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder, bufLoc []uint64) (
 	lastDocNum uint64, lastFreq uint64, lastNorm uint64, bufLocOut []uint64, err error) {
 	next, err := postItr.Next()
 	for next != nil && err == nil {
@@ -467,9 +476,6 @@ func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *Po
 			}
 		}
 
-		docTermMap[hitNewDocNum] =
-			append(append(docTermMap[hitNewDocNum], term...), termSeparator)
-
 		lastDocNum = hitNewDocNum
 		lastFreq = nextFreq
 		lastNorm = nextNorm
@@ -482,7 +488,7 @@ func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *Po
 
 func mergeTermFreqNormLocsByCopying(term []byte, postItr *PostingsIterator,
 	newDocNums []uint64, newRoaring *roaring.Bitmap,
-	tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder, docTermMap [][]byte) (
+	tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder) (
 	lastDocNum uint64, lastFreq uint64, lastNorm uint64, err error) {
 	nextDocNum, nextFreq, nextNorm, nextFreqNormBytes, nextLocBytes, err :=
 		postItr.nextBytes()
@@ -505,9 +511,6 @@ func mergeTermFreqNormLocsByCopying(term []byte, postItr *PostingsIterator,
 			}
 		}
 
-		docTermMap[hitNewDocNum] =
-			append(append(docTermMap[hitNewDocNum], term...), termSeparator)
-
 		lastDocNum = hitNewDocNum
 		lastFreq = nextFreq
 		lastNorm = nextNorm

From 542efc16c6d8eb59685cd24ed302969b23a8d17d Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Thu, 19 Apr 2018 10:18:40 -0400
Subject: [PATCH 384/728] alternate disjunction searcher impl using heap

introduce an alternate disjunction searcher implementation that
uses a heap to track searchers.  when you have a disjunction
of many other searchers, this should be more efficient by
performing much fewer key comparisions.

currently a cut-over from slice to heap is set at 10,
this can be adjusted by setting the package-level variable
named DisjunctionHeapTakeover
---
 search/searcher/search_disjunction.go       | 302 ++---------------
 search/searcher/search_disjunction_heap.go  | 342 ++++++++++++++++++++
 search/searcher/search_disjunction_slice.go | 298 +++++++++++++++++
 3 files changed, 660 insertions(+), 282 deletions(-)
 create mode 100644 search/searcher/search_disjunction_heap.go
 create mode 100644 search/searcher/search_disjunction_slice.go

diff --git a/search/searcher/search_disjunction.go b/search/searcher/search_disjunction.go
index b75041371..16df7ca36 100644
--- a/search/searcher/search_disjunction.go
+++ b/search/searcher/search_disjunction.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2014 Couchbase, Inc.
+//  Copyright (c) 2018 Couchbase, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -16,308 +16,46 @@ package searcher
 
 import (
 	"fmt"
-	"math"
-	"reflect"
-	"sort"
 
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
-	"github.com/blevesearch/bleve/search/scorer"
-	"github.com/blevesearch/bleve/size"
 )
 
-var reflectStaticSizeDisjunctionSearcher int
-
-func init() {
-	var ds DisjunctionSearcher
-	reflectStaticSizeDisjunctionSearcher = int(reflect.TypeOf(ds).Size())
-}
-
 // DisjunctionMaxClauseCount is a compile time setting that applications can
 // adjust to non-zero value to cause the DisjunctionSearcher to return an
 // error instead of exeucting searches when the size exceeds this value.
 var DisjunctionMaxClauseCount = 0
 
-type DisjunctionSearcher struct {
-	indexReader  index.IndexReader
-	searchers    OrderedSearcherList
-	numSearchers int
-	queryNorm    float64
-	currs        []*search.DocumentMatch
-	scorer       *scorer.DisjunctionQueryScorer
-	min          int
-	matching     []*search.DocumentMatch
-	matchingIdxs []int
-	initialized  bool
-}
-
-func tooManyClauses(count int) bool {
-	if DisjunctionMaxClauseCount != 0 && count > DisjunctionMaxClauseCount {
-		return true
-	}
-	return false
-}
-
-func tooManyClausesErr() error {
-	return fmt.Errorf("TooManyClauses[maxClauseCount is set to %d]",
-		DisjunctionMaxClauseCount)
-}
+// DisjunctionHeapTakeover is a compile time setting that applications can
+// adjust to control when the DisjunctionSearcher will switch from a simple
+// slice implementation to a heap implementation.
+var DisjunctionHeapTakeover = 10
 
 func NewDisjunctionSearcher(indexReader index.IndexReader,
 	qsearchers []search.Searcher, min float64, options search.SearcherOptions) (
-	*DisjunctionSearcher, error) {
-	return newDisjunctionSearcher(indexReader, qsearchers, min, options,
-		true)
+	search.Searcher, error) {
+	return newDisjunctionSearcher(indexReader, qsearchers, min, options, true)
 }
 
 func newDisjunctionSearcher(indexReader index.IndexReader,
 	qsearchers []search.Searcher, min float64, options search.SearcherOptions,
-	limit bool) (
-	*DisjunctionSearcher, error) {
-	if limit && tooManyClauses(len(qsearchers)) {
-		return nil, tooManyClausesErr()
-	}
-	// build the downstream searchers
-	searchers := make(OrderedSearcherList, len(qsearchers))
-	for i, searcher := range qsearchers {
-		searchers[i] = searcher
-	}
-	// sort the searchers
-	sort.Sort(sort.Reverse(searchers))
-	// build our searcher
-	rv := DisjunctionSearcher{
-		indexReader:  indexReader,
-		searchers:    searchers,
-		numSearchers: len(searchers),
-		currs:        make([]*search.DocumentMatch, len(searchers)),
-		scorer:       scorer.NewDisjunctionQueryScorer(options),
-		min:          int(min),
-		matching:     make([]*search.DocumentMatch, len(searchers)),
-		matchingIdxs: make([]int, len(searchers)),
-	}
-	rv.computeQueryNorm()
-	return &rv, nil
-}
-
-func (s *DisjunctionSearcher) Size() int {
-	sizeInBytes := reflectStaticSizeDisjunctionSearcher + size.SizeOfPtr +
-		s.scorer.Size()
-
-	for _, entry := range s.searchers {
-		sizeInBytes += entry.Size()
-	}
-
-	for _, entry := range s.currs {
-		if entry != nil {
-			sizeInBytes += entry.Size()
-		}
-	}
-
-	for _, entry := range s.matching {
-		if entry != nil {
-			sizeInBytes += entry.Size()
-		}
-	}
-
-	sizeInBytes += len(s.matchingIdxs) * size.SizeOfInt
-
-	return sizeInBytes
-}
-
-func (s *DisjunctionSearcher) computeQueryNorm() {
-	// first calculate sum of squared weights
-	sumOfSquaredWeights := 0.0
-	for _, searcher := range s.searchers {
-		sumOfSquaredWeights += searcher.Weight()
-	}
-	// now compute query norm from this
-	s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
-	// finally tell all the downstream searchers the norm
-	for _, searcher := range s.searchers {
-		searcher.SetQueryNorm(s.queryNorm)
-	}
-}
-
-func (s *DisjunctionSearcher) initSearchers(ctx *search.SearchContext) error {
-	var err error
-	// get all searchers pointing at their first match
-	for i, searcher := range s.searchers {
-		if s.currs[i] != nil {
-			ctx.DocumentMatchPool.Put(s.currs[i])
-		}
-		s.currs[i], err = searcher.Next(ctx)
-		if err != nil {
-			return err
-		}
-	}
-
-	err = s.updateMatches()
-	if err != nil {
-		return err
-	}
-
-	s.initialized = true
-	return nil
-}
-
-func (s *DisjunctionSearcher) updateMatches() error {
-	matching := s.matching[:0]
-	matchingIdxs := s.matchingIdxs[:0]
-
-	for i := 0; i < len(s.currs); i++ {
-		curr := s.currs[i]
-		if curr == nil {
-			continue
-		}
-
-		if len(matching) > 0 {
-			cmp := curr.IndexInternalID.Compare(matching[0].IndexInternalID)
-			if cmp > 0 {
-				continue
-			}
-
-			if cmp < 0 {
-				matching = matching[:0]
-				matchingIdxs = matchingIdxs[:0]
-			}
-		}
-
-		matching = append(matching, curr)
-		matchingIdxs = append(matchingIdxs, i)
-	}
-
-	s.matching = matching
-	s.matchingIdxs = matchingIdxs
-
-	return nil
-}
-
-func (s *DisjunctionSearcher) Weight() float64 {
-	var rv float64
-	for _, searcher := range s.searchers {
-		rv += searcher.Weight()
-	}
-	return rv
-}
-
-func (s *DisjunctionSearcher) SetQueryNorm(qnorm float64) {
-	for _, searcher := range s.searchers {
-		searcher.SetQueryNorm(qnorm)
-	}
-}
-
-func (s *DisjunctionSearcher) Next(ctx *search.SearchContext) (
-	*search.DocumentMatch, error) {
-	if !s.initialized {
-		err := s.initSearchers(ctx)
-		if err != nil {
-			return nil, err
-		}
-	}
-	var err error
-	var rv *search.DocumentMatch
-
-	found := false
-	for !found && len(s.matching) > 0 {
-		if len(s.matching) >= s.min {
-			found = true
-			// score this match
-			rv = s.scorer.Score(ctx, s.matching, len(s.matching), s.numSearchers)
-		}
-
-		// invoke next on all the matching searchers
-		for _, i := range s.matchingIdxs {
-			searcher := s.searchers[i]
-			if s.currs[i] != rv {
-				ctx.DocumentMatchPool.Put(s.currs[i])
-			}
-			s.currs[i], err = searcher.Next(ctx)
-			if err != nil {
-				return nil, err
-			}
-		}
-
-		err = s.updateMatches()
-		if err != nil {
-			return nil, err
-		}
-	}
-	return rv, nil
-}
-
-func (s *DisjunctionSearcher) Advance(ctx *search.SearchContext,
-	ID index.IndexInternalID) (*search.DocumentMatch, error) {
-	if !s.initialized {
-		err := s.initSearchers(ctx)
-		if err != nil {
-			return nil, err
-		}
-	}
-	// get all searchers pointing at their first match
-	var err error
-	for i, searcher := range s.searchers {
-		if s.currs[i] != nil {
-			if s.currs[i].IndexInternalID.Compare(ID) >= 0 {
-				continue
-			}
-			ctx.DocumentMatchPool.Put(s.currs[i])
-		}
-		s.currs[i], err = searcher.Advance(ctx, ID)
-		if err != nil {
-			return nil, err
-		}
+	limit bool) (search.Searcher, error) {
+	if len(qsearchers) > DisjunctionHeapTakeover {
+		return newDisjunctionHeapSearcher(indexReader, qsearchers, min, options,
+			true)
 	}
-
-	err = s.updateMatches()
-	if err != nil {
-		return nil, err
-	}
-
-	return s.Next(ctx)
-}
-
-func (s *DisjunctionSearcher) Count() uint64 {
-	// for now return a worst case
-	var sum uint64
-	for _, searcher := range s.searchers {
-		sum += searcher.Count()
-	}
-	return sum
-}
-
-func (s *DisjunctionSearcher) Close() (rv error) {
-	for _, searcher := range s.searchers {
-		err := searcher.Close()
-		if err != nil && rv == nil {
-			rv = err
-		}
-	}
-	return rv
-}
-
-func (s *DisjunctionSearcher) Min() int {
-	return s.min
+	return newDisjunctionSliceSearcher(indexReader, qsearchers, min, options,
+		true)
 }
 
-func (s *DisjunctionSearcher) DocumentMatchPoolSize() int {
-	rv := len(s.currs)
-	for _, s := range s.searchers {
-		rv += s.DocumentMatchPoolSize()
+func tooManyClauses(count int) bool {
+	if DisjunctionMaxClauseCount != 0 && count > DisjunctionMaxClauseCount {
+		return true
 	}
-	return rv
+	return false
 }
 
-// a disjunction searcher implements the index.Optimizable interface
-// but only activates on an edge case where the disjunction is a
-// wrapper around a single Optimizable child searcher
-func (s *DisjunctionSearcher) Optimize(kind string, octx index.OptimizableContext) (
-	index.OptimizableContext, error) {
-	if len(s.searchers) == 1 {
-		o, ok := s.searchers[0].(index.Optimizable)
-		if ok {
-			return o.Optimize(kind, octx)
-		}
-	}
-
-	return octx, nil
+func tooManyClausesErr() error {
+	return fmt.Errorf("TooManyClauses[maxClauseCount is set to %d]",
+		DisjunctionMaxClauseCount)
 }
diff --git a/search/searcher/search_disjunction_heap.go b/search/searcher/search_disjunction_heap.go
new file mode 100644
index 000000000..6414da2cc
--- /dev/null
+++ b/search/searcher/search_disjunction_heap.go
@@ -0,0 +1,342 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package searcher
+
+import (
+	"bytes"
+	"container/heap"
+	"math"
+	"reflect"
+
+	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/search"
+	"github.com/blevesearch/bleve/search/scorer"
+	"github.com/blevesearch/bleve/size"
+)
+
+var reflectStaticSizeDisjunctionHeapSearcher int
+var reflectStaticSizeSearcherCurr int
+
+func init() {
+	var dhs DisjunctionHeapSearcher
+	reflectStaticSizeDisjunctionHeapSearcher = int(reflect.TypeOf(dhs).Size())
+
+	var sc SearcherCurr
+	reflectStaticSizeSearcherCurr = int(reflect.TypeOf(sc).Size())
+}
+
+type SearcherCurr struct {
+	searcher search.Searcher
+	curr     *search.DocumentMatch
+}
+
+type DisjunctionHeapSearcher struct {
+	indexReader index.IndexReader
+
+	numSearchers int
+	scorer       *scorer.DisjunctionQueryScorer
+	min          int
+	queryNorm    float64
+	initialized  bool
+	searchers    []search.Searcher
+	heap         []*SearcherCurr
+
+	matching      []*search.DocumentMatch
+	matchingCurrs []*SearcherCurr
+}
+
+func newDisjunctionHeapSearcher(indexReader index.IndexReader,
+	searchers []search.Searcher, min float64, options search.SearcherOptions,
+	limit bool) (
+	*DisjunctionHeapSearcher, error) {
+	if limit && tooManyClauses(len(searchers)) {
+		return nil, tooManyClausesErr()
+	}
+
+	// build our searcher
+	rv := DisjunctionHeapSearcher{
+		indexReader:   indexReader,
+		searchers:     searchers,
+		numSearchers:  len(searchers),
+		scorer:        scorer.NewDisjunctionQueryScorer(options),
+		min:           int(min),
+		matching:      make([]*search.DocumentMatch, len(searchers)),
+		matchingCurrs: make([]*SearcherCurr, len(searchers)),
+		heap:          make([]*SearcherCurr, 0, len(searchers)),
+	}
+	rv.computeQueryNorm()
+	return &rv, nil
+}
+
+func (s *DisjunctionHeapSearcher) Size() int {
+	sizeInBytes := reflectStaticSizeDisjunctionHeapSearcher + size.SizeOfPtr +
+		s.scorer.Size()
+
+	for _, entry := range s.searchers {
+		sizeInBytes += entry.Size()
+	}
+
+	for _, entry := range s.matching {
+		if entry != nil {
+			sizeInBytes += entry.Size()
+		}
+	}
+
+	// for matchingCurrs and heap, just use static size * len
+	// since searchers and document matches already counted above
+	sizeInBytes += len(s.matchingCurrs) * reflectStaticSizeSearcherCurr
+	sizeInBytes += len(s.heap) * reflectStaticSizeSearcherCurr
+
+	return sizeInBytes
+}
+
+func (s *DisjunctionHeapSearcher) computeQueryNorm() {
+	// first calculate sum of squared weights
+	sumOfSquaredWeights := 0.0
+	for _, searcher := range s.searchers {
+		sumOfSquaredWeights += searcher.Weight()
+	}
+	// now compute query norm from this
+	s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
+	// finally tell all the downstream searchers the norm
+	for _, searcher := range s.searchers {
+		searcher.SetQueryNorm(s.queryNorm)
+	}
+}
+
+func (s *DisjunctionHeapSearcher) initSearchers(ctx *search.SearchContext) error {
+	// alloc a single block of SearcherCurrs
+	block := make([]SearcherCurr, len(s.searchers))
+
+	// get all searchers pointing at their first match
+	for i, searcher := range s.searchers {
+		curr, err := searcher.Next(ctx)
+		if err != nil {
+			return err
+		}
+		if curr != nil {
+			block[i].searcher = searcher
+			block[i].curr = curr
+			heap.Push(s, &block[i])
+		}
+	}
+
+	err := s.updateMatches()
+	if err != nil {
+		return err
+	}
+	s.initialized = true
+	return nil
+}
+
+func (s *DisjunctionHeapSearcher) updateMatches() error {
+	matching := s.matching[:0]
+	matchingCurrs := s.matchingCurrs[:0]
+
+	if len(s.heap) > 0 {
+
+		// top of the heap is our next hit
+		next := heap.Pop(s).(*SearcherCurr)
+		matching = append(matching, next.curr)
+		matchingCurrs = append(matchingCurrs, next)
+
+		// now as long as top of heap matches, keep popping
+		for len(s.heap) > 0 && bytes.Compare(next.curr.IndexInternalID, s.heap[0].curr.IndexInternalID) == 0 {
+			next = heap.Pop(s).(*SearcherCurr)
+			matching = append(matching, next.curr)
+			matchingCurrs = append(matchingCurrs, next)
+		}
+	}
+
+	s.matching = matching
+	s.matchingCurrs = matchingCurrs
+
+	return nil
+}
+
+func (s *DisjunctionHeapSearcher) Weight() float64 {
+	var rv float64
+	for _, searcher := range s.searchers {
+		rv += searcher.Weight()
+	}
+	return rv
+}
+
+func (s *DisjunctionHeapSearcher) SetQueryNorm(qnorm float64) {
+	for _, searcher := range s.searchers {
+		searcher.SetQueryNorm(qnorm)
+	}
+}
+
+func (s *DisjunctionHeapSearcher) Next(ctx *search.SearchContext) (
+	*search.DocumentMatch, error) {
+	if !s.initialized {
+		err := s.initSearchers(ctx)
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	var rv *search.DocumentMatch
+	found := false
+	for !found && len(s.matching) > 0 {
+		if len(s.matching) >= s.min {
+			found = true
+			// score this match
+			rv = s.scorer.Score(ctx, s.matching, len(s.matching), s.numSearchers)
+		}
+
+		// invoke next on all the matching searchers
+		for _, matchingCurr := range s.matchingCurrs {
+			if matchingCurr.curr != rv {
+				ctx.DocumentMatchPool.Put(matchingCurr.curr)
+			}
+			curr, err := matchingCurr.searcher.Next(ctx)
+			if err != nil {
+				return nil, err
+			}
+			if curr != nil {
+				matchingCurr.curr = curr
+				heap.Push(s, matchingCurr)
+			}
+		}
+
+		err := s.updateMatches()
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	return rv, nil
+}
+
+func (s *DisjunctionHeapSearcher) Advance(ctx *search.SearchContext,
+	ID index.IndexInternalID) (*search.DocumentMatch, error) {
+	if !s.initialized {
+		err := s.initSearchers(ctx)
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	// if there is anything in matching, toss it back onto the heap
+	for _, matchingCurr := range s.matchingCurrs {
+		heap.Push(s, matchingCurr)
+	}
+	s.matching = s.matching[:0]
+	s.matchingCurrs = s.matchingCurrs[:0]
+
+	// get all searchers pointing at their first match
+	for i, searcherCurr := range s.heap {
+		if searcherCurr.searcher != nil {
+			if searcherCurr.curr.IndexInternalID.Compare(ID) >= 0 {
+				continue
+			}
+			ctx.DocumentMatchPool.Put(searcherCurr.curr)
+		}
+		curr, err := searcherCurr.searcher.Advance(ctx, ID)
+		if err != nil {
+			return nil, err
+		}
+		searcherCurr.curr = curr
+		heap.Fix(s, i)
+	}
+	// now remove any nil values (at top of heap)
+	for len(s.heap) > 0 && s.heap[0].curr == nil {
+		heap.Pop(s)
+	}
+
+	err := s.updateMatches()
+	if err != nil {
+		return nil, err
+	}
+
+	return s.Next(ctx)
+}
+
+func (s *DisjunctionHeapSearcher) Count() uint64 {
+	// for now return a worst case
+	var sum uint64
+	for _, searcher := range s.searchers {
+		sum += searcher.Count()
+	}
+	return sum
+}
+
+func (s *DisjunctionHeapSearcher) Close() (rv error) {
+	for _, searcher := range s.searchers {
+		err := searcher.Close()
+		if err != nil && rv == nil {
+			rv = err
+		}
+	}
+	return rv
+}
+
+func (s *DisjunctionHeapSearcher) Min() int {
+	return s.min
+}
+
+func (s *DisjunctionHeapSearcher) DocumentMatchPoolSize() int {
+	rv := len(s.searchers)
+	for _, s := range s.searchers {
+		rv += s.DocumentMatchPoolSize()
+	}
+	return rv
+}
+
+// a disjunction searcher implements the index.Optimizable interface
+// but only activates on an edge case where the disjunction is a
+// wrapper around a single Optimizable child searcher
+func (s *DisjunctionHeapSearcher) Optimize(kind string, octx index.OptimizableContext) (
+	index.OptimizableContext, error) {
+	if len(s.searchers) == 1 {
+		o, ok := s.searchers[0].(index.Optimizable)
+		if ok {
+			return o.Optimize(kind, octx)
+		}
+	}
+
+	return octx, nil
+}
+
+// heap impl
+
+func (s *DisjunctionHeapSearcher) Len() int { return len(s.heap) }
+
+func (s *DisjunctionHeapSearcher) Less(i, j int) bool {
+	if s.heap[i].curr == nil {
+		return true
+	} else if s.heap[j].curr == nil {
+		return false
+	}
+	return bytes.Compare(s.heap[i].curr.IndexInternalID, s.heap[j].curr.IndexInternalID) < 0
+}
+
+func (s *DisjunctionHeapSearcher) Swap(i, j int) {
+	s.heap[i], s.heap[j] = s.heap[j], s.heap[i]
+}
+
+func (s *DisjunctionHeapSearcher) Push(x interface{}) {
+	s.heap = append(s.heap, x.(*SearcherCurr))
+}
+
+func (s *DisjunctionHeapSearcher) Pop() interface{} {
+	old := s.heap
+	n := len(old)
+	x := old[n-1]
+	s.heap = old[0 : n-1]
+	return x
+}
diff --git a/search/searcher/search_disjunction_slice.go b/search/searcher/search_disjunction_slice.go
new file mode 100644
index 000000000..e3efdf2a7
--- /dev/null
+++ b/search/searcher/search_disjunction_slice.go
@@ -0,0 +1,298 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package searcher
+
+import (
+	"math"
+	"reflect"
+	"sort"
+
+	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/search"
+	"github.com/blevesearch/bleve/search/scorer"
+	"github.com/blevesearch/bleve/size"
+)
+
+var reflectStaticSizeDisjunctionSliceSearcher int
+
+func init() {
+	var ds DisjunctionSliceSearcher
+	reflectStaticSizeDisjunctionSliceSearcher = int(reflect.TypeOf(ds).Size())
+}
+
+type DisjunctionSliceSearcher struct {
+	indexReader  index.IndexReader
+	searchers    OrderedSearcherList
+	numSearchers int
+	queryNorm    float64
+	currs        []*search.DocumentMatch
+	scorer       *scorer.DisjunctionQueryScorer
+	min          int
+	matching     []*search.DocumentMatch
+	matchingIdxs []int
+	initialized  bool
+}
+
+func newDisjunctionSliceSearcher(indexReader index.IndexReader,
+	qsearchers []search.Searcher, min float64, options search.SearcherOptions,
+	limit bool) (
+	*DisjunctionSliceSearcher, error) {
+	if limit && tooManyClauses(len(qsearchers)) {
+		return nil, tooManyClausesErr()
+	}
+	// build the downstream searchers
+	searchers := make(OrderedSearcherList, len(qsearchers))
+	for i, searcher := range qsearchers {
+		searchers[i] = searcher
+	}
+	// sort the searchers
+	sort.Sort(sort.Reverse(searchers))
+	// build our searcher
+	rv := DisjunctionSliceSearcher{
+		indexReader:  indexReader,
+		searchers:    searchers,
+		numSearchers: len(searchers),
+		currs:        make([]*search.DocumentMatch, len(searchers)),
+		scorer:       scorer.NewDisjunctionQueryScorer(options),
+		min:          int(min),
+		matching:     make([]*search.DocumentMatch, len(searchers)),
+		matchingIdxs: make([]int, len(searchers)),
+	}
+	rv.computeQueryNorm()
+	return &rv, nil
+}
+
+func (s *DisjunctionSliceSearcher) Size() int {
+	sizeInBytes := reflectStaticSizeDisjunctionSliceSearcher + size.SizeOfPtr +
+		s.scorer.Size()
+
+	for _, entry := range s.searchers {
+		sizeInBytes += entry.Size()
+	}
+
+	for _, entry := range s.currs {
+		if entry != nil {
+			sizeInBytes += entry.Size()
+		}
+	}
+
+	for _, entry := range s.matching {
+		if entry != nil {
+			sizeInBytes += entry.Size()
+		}
+	}
+
+	sizeInBytes += len(s.matchingIdxs) * size.SizeOfInt
+
+	return sizeInBytes
+}
+
+func (s *DisjunctionSliceSearcher) computeQueryNorm() {
+	// first calculate sum of squared weights
+	sumOfSquaredWeights := 0.0
+	for _, searcher := range s.searchers {
+		sumOfSquaredWeights += searcher.Weight()
+	}
+	// now compute query norm from this
+	s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
+	// finally tell all the downstream searchers the norm
+	for _, searcher := range s.searchers {
+		searcher.SetQueryNorm(s.queryNorm)
+	}
+}
+
+func (s *DisjunctionSliceSearcher) initSearchers(ctx *search.SearchContext) error {
+	var err error
+	// get all searchers pointing at their first match
+	for i, searcher := range s.searchers {
+		if s.currs[i] != nil {
+			ctx.DocumentMatchPool.Put(s.currs[i])
+		}
+		s.currs[i], err = searcher.Next(ctx)
+		if err != nil {
+			return err
+		}
+	}
+
+	err = s.updateMatches()
+	if err != nil {
+		return err
+	}
+
+	s.initialized = true
+	return nil
+}
+
+func (s *DisjunctionSliceSearcher) updateMatches() error {
+	matching := s.matching[:0]
+	matchingIdxs := s.matchingIdxs[:0]
+
+	for i := 0; i < len(s.currs); i++ {
+		curr := s.currs[i]
+		if curr == nil {
+			continue
+		}
+
+		if len(matching) > 0 {
+			cmp := curr.IndexInternalID.Compare(matching[0].IndexInternalID)
+			if cmp > 0 {
+				continue
+			}
+
+			if cmp < 0 {
+				matching = matching[:0]
+				matchingIdxs = matchingIdxs[:0]
+			}
+		}
+
+		matching = append(matching, curr)
+		matchingIdxs = append(matchingIdxs, i)
+	}
+
+	s.matching = matching
+	s.matchingIdxs = matchingIdxs
+
+	return nil
+}
+
+func (s *DisjunctionSliceSearcher) Weight() float64 {
+	var rv float64
+	for _, searcher := range s.searchers {
+		rv += searcher.Weight()
+	}
+	return rv
+}
+
+func (s *DisjunctionSliceSearcher) SetQueryNorm(qnorm float64) {
+	for _, searcher := range s.searchers {
+		searcher.SetQueryNorm(qnorm)
+	}
+}
+
+func (s *DisjunctionSliceSearcher) Next(ctx *search.SearchContext) (
+	*search.DocumentMatch, error) {
+	if !s.initialized {
+		err := s.initSearchers(ctx)
+		if err != nil {
+			return nil, err
+		}
+	}
+	var err error
+	var rv *search.DocumentMatch
+
+	found := false
+	for !found && len(s.matching) > 0 {
+		if len(s.matching) >= s.min {
+			found = true
+			// score this match
+			rv = s.scorer.Score(ctx, s.matching, len(s.matching), s.numSearchers)
+		}
+
+		// invoke next on all the matching searchers
+		for _, i := range s.matchingIdxs {
+			searcher := s.searchers[i]
+			if s.currs[i] != rv {
+				ctx.DocumentMatchPool.Put(s.currs[i])
+			}
+			s.currs[i], err = searcher.Next(ctx)
+			if err != nil {
+				return nil, err
+			}
+		}
+
+		err = s.updateMatches()
+		if err != nil {
+			return nil, err
+		}
+	}
+	return rv, nil
+}
+
+func (s *DisjunctionSliceSearcher) Advance(ctx *search.SearchContext,
+	ID index.IndexInternalID) (*search.DocumentMatch, error) {
+	if !s.initialized {
+		err := s.initSearchers(ctx)
+		if err != nil {
+			return nil, err
+		}
+	}
+	// get all searchers pointing at their first match
+	var err error
+	for i, searcher := range s.searchers {
+		if s.currs[i] != nil {
+			if s.currs[i].IndexInternalID.Compare(ID) >= 0 {
+				continue
+			}
+			ctx.DocumentMatchPool.Put(s.currs[i])
+		}
+		s.currs[i], err = searcher.Advance(ctx, ID)
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	err = s.updateMatches()
+	if err != nil {
+		return nil, err
+	}
+
+	return s.Next(ctx)
+}
+
+func (s *DisjunctionSliceSearcher) Count() uint64 {
+	// for now return a worst case
+	var sum uint64
+	for _, searcher := range s.searchers {
+		sum += searcher.Count()
+	}
+	return sum
+}
+
+func (s *DisjunctionSliceSearcher) Close() (rv error) {
+	for _, searcher := range s.searchers {
+		err := searcher.Close()
+		if err != nil && rv == nil {
+			rv = err
+		}
+	}
+	return rv
+}
+
+func (s *DisjunctionSliceSearcher) Min() int {
+	return s.min
+}
+
+func (s *DisjunctionSliceSearcher) DocumentMatchPoolSize() int {
+	rv := len(s.currs)
+	for _, s := range s.searchers {
+		rv += s.DocumentMatchPoolSize()
+	}
+	return rv
+}
+
+// a disjunction searcher implements the index.Optimizable interface
+// but only activates on an edge case where the disjunction is a
+// wrapper around a single Optimizable child searcher
+func (s *DisjunctionSliceSearcher) Optimize(kind string, octx index.OptimizableContext) (
+	index.OptimizableContext, error) {
+	if len(s.searchers) == 1 {
+		o, ok := s.searchers[0].(index.Optimizable)
+		if ok {
+			return o.Optimize(kind, octx)
+		}
+	}
+
+	return octx, nil
+}

From 4bf27cb44d758427a24a9851afdd78aa60820264 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 20 Apr 2018 11:42:26 -0700
Subject: [PATCH 385/728] persistSnapshotMaybeMerge() merges to directly to
 file

The persister goroutine can decide to merge its incoming input
segments before persistence... and before this commit, that merging
would have been entirely in-memory, leading to a lot of memory usage.

After this change, the persistSnapshotMaybeMerge(), which calls
mergeSegmentBases(), performs the merging directly to file.
---
 index/scorch/merge.go             | 48 ++++++++++---------------------
 index/scorch/persister.go         |  2 +-
 index/scorch/segment/zap/merge.go | 15 ++++++----
 3 files changed, 26 insertions(+), 39 deletions(-)

diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 73351aa4c..41b734aaf 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -15,7 +15,6 @@
 package scorch
 
 import (
-	"bytes"
 	"encoding/json"
 	"fmt"
 	"os"
@@ -195,7 +194,7 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 			fileMergeZapStartTime := time.Now()
 
 			atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1)
-			newDocNums, nBytes, err := zap.Merge(segmentsToMerge, docsToDrop, path, 1024)
+			newDocNums, nBytes, err := zap.Merge(segmentsToMerge, docsToDrop, path, DefaultChunkFactor)
 			atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
 			atomic.AddUint64(&s.stats.TotFileMergeWrittenBytes, nBytes)
 
@@ -274,19 +273,20 @@ type segmentMerge struct {
 // into the root
 func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 	sbs []*zap.SegmentBase, sbsDrops []*roaring.Bitmap, sbsIndexes []int,
-	chunkFactor uint32) (uint64, *IndexSnapshot, uint64, error) {
+	chunkFactor uint32) (*IndexSnapshot, uint64, error) {
 	atomic.AddUint64(&s.stats.TotMemMergeBeg, 1)
 
-	var br bytes.Buffer
-
-	cr := zap.NewCountHashWriter(&br)
-
 	memMergeZapStartTime := time.Now()
 
 	atomic.AddUint64(&s.stats.TotMemMergeZapBeg, 1)
-	newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset,
-		docValueOffset, dictLocs, fieldsInv, fieldsMap, err :=
-		zap.MergeToWriter(sbs, sbsDrops, chunkFactor, cr)
+
+	newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
+	filename := zapFileName(newSegmentID)
+	path := s.path + string(os.PathSeparator) + filename
+
+	newDocNums, _, err :=
+		zap.MergeSegmentBases(sbs, sbsDrops, path, chunkFactor)
+
 	atomic.AddUint64(&s.stats.TotMemMergeZapEnd, 1)
 
 	memMergeZapTime := uint64(time.Since(memMergeZapStartTime))
@@ -297,31 +297,13 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 
 	if err != nil {
 		atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
-		return 0, nil, 0, err
-	}
-
-	sb, err := zap.InitSegmentBase(br.Bytes(), cr.Sum32(), chunkFactor,
-		fieldsMap, fieldsInv, numDocs, storedIndexOffset, fieldsIndexOffset,
-		docValueOffset, dictLocs)
-	if err != nil {
-		atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
-		return 0, nil, 0, err
-	}
-
-	newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
-
-	filename := zapFileName(newSegmentID)
-	path := s.path + string(os.PathSeparator) + filename
-	err = zap.PersistSegmentBase(sb, path)
-	if err != nil {
-		atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
-		return 0, nil, 0, err
+		return nil, 0, err
 	}
 
 	segment, err := zap.Open(path)
 	if err != nil {
 		atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
-		return 0, nil, 0, err
+		return nil, 0, err
 	}
 
 	// update persisted stats
@@ -345,16 +327,16 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 	select { // send to introducer
 	case <-s.closeCh:
 		_ = segment.DecRef()
-		return 0, nil, 0, ErrClosed
+		return nil, 0, ErrClosed
 	case s.merges <- sm:
 	}
 
 	select { // wait for introduction to complete
 	case <-s.closeCh:
-		return 0, nil, 0, ErrClosed
+		return nil, 0, ErrClosed
 	case newSnapshot := <-sm.notify:
 		atomic.AddUint64(&s.stats.TotMemMergeSegments, uint64(len(sbs)))
 		atomic.AddUint64(&s.stats.TotMemMergeDone, 1)
-		return numDocs, newSnapshot, newSegmentID, nil
+		return newSnapshot, newSegmentID, nil
 	}
 }
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 76fd746d5..cbc24cdb7 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -244,7 +244,7 @@ func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot) (
 		return false, nil
 	}
 
-	_, newSnapshot, newSegmentID, err := s.mergeSegmentBases(
+	newSnapshot, newSegmentID, err := s.mergeSegmentBases(
 		snapshot, sbs, sbsDrops, sbsIndexes, DefaultChunkFactor)
 	if err != nil {
 		return false, err
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 7f8f23f77..dab09f6b3 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -37,6 +37,16 @@ const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc
 // remaining data.  This new segment is built at the specified path,
 // with the provided chunkFactor.
 func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
+	chunkFactor uint32) ([][]uint64, uint64, error) {
+	segmentBases := make([]*SegmentBase, len(segments))
+	for segmenti, segment := range segments {
+		segmentBases[segmenti] = &segment.SegmentBase
+	}
+
+	return MergeSegmentBases(segmentBases, drops, path, chunkFactor)
+}
+
+func MergeSegmentBases(segmentBases []*SegmentBase, drops []*roaring.Bitmap, path string,
 	chunkFactor uint32) ([][]uint64, uint64, error) {
 	flag := os.O_RDWR | os.O_CREATE
 
@@ -50,11 +60,6 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
 		_ = os.Remove(path)
 	}
 
-	segmentBases := make([]*SegmentBase, len(segments))
-	for segmenti, segment := range segments {
-		segmentBases[segmenti] = &segment.SegmentBase
-	}
-
 	// buffer the output
 	br := bufio.NewWriterSize(f, DefaultFileMergerBufferSize)
 

From 5d592a302fd6caa682740d17b1c01e971ae7406f Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 20 Apr 2018 14:36:28 -0700
Subject: [PATCH 386/728] Revert "Merge pull request #900 from
 abhinavdangeti/merger-overhead-doc-values"

This reverts commit 2d5ae21650e05d26a3876b318b860546ab970a9b, reversing
changes made to cd840e0996e29962922be75c47014fad6e857078.

panic: runtime error: slice bounds out of range

github.com/blevesearch/bleve/index/scorch/segment/zap.(*docValueReader).loadDvChunk
    /Users/steveyen/dev/couchbase-server.vulcan/godeps/src/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go:149
github.com/blevesearch/bleve/index/scorch/segment/zap.(*docValueReader).iterateAllDocValues
    /Users/steveyen/dev/couchbase-server.vulcan/godeps/src/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go:157
---
 index/scorch/segment/zap/docvalues.go | 31 +---------
 index/scorch/segment/zap/merge.go     | 83 +++++++++++++--------------
 2 files changed, 41 insertions(+), 73 deletions(-)

diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index c73c01215..bc5d1cac6 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -34,11 +34,10 @@ func init() {
 	reflectStaticSizedocValueReader = int(reflect.TypeOf(dvi).Size())
 }
 
-type docNumTermsVisitor func(docNum uint64, terms []byte) error
-
 type docValueReader struct {
 	field          string
 	curChunkNum    uint64
+	numChunks      uint64
 	chunkOffsets   []uint64
 	dvDataLoc      uint64
 	curChunkHeader []MetaData
@@ -152,34 +151,6 @@ func (di *docValueReader) loadDvChunk(chunkNumber uint64, s *SegmentBase) error
 	return nil
 }
 
-func (di *docValueReader) iterateAllDocValues(s *SegmentBase, visitor docNumTermsVisitor) error {
-	for i := 0; i < len(di.chunkOffsets); i++ {
-		err := di.loadDvChunk(uint64(i), s)
-		if err != nil {
-			return err
-		}
-
-		// uncompress the already loaded data
-		uncompressed, err := snappy.Decode(di.uncompressed[:cap(di.uncompressed)], di.curChunkData)
-		if err != nil {
-			return err
-		}
-		di.uncompressed = uncompressed
-
-		start := uint64(0)
-		for _, entry := range di.curChunkHeader {
-			err = visitor(entry.DocNum, uncompressed[start:entry.DocDvOffset])
-			if err != nil {
-				return err
-			}
-
-			start = entry.DocDvOffset
-		}
-	}
-
-	return nil
-}
-
 func (di *docValueReader) visitDocValues(docNum uint64,
 	visitor index.DocumentFieldTermVisitor) error {
 	// binary search the term locations for the docNum
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index dab09f6b3..faeb9768c 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -180,6 +180,12 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 	tfEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
 	locEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
 
+	// docTermMap is keyed by docNum, where the array impl provides
+	// better memory usage behavior than a sparse-friendlier hashmap
+	// for when docs have much structural similarity (i.e., every doc
+	// has a given field)
+	var docTermMap [][]byte
+
 	var vellumBuf bytes.Buffer
 	newVellum, err := vellum.New(&vellumBuf, nil)
 	if err != nil {
@@ -197,8 +203,6 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 		var dicts []*Dictionary
 		var itrs []vellum.Iterator
 
-		var segmentsInFocus []*SegmentBase
-
 		for segmentI, segment := range segments {
 			dict, err2 := segment.dictionary(fieldName)
 			if err2 != nil {
@@ -218,11 +222,19 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 					}
 					dicts = append(dicts, dict)
 					itrs = append(itrs, itr)
-					segmentsInFocus = append(segmentsInFocus, segment)
 				}
 			}
 		}
 
+		if uint64(cap(docTermMap)) < newSegDocCount {
+			docTermMap = make([][]byte, newSegDocCount)
+		} else {
+			docTermMap = docTermMap[0:newSegDocCount]
+			for docNum := range docTermMap { // reset the docTermMap
+				docTermMap[docNum] = docTermMap[docNum][:0]
+			}
+		}
+
 		var prevTerm []byte
 
 		newRoaring.Clear()
@@ -302,11 +314,11 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 				// can optimize by copying freq/norm/loc bytes directly
 				lastDocNum, lastFreq, lastNorm, err = mergeTermFreqNormLocsByCopying(
 					term, postItr, newDocNums[itrI], newRoaring,
-					tfEncoder, locEncoder)
+					tfEncoder, locEncoder, docTermMap)
 			} else {
 				lastDocNum, lastFreq, lastNorm, bufLoc, err = mergeTermFreqNormLocs(
 					fieldsMap, term, postItr, newDocNums[itrI], newRoaring,
-					tfEncoder, locEncoder, bufLoc)
+					tfEncoder, locEncoder, docTermMap, bufLoc)
 			}
 			if err != nil {
 				return nil, 0, err
@@ -354,50 +366,28 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 
 		// update the field doc values
 		fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), newSegDocCount-1, w, true)
-
-		fdvReadersAvailable := false
-		var dvIterClone *docValueReader
-		for segmentI, segment := range segmentsInFocus {
-			fieldIDPlus1 := uint16(segment.fieldsMap[fieldName])
-			if dvIter, exists := segment.fieldDvReaders[fieldIDPlus1-1]; exists &&
-				dvIter != nil {
-				fdvReadersAvailable = true
-				dvIterClone = dvIter.cloneInto(dvIterClone)
-				err = dvIterClone.iterateAllDocValues(segment, func(docNum uint64, terms []byte) error {
-					if newDocNums[segmentI][docNum] == docDropped {
-						return nil
-					}
-					err := fdvEncoder.Add(newDocNums[segmentI][docNum], terms)
-					if err != nil {
-						return err
-					}
-					return nil
-				})
+		for docNum, docTerms := range docTermMap {
+			if len(docTerms) > 0 {
+				err = fdvEncoder.Add(uint64(docNum), docTerms)
 				if err != nil {
 					return nil, 0, err
 				}
 			}
 		}
+		err = fdvEncoder.Close()
+		if err != nil {
+			return nil, 0, err
+		}
 
-		if fdvReadersAvailable {
-			err = fdvEncoder.Close()
-			if err != nil {
-				return nil, 0, err
-			}
-
-			// persist the doc value details for this field
-			_, err = fdvEncoder.Write()
-			if err != nil {
-				return nil, 0, err
-			}
-
-			// get the field doc value offset (end)
-			fieldDvLocsEnd[fieldID] = uint64(w.Count())
-		} else {
-			fieldDvLocsStart[fieldID] = fieldNotUninverted
-			fieldDvLocsEnd[fieldID] = fieldNotUninverted
+		// persist the doc value details for this field
+		_, err = fdvEncoder.Write()
+		if err != nil {
+			return nil, 0, err
 		}
 
+		// get the field doc value offset (end)
+		fieldDvLocsEnd[fieldID] = uint64(w.Count())
+
 		// reset vellum buffer and vellum builder
 		vellumBuf.Reset()
 		err = newVellum.Reset(&vellumBuf)
@@ -427,7 +417,8 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 
 func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *PostingsIterator,
 	newDocNums []uint64, newRoaring *roaring.Bitmap,
-	tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder, bufLoc []uint64) (
+	tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder, docTermMap [][]byte,
+	bufLoc []uint64) (
 	lastDocNum uint64, lastFreq uint64, lastNorm uint64, bufLocOut []uint64, err error) {
 	next, err := postItr.Next()
 	for next != nil && err == nil {
@@ -481,6 +472,9 @@ func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *Po
 			}
 		}
 
+		docTermMap[hitNewDocNum] =
+			append(append(docTermMap[hitNewDocNum], term...), termSeparator)
+
 		lastDocNum = hitNewDocNum
 		lastFreq = nextFreq
 		lastNorm = nextNorm
@@ -493,7 +487,7 @@ func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *Po
 
 func mergeTermFreqNormLocsByCopying(term []byte, postItr *PostingsIterator,
 	newDocNums []uint64, newRoaring *roaring.Bitmap,
-	tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder) (
+	tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder, docTermMap [][]byte) (
 	lastDocNum uint64, lastFreq uint64, lastNorm uint64, err error) {
 	nextDocNum, nextFreq, nextNorm, nextFreqNormBytes, nextLocBytes, err :=
 		postItr.nextBytes()
@@ -516,6 +510,9 @@ func mergeTermFreqNormLocsByCopying(term []byte, postItr *PostingsIterator,
 			}
 		}
 
+		docTermMap[hitNewDocNum] =
+			append(append(docTermMap[hitNewDocNum], term...), termSeparator)
+
 		lastDocNum = hitNewDocNum
 		lastFreq = nextFreq
 		lastNorm = nextNorm

From 3e4c63a620243a5ea963597de9ffcbdd8d37d901 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 21 Apr 2018 12:45:04 -0700
Subject: [PATCH 387/728] Revert "Revert "Merge pull request #900 from
 abhinavdangeti/merger-overhead-doc-values""

This reverts commit 5d592a302fd6caa682740d17b1c01e971ae7406f.
---
 index/scorch/segment/zap/docvalues.go | 31 +++++++++-
 index/scorch/segment/zap/merge.go     | 83 ++++++++++++++-------------
 2 files changed, 73 insertions(+), 41 deletions(-)

diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index bc5d1cac6..c73c01215 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -34,10 +34,11 @@ func init() {
 	reflectStaticSizedocValueReader = int(reflect.TypeOf(dvi).Size())
 }
 
+type docNumTermsVisitor func(docNum uint64, terms []byte) error
+
 type docValueReader struct {
 	field          string
 	curChunkNum    uint64
-	numChunks      uint64
 	chunkOffsets   []uint64
 	dvDataLoc      uint64
 	curChunkHeader []MetaData
@@ -151,6 +152,34 @@ func (di *docValueReader) loadDvChunk(chunkNumber uint64, s *SegmentBase) error
 	return nil
 }
 
+func (di *docValueReader) iterateAllDocValues(s *SegmentBase, visitor docNumTermsVisitor) error {
+	for i := 0; i < len(di.chunkOffsets); i++ {
+		err := di.loadDvChunk(uint64(i), s)
+		if err != nil {
+			return err
+		}
+
+		// uncompress the already loaded data
+		uncompressed, err := snappy.Decode(di.uncompressed[:cap(di.uncompressed)], di.curChunkData)
+		if err != nil {
+			return err
+		}
+		di.uncompressed = uncompressed
+
+		start := uint64(0)
+		for _, entry := range di.curChunkHeader {
+			err = visitor(entry.DocNum, uncompressed[start:entry.DocDvOffset])
+			if err != nil {
+				return err
+			}
+
+			start = entry.DocDvOffset
+		}
+	}
+
+	return nil
+}
+
 func (di *docValueReader) visitDocValues(docNum uint64,
 	visitor index.DocumentFieldTermVisitor) error {
 	// binary search the term locations for the docNum
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index faeb9768c..dab09f6b3 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -180,12 +180,6 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 	tfEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
 	locEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
 
-	// docTermMap is keyed by docNum, where the array impl provides
-	// better memory usage behavior than a sparse-friendlier hashmap
-	// for when docs have much structural similarity (i.e., every doc
-	// has a given field)
-	var docTermMap [][]byte
-
 	var vellumBuf bytes.Buffer
 	newVellum, err := vellum.New(&vellumBuf, nil)
 	if err != nil {
@@ -203,6 +197,8 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 		var dicts []*Dictionary
 		var itrs []vellum.Iterator
 
+		var segmentsInFocus []*SegmentBase
+
 		for segmentI, segment := range segments {
 			dict, err2 := segment.dictionary(fieldName)
 			if err2 != nil {
@@ -222,19 +218,11 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 					}
 					dicts = append(dicts, dict)
 					itrs = append(itrs, itr)
+					segmentsInFocus = append(segmentsInFocus, segment)
 				}
 			}
 		}
 
-		if uint64(cap(docTermMap)) < newSegDocCount {
-			docTermMap = make([][]byte, newSegDocCount)
-		} else {
-			docTermMap = docTermMap[0:newSegDocCount]
-			for docNum := range docTermMap { // reset the docTermMap
-				docTermMap[docNum] = docTermMap[docNum][:0]
-			}
-		}
-
 		var prevTerm []byte
 
 		newRoaring.Clear()
@@ -314,11 +302,11 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 				// can optimize by copying freq/norm/loc bytes directly
 				lastDocNum, lastFreq, lastNorm, err = mergeTermFreqNormLocsByCopying(
 					term, postItr, newDocNums[itrI], newRoaring,
-					tfEncoder, locEncoder, docTermMap)
+					tfEncoder, locEncoder)
 			} else {
 				lastDocNum, lastFreq, lastNorm, bufLoc, err = mergeTermFreqNormLocs(
 					fieldsMap, term, postItr, newDocNums[itrI], newRoaring,
-					tfEncoder, locEncoder, docTermMap, bufLoc)
+					tfEncoder, locEncoder, bufLoc)
 			}
 			if err != nil {
 				return nil, 0, err
@@ -366,27 +354,49 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 
 		// update the field doc values
 		fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), newSegDocCount-1, w, true)
-		for docNum, docTerms := range docTermMap {
-			if len(docTerms) > 0 {
-				err = fdvEncoder.Add(uint64(docNum), docTerms)
+
+		fdvReadersAvailable := false
+		var dvIterClone *docValueReader
+		for segmentI, segment := range segmentsInFocus {
+			fieldIDPlus1 := uint16(segment.fieldsMap[fieldName])
+			if dvIter, exists := segment.fieldDvReaders[fieldIDPlus1-1]; exists &&
+				dvIter != nil {
+				fdvReadersAvailable = true
+				dvIterClone = dvIter.cloneInto(dvIterClone)
+				err = dvIterClone.iterateAllDocValues(segment, func(docNum uint64, terms []byte) error {
+					if newDocNums[segmentI][docNum] == docDropped {
+						return nil
+					}
+					err := fdvEncoder.Add(newDocNums[segmentI][docNum], terms)
+					if err != nil {
+						return err
+					}
+					return nil
+				})
 				if err != nil {
 					return nil, 0, err
 				}
 			}
 		}
-		err = fdvEncoder.Close()
-		if err != nil {
-			return nil, 0, err
-		}
 
-		// persist the doc value details for this field
-		_, err = fdvEncoder.Write()
-		if err != nil {
-			return nil, 0, err
-		}
+		if fdvReadersAvailable {
+			err = fdvEncoder.Close()
+			if err != nil {
+				return nil, 0, err
+			}
 
-		// get the field doc value offset (end)
-		fieldDvLocsEnd[fieldID] = uint64(w.Count())
+			// persist the doc value details for this field
+			_, err = fdvEncoder.Write()
+			if err != nil {
+				return nil, 0, err
+			}
+
+			// get the field doc value offset (end)
+			fieldDvLocsEnd[fieldID] = uint64(w.Count())
+		} else {
+			fieldDvLocsStart[fieldID] = fieldNotUninverted
+			fieldDvLocsEnd[fieldID] = fieldNotUninverted
+		}
 
 		// reset vellum buffer and vellum builder
 		vellumBuf.Reset()
@@ -417,8 +427,7 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 
 func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *PostingsIterator,
 	newDocNums []uint64, newRoaring *roaring.Bitmap,
-	tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder, docTermMap [][]byte,
-	bufLoc []uint64) (
+	tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder, bufLoc []uint64) (
 	lastDocNum uint64, lastFreq uint64, lastNorm uint64, bufLocOut []uint64, err error) {
 	next, err := postItr.Next()
 	for next != nil && err == nil {
@@ -472,9 +481,6 @@ func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *Po
 			}
 		}
 
-		docTermMap[hitNewDocNum] =
-			append(append(docTermMap[hitNewDocNum], term...), termSeparator)
-
 		lastDocNum = hitNewDocNum
 		lastFreq = nextFreq
 		lastNorm = nextNorm
@@ -487,7 +493,7 @@ func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *Po
 
 func mergeTermFreqNormLocsByCopying(term []byte, postItr *PostingsIterator,
 	newDocNums []uint64, newRoaring *roaring.Bitmap,
-	tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder, docTermMap [][]byte) (
+	tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder) (
 	lastDocNum uint64, lastFreq uint64, lastNorm uint64, err error) {
 	nextDocNum, nextFreq, nextNorm, nextFreqNormBytes, nextLocBytes, err :=
 		postItr.nextBytes()
@@ -510,9 +516,6 @@ func mergeTermFreqNormLocsByCopying(term []byte, postItr *PostingsIterator,
 			}
 		}
 
-		docTermMap[hitNewDocNum] =
-			append(append(docTermMap[hitNewDocNum], term...), termSeparator)
-
 		lastDocNum = hitNewDocNum
 		lastFreq = nextFreq
 		lastNorm = nextNorm

From c623c57df87b51b47fb42f92470b10f4be555c52 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 21 Apr 2018 12:49:42 -0700
Subject: [PATCH 388/728] fixes #906 - docvalues handles empty chunk

---
 index/scorch/segment/zap/docvalues.go | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index c73c01215..971b78b78 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -125,6 +125,14 @@ func (di *docValueReader) loadDvChunk(chunkNumber uint64, s *SegmentBase) error
 	// reside for the given docNum
 	destChunkDataLoc, curChunkEnd := di.dvDataLoc, di.dvDataLoc
 	start, end := readChunkBoundary(int(chunkNumber), di.chunkOffsets)
+    if start >= end {
+        di.curChunkHeader = di.curChunkHeader[:0]
+        di.curChunkData = nil
+        di.curChunkNum = chunkNumber
+        di.uncompressed = di.uncompressed[:0]
+        return nil
+    }
+
 	destChunkDataLoc += start
 	curChunkEnd += end
 
@@ -158,6 +166,9 @@ func (di *docValueReader) iterateAllDocValues(s *SegmentBase, visitor docNumTerm
 		if err != nil {
 			return err
 		}
+        if di.curChunkData == nil || len(di.curChunkHeader) <= 0 {
+            continue
+        }
 
 		// uncompress the already loaded data
 		uncompressed, err := snappy.Decode(di.uncompressed[:cap(di.uncompressed)], di.curChunkData)
@@ -184,7 +195,7 @@ func (di *docValueReader) visitDocValues(docNum uint64,
 	visitor index.DocumentFieldTermVisitor) error {
 	// binary search the term locations for the docNum
 	start, end := di.getDocValueLocs(docNum)
-	if start == math.MaxUint64 || end == math.MaxUint64 {
+    if start == math.MaxUint64 || end == math.MaxUint64 || start == end {
 		return nil
 	}
 

From 76584c7038dee90b805dda1f8c7b0a3e127be310 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Fri, 20 Apr 2018 16:55:43 -0700
Subject: [PATCH 389/728] MB-28847: Account for bytes of an allocated buffer
 while building new segment

Account for these bytes in the memory used by the scorch index.
---
 index/scorch/scorch.go                   | 18 +++++++++++++-----
 index/scorch/segment/zap/build_test.go   | 12 ++++++------
 index/scorch/segment/zap/dict_test.go    |  4 ++--
 index/scorch/segment/zap/merge_test.go   | 22 +++++++++++-----------
 index/scorch/segment/zap/new.go          |  6 +++---
 index/scorch/segment/zap/segment_test.go | 14 +++++++-------
 6 files changed, 42 insertions(+), 34 deletions(-)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 14796c5e8..398451570 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -76,10 +76,12 @@ type Scorch struct {
 }
 
 type internalStats struct {
-	persistEpoch        uint64
-	persistSnapshotSize uint64
-	mergeEpoch          uint64
-	mergeSnapshotSize   uint64
+	persistEpoch          uint64
+	persistSnapshotSize   uint64
+	mergeEpoch            uint64
+	mergeSnapshotSize     uint64
+	newSegBufBytesAdded   uint64
+	newSegBufBytesRemoved uint64
 }
 
 func NewScorch(storeName string,
@@ -308,11 +310,13 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
 	s.fireEvent(EventKindBatchIntroductionStart, 0)
 
 	var newSegment segment.Segment
+	var bufBytes uint64
 	if len(analysisResults) > 0 {
-		newSegment, err = zap.AnalysisResultsToSegmentBase(analysisResults, DefaultChunkFactor)
+		newSegment, bufBytes, err = zap.AnalysisResultsToSegmentBase(analysisResults, DefaultChunkFactor)
 		if err != nil {
 			return err
 		}
+		atomic.AddUint64(&s.iStats.newSegBufBytesAdded, bufBytes)
 	} else {
 		atomic.AddUint64(&s.stats.TotBatchesEmpty, 1)
 	}
@@ -330,6 +334,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
 		atomic.AddUint64(&s.stats.TotIndexedPlainTextBytes, numPlainTextBytes)
 	}
 
+	atomic.AddUint64(&s.iStats.newSegBufBytesRemoved, bufBytes)
 	atomic.AddUint64(&s.stats.TotIndexTime, uint64(time.Since(indexStart)))
 
 	return err
@@ -523,6 +528,9 @@ func (s *Scorch) MemoryUsed() uint64 {
 		memUsed += mergeSnapshotSize
 	}
 
+	memUsed += (atomic.LoadUint64(&s.iStats.newSegBufBytesAdded) -
+		atomic.LoadUint64(&s.iStats.newSegBufBytesRemoved))
+
 	return memUsed
 }
 
diff --git a/index/scorch/segment/zap/build_test.go b/index/scorch/segment/zap/build_test.go
index 5b6ffd559..9d1b584f2 100644
--- a/index/scorch/segment/zap/build_test.go
+++ b/index/scorch/segment/zap/build_test.go
@@ -26,7 +26,7 @@ import (
 func TestBuild(t *testing.T) {
 	_ = os.RemoveAll("/tmp/scorch.zap")
 
-	sb, err := buildTestSegment()
+	sb, _, err := buildTestSegment()
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -36,7 +36,7 @@ func TestBuild(t *testing.T) {
 	}
 }
 
-func buildTestSegment() (*SegmentBase, error) {
+func buildTestSegment() (*SegmentBase, uint64, error) {
 	doc := &document.Document{
 		ID: "a",
 		Fields: []document.Field{
@@ -125,19 +125,19 @@ func buildTestSegment() (*SegmentBase, error) {
 	return AnalysisResultsToSegmentBase(results, 1024)
 }
 
-func buildTestSegmentMulti() (*SegmentBase, error) {
+func buildTestSegmentMulti() (*SegmentBase, uint64, error) {
 	results := buildTestAnalysisResultsMulti()
 
 	return AnalysisResultsToSegmentBase(results, 1024)
 }
 
-func buildTestSegmentMultiWithChunkFactor(chunkFactor uint32) (*SegmentBase, error) {
+func buildTestSegmentMultiWithChunkFactor(chunkFactor uint32) (*SegmentBase, uint64, error) {
 	results := buildTestAnalysisResultsMulti()
 
 	return AnalysisResultsToSegmentBase(results, chunkFactor)
 }
 
-func buildTestSegmentMultiWithDifferentFields(includeDocA, includeDocB bool) (*SegmentBase, error) {
+func buildTestSegmentMultiWithDifferentFields(includeDocA, includeDocB bool) (*SegmentBase, uint64, error) {
 	results := buildTestAnalysisResultsMultiWithDifferentFields(includeDocA, includeDocB)
 
 	return AnalysisResultsToSegmentBase(results, 1024)
@@ -550,7 +550,7 @@ func buildTestSegmentWithDefaultFieldMapping(chunkFactor uint32) (
 		}
 	}
 
-	sb, err := AnalysisResultsToSegmentBase(results, chunkFactor)
+	sb, _, err := AnalysisResultsToSegmentBase(results, chunkFactor)
 
 	return sb, fields, err
 }
diff --git a/index/scorch/segment/zap/dict_test.go b/index/scorch/segment/zap/dict_test.go
index b70f2adf7..1a8ce22c5 100644
--- a/index/scorch/segment/zap/dict_test.go
+++ b/index/scorch/segment/zap/dict_test.go
@@ -24,7 +24,7 @@ import (
 	"github.com/blevesearch/bleve/index"
 )
 
-func buildTestSegmentForDict() (*SegmentBase, error) {
+func buildTestSegmentForDict() (*SegmentBase, uint64, error) {
 	doc := &document.Document{
 		ID: "a",
 		Fields: []document.Field{
@@ -105,7 +105,7 @@ func TestDictionary(t *testing.T) {
 
 	_ = os.RemoveAll("/tmp/scorch.zap")
 
-	testSeg, _ := buildTestSegmentForDict()
+	testSeg, _, _ := buildTestSegmentForDict()
 	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
 	if err != nil {
 		t.Fatalf("error persisting segment: %v", err)
diff --git a/index/scorch/segment/zap/merge_test.go b/index/scorch/segment/zap/merge_test.go
index 6b168c907..cd21ecb66 100644
--- a/index/scorch/segment/zap/merge_test.go
+++ b/index/scorch/segment/zap/merge_test.go
@@ -33,13 +33,13 @@ func TestMerge(t *testing.T) {
 	_ = os.RemoveAll("/tmp/scorch2.zap")
 	_ = os.RemoveAll("/tmp/scorch3.zap")
 
-	testSeg, _ := buildTestSegmentMulti()
+	testSeg, _, _ := buildTestSegmentMulti()
 	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
 	if err != nil {
 		t.Fatal(err)
 	}
 
-	testSeg2, _ := buildTestSegmentMulti2()
+	testSeg2, _, _ := buildTestSegmentMulti2()
 	err = PersistSegmentBase(testSeg2, "/tmp/scorch2.zap")
 	if err != nil {
 		t.Fatal(err)
@@ -120,7 +120,7 @@ func TestMergeWithEmptySegmentsFirst(t *testing.T) {
 func testMergeWithEmptySegments(t *testing.T, before bool, numEmptySegments int) {
 	_ = os.RemoveAll("/tmp/scorch.zap")
 
-	testSeg, _ := buildTestSegmentMulti()
+	testSeg, _, _ := buildTestSegmentMulti()
 	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
 	if err != nil {
 		t.Fatal(err)
@@ -147,7 +147,7 @@ func testMergeWithEmptySegments(t *testing.T, before bool, numEmptySegments int)
 
 		_ = os.RemoveAll("/tmp/" + fname)
 
-		emptySegment, _ := AnalysisResultsToSegmentBase([]*index.AnalysisResult{}, 1024)
+		emptySegment, _, _ := AnalysisResultsToSegmentBase([]*index.AnalysisResult{}, 1024)
 		err = PersistSegmentBase(emptySegment, "/tmp/"+fname)
 		if err != nil {
 			t.Fatal(err)
@@ -461,7 +461,7 @@ func testMergeAndDrop(t *testing.T, docsToDrop []*roaring.Bitmap) {
 	_ = os.RemoveAll("/tmp/scorch.zap")
 	_ = os.RemoveAll("/tmp/scorch2.zap")
 
-	testSeg, _ := buildTestSegmentMulti()
+	testSeg, _, _ := buildTestSegmentMulti()
 	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
 	if err != nil {
 		t.Fatal(err)
@@ -477,7 +477,7 @@ func testMergeAndDrop(t *testing.T, docsToDrop []*roaring.Bitmap) {
 		}
 	}()
 
-	testSeg2, _ := buildTestSegmentMulti2()
+	testSeg2, _, _ := buildTestSegmentMulti2()
 	err = PersistSegmentBase(testSeg2, "/tmp/scorch2.zap")
 	if err != nil {
 		t.Fatal(err)
@@ -564,7 +564,7 @@ func testMergeWithUpdates(t *testing.T, segmentDocIds [][]string, docsToDrop []*
 
 		_ = os.RemoveAll("/tmp/" + fname)
 
-		testSeg, _ := buildTestSegmentMultiHelper(docIds)
+		testSeg, _, _ := buildTestSegmentMultiHelper(docIds)
 		err := PersistSegmentBase(testSeg, "/tmp/"+fname)
 		if err != nil {
 			t.Fatal(err)
@@ -615,11 +615,11 @@ func testMergeAndDropSegments(t *testing.T, segsToMerge []*Segment, docsToDrop [
 	testMergeWithSelf(t, segm.(*Segment), expectedNumDocs)
 }
 
-func buildTestSegmentMulti2() (*SegmentBase, error) {
+func buildTestSegmentMulti2() (*SegmentBase, uint64, error) {
 	return buildTestSegmentMultiHelper([]string{"c", "d"})
 }
 
-func buildTestSegmentMultiHelper(docIds []string) (*SegmentBase, error) {
+func buildTestSegmentMultiHelper(docIds []string) (*SegmentBase, uint64, error) {
 	doc := &document.Document{
 		ID: "c",
 		Fields: []document.Field{
@@ -785,13 +785,13 @@ func TestMergeBytesWritten(t *testing.T) {
 	_ = os.RemoveAll("/tmp/scorch2.zap")
 	_ = os.RemoveAll("/tmp/scorch3.zap")
 
-	testSeg, _ := buildTestSegmentMulti()
+	testSeg, _, _ := buildTestSegmentMulti()
 	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
 	if err != nil {
 		t.Fatal(err)
 	}
 
-	testSeg2, _ := buildTestSegmentMulti2()
+	testSeg2, _, _ := buildTestSegmentMulti2()
 	err = PersistSegmentBase(testSeg2, "/tmp/scorch2.zap")
 	if err != nil {
 		t.Fatal(err)
diff --git a/index/scorch/segment/zap/new.go b/index/scorch/segment/zap/new.go
index 7471cf277..a76a8f6af 100644
--- a/index/scorch/segment/zap/new.go
+++ b/index/scorch/segment/zap/new.go
@@ -32,7 +32,7 @@ import (
 // AnalysisResultsToSegmentBase produces an in-memory zap-encoded
 // SegmentBase from analysis results
 func AnalysisResultsToSegmentBase(results []*index.AnalysisResult,
-	chunkFactor uint32) (*SegmentBase, error) {
+	chunkFactor uint32) (*SegmentBase, uint64, error) {
 	s := interimPool.Get().(*interim)
 
 	var br bytes.Buffer
@@ -52,7 +52,7 @@ func AnalysisResultsToSegmentBase(results []*index.AnalysisResult,
 	storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets,
 		err := s.convert()
 	if err != nil {
-		return nil, err
+		return nil, uint64(0), err
 	}
 
 	sb, err := InitSegmentBase(br.Bytes(), s.w.Sum32(), chunkFactor,
@@ -65,7 +65,7 @@ func AnalysisResultsToSegmentBase(results []*index.AnalysisResult,
 		interimPool.Put(s)
 	}
 
-	return sb, err
+	return sb, uint64(len(br.Bytes())), err
 }
 
 var interimPool = sync.Pool{New: func() interface{} { return &interim{} }}
diff --git a/index/scorch/segment/zap/segment_test.go b/index/scorch/segment/zap/segment_test.go
index 27f87faff..be7a39e14 100644
--- a/index/scorch/segment/zap/segment_test.go
+++ b/index/scorch/segment/zap/segment_test.go
@@ -28,7 +28,7 @@ import (
 func TestOpen(t *testing.T) {
 	_ = os.RemoveAll("/tmp/scorch.zap")
 
-	testSeg, _ := buildTestSegment()
+	testSeg, _, _ := buildTestSegment()
 	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
 	if err != nil {
 		t.Fatalf("error persisting segment: %v", err)
@@ -328,7 +328,7 @@ func TestOpen(t *testing.T) {
 func TestOpenMulti(t *testing.T) {
 	_ = os.RemoveAll("/tmp/scorch.zap")
 
-	testSeg, _ := buildTestSegmentMulti()
+	testSeg, _, _ := buildTestSegmentMulti()
 	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
 	if err != nil {
 		t.Fatalf("error persisting segment: %v", err)
@@ -428,7 +428,7 @@ func TestOpenMulti(t *testing.T) {
 func TestOpenMultiWithTwoChunks(t *testing.T) {
 	_ = os.RemoveAll("/tmp/scorch.zap")
 
-	testSeg, _ := buildTestSegmentMultiWithChunkFactor(1)
+	testSeg, _, _ := buildTestSegmentMultiWithChunkFactor(1)
 	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
 	if err != nil {
 		t.Fatalf("error persisting segment: %v", err)
@@ -523,7 +523,7 @@ func TestOpenMultiWithTwoChunks(t *testing.T) {
 func TestSegmentVisitableDocValueFieldsList(t *testing.T) {
 	_ = os.RemoveAll("/tmp/scorch.zap")
 
-	testSeg, _ := buildTestSegmentMultiWithChunkFactor(1)
+	testSeg, _, _ := buildTestSegmentMultiWithChunkFactor(1)
 	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
 	if err != nil {
 		t.Fatalf("error persisting segment: %v", err)
@@ -603,7 +603,7 @@ func TestSegmentVisitableDocValueFieldsList(t *testing.T) {
 func TestSegmentDocsWithNonOverlappingFields(t *testing.T) {
 	_ = os.RemoveAll("/tmp/scorch.zap")
 
-	testSeg, err := buildTestSegmentMultiWithDifferentFields(true, true)
+	testSeg, _, err := buildTestSegmentMultiWithDifferentFields(true, true)
 	if err != nil {
 		t.Fatalf("error building segment: %v", err)
 	}
@@ -653,13 +653,13 @@ func TestMergedSegmentDocsWithNonOverlappingFields(t *testing.T) {
 	_ = os.RemoveAll("/tmp/scorch2.zap")
 	_ = os.RemoveAll("/tmp/scorch3.zap")
 
-	testSeg1, _ := buildTestSegmentMultiWithDifferentFields(true, false)
+	testSeg1, _, _ := buildTestSegmentMultiWithDifferentFields(true, false)
 	err := PersistSegmentBase(testSeg1, "/tmp/scorch1.zap")
 	if err != nil {
 		t.Fatalf("error persisting segment: %v", err)
 	}
 
-	testSeg2, _ := buildTestSegmentMultiWithDifferentFields(false, true)
+	testSeg2, _, _ := buildTestSegmentMultiWithDifferentFields(false, true)
 	err = PersistSegmentBase(testSeg2, "/tmp/scorch2.zap")
 	if err != nil {
 		t.Fatalf("error persisting segment: %v", err)

From e12b653120e6cd702e65fa28ab8a70791219e5a0 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 24 Apr 2018 10:16:11 -0700
Subject: [PATCH 390/728] more knobs for estimating new segment buffer size

---
 index/scorch/segment/zap/new.go | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/index/scorch/segment/zap/new.go b/index/scorch/segment/zap/new.go
index a76a8f6af..22b69913e 100644
--- a/index/scorch/segment/zap/new.go
+++ b/index/scorch/segment/zap/new.go
@@ -29,6 +29,10 @@ import (
 	"github.com/golang/snappy"
 )
 
+var NewSegmentBufferNumResultsBump int = 100
+var NewSegmentBufferNumResultsFactor float64 = 1.0
+var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0
+
 // AnalysisResultsToSegmentBase produces an in-memory zap-encoded
 // SegmentBase from analysis results
 func AnalysisResultsToSegmentBase(results []*index.AnalysisResult,
@@ -41,8 +45,11 @@ func AnalysisResultsToSegmentBase(results []*index.AnalysisResult,
 		// size, but note that the interim instance comes from a
 		// global interimPool, so multiple scorch instances indexing
 		// different docs can lead to low quality estimates
-		avgBytesPerDoc := s.lastOutSize / s.lastNumDocs
-		br.Grow(avgBytesPerDoc * (len(results) + 1))
+		estimateAvgBytesPerDoc := int(float64(s.lastOutSize/s.lastNumDocs) *
+			NewSegmentBufferNumResultsFactor)
+		estimateNumResults := int(float64(len(results)+NewSegmentBufferNumResultsBump) *
+			NewSegmentBufferAvgBytesPerDocFactor)
+		br.Grow(estimateAvgBytesPerDoc * estimateNumResults)
 	}
 
 	s.results = results

From 414274b49b0e96dfe9470868992f836ea10e5da1 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 24 Apr 2018 10:16:57 -0700
Subject: [PATCH 391/728] go fmt

---
 index/scorch/segment/zap/docvalues.go | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index 971b78b78..9c3815068 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -125,13 +125,13 @@ func (di *docValueReader) loadDvChunk(chunkNumber uint64, s *SegmentBase) error
 	// reside for the given docNum
 	destChunkDataLoc, curChunkEnd := di.dvDataLoc, di.dvDataLoc
 	start, end := readChunkBoundary(int(chunkNumber), di.chunkOffsets)
-    if start >= end {
-        di.curChunkHeader = di.curChunkHeader[:0]
-        di.curChunkData = nil
-        di.curChunkNum = chunkNumber
-        di.uncompressed = di.uncompressed[:0]
-        return nil
-    }
+	if start >= end {
+		di.curChunkHeader = di.curChunkHeader[:0]
+		di.curChunkData = nil
+		di.curChunkNum = chunkNumber
+		di.uncompressed = di.uncompressed[:0]
+		return nil
+	}
 
 	destChunkDataLoc += start
 	curChunkEnd += end
@@ -166,9 +166,9 @@ func (di *docValueReader) iterateAllDocValues(s *SegmentBase, visitor docNumTerm
 		if err != nil {
 			return err
 		}
-        if di.curChunkData == nil || len(di.curChunkHeader) <= 0 {
-            continue
-        }
+		if di.curChunkData == nil || len(di.curChunkHeader) <= 0 {
+			continue
+		}
 
 		// uncompress the already loaded data
 		uncompressed, err := snappy.Decode(di.uncompressed[:cap(di.uncompressed)], di.curChunkData)
@@ -195,7 +195,7 @@ func (di *docValueReader) visitDocValues(docNum uint64,
 	visitor index.DocumentFieldTermVisitor) error {
 	// binary search the term locations for the docNum
 	start, end := di.getDocValueLocs(docNum)
-    if start == math.MaxUint64 || end == math.MaxUint64 || start == end {
+	if start == math.MaxUint64 || end == math.MaxUint64 || start == end {
 		return nil
 	}
 

From 06f7c58a3562345b50a2874e2575ebfc579b427e Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Tue, 24 Apr 2018 12:16:25 -0700
Subject: [PATCH 392/728] Include missing initialization for size entry of
 integers

---
 size/sizes.go | 46 ++++++++++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/size/sizes.go b/size/sizes.go
index 4ba544a71..0990bf86e 100644
--- a/size/sizes.go
+++ b/size/sizes.go
@@ -19,28 +19,30 @@ import (
 )
 
 func init() {
-	var a bool
-	SizeOfBool = int(reflect.TypeOf(a).Size())
-	var b float32
-	SizeOfFloat32 = int(reflect.TypeOf(b).Size())
-	var c float64
-	SizeOfFloat64 = int(reflect.TypeOf(c).Size())
-	var d map[int]int
-	SizeOfMap = int(reflect.TypeOf(d).Size())
-	var e *int
-	SizeOfPtr = int(reflect.TypeOf(e).Size())
-	var f []int
-	SizeOfSlice = int(reflect.TypeOf(f).Size())
-	var g string
-	SizeOfString = int(reflect.TypeOf(g).Size())
-	var h uint8
-	SizeOfUint8 = int(reflect.TypeOf(h).Size())
-	var i uint16
-	SizeOfUint16 = int(reflect.TypeOf(i).Size())
-	var j uint32
-	SizeOfUint32 = int(reflect.TypeOf(j).Size())
-	var k uint64
-	SizeOfUint64 = int(reflect.TypeOf(k).Size())
+	var b bool
+	SizeOfBool = int(reflect.TypeOf(b).Size())
+	var f32 float32
+	SizeOfFloat32 = int(reflect.TypeOf(f32).Size())
+	var f64 float64
+	SizeOfFloat64 = int(reflect.TypeOf(f64).Size())
+	var i int
+	SizeOfInt = int(reflect.TypeOf(i).Size())
+	var m map[int]int
+	SizeOfMap = int(reflect.TypeOf(m).Size())
+	var ptr *int
+	SizeOfPtr = int(reflect.TypeOf(ptr).Size())
+	var slice []int
+	SizeOfSlice = int(reflect.TypeOf(slice).Size())
+	var str string
+	SizeOfString = int(reflect.TypeOf(str).Size())
+	var u8 uint8
+	SizeOfUint8 = int(reflect.TypeOf(u8).Size())
+	var u16 uint16
+	SizeOfUint16 = int(reflect.TypeOf(u16).Size())
+	var u32 uint32
+	SizeOfUint32 = int(reflect.TypeOf(u32).Size())
+	var u64 uint64
+	SizeOfUint64 = int(reflect.TypeOf(u64).Size())
 }
 
 var SizeOfBool int

From 3c30bc2d476083802ed11044c004edb44f19eaa4 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 24 Apr 2018 15:24:14 -0400
Subject: [PATCH 393/728] account for memory allocated during text analysis

---
 analysis/freq.go       | 41 +++++++++++++++++++++++++++++++++++++++++
 index/analysis.go      | 19 +++++++++++++++++++
 index/scorch/scorch.go | 10 ++++++++++
 3 files changed, 70 insertions(+)

diff --git a/analysis/freq.go b/analysis/freq.go
index e1ca2cd6f..198c149b2 100644
--- a/analysis/freq.go
+++ b/analysis/freq.go
@@ -14,6 +14,22 @@
 
 package analysis
 
+import (
+	"reflect"
+
+	"github.com/blevesearch/bleve/size"
+)
+
+var reflectStaticSizeTokenLocation int
+var reflectStaticSizeTokenFreq int
+
+func init() {
+	var tl TokenLocation
+	reflectStaticSizeTokenLocation = int(reflect.TypeOf(tl).Size())
+	var tf TokenFreq
+	reflectStaticSizeTokenFreq = int(reflect.TypeOf(tf).Size())
+}
+
 // TokenLocation represents one occurrence of a term at a particular location in
 // a field. Start, End and Position have the same meaning as in analysis.Token.
 // Field and ArrayPositions identify the field value in the source document.
@@ -26,6 +42,12 @@ type TokenLocation struct {
 	Position       int
 }
 
+func (tl *TokenLocation) Size() int {
+	rv := reflectStaticSizeTokenLocation
+	rv += len(tl.ArrayPositions) * size.SizeOfUint64
+	return rv
+}
+
 // TokenFreq represents all the occurrences of a term in all fields of a
 // document.
 type TokenFreq struct {
@@ -34,6 +56,15 @@ type TokenFreq struct {
 	frequency int
 }
 
+func (tf *TokenFreq) Size() int {
+	rv := reflectStaticSizeTokenFreq
+	rv += len(tf.Term)
+	for _, loc := range tf.Locations {
+		rv += loc.Size()
+	}
+	return rv
+}
+
 func (tf *TokenFreq) Frequency() int {
 	return tf.frequency
 }
@@ -42,6 +73,16 @@ func (tf *TokenFreq) Frequency() int {
 // fields.
 type TokenFrequencies map[string]*TokenFreq
 
+func (tfs TokenFrequencies) Size() int {
+	rv := size.SizeOfMap
+	rv += len(tfs) * (size.SizeOfString + size.SizeOfPtr)
+	for k, v := range tfs {
+		rv += len(k)
+		rv += v.Size()
+	}
+	return rv
+}
+
 func (tfs TokenFrequencies) MergeAll(remoteField string, other TokenFrequencies) {
 	// walk the new token frequencies
 	for tfk, tf := range other {
diff --git a/index/analysis.go b/index/analysis.go
index 840dad97a..82883af01 100644
--- a/index/analysis.go
+++ b/index/analysis.go
@@ -15,10 +15,20 @@
 package index
 
 import (
+	"reflect"
+
 	"github.com/blevesearch/bleve/analysis"
 	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/size"
 )
 
+var reflectStaticSizeAnalysisResult int
+
+func init() {
+	var ar AnalysisResult
+	reflectStaticSizeAnalysisResult = int(reflect.TypeOf(ar).Size())
+}
+
 type IndexRow interface {
 	KeySize() int
 	KeyTo([]byte) (int, error)
@@ -39,6 +49,15 @@ type AnalysisResult struct {
 	Length   []int
 }
 
+func (a *AnalysisResult) Size() int {
+	rv := reflectStaticSizeAnalysisResult
+	for _, analyzedI := range a.Analyzed {
+		rv += analyzedI.Size()
+	}
+	rv += len(a.Length) * size.SizeOfInt
+	return rv
+}
+
 type AnalysisWork struct {
 	i  Index
 	d  *document.Document
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 398451570..31d31642a 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -82,6 +82,8 @@ type internalStats struct {
 	mergeSnapshotSize     uint64
 	newSegBufBytesAdded   uint64
 	newSegBufBytesRemoved uint64
+	analysisBytesAdded    uint64
+	analysisBytesRemoved  uint64
 }
 
 func NewScorch(storeName string,
@@ -295,12 +297,17 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
 	// wait for analysis result
 	analysisResults := make([]*index.AnalysisResult, int(numUpdates))
 	var itemsDeQueued uint64
+	var totalAnalysisSize int
 	for itemsDeQueued < numUpdates {
 		result := <-resultChan
+		resultSize := result.Size()
+		atomic.AddUint64(&s.iStats.analysisBytesAdded, uint64(resultSize))
+		totalAnalysisSize += resultSize
 		analysisResults[itemsDeQueued] = result
 		itemsDeQueued++
 	}
 	close(resultChan)
+	defer atomic.AddUint64(&s.iStats.analysisBytesRemoved, uint64(totalAnalysisSize))
 
 	atomic.AddUint64(&s.stats.TotAnalysisTime, uint64(time.Since(start)))
 
@@ -531,6 +538,9 @@ func (s *Scorch) MemoryUsed() uint64 {
 	memUsed += (atomic.LoadUint64(&s.iStats.newSegBufBytesAdded) -
 		atomic.LoadUint64(&s.iStats.newSegBufBytesRemoved))
 
+	memUsed += (atomic.LoadUint64(&s.iStats.analysisBytesAdded) -
+		atomic.LoadUint64(&s.iStats.analysisBytesRemoved))
+
 	return memUsed
 }
 

From d6e1f4557305a24c23888267ce3bac039f7977b5 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 24 Apr 2018 14:41:14 -0700
Subject: [PATCH 394/728] use cap(mem) for SegmentBase.Size() memory accounting

---
 index/scorch/segment/zap/segment.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 3bae48ddb..4a8a37988 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -108,7 +108,7 @@ func (sb *SegmentBase) Size() int {
 
 func (sb *SegmentBase) updateSize() {
 	sizeInBytes := reflectStaticSizeSegmentBase +
-		len(sb.mem)
+		cap(sb.mem)
 
 	// fieldsMap
 	for k, _ := range sb.fieldsMap {
@@ -163,7 +163,7 @@ func (s *Segment) Size() int {
 	sizeInBytes += 16
 
 	// do not include the mmap'ed part
-	return sizeInBytes + s.SegmentBase.Size() - len(s.mem)
+	return sizeInBytes + s.SegmentBase.Size() - cap(s.mem)
 }
 
 func (s *Segment) AddRef() {

From fca187c89cfa96e848141c449b7c2d83cc7d9d59 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Thu, 26 Apr 2018 08:39:55 -0400
Subject: [PATCH 395/728] disable disjunction heap optimization due to reported
 problems

---
 search/searcher/search_disjunction.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/search/searcher/search_disjunction.go b/search/searcher/search_disjunction.go
index 16df7ca36..9c1f1d1f9 100644
--- a/search/searcher/search_disjunction.go
+++ b/search/searcher/search_disjunction.go
@@ -16,6 +16,7 @@ package searcher
 
 import (
 	"fmt"
+	"math"
 
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
@@ -29,7 +30,7 @@ var DisjunctionMaxClauseCount = 0
 // DisjunctionHeapTakeover is a compile time setting that applications can
 // adjust to control when the DisjunctionSearcher will switch from a simple
 // slice implementation to a heap implementation.
-var DisjunctionHeapTakeover = 10
+var DisjunctionHeapTakeover = math.MaxInt64
 
 func NewDisjunctionSearcher(indexReader index.IndexReader,
 	qsearchers []search.Searcher, min float64, options search.SearcherOptions) (

From 8b92794f4c08934629e86a805e8f49b0a069f74f Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Thu, 26 Apr 2018 18:18:23 -0400
Subject: [PATCH 396/728] fix disjunction heap implementation and re-enable

previously we incorreclty attempted to heap.Fix while
traversing the heap in Advance()

the fix also eliminates a full traversal of the heap
performing comparisons, by taking advantage of the fact
that heap elements are already ordered.  so in addition
to being correct, this should also be faster.
---
 search/searcher/search_disjunction.go      |  3 +--
 search/searcher/search_disjunction_heap.go | 27 +++++++++++-----------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/search/searcher/search_disjunction.go b/search/searcher/search_disjunction.go
index 9c1f1d1f9..16df7ca36 100644
--- a/search/searcher/search_disjunction.go
+++ b/search/searcher/search_disjunction.go
@@ -16,7 +16,6 @@ package searcher
 
 import (
 	"fmt"
-	"math"
 
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
@@ -30,7 +29,7 @@ var DisjunctionMaxClauseCount = 0
 // DisjunctionHeapTakeover is a compile time setting that applications can
 // adjust to control when the DisjunctionSearcher will switch from a simple
 // slice implementation to a heap implementation.
-var DisjunctionHeapTakeover = math.MaxInt64
+var DisjunctionHeapTakeover = 10
 
 func NewDisjunctionSearcher(indexReader index.IndexReader,
 	qsearchers []search.Searcher, min float64, options search.SearcherOptions) (
diff --git a/search/searcher/search_disjunction_heap.go b/search/searcher/search_disjunction_heap.go
index 6414da2cc..ffa373d2d 100644
--- a/search/searcher/search_disjunction_heap.go
+++ b/search/searcher/search_disjunction_heap.go
@@ -238,25 +238,26 @@ func (s *DisjunctionHeapSearcher) Advance(ctx *search.SearchContext,
 	s.matching = s.matching[:0]
 	s.matchingCurrs = s.matchingCurrs[:0]
 
-	// get all searchers pointing at their first match
-	for i, searcherCurr := range s.heap {
-		if searcherCurr.searcher != nil {
-			if searcherCurr.curr.IndexInternalID.Compare(ID) >= 0 {
-				continue
-			}
-			ctx.DocumentMatchPool.Put(searcherCurr.curr)
-		}
+	// find all searchers that actually need to be advanced
+	// advance them, using s.matchingCurrs as temp storage
+	for len(s.heap) > 0 && bytes.Compare(s.heap[0].curr.IndexInternalID, ID) < 0 {
+		searcherCurr := heap.Pop(s).(*SearcherCurr)
+		ctx.DocumentMatchPool.Put(searcherCurr.curr)
 		curr, err := searcherCurr.searcher.Advance(ctx, ID)
 		if err != nil {
 			return nil, err
 		}
-		searcherCurr.curr = curr
-		heap.Fix(s, i)
+		if curr != nil {
+			searcherCurr.curr = curr
+			s.matchingCurrs = append(s.matchingCurrs, searcherCurr)
+		}
 	}
-	// now remove any nil values (at top of heap)
-	for len(s.heap) > 0 && s.heap[0].curr == nil {
-		heap.Pop(s)
+	// now all of the searchers that we advanced have to be pushed back
+	for _, matchingCurr := range s.matchingCurrs {
+		heap.Push(s, matchingCurr)
 	}
+	// reset our temp space
+	s.matchingCurrs = s.matchingCurrs[:0]
 
 	err := s.updateMatches()
 	if err != nil {

From 96dad7fda05b25b250b4ffe0e593d4e293b96e21 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Fri, 27 Apr 2018 15:47:31 +0530
Subject: [PATCH 397/728] dvReader improvement

-attempt to improve the reuse of dvReaders
-avoid the redundant dvCache prep checks
---
 index/index.go                           |  6 ++
 index/scorch/segment/segment.go          | 20 +++++-
 index/scorch/segment/zap/docvalues.go    | 52 +++++++++++++---
 index/scorch/segment/zap/segment_test.go |  4 +-
 index/scorch/snapshot_index.go           | 78 ++++++++++++++++++++----
 index/upsidedown/index_reader.go         | 14 +++++
 search/collector/search_test.go          | 13 ++++
 search/collector/topn.go                 | 20 ++++--
 8 files changed, 177 insertions(+), 30 deletions(-)

diff --git a/index/index.go b/index/index.go
index 42a452cde..7c4edcb8d 100644
--- a/index/index.go
+++ b/index/index.go
@@ -80,6 +80,8 @@ type IndexReader interface {
 	Document(id string) (*document.Document, error)
 	DocumentVisitFieldTerms(id IndexInternalID, fields []string, visitor DocumentFieldTermVisitor) error
 
+	DocValueReader(fields []string) (DocValueReader, error)
+
 	Fields() ([]string, error)
 
 	GetInternal(key []byte) ([]byte, error)
@@ -302,3 +304,7 @@ type OptimizableContext interface {
 	// finished or completed via the Finish() method.
 	Finish() error
 }
+
+type DocValueReader interface {
+	VisitDocValues(id IndexInternalID, visitor DocumentFieldTermVisitor) error
+}
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index 1dd89b763..c2d7ce913 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -111,10 +111,28 @@ type Location interface {
 // postings or other indexed values.
 type DocumentFieldTermVisitable interface {
 	VisitDocumentFieldTerms(localDocNum uint64, fields []string,
-		visitor index.DocumentFieldTermVisitor) error
+		visitor index.DocumentFieldTermVisitor, optional DocVisitState) (DocVisitState, error)
 
 	// VisitableDocValueFields implementation should return
 	// the list of fields which are document value persisted and
 	// therefore visitable by the above VisitDocumentFieldTerms method.
 	VisitableDocValueFields() ([]string, error)
 }
+
+type DocVisitState interface {
+	State() *FieldDocValueState
+	SetState(*FieldDocValueState)
+}
+
+// FieldDocValueState represents the state details,
+// which intents to save the redundant dvCache preparations
+type FieldDocValueState struct {
+	DvFieldsAllPersisted bool
+	DvFieldsPending      []string
+	DvCachePrepared      bool
+	DvSegment            DocumentFieldTermVisitable
+}
+
+func (fdvs *FieldDocValueState) CurrentSegment() DocumentFieldTermVisitable {
+	return fdvs.DvSegment
+}
diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index 9c3815068..f37185152 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -23,6 +23,7 @@ import (
 	"sort"
 
 	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/scorch/segment"
 	"github.com/blevesearch/bleve/size"
 	"github.com/golang/snappy"
 )
@@ -36,6 +37,19 @@ func init() {
 
 type docNumTermsVisitor func(docNum uint64, terms []byte) error
 
+type docVisitState struct {
+	dvrs  map[uint16]*docValueReader
+	state *segment.FieldDocValueState
+}
+
+func (dvs *docVisitState) SetState(state *segment.FieldDocValueState) {
+	dvs.state = state
+}
+
+func (dvs *docVisitState) State() *segment.FieldDocValueState {
+	return dvs.state
+}
+
 type docValueReader struct {
 	field          string
 	curChunkNum    uint64
@@ -238,13 +252,29 @@ func (di *docValueReader) getDocValueLocs(docNum uint64) (uint64, uint64) {
 	return math.MaxUint64, math.MaxUint64
 }
 
+func (s *Segment) CurrentSegment() segment.DocumentFieldTermVisitable {
+	return s
+}
+
 // VisitDocumentFieldTerms is an implementation of the
 // DocumentFieldTermVisitable interface
-func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
-	visitor index.DocumentFieldTermVisitor) error {
-	var dvIterClone *docValueReader
-	fieldIDPlus1 := uint16(0)
-	ok := true
+func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
+	visitor index.DocumentFieldTermVisitor, dvsIn segment.DocVisitState) (
+	segment.DocVisitState, error) {
+	dvs, ok := dvsIn.(*docVisitState)
+	if !ok || dvs == nil {
+		dvs = &docVisitState{
+			dvrs:  make(map[uint16]*docValueReader, len(fields)),
+			state: &segment.FieldDocValueState{DvSegment: s},
+		}
+	} else {
+		if dvs.state.DvSegment != s {
+			dvs.state = &segment.FieldDocValueState{DvSegment: s}
+			dvs.dvrs = make(map[uint16]*docValueReader, len(fields))
+		}
+	}
+
+	var fieldIDPlus1 uint16
 	for _, field := range fields {
 		if fieldIDPlus1, ok = s.fieldsMap[field]; !ok {
 			continue
@@ -254,20 +284,22 @@ func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []strin
 
 		if dvIter, exists := s.fieldDvReaders[fieldIDPlus1-1]; exists &&
 			dvIter != nil {
-			dvIterClone = dvIter.cloneInto(dvIterClone)
+			if _, ok := dvs.dvrs[fieldIDPlus1-1]; !ok {
+				dvs.dvrs[fieldIDPlus1-1] = dvIter.cloneInto(dvs.dvrs[fieldIDPlus1-1])
+			}
 
 			// check if the chunk is already loaded
-			if docInChunk != dvIterClone.curChunkNumber() {
-				err := dvIterClone.loadDvChunk(docInChunk, s)
+			if docInChunk != dvs.dvrs[fieldIDPlus1-1].curChunkNumber() {
+				err := dvs.dvrs[fieldIDPlus1-1].loadDvChunk(docInChunk, &s.SegmentBase)
 				if err != nil {
 					continue
 				}
 			}
 
-			_ = dvIterClone.visitDocValues(localDocNum, visitor)
+			_ = dvs.dvrs[fieldIDPlus1-1].visitDocValues(localDocNum, visitor)
 		}
 	}
-	return nil
+	return dvs, nil
 }
 
 // VisitableDocValueFields returns the list of fields with
diff --git a/index/scorch/segment/zap/segment_test.go b/index/scorch/segment/zap/segment_test.go
index be7a39e14..22b8af23f 100644
--- a/index/scorch/segment/zap/segment_test.go
+++ b/index/scorch/segment/zap/segment_test.go
@@ -581,9 +581,9 @@ func TestSegmentVisitableDocValueFieldsList(t *testing.T) {
 		}
 
 		fieldTerms := make(index.FieldTerms)
-		err = zaps.VisitDocumentFieldTerms(0, fields, func(field string, term []byte) {
+		_, err = zaps.VisitDocumentFieldTerms(0, fields, func(field string, term []byte) {
 			fieldTerms[field] = append(fieldTerms[field], string(term))
-		})
+		}, nil)
 		if err != nil {
 			t.Error(err)
 		}
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 2664fe425..ea73811d4 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -496,30 +496,64 @@ func docInternalToNumber(in index.IndexInternalID) (uint64, error) {
 
 func (i *IndexSnapshot) DocumentVisitFieldTerms(id index.IndexInternalID,
 	fields []string, visitor index.DocumentFieldTermVisitor) error {
+	_, err := i.documentVisitFieldTerms(id, fields, visitor, nil)
+	return err
+}
+
+func (i *IndexSnapshot) documentVisitFieldTerms(id index.IndexInternalID,
+	fields []string, visitor index.DocumentFieldTermVisitor, dvs segment.DocVisitState) (
+	segment.DocVisitState, error) {
 
 	docNum, err := docInternalToNumber(id)
 	if err != nil {
-		return err
+		return nil, err
 	}
 	segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum)
 	if segmentIndex >= len(i.segment) {
-		return nil
+		return nil, nil
 	}
 
 	ss := i.segment[segmentIndex]
 
 	if zaps, ok := ss.segment.(segment.DocumentFieldTermVisitable); ok {
+		var dvState *segment.FieldDocValueState
+		if dvs == nil {
+			dvState = &segment.FieldDocValueState{}
+		} else {
+			dvState = dvs.State()
+			// for a new segment, need to recheck the dvCache preparations
+			if zaps != dvState.CurrentSegment() {
+				dvState = &segment.FieldDocValueState{}
+			}
+		}
+
+		// if all fields are dv persisted
+		if dvState.DvFieldsAllPersisted {
+			return zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor, dvs)
+		}
+
+		// if the dvCache is already prepared for pending fields
+		if dvState.DvCachePrepared {
+			visitDocumentFieldCacheTerms(localDocNum, dvState.DvFieldsPending, ss, visitor)
+			return zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor, dvs)
+		}
+
 		// get the list of doc value persisted fields
 		pFields, err := zaps.VisitableDocValueFields()
 		if err != nil {
-			return err
+			return nil, err
 		}
 		// assort the fields for which terms look up have to
 		// be performed runtime
 		dvPendingFields := extractDvPendingFields(fields, pFields)
+		// all fields are doc value persisted
 		if len(dvPendingFields) == 0 {
-			// all fields are doc value persisted
-			return zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor)
+			dvs, err = zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor, dvs)
+			state := dvs.State()
+			state.DvFieldsAllPersisted = true
+			state.DvFieldsPending = nil
+			dvs.SetState(state)
+			return dvs, err
 		}
 
 		// concurrently trigger the runtime doc value preparations for
@@ -528,29 +562,33 @@ func (i *IndexSnapshot) DocumentVisitFieldTerms(id index.IndexInternalID,
 
 		go func() {
 			defer close(errCh)
-			err := ss.cachedDocs.prepareFields(fields, ss)
+			err := ss.cachedDocs.prepareFields(dvPendingFields, ss)
 			if err != nil {
 				errCh <- err
 			}
 		}()
 
-		// visit the persisted dv while the cache preparation is in progress
-		err = zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor)
+		// visit the requested persisted dv while the cache preparation in progress
+		dvs, err = zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor, dvs)
 		if err != nil {
-			return err
+			return nil, err
 		}
 
 		// err out if fieldCache preparation failed
 		err = <-errCh
 		if err != nil {
-			return err
+			return nil, err
 		}
+		state := dvs.State()
+		state.DvCachePrepared = true
+		state.DvFieldsPending = dvPendingFields
+		dvs.SetState(state)
 
 		visitDocumentFieldCacheTerms(localDocNum, dvPendingFields, ss, visitor)
-		return nil
+		return dvs, nil
 	}
 
-	return prepareCacheVisitDocumentFieldTerms(localDocNum, fields, ss, visitor)
+	return dvs, prepareCacheVisitDocumentFieldTerms(localDocNum, fields, ss, visitor)
 }
 
 func prepareCacheVisitDocumentFieldTerms(localDocNum uint64, fields []string,
@@ -599,6 +637,22 @@ func extractDvPendingFields(requestedFields, persistedFields []string) []string
 	return rv
 }
 
+func (i *IndexSnapshot) DocValueReader(fields []string) (index.DocValueReader, error) {
+	return &DocValueReader{i: i, fields: fields}, nil
+}
+
+type DocValueReader struct {
+	i      *IndexSnapshot
+	fields []string
+	dvs    segment.DocVisitState
+}
+
+func (dvr *DocValueReader) VisitDocValues(id index.IndexInternalID,
+	visitor index.DocumentFieldTermVisitor) (err error) {
+	dvr.dvs, err = dvr.i.documentVisitFieldTerms(id, dvr.fields, visitor, dvr.dvs)
+	return err
+}
+
 func (i *IndexSnapshot) DumpAll() chan interface{} {
 	rv := make(chan interface{})
 	go func() {
diff --git a/index/upsidedown/index_reader.go b/index/upsidedown/index_reader.go
index e045f67c7..ea7243eaa 100644
--- a/index/upsidedown/index_reader.go
+++ b/index/upsidedown/index_reader.go
@@ -210,3 +210,17 @@ func incrementBytes(in []byte) []byte {
 	}
 	return rv
 }
+
+func (i *IndexReader) DocValueReader(fields []string) (index.DocValueReader, error) {
+	return &DocValueReader{i: i, fields: fields}, nil
+}
+
+type DocValueReader struct {
+	i      *IndexReader
+	fields []string
+}
+
+func (dvr *DocValueReader) VisitDocValues(id index.IndexInternalID,
+	visitor index.DocumentFieldTermVisitor) error {
+	return dvr.i.DocumentVisitFieldTerms(id, dvr.fields, visitor)
+}
diff --git a/search/collector/search_test.go b/search/collector/search_test.go
index 3ba71c1d1..233bc9711 100644
--- a/search/collector/search_test.go
+++ b/search/collector/search_test.go
@@ -161,3 +161,16 @@ func (sr *stubReader) DumpFields() chan interface{} {
 func (sr *stubReader) Close() error {
 	return nil
 }
+
+func (sr *stubReader) DocValueReader(fields []string) (index.DocValueReader, error) {
+	return &DocValueReader{i: sr, fields: fields}, nil
+}
+
+type DocValueReader struct {
+	i      *stubReader
+	fields []string
+}
+
+func (dvr *DocValueReader) VisitDocValues(id index.IndexInternalID, visitor index.DocumentFieldTermVisitor) error {
+	return dvr.i.DocumentVisitFieldTerms(id, dvr.fields, visitor)
+}
diff --git a/search/collector/topn.go b/search/collector/topn.go
index 4efe3ef24..8d63685cf 100644
--- a/search/collector/topn.go
+++ b/search/collector/topn.go
@@ -67,6 +67,8 @@ type TopNCollector struct {
 	cachedDesc    []bool
 
 	lowestMatchOutsideResults *search.DocumentMatch
+	updateFieldVisitor        index.DocumentFieldTermVisitor
+	dvReader                  index.DocValueReader
 }
 
 // CheckDoneEvery controls how frequently we check the context deadline
@@ -140,6 +142,11 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
 		DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)),
 	}
 
+	hc.dvReader, err = reader.DocValueReader(hc.neededFields)
+	if err != nil {
+		return err
+	}
+
 	select {
 	case <-ctx.Done():
 		return ctx.Err()
@@ -248,13 +255,16 @@ func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.Doc
 		hc.facetsBuilder.StartDoc()
 	}
 
-	err := reader.DocumentVisitFieldTerms(d.IndexInternalID, hc.neededFields, func(field string, term []byte) {
-		if hc.facetsBuilder != nil {
-			hc.facetsBuilder.UpdateVisitor(field, term)
+	if hc.updateFieldVisitor == nil {
+		hc.updateFieldVisitor = func(field string, term []byte) {
+			if hc.facetsBuilder != nil {
+				hc.facetsBuilder.UpdateVisitor(field, term)
+			}
+			hc.sort.UpdateVisitor(field, term)
 		}
-		hc.sort.UpdateVisitor(field, term)
-	})
+	}
 
+	err := hc.dvReader.VisitDocValues(d.IndexInternalID, hc.updateFieldVisitor)
 	if hc.facetsBuilder != nil {
 		hc.facetsBuilder.EndDoc()
 	}

From 14008f445a82c20a916df57a9cbc4e4f6d5075c5 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 27 Apr 2018 20:39:21 -0400
Subject: [PATCH 398/728] fix geo searches hitting max clause limit

geo queries are supposed to execute without considering the max
disjunction clauses limit, but a recent refactoring introduced
this bug, causing them to run with the limit enforced.
---
 search/searcher/search_disjunction.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/search/searcher/search_disjunction.go b/search/searcher/search_disjunction.go
index 16df7ca36..bbf7b4bbc 100644
--- a/search/searcher/search_disjunction.go
+++ b/search/searcher/search_disjunction.go
@@ -42,10 +42,10 @@ func newDisjunctionSearcher(indexReader index.IndexReader,
 	limit bool) (search.Searcher, error) {
 	if len(qsearchers) > DisjunctionHeapTakeover {
 		return newDisjunctionHeapSearcher(indexReader, qsearchers, min, options,
-			true)
+			limit)
 	}
 	return newDisjunctionSliceSearcher(indexReader, qsearchers, min, options,
-		true)
+		limit)
 }
 
 func tooManyClauses(count int) bool {

From 285ec7a656f30335eea577e13fa3293866fff417 Mon Sep 17 00:00:00 2001
From: Cesar Alvernaz <cesar.alvernaz@gmail.com>
Date: Sun, 29 Apr 2018 17:52:23 +0100
Subject: [PATCH 399/728] use filepath.Join instead, safer and cross-platform

---
 index_meta.go | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/index_meta.go b/index_meta.go
index 95592a65d..2614292b7 100644
--- a/index_meta.go
+++ b/index_meta.go
@@ -18,7 +18,8 @@ import (
 	"encoding/json"
 	"io/ioutil"
 	"os"
-
+	"path/filepath"
+	
 	"github.com/blevesearch/bleve/index/upsidedown"
 )
 
@@ -92,5 +93,5 @@ func (i *indexMeta) Save(path string) (err error) {
 }
 
 func indexMetaPath(path string) string {
-	return path + string(os.PathSeparator) + metaFilename
+	return filepath.Join(path, metaFilename)
 }

From 0a3f3e44224c1fc8f8b0e8777b7554abb8113d3b Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 9 Apr 2018 18:40:08 -0700
Subject: [PATCH 400/728] scorch optimize via vellum.FST.Reader() API

---
 index/scorch/segment/zap/dict.go    | 13 +++++++------
 index/scorch/segment/zap/segment.go |  4 ++++
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index b0664534d..a6fe58cdd 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -28,10 +28,11 @@ import (
 
 // Dictionary is the zap representation of the term dictionary
 type Dictionary struct {
-	sb      *SegmentBase
-	field   string
-	fieldID uint16
-	fst     *vellum.FST
+	sb        *SegmentBase
+	field     string
+	fieldID   uint16
+	fst       *vellum.FST
+	fstReader *vellum.Reader
 }
 
 // PostingsList returns the postings list for the specified term
@@ -46,14 +47,14 @@ func (d *Dictionary) PostingsList(term []byte, except *roaring.Bitmap,
 }
 
 func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
-	if d.fst == nil {
+	if d.fstReader == nil {
 		if rv == nil || rv == emptyPostingsList {
 			return emptyPostingsList, nil
 		}
 		return d.postingsListInit(rv, except), nil
 	}
 
-	postingsOffset, exists, err := d.fst.Get(term)
+	postingsOffset, exists, err := d.fstReader.Get(term)
 	if err != nil {
 		return nil, fmt.Errorf("vellum err: %v", err)
 	}
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 4a8a37988..7d25d5f87 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -265,6 +265,10 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) {
 				if err != nil {
 					return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err)
 				}
+				rv.fstReader, err = rv.fst.Reader()
+				if err != nil {
+					return nil, fmt.Errorf("dictionary field %s vellum Reader err: %v", field, err)
+				}
 			}
 		}
 	}

From 23aaeb730497c01ec0dfab0f05cc6adb62097b1e Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 9 Apr 2018 18:56:51 -0700
Subject: [PATCH 401/728] scorch optimize ExternalID() with Segment.DocID() API

The previous implementation of ExternalID(), which used the generic
VisitDocument() API to access the _id field (with extra closure
allocations), has now been replaced in this optimization by invoking a
new Segment.DocID() method.
---
 index/scorch/segment/empty.go       |  4 ++++
 index/scorch/segment/segment.go     |  3 +++
 index/scorch/segment/zap/segment.go | 24 ++++++++++++++++++++++++
 index/scorch/snapshot_index.go      | 19 +++++--------------
 index/scorch/snapshot_segment.go    |  4 ++++
 5 files changed, 40 insertions(+), 14 deletions(-)

diff --git a/index/scorch/segment/empty.go b/index/scorch/segment/empty.go
index 95f6d8bc8..0489c8218 100644
--- a/index/scorch/segment/empty.go
+++ b/index/scorch/segment/empty.go
@@ -29,6 +29,10 @@ func (e *EmptySegment) VisitDocument(num uint64, visitor DocumentFieldValueVisit
 	return nil
 }
 
+func (e *EmptySegment) DocID(num uint64) ([]byte, error) {
+	return nil, nil
+}
+
 func (e *EmptySegment) Count() uint64 {
 	return 0
 }
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index 1dd89b763..1620530a4 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -28,6 +28,9 @@ type Segment interface {
 	Dictionary(field string) (TermDictionary, error)
 
 	VisitDocument(num uint64, visitor DocumentFieldValueVisitor) error
+
+	DocID(num uint64) ([]byte, error)
+
 	Count() uint64
 
 	DocNumbers([]string) (*roaring.Bitmap, error)
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 4a8a37988..874a9df66 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -368,6 +368,30 @@ func (s *SegmentBase) VisitDocument(num uint64, visitor segment.DocumentFieldVal
 	return nil
 }
 
+// DocID returns the value of the _id field for the given docNum
+func (s *SegmentBase) DocID(num uint64) ([]byte, error) {
+	if num >= s.numDocs {
+		return nil, nil
+	}
+
+	vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx)
+
+	meta, compressed := s.getDocStoredMetaAndCompressed(num)
+
+	vdc.reader.Reset(meta)
+
+	// handle _id field special case
+	idFieldValLen, err := binary.ReadUvarint(&vdc.reader)
+	if err != nil {
+		return nil, err
+	}
+	idFieldVal := compressed[:idFieldValLen]
+
+	visitDocumentCtxPool.Put(vdc)
+
+	return idFieldVal, nil
+}
+
 // Count returns the number of documents in this segment.
 func (s *SegmentBase) Count() uint64 {
 	return s.numDocs
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 2664fe425..d9251ca5e 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -357,24 +357,15 @@ func (i *IndexSnapshot) ExternalID(id index.IndexInternalID) (string, error) {
 	}
 	segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum)
 
-	var found bool
-	var rv string
-	err = i.segment[segmentIndex].VisitDocument(localDocNum, func(field string, typ byte, value []byte, pos []uint64) bool {
-		if field == "_id" {
-			found = true
-			rv = string(value)
-			return false
-		}
-		return true
-	})
+	v, err := i.segment[segmentIndex].DocID(localDocNum)
 	if err != nil {
 		return "", err
 	}
-
-	if found {
-		return rv, nil
+	if v == nil {
+		return "", fmt.Errorf("document number %d not found", docNum)
 	}
-	return "", fmt.Errorf("document number %d not found", docNum)
+
+	return string(v), nil
 }
 
 func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err error) {
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index 44aafa523..4053244da 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -101,6 +101,10 @@ func (s *SegmentSnapshot) VisitDocument(num uint64, visitor segment.DocumentFiel
 	return s.segment.VisitDocument(num, visitor)
 }
 
+func (s *SegmentSnapshot) DocID(num uint64) ([]byte, error) {
+	return s.segment.DocID(num)
+}
+
 func (s *SegmentSnapshot) Count() uint64 {
 
 	rv := s.segment.Count()

From 7c996711e7dadf65200295244305ba3a695be1c5 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 30 Apr 2018 14:00:58 -0700
Subject: [PATCH 402/728] Micro optimizations while estimating mem needed for
 search

---
 index_impl.go | 39 +++++++++++++++++++++++++--------------
 1 file changed, 25 insertions(+), 14 deletions(-)

diff --git a/index_impl.go b/index_impl.go
index b5373ff0d..c969f3758 100644
--- a/index_impl.go
+++ b/index_impl.go
@@ -368,6 +368,25 @@ func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) {
 	return i.SearchInContext(context.Background(), req)
 }
 
+var documentMatchEmptySize int
+var searchContextEmptySize int
+var facetResultEmptySize int
+var documentEmptySize int
+
+func init() {
+	var dm search.DocumentMatch
+	documentMatchEmptySize = dm.Size()
+
+	var sc search.SearchContext
+	searchContextEmptySize = sc.Size()
+
+	var fr search.FacetResult
+	facetResultEmptySize = fr.Size()
+
+	var d document.Document
+	documentEmptySize = d.Size()
+}
+
 // memNeededForSearch is a helper function that returns an estimate of RAM
 // needed to execute a search request.
 func memNeededForSearch(req *SearchRequest,
@@ -385,35 +404,27 @@ func memNeededForSearch(req *SearchRequest,
 	// overhead, size in bytes from collector
 	estimate += topnCollector.Size()
 
-	var dm search.DocumentMatch
-	sizeOfDocumentMatch := dm.Size()
-
 	// pre-allocing DocumentMatchPool
-	var sc search.SearchContext
-	estimate += sc.Size() + numDocMatches*sizeOfDocumentMatch
+	estimate += searchContextEmptySize + numDocMatches*documentMatchEmptySize
 
 	// searcher overhead
 	estimate += searcher.Size()
 
 	// overhead from results, lowestMatchOutsideResults
-	estimate += (numDocMatches + 1) * sizeOfDocumentMatch
+	estimate += (numDocMatches + 1) * documentMatchEmptySize
 
 	// additional overhead from SearchResult
-	var sr SearchResult
-	estimate += sr.Size()
+	estimate += reflectStaticSizeSearchResult + reflectStaticSizeSearchStatus
 
 	// overhead from facet results
 	if req.Facets != nil {
-		var fr search.FacetResult
-		estimate += len(req.Facets) * fr.Size()
+		estimate += len(req.Facets) * facetResultEmptySize
 	}
 
 	// highlighting, store
-	var d document.Document
 	if len(req.Fields) > 0 || req.Highlight != nil {
-		for i := 0; i < (req.Size + req.From); i++ { // size + from => number of hits
-			estimate += (req.Size + req.From) * d.Size()
-		}
+		// Size + From => number of hits
+		estimate += (req.Size + req.From) * documentEmptySize
 	}
 
 	return uint64(estimate)

From a8bd3e641e1ead048ded267094922f69b68ca44b Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Tue, 1 May 2018 13:45:34 +0530
Subject: [PATCH 403/728] refactoring the dvReader reuse

---
 index/scorch/segment/segment.go       | 15 -------
 index/scorch/segment/zap/docvalues.go | 63 ++++++++++++---------------
 index/scorch/snapshot_index.go        | 33 +-------------
 search/collector/topn.go              | 16 +++----
 4 files changed, 37 insertions(+), 90 deletions(-)

diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index c2d7ce913..9b8d2aa47 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -120,19 +120,4 @@ type DocumentFieldTermVisitable interface {
 }
 
 type DocVisitState interface {
-	State() *FieldDocValueState
-	SetState(*FieldDocValueState)
-}
-
-// FieldDocValueState represents the state details,
-// which intents to save the redundant dvCache preparations
-type FieldDocValueState struct {
-	DvFieldsAllPersisted bool
-	DvFieldsPending      []string
-	DvCachePrepared      bool
-	DvSegment            DocumentFieldTermVisitable
-}
-
-func (fdvs *FieldDocValueState) CurrentSegment() DocumentFieldTermVisitable {
-	return fdvs.DvSegment
 }
diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index f37185152..d28964978 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -38,16 +38,8 @@ func init() {
 type docNumTermsVisitor func(docNum uint64, terms []byte) error
 
 type docVisitState struct {
-	dvrs  map[uint16]*docValueReader
-	state *segment.FieldDocValueState
-}
-
-func (dvs *docVisitState) SetState(state *segment.FieldDocValueState) {
-	dvs.state = state
-}
-
-func (dvs *docVisitState) State() *segment.FieldDocValueState {
-	return dvs.state
+	dvrs    map[uint16]*docValueReader
+	segment *Segment
 }
 
 type docValueReader struct {
@@ -252,10 +244,6 @@ func (di *docValueReader) getDocValueLocs(docNum uint64) (uint64, uint64) {
 	return math.MaxUint64, math.MaxUint64
 }
 
-func (s *Segment) CurrentSegment() segment.DocumentFieldTermVisitable {
-	return s
-}
-
 // VisitDocumentFieldTerms is an implementation of the
 // DocumentFieldTermVisitable interface
 func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
@@ -263,40 +251,47 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
 	segment.DocVisitState, error) {
 	dvs, ok := dvsIn.(*docVisitState)
 	if !ok || dvs == nil {
-		dvs = &docVisitState{
-			dvrs:  make(map[uint16]*docValueReader, len(fields)),
-			state: &segment.FieldDocValueState{DvSegment: s},
-		}
+		dvs = &docVisitState{}
 	} else {
-		if dvs.state.DvSegment != s {
-			dvs.state = &segment.FieldDocValueState{DvSegment: s}
-			dvs.dvrs = make(map[uint16]*docValueReader, len(fields))
+		if dvs.segment != s {
+			dvs.segment = s
+			dvs.dvrs = nil
 		}
 	}
 
 	var fieldIDPlus1 uint16
+	if dvs.dvrs == nil {
+		dvs.dvrs = make(map[uint16]*docValueReader, len(fields))
+		for _, field := range fields {
+			if fieldIDPlus1, ok = s.fieldsMap[field]; !ok {
+				continue
+			}
+			fieldID := fieldIDPlus1 - 1
+			if dvIter, exists := s.fieldDvReaders[fieldID]; exists &&
+				dvIter != nil {
+				dvs.dvrs[fieldID] = dvIter.cloneInto(dvs.dvrs[fieldID])
+			}
+		}
+	}
+
+	// find the chunkNumber where the docValues are stored
+	docInChunk := localDocNum / uint64(s.chunkFactor)
+	var dvr *docValueReader
 	for _, field := range fields {
 		if fieldIDPlus1, ok = s.fieldsMap[field]; !ok {
 			continue
 		}
-		// find the chunkNumber where the docValues are stored
-		docInChunk := localDocNum / uint64(s.chunkFactor)
-
-		if dvIter, exists := s.fieldDvReaders[fieldIDPlus1-1]; exists &&
-			dvIter != nil {
-			if _, ok := dvs.dvrs[fieldIDPlus1-1]; !ok {
-				dvs.dvrs[fieldIDPlus1-1] = dvIter.cloneInto(dvs.dvrs[fieldIDPlus1-1])
-			}
-
+		fieldID := fieldIDPlus1 - 1
+		if dvr, ok = dvs.dvrs[fieldID]; ok && dvr != nil {
 			// check if the chunk is already loaded
-			if docInChunk != dvs.dvrs[fieldIDPlus1-1].curChunkNumber() {
-				err := dvs.dvrs[fieldIDPlus1-1].loadDvChunk(docInChunk, &s.SegmentBase)
+			if docInChunk != dvr.curChunkNumber() {
+				err := dvr.loadDvChunk(docInChunk, &s.SegmentBase)
 				if err != nil {
-					continue
+					return dvs, err
 				}
 			}
 
-			_ = dvs.dvrs[fieldIDPlus1-1].visitDocValues(localDocNum, visitor)
+			_ = dvr.visitDocValues(localDocNum, visitor)
 		}
 	}
 	return dvs, nil
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index ea73811d4..6de5e14fd 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -516,28 +516,6 @@ func (i *IndexSnapshot) documentVisitFieldTerms(id index.IndexInternalID,
 	ss := i.segment[segmentIndex]
 
 	if zaps, ok := ss.segment.(segment.DocumentFieldTermVisitable); ok {
-		var dvState *segment.FieldDocValueState
-		if dvs == nil {
-			dvState = &segment.FieldDocValueState{}
-		} else {
-			dvState = dvs.State()
-			// for a new segment, need to recheck the dvCache preparations
-			if zaps != dvState.CurrentSegment() {
-				dvState = &segment.FieldDocValueState{}
-			}
-		}
-
-		// if all fields are dv persisted
-		if dvState.DvFieldsAllPersisted {
-			return zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor, dvs)
-		}
-
-		// if the dvCache is already prepared for pending fields
-		if dvState.DvCachePrepared {
-			visitDocumentFieldCacheTerms(localDocNum, dvState.DvFieldsPending, ss, visitor)
-			return zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor, dvs)
-		}
-
 		// get the list of doc value persisted fields
 		pFields, err := zaps.VisitableDocValueFields()
 		if err != nil {
@@ -548,12 +526,7 @@ func (i *IndexSnapshot) documentVisitFieldTerms(id index.IndexInternalID,
 		dvPendingFields := extractDvPendingFields(fields, pFields)
 		// all fields are doc value persisted
 		if len(dvPendingFields) == 0 {
-			dvs, err = zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor, dvs)
-			state := dvs.State()
-			state.DvFieldsAllPersisted = true
-			state.DvFieldsPending = nil
-			dvs.SetState(state)
-			return dvs, err
+			return zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor, dvs)
 		}
 
 		// concurrently trigger the runtime doc value preparations for
@@ -579,10 +552,6 @@ func (i *IndexSnapshot) documentVisitFieldTerms(id index.IndexInternalID,
 		if err != nil {
 			return nil, err
 		}
-		state := dvs.State()
-		state.DvCachePrepared = true
-		state.DvFieldsPending = dvPendingFields
-		dvs.SetState(state)
 
 		visitDocumentFieldCacheTerms(localDocNum, dvPendingFields, ss, visitor)
 		return dvs, nil
diff --git a/search/collector/topn.go b/search/collector/topn.go
index 8d63685cf..4b2682da0 100644
--- a/search/collector/topn.go
+++ b/search/collector/topn.go
@@ -147,6 +147,13 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
 		return err
 	}
 
+	hc.updateFieldVisitor = func(field string, term []byte) {
+		if hc.facetsBuilder != nil {
+			hc.facetsBuilder.UpdateVisitor(field, term)
+		}
+		hc.sort.UpdateVisitor(field, term)
+	}
+
 	select {
 	case <-ctx.Done():
 		return ctx.Err()
@@ -255,15 +262,6 @@ func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.Doc
 		hc.facetsBuilder.StartDoc()
 	}
 
-	if hc.updateFieldVisitor == nil {
-		hc.updateFieldVisitor = func(field string, term []byte) {
-			if hc.facetsBuilder != nil {
-				hc.facetsBuilder.UpdateVisitor(field, term)
-			}
-			hc.sort.UpdateVisitor(field, term)
-		}
-	}
-
 	err := hc.dvReader.VisitDocValues(d.IndexInternalID, hc.updateFieldVisitor)
 	if hc.facetsBuilder != nil {
 		hc.facetsBuilder.EndDoc()

From ebdbbffd06d86d411f1ba4fbbc10c94370e6ddb4 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Thu, 3 May 2018 12:54:36 -0400
Subject: [PATCH 404/728] limit fuzzy edit distance to 2

---
 search/searcher/search_fuzzy.go | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/search/searcher/search_fuzzy.go b/search/searcher/search_fuzzy.go
index 41ad804f1..575e3c140 100644
--- a/search/searcher/search_fuzzy.go
+++ b/search/searcher/search_fuzzy.go
@@ -15,13 +15,22 @@
 package searcher
 
 import (
+	"fmt"
+
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
 )
 
+var MaxFuzziness = 2
+
 func NewFuzzySearcher(indexReader index.IndexReader, term string,
 	prefix, fuzziness int, field string, boost float64,
 	options search.SearcherOptions) (search.Searcher, error) {
+
+	if fuzziness > MaxFuzziness {
+		return nil, fmt.Errorf("fuzziness exceeds max (%d)", MaxFuzziness)
+	}
+
 	// Note: we don't byte slice the term for a prefix because of runes.
 	prefixTerm := ""
 	for i, r := range term {

From e52660b99369b0e79f3de17434f5c7594aa61986 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Thu, 3 May 2018 13:32:06 -0400
Subject: [PATCH 405/728] correctly return dictionary iterator creation errors
 in Next()

---
 index/scorch/segment/zap/dict.go      |  6 ++--
 index/scorch/segment/zap/dict_test.go | 40 +++++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index a6fe58cdd..736fa59f6 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -270,10 +270,10 @@ type DictionaryIterator struct {
 
 // Next returns the next entry in the dictionary
 func (i *DictionaryIterator) Next() (*index.DictEntry, error) {
-	if i.itr == nil || i.err == vellum.ErrIteratorDone {
-		return nil, nil
-	} else if i.err != nil {
+	if i.err != nil && i.err != vellum.ErrIteratorDone {
 		return nil, i.err
+	} else if i.itr == nil || i.err == vellum.ErrIteratorDone {
+		return nil, nil
 	}
 	term, postingsOffset := i.itr.Current()
 	i.entry.Term = string(term)
diff --git a/index/scorch/segment/zap/dict_test.go b/index/scorch/segment/zap/dict_test.go
index 1a8ce22c5..8cbd5710b 100644
--- a/index/scorch/segment/zap/dict_test.go
+++ b/index/scorch/segment/zap/dict_test.go
@@ -178,3 +178,43 @@ func TestDictionary(t *testing.T) {
 		t.Errorf("expected: %v, got: %v", expected, got)
 	}
 }
+
+func TestDictionaryError(t *testing.T) {
+
+	_ = os.RemoveAll("/tmp/scorch.zap")
+
+	testSeg, _, _ := buildTestSegmentForDict()
+	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
+	if err != nil {
+		t.Fatalf("error persisting segment: %v", err)
+	}
+
+	segment, err := Open("/tmp/scorch.zap")
+	if err != nil {
+		t.Fatalf("error opening segment: %v", err)
+	}
+	defer func() {
+		cerr := segment.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", err)
+		}
+	}()
+
+	dict, err := segment.Dictionary("desc")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	itr := dict.FuzzyIterator("summer", 5)
+	if itr == nil {
+		t.Fatalf("got nil itr")
+	}
+	nxt, err := itr.Next()
+	if nxt != nil {
+		t.Fatalf("expected nil next")
+	}
+	if err == nil {
+		t.Fatalf("expected error from iterator")
+	}
+
+}

From b1b570b3d9b0e359833ea7f0819bd7ab87a08490 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Thu, 3 May 2018 14:18:13 -0400
Subject: [PATCH 406/728] return err and close properly for fuzzy and regexp
 search

---
 search/searcher/search_fuzzy.go  | 7 ++++++-
 search/searcher/search_regexp.go | 3 +++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/search/searcher/search_fuzzy.go b/search/searcher/search_fuzzy.go
index 575e3c140..b812f4840 100644
--- a/search/searcher/search_fuzzy.go
+++ b/search/searcher/search_fuzzy.go
@@ -65,12 +65,17 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
 			if err != nil {
 				return rv, err
 			}
+			defer func() {
+				if cerr := fieldDict.Close(); cerr != nil && err == nil {
+					err = cerr
+				}
+			}()
 			tfd, err := fieldDict.Next()
 			for err == nil && tfd != nil {
 				rv = append(rv, tfd.Term)
 				tfd, err = fieldDict.Next()
 			}
-			return rv, nil
+			return rv, err
 		}
 		fieldDict, err = indexReader.FieldDict(field)
 	}
diff --git a/search/searcher/search_regexp.go b/search/searcher/search_regexp.go
index b88eb3eb5..ad417a056 100644
--- a/search/searcher/search_regexp.go
+++ b/search/searcher/search_regexp.go
@@ -47,6 +47,9 @@ func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp,
 			candidateTerms = append(candidateTerms, tfd.Term)
 			tfd, err = fieldDict.Next()
 		}
+		if err != nil {
+			return nil, err
+		}
 	} else {
 		prefixTerm, complete := pattern.LiteralPrefix()
 		if complete {

From 4d0e0fe826886e61a76221aed6fd9bad082386b2 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 4 May 2018 10:55:02 -0700
Subject: [PATCH 407/728] scorch refactor visitDocumentFieldCacheTerms for lock
 protection

This change refactors visitDocumentFieldCacheTerms() into a method of
cachedDocs so that it can acquire the appropriate locks.
---
 index/scorch/snapshot_index.go   | 25 ++-----------------------
 index/scorch/snapshot_segment.go | 24 ++++++++++++++++++++++++
 2 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index edb986d0a..be71836d0 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -15,7 +15,6 @@
 package scorch
 
 import (
-	"bytes"
 	"container/heap"
 	"encoding/binary"
 	"fmt"
@@ -544,7 +543,7 @@ func (i *IndexSnapshot) documentVisitFieldTerms(id index.IndexInternalID,
 			return nil, err
 		}
 
-		visitDocumentFieldCacheTerms(localDocNum, dvPendingFields, ss, visitor)
+		ss.cachedDocs.visitDoc(localDocNum, dvPendingFields, visitor)
 		return dvs, nil
 	}
 
@@ -558,30 +557,10 @@ func prepareCacheVisitDocumentFieldTerms(localDocNum uint64, fields []string,
 		return err
 	}
 
-	visitDocumentFieldCacheTerms(localDocNum, fields, ss, visitor)
+	ss.cachedDocs.visitDoc(localDocNum, fields, visitor)
 	return nil
 }
 
-func visitDocumentFieldCacheTerms(localDocNum uint64, fields []string,
-	ss *SegmentSnapshot, visitor index.DocumentFieldTermVisitor) {
-
-	for _, field := range fields {
-		if cachedFieldDocs, exists := ss.cachedDocs.cache[field]; exists {
-			if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists {
-				for {
-					i := bytes.Index(tlist, TermSeparatorSplitSlice)
-					if i < 0 {
-						break
-					}
-					visitor(field, tlist[0:i])
-					tlist = tlist[i+1:]
-				}
-			}
-		}
-	}
-
-}
-
 func extractDvPendingFields(requestedFields, persistedFields []string) []string {
 	removeMap := make(map[string]struct{}, len(persistedFields))
 	for _, str := range persistedFields {
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index 4053244da..7f7300109 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -15,10 +15,12 @@
 package scorch
 
 import (
+	"bytes"
 	"sync"
 	"sync/atomic"
 
 	"github.com/RoaringBitmap/roaring"
+	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
 	"github.com/blevesearch/bleve/size"
 )
@@ -270,3 +272,25 @@ func (c *cachedDocs) updateSizeLOCKED() {
 	}
 	atomic.StoreUint64(&c.size, uint64(sizeInBytes))
 }
+
+func (c *cachedDocs) visitDoc(localDocNum uint64,
+	fields []string, visitor index.DocumentFieldTermVisitor) {
+	c.m.Lock()
+
+	for _, field := range fields {
+		if cachedFieldDocs, exists := c.cache[field]; exists {
+			if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists {
+				for {
+					i := bytes.Index(tlist, TermSeparatorSplitSlice)
+					if i < 0 {
+						break
+					}
+					visitor(field, tlist[0:i])
+					tlist = tlist[i+1:]
+				}
+			}
+		}
+	}
+
+	c.m.Unlock()
+}

From f8373bd328b9aa44d827c244ed268d66fab2dbe0 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 4 May 2018 13:04:13 -0700
Subject: [PATCH 408/728] rename cachedFieldDocs.prepareField() to singular

---
 index/scorch/snapshot_segment.go | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index 7f7300109..4e05f5153 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -168,7 +168,7 @@ type cachedFieldDocs struct {
 	size    uint64
 }
 
-func (cfd *cachedFieldDocs) prepareFields(field string, ss *SegmentSnapshot) {
+func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) {
 	defer close(cfd.readyCh)
 
 	cfd.size += uint64(size.SizeOfUint64) /* size field */
@@ -224,6 +224,7 @@ type cachedDocs struct {
 
 func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error {
 	c.m.Lock()
+
 	if c.cache == nil {
 		c.cache = make(map[string]*cachedFieldDocs, len(ss.Fields()))
 	}
@@ -236,7 +237,7 @@ func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) e
 				docs:    make(map[uint64][]byte),
 			}
 
-			go c.cache[field].prepareFields(field, ss)
+			go c.cache[field].prepareField(field, ss)
 		}
 	}
 
@@ -250,6 +251,7 @@ func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) e
 		}
 		c.m.Lock()
 	}
+
 	c.updateSizeLOCKED()
 
 	c.m.Unlock()

From 7ec3ad9e8ae005b3a123a0cd08b9f6484ea3259f Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 4 May 2018 14:04:11 -0700
Subject: [PATCH 409/728] scorch documentVisitFieldTerms() avoids unneeded
 prepareFields()

This optimization checks whether the cachedDocs already has all the
wanted fields cached already (from a previous invocation) before
spawning a goroutine to prepareFields().
---
 index/scorch/snapshot_index.go   | 94 +++++++++++++++-----------------
 index/scorch/snapshot_segment.go | 18 +++++-
 2 files changed, 61 insertions(+), 51 deletions(-)

diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index be71836d0..2525f9899 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -491,13 +491,13 @@ func (i *IndexSnapshot) DocumentVisitFieldTerms(id index.IndexInternalID,
 }
 
 func (i *IndexSnapshot) documentVisitFieldTerms(id index.IndexInternalID,
-	fields []string, visitor index.DocumentFieldTermVisitor, dvs segment.DocVisitState) (
-	segment.DocVisitState, error) {
-
+	fields []string, visitor index.DocumentFieldTermVisitor,
+	dvs segment.DocVisitState) (segment.DocVisitState, error) {
 	docNum, err := docInternalToNumber(id)
 	if err != nil {
 		return nil, err
 	}
+
 	segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum)
 	if segmentIndex >= len(i.segment) {
 		return nil, nil
@@ -505,75 +505,50 @@ func (i *IndexSnapshot) documentVisitFieldTerms(id index.IndexInternalID,
 
 	ss := i.segment[segmentIndex]
 
-	if zaps, ok := ss.segment.(segment.DocumentFieldTermVisitable); ok {
-		// get the list of doc value persisted fields
-		pFields, err := zaps.VisitableDocValueFields()
+	var vFields []string // fields that are visitable via the segment
+
+	ssv, ssvOk := ss.segment.(segment.DocumentFieldTermVisitable)
+	if ssvOk && ssv != nil {
+		vFields, err = ssv.VisitableDocValueFields()
 		if err != nil {
 			return nil, err
 		}
-		// assort the fields for which terms look up have to
-		// be performed runtime
-		dvPendingFields := extractDvPendingFields(fields, pFields)
-		// all fields are doc value persisted
-		if len(dvPendingFields) == 0 {
-			return zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor, dvs)
-		}
+	}
+
+	// cFields represents the fields that we'll need from the cachedDocs
+	cFields := subtractStrings(fields, vFields)
 
-		// concurrently trigger the runtime doc value preparations for
-		// pending fields as well as the visit of the persisted doc values
-		errCh := make(chan error, 1)
+	var errCh chan error
+
+	if !ss.cachedDocs.hasFields(cFields) {
+		errCh = make(chan error, 1)
 
 		go func() {
-			defer close(errCh)
-			err := ss.cachedDocs.prepareFields(dvPendingFields, ss)
+			err := ss.cachedDocs.prepareFields(cFields, ss)
 			if err != nil {
 				errCh <- err
 			}
+			close(errCh)
 		}()
+	}
 
-		// visit the requested persisted dv while the cache preparation in progress
-		dvs, err = zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor, dvs)
+	if ssvOk && ssv != nil && len(vFields) > 0 {
+		dvs, err = ssv.VisitDocumentFieldTerms(localDocNum, fields, visitor, dvs)
 		if err != nil {
 			return nil, err
 		}
+	}
 
-		// err out if fieldCache preparation failed
+	if errCh != nil {
 		err = <-errCh
 		if err != nil {
 			return nil, err
 		}
-
-		ss.cachedDocs.visitDoc(localDocNum, dvPendingFields, visitor)
-		return dvs, nil
 	}
 
-	return dvs, prepareCacheVisitDocumentFieldTerms(localDocNum, fields, ss, visitor)
-}
+	ss.cachedDocs.visitDoc(localDocNum, cFields, visitor)
 
-func prepareCacheVisitDocumentFieldTerms(localDocNum uint64, fields []string,
-	ss *SegmentSnapshot, visitor index.DocumentFieldTermVisitor) error {
-	err := ss.cachedDocs.prepareFields(fields, ss)
-	if err != nil {
-		return err
-	}
-
-	ss.cachedDocs.visitDoc(localDocNum, fields, visitor)
-	return nil
-}
-
-func extractDvPendingFields(requestedFields, persistedFields []string) []string {
-	removeMap := make(map[string]struct{}, len(persistedFields))
-	for _, str := range persistedFields {
-		removeMap[str] = struct{}{}
-	}
-
-	rv := make([]string, 0, len(requestedFields))
-	for _, s := range requestedFields {
-		if _, ok := removeMap[s]; !ok {
-			rv = append(rv, s)
-		}
-	}
-	return rv
+	return dvs, nil
 }
 
 func (i *IndexSnapshot) DocValueReader(fields []string) (index.DocValueReader, error) {
@@ -615,3 +590,22 @@ func (i *IndexSnapshot) DumpFields() chan interface{} {
 	}()
 	return rv
 }
+
+// subtractStrings returns set a minus elements of set b.
+func subtractStrings(a, b []string) []string {
+	if len(b) <= 0 {
+		return a
+	}
+
+	rv := make([]string, 0, len(a))
+OUTER:
+	for _, as := range a {
+		for _, bs := range b {
+			if as == bs {
+				continue OUTER
+			}
+		}
+		rv = append(rv, as)
+	}
+	return rv
+}
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index 4e05f5153..7ee43fcf9 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -108,7 +108,6 @@ func (s *SegmentSnapshot) DocID(num uint64) ([]byte, error) {
 }
 
 func (s *SegmentSnapshot) Count() uint64 {
-
 	rv := s.segment.Count()
 	if s.deleted != nil {
 		rv -= s.deleted.GetCardinality()
@@ -258,6 +257,23 @@ func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) e
 	return nil
 }
 
+// hasFields returns true if the cache has all the given fields
+func (c *cachedDocs) hasFields(fields []string) bool {
+	c.m.Lock()
+OUTER:
+	for _, field := range fields {
+		for f := range c.cache {
+			if f == field {
+				continue OUTER
+			}
+		}
+		c.m.Unlock()
+		return false // found a field not in cache
+	}
+	c.m.Unlock()
+	return true
+}
+
 func (c *cachedDocs) Size() int {
 	return int(atomic.LoadUint64(&c.size))
 }

From 3afbaf3731b6d3d7cd136b59944772d8eebc0e16 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 4 May 2018 14:14:30 -0700
Subject: [PATCH 410/728] scorch optimize zap segment.VisitableDocValueFields()

---
 index/scorch/segment/zap/docvalues.go | 9 +--------
 index/scorch/segment/zap/segment.go   | 8 +++++++-
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index d28964978..96dd5ab1a 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -301,12 +301,5 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
 // persisted doc value terms ready to be visitable using the
 // VisitDocumentFieldTerms method.
 func (s *Segment) VisitableDocValueFields() ([]string, error) {
-	rv := make([]string, 0, len(s.fieldDvReaders))
-	for fieldID, field := range s.fieldsInv {
-		if dvIter, ok := s.fieldDvReaders[uint16(fieldID)]; ok &&
-			dvIter != nil {
-			rv = append(rv, field)
-		}
-	}
-	return rv, nil
+	return s.fieldDvNames, nil
 }
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 5f9a562f5..ed09d149d 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -99,6 +99,7 @@ type SegmentBase struct {
 	docValueOffset    uint64
 	dictLocs          []uint64
 	fieldDvReaders    map[uint16]*docValueReader // naive chunk cache per field
+	fieldDvNames      []string                   // field names cached in fieldDvReaders
 	size              uint64
 }
 
@@ -528,7 +529,12 @@ func (s *SegmentBase) loadDvReaders() error {
 		}
 		read += uint64(n)
 
-		s.fieldDvReaders[uint16(fieldID)], _ = s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd)
+		fieldDvReader, _ := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd)
+		if fieldDvReader != nil {
+			s.fieldDvReaders[uint16(fieldID)] = fieldDvReader
+			s.fieldDvNames = append(s.fieldDvNames, field)
+		}
 	}
+
 	return nil
 }

From 03dcd2e2f5b06016668b37d7f61b838d2d2c62a6 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 4 May 2018 14:28:08 -0700
Subject: [PATCH 411/728] scorch zap optimize docValueReader curChunkHeader
 slice reuse

---
 index/scorch/segment/zap/docvalues.go | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index 96dd5ab1a..72ce1248f 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -69,9 +69,9 @@ func (di *docValueReader) cloneInto(rv *docValueReader) *docValueReader {
 	rv.curChunkNum = math.MaxUint64
 	rv.chunkOffsets = di.chunkOffsets // immutable, so it's sharable
 	rv.dvDataLoc = di.dvDataLoc
-	rv.curChunkHeader = nil
+	rv.curChunkHeader = rv.curChunkHeader[:0]
 	rv.curChunkData = nil
-	rv.uncompressed = nil
+	rv.uncompressed = rv.uncompressed[:0]
 
 	return rv
 }
@@ -150,7 +150,11 @@ func (di *docValueReader) loadDvChunk(chunkNumber uint64, s *SegmentBase) error
 	chunkMetaLoc := destChunkDataLoc + uint64(read)
 
 	offset := uint64(0)
-	di.curChunkHeader = make([]MetaData, int(numDocs))
+	if cap(di.curChunkHeader) < int(numDocs) {
+		di.curChunkHeader = make([]MetaData, int(numDocs))
+	} else {
+		di.curChunkHeader = di.curChunkHeader[:int(numDocs)]
+	}
 	for i := 0; i < int(numDocs); i++ {
 		di.curChunkHeader[i].DocNum, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
 		offset += uint64(read)

From a0c5b7fc401620fb071a64dcaa9f9ecbc7d82957 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 4 May 2018 15:41:52 -0700
Subject: [PATCH 412/728] optimize cachedDocs.hasFields()

Hat tip to Abhinav D. for optimization idea!
---
 index/scorch/snapshot_segment.go | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index 7ee43fcf9..98084a980 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -260,15 +260,11 @@ func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) e
 // hasFields returns true if the cache has all the given fields
 func (c *cachedDocs) hasFields(fields []string) bool {
 	c.m.Lock()
-OUTER:
 	for _, field := range fields {
-		for f := range c.cache {
-			if f == field {
-				continue OUTER
-			}
+		if _, exists := c.cache[field]; !exists {
+			c.m.Unlock()
+			return false // found a field not in cache
 		}
-		c.m.Unlock()
-		return false // found a field not in cache
 	}
 	c.m.Unlock()
 	return true

From c3a911da80c2cdeebc2e219be8bf868d07b2ca34 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 4 May 2018 16:01:08 -0700
Subject: [PATCH 413/728] scorch optimize DocValueReader as its fields are
 stable

Since the wanted fields are immutable during the lifetime of a
DocValueReader, we can optimize as the cached fields doesn't need to
be recomputed on every document that's visited.
---
 index/scorch/snapshot_index.go | 74 +++++++++++++++++++++++++---------
 1 file changed, 55 insertions(+), 19 deletions(-)

diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 2525f9899..4d5cfc7ba 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -503,6 +503,16 @@ func (i *IndexSnapshot) documentVisitFieldTerms(id index.IndexInternalID,
 		return nil, nil
 	}
 
+	_, dvs, err = i.documentVisitFieldTermsOnSegment(
+		segmentIndex, localDocNum, fields, nil, visitor, dvs)
+
+	return dvs, err
+}
+
+func (i *IndexSnapshot) documentVisitFieldTermsOnSegment(
+	segmentIndex int, localDocNum uint64, fields []string, cFields []string,
+	visitor index.DocumentFieldTermVisitor, dvs segment.DocVisitState) (
+	cFieldsOut []string, dvsOut segment.DocVisitState, err error) {
 	ss := i.segment[segmentIndex]
 
 	var vFields []string // fields that are visitable via the segment
@@ -511,59 +521,85 @@ func (i *IndexSnapshot) documentVisitFieldTerms(id index.IndexInternalID,
 	if ssvOk && ssv != nil {
 		vFields, err = ssv.VisitableDocValueFields()
 		if err != nil {
-			return nil, err
+			return nil, nil, err
 		}
 	}
 
-	// cFields represents the fields that we'll need from the cachedDocs
-	cFields := subtractStrings(fields, vFields)
-
 	var errCh chan error
 
-	if !ss.cachedDocs.hasFields(cFields) {
-		errCh = make(chan error, 1)
+	// cFields represents the fields that we'll need from the
+	// cachedDocs, and might be optionally be provided by the caller,
+	// if the caller happens to know we're on the same segmentIndex
+	// from a previous invocation
+	if cFields == nil {
+		cFields = subtractStrings(fields, vFields)
 
-		go func() {
-			err := ss.cachedDocs.prepareFields(cFields, ss)
-			if err != nil {
-				errCh <- err
-			}
-			close(errCh)
-		}()
+		if !ss.cachedDocs.hasFields(cFields) {
+			errCh = make(chan error, 1)
+
+			go func() {
+				err := ss.cachedDocs.prepareFields(cFields, ss)
+				if err != nil {
+					errCh <- err
+				}
+				close(errCh)
+			}()
+		}
 	}
 
 	if ssvOk && ssv != nil && len(vFields) > 0 {
 		dvs, err = ssv.VisitDocumentFieldTerms(localDocNum, fields, visitor, dvs)
 		if err != nil {
-			return nil, err
+			return nil, nil, err
 		}
 	}
 
 	if errCh != nil {
 		err = <-errCh
 		if err != nil {
-			return nil, err
+			return nil, nil, err
 		}
 	}
 
 	ss.cachedDocs.visitDoc(localDocNum, cFields, visitor)
 
-	return dvs, nil
+	return cFields, dvs, nil
 }
 
-func (i *IndexSnapshot) DocValueReader(fields []string) (index.DocValueReader, error) {
-	return &DocValueReader{i: i, fields: fields}, nil
+func (i *IndexSnapshot) DocValueReader(fields []string) (
+	index.DocValueReader, error) {
+	return &DocValueReader{i: i, fields: fields, currSegmentIndex: -1}, nil
 }
 
 type DocValueReader struct {
 	i      *IndexSnapshot
 	fields []string
 	dvs    segment.DocVisitState
+
+	currSegmentIndex int
+	currCachedFields []string
 }
 
 func (dvr *DocValueReader) VisitDocValues(id index.IndexInternalID,
 	visitor index.DocumentFieldTermVisitor) (err error) {
-	dvr.dvs, err = dvr.i.documentVisitFieldTerms(id, dvr.fields, visitor, dvr.dvs)
+	docNum, err := docInternalToNumber(id)
+	if err != nil {
+		return err
+	}
+
+	segmentIndex, localDocNum := dvr.i.segmentIndexAndLocalDocNumFromGlobal(docNum)
+	if segmentIndex >= len(dvr.i.segment) {
+		return nil
+	}
+
+	if dvr.currSegmentIndex != segmentIndex {
+		dvr.currSegmentIndex = segmentIndex
+		dvr.currCachedFields = nil
+	}
+
+	dvr.currCachedFields, dvr.dvs, err = dvr.i.documentVisitFieldTermsOnSegment(
+		dvr.currSegmentIndex, localDocNum, dvr.fields, dvr.currCachedFields, visitor, dvr.dvs)
+
 	return err
 }
 

From 46cdfad30f9c7247d2cd49861752618cf006a529 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 4 May 2018 19:02:54 -0700
Subject: [PATCH 414/728] scorch invokes cachedDocs.visitDoc() only if needed

Hat tip to opimization feedback from Sreekanth S.
---
 index/scorch/snapshot_index.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 4d5cfc7ba..0f8c21371 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -561,7 +561,9 @@ func (i *IndexSnapshot) documentVisitFieldTermsOnSegment(
 		}
 	}
 
-	ss.cachedDocs.visitDoc(localDocNum, cFields, visitor)
+	if len(cFields) > 0 {
+		ss.cachedDocs.visitDoc(localDocNum, cFields, visitor)
+	}
 
 	return cFields, dvs, nil
 }

From 0f9eebda37f2011c7b9996160a7d2c90a79ea98e Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 5 May 2018 11:21:00 -0700
Subject: [PATCH 415/728] optimize sort via values slice reuse

---
 search/sort.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/search/sort.go b/search/sort.go
index 6afc9789b..70fb969be 100644
--- a/search/sort.go
+++ b/search/sort.go
@@ -359,7 +359,7 @@ func (s *SortField) UpdateVisitor(field string, term []byte) {
 func (s *SortField) Value(i *DocumentMatch) string {
 	iTerms := s.filterTermsByType(s.values)
 	iTerm := s.filterTermsByMode(iTerms)
-	s.values = nil
+	s.values = s.values[:0]
 	return iTerm
 }
 
@@ -619,7 +619,7 @@ func (s *SortGeoDistance) UpdateVisitor(field string, term []byte) {
 func (s *SortGeoDistance) Value(i *DocumentMatch) string {
 	iTerms := s.filterTermsByType(s.values)
 	iTerm := s.filterTermsByMode(iTerms)
-	s.values = nil
+	s.values = s.values[:0]
 
 	if iTerm == "" {
 		return maxDistance

From 1408317e35f590736fbcaf81cc2693c1a3b6d60b Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 5 May 2018 11:36:21 -0700
Subject: [PATCH 416/728] optimize SortField by delaying string conversion

---
 numeric/prefix_coded.go |  4 ++++
 search/sort.go          | 33 ++++++++++++++++++++-------------
 2 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/numeric/prefix_coded.go b/numeric/prefix_coded.go
index 4200c23bb..76ea001ba 100644
--- a/numeric/prefix_coded.go
+++ b/numeric/prefix_coded.go
@@ -77,6 +77,10 @@ func (p PrefixCoded) Int64() (int64, error) {
 }
 
 func ValidPrefixCodedTerm(p string) (bool, int) {
+	return ValidPrefixCodedTermBytes([]byte(p))
+}
+
+func ValidPrefixCodedTermBytes(p []byte) (bool, int) {
 	if len(p) > 0 {
 		if p[0] < ShiftStartInt64 || p[0] > ShiftStartInt64+63 {
 			return false, 0
diff --git a/search/sort.go b/search/sort.go
index 70fb969be..78d38d857 100644
--- a/search/sort.go
+++ b/search/sort.go
@@ -15,6 +15,7 @@
 package search
 
 import (
+	"bytes"
 	"encoding/json"
 	"fmt"
 	"math"
@@ -342,14 +343,14 @@ type SortField struct {
 	Type    SortFieldType
 	Mode    SortFieldMode
 	Missing SortFieldMissing
-	values  []string
+	values  [][]byte
 }
 
 // UpdateVisitor notifies this sort field that in this document
 // this field has the specified term
 func (s *SortField) UpdateVisitor(field string, term []byte) {
 	if field == s.Field {
-		s.values = append(s.values, string(term))
+		s.values = append(s.values, term)
 	}
 }
 
@@ -368,17 +369,17 @@ func (s *SortField) Descending() bool {
 	return s.Desc
 }
 
-func (s *SortField) filterTermsByMode(terms []string) string {
+func (s *SortField) filterTermsByMode(terms [][]byte) string {
 	if len(terms) == 1 || (len(terms) > 1 && s.Mode == SortFieldDefault) {
-		return terms[0]
+		return string(terms[0])
 	} else if len(terms) > 1 {
 		switch s.Mode {
 		case SortFieldMin:
-			sort.Strings(terms)
-			return terms[0]
+			sort.Sort(BytesSlice(terms))
+			return string(terms[0])
 		case SortFieldMax:
-			sort.Strings(terms)
-			return terms[len(terms)-1]
+			sort.Sort(BytesSlice(terms))
+			return string(terms[len(terms)-1])
 		}
 	}
 
@@ -400,13 +401,13 @@ func (s *SortField) filterTermsByMode(terms []string) string {
 // return only the terms which had shift of 0
 // if we are in explicit number or date mode, return only valid
 // prefix coded numbers with shift of 0
-func (s *SortField) filterTermsByType(terms []string) []string {
+func (s *SortField) filterTermsByType(terms [][]byte) [][]byte {
 	stype := s.Type
 	if stype == SortFieldAuto {
 		allTermsPrefixCoded := true
-		var termsWithShiftZero []string
+		var termsWithShiftZero [][]byte
 		for _, term := range terms {
-			valid, shift := numeric.ValidPrefixCodedTerm(term)
+			valid, shift := numeric.ValidPrefixCodedTermBytes(term)
 			if valid && shift == 0 {
 				termsWithShiftZero = append(termsWithShiftZero, term)
 			} else if !valid {
@@ -417,9 +418,9 @@ func (s *SortField) filterTermsByType(terms []string) []string {
 			terms = termsWithShiftZero
 		}
 	} else if stype == SortFieldAsNumber || stype == SortFieldAsDate {
-		var termsWithShiftZero []string
+		var termsWithShiftZero [][]byte
 		for _, term := range terms {
-			valid, shift := numeric.ValidPrefixCodedTerm(term)
+			valid, shift := numeric.ValidPrefixCodedTermBytes(term)
 			if valid && shift == 0 {
 				termsWithShiftZero = append(termsWithShiftZero, term)
 			}
@@ -700,3 +701,9 @@ func (s *SortGeoDistance) Copy() SearchSort {
 	rv := *s
 	return &rv
 }
+
+type BytesSlice [][]byte
+
+func (p BytesSlice) Len() int           { return len(p) }
+func (p BytesSlice) Less(i, j int) bool { return bytes.Compare(p[i], p[j]) < 0 }
+func (p BytesSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }

From 44e0a2957167084db62fda268afc9c86992dc47e Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 5 May 2018 13:29:59 -0700
Subject: [PATCH 417/728] optimize SortField termsWithShiftZero to reuse slice

---
 search/sort.go | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/search/sort.go b/search/sort.go
index 78d38d857..e17f70787 100644
--- a/search/sort.go
+++ b/search/sort.go
@@ -344,6 +344,7 @@ type SortField struct {
 	Mode    SortFieldMode
 	Missing SortFieldMissing
 	values  [][]byte
+	tmp     [][]byte
 }
 
 // UpdateVisitor notifies this sort field that in this document
@@ -405,7 +406,7 @@ func (s *SortField) filterTermsByType(terms [][]byte) [][]byte {
 	stype := s.Type
 	if stype == SortFieldAuto {
 		allTermsPrefixCoded := true
-		var termsWithShiftZero [][]byte
+		termsWithShiftZero := s.tmp[:0]
 		for _, term := range terms {
 			valid, shift := numeric.ValidPrefixCodedTermBytes(term)
 			if valid && shift == 0 {
@@ -416,9 +417,10 @@ func (s *SortField) filterTermsByType(terms [][]byte) [][]byte {
 		}
 		if allTermsPrefixCoded {
 			terms = termsWithShiftZero
+			s.tmp = termsWithShiftZero[:0]
 		}
 	} else if stype == SortFieldAsNumber || stype == SortFieldAsDate {
-		var termsWithShiftZero [][]byte
+		termsWithShiftZero := s.tmp[:0]
 		for _, term := range terms {
 			valid, shift := numeric.ValidPrefixCodedTermBytes(term)
 			if valid && shift == 0 {
@@ -426,6 +428,7 @@ func (s *SortField) filterTermsByType(terms [][]byte) [][]byte {
 			}
 		}
 		terms = termsWithShiftZero
+		s.tmp = termsWithShiftZero[:0]
 	}
 	return terms
 }

From 1387c024723b947a7bd22828e59dea2d1fc4333b Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 7 May 2018 16:57:07 -0700
Subject: [PATCH 418/728] MB-29554: Handle case when MemoryUsed is invoked
 after index is closed

T1:
runtime.sigpanic()
	/home/couchbase/.cbdepscache/exploded/x86_64/go-1.9.6/go/src/runtime/signal_unix.go:367 +0x17c fp=0xc4315bc928 sp=0xc4315bc8d8 pc=0x44432c
github.com/blevesearch/bleve/index/scorch.(*IndexSnapshot).AddRef(0x0)
	/home/couchbase/jenkins/workspace/couchbase-server-unix/godeps/src/github.com/blevesearch/bleve/index/scorch/snapshot_index.go:77 +0x22 fp=0xc4315bc948 sp=0xc4315bc928 pc=0x6012c2
github.com/blevesearch/bleve/index/scorch.(*Scorch).currentSnapshot(0xc420364380, 0xc4314ca180)
	/home/couchbase/jenkins/workspace/couchbase-server-unix/godeps/src/github.com/blevesearch/bleve/index/scorch/scorch.go:427 +0x53 fp=0xc4315bc970 sp=0xc4315bc948 pc=0x5ffc03
github.com/blevesearch/bleve/index/scorch.(*Scorch).MemoryUsed(0xc420364380, 0x0)
	/home/couchbase/jenkins/workspace/couchbase-server-unix/godeps/src/github.com/blevesearch/bleve/index/scorch/scorch.go:512 +0x38 fp=0xc4315bc9a0 sp=0xc4315bc970 pc=0x600dd8
main.scorchSize(0xdbf6c0, 0xc420364380, 0x6812d7)
	/home/couchbase/jenkins/workspace/couchbase-server-unix/goproj/src/github.com/couchbase/cbft/cmd/cbft/app_herder.go:296 +0x3c fp=0xc4315bc9c8 sp=0xc4315bc9a0 pc=0xbc97cc
main.(*appHerder).indexingMemoryLOCKED(0xc42023a690, 0xc42023a6b4)
	/home/couchbase/jenkins/workspace/couchbase-server-unix/goproj/src/github.com/couchbase/cbft/cmd/cbft/app_herder.go:122 +0x9f fp=0xc4315bca60 sp=0xc4315bc9c8 pc=0xbc87bf

T2:
sync.runtime_SemacquireMutex(0xc42023a6b4, 0x0)
	/home/couchbase/.cbdepscache/exploded/x86_64/go-1.9.6/go/src/runtime/sema.go:71 +0x3d fp=0xc420339600 sp=0xc4203395d8 pc=0x44062d
sync.(*Mutex).Lock(0xc42023a6b0)
	/home/couchbase/.cbdepscache/exploded/x86_64/go-1.9.6/go/src/sync/mutex.go:134 +0xee fp=0xc420339638 sp=0xc420339600 pc=0x466f7e
main.(*appHerder).onClose(0xc42023a690, 0xdbf6c0, 0xc420364380)
	/home/couchbase/jenkins/workspace/couchbase-server-unix/goproj/src/github.com/couchbase/cbft/cmd/cbft/app_herder.go:83 +0x3a fp=0xc420339698 sp=0xc420339638 pc=0xbc844a
main.(*appHerder).onScorchEvent(0xc42023a690, 0x2, 0xc420364380, 0xd06f1a6)
	/home/couchbase/jenkins/workspace/couchbase-server-unix/goproj/src/github.com/couchbase/cbft/cmd/cbft/app_herder.go:302 +0xc2 fp=0xc4203396c8 sp=0xc420339698 pc=0xbc98d2
main.(*appHerder).ScorchHerderOnEvent.func1(0x2, 0xc420364380, 0xd06f1a6)
	/home/couchbase/jenkins/workspace/couchbase-server-unix/goproj/src/github.com/couchbase/cbft/cmd/cbft/app_herder.go:292 +0x48 fp=0xc4203396f8 sp=0xc4203396c8 pc=0xbd9a98
github.com/blevesearch/bleve/index/scorch.(*Scorch).fireEvent(0xc420364380, 0x2, 0xd06f1a6)
	/home/couchbase/jenkins/workspace/couchbase-server-unix/godeps/src/github.com/blevesearch/bleve/index/scorch/scorch.go:122 +0x55 fp=0xc420339720 sp=0xc4203396f8 pc=0x5fd9b5
github.com/blevesearch/bleve/index/scorch.(*Scorch).Close.func1(0xc420364380, 0xbeb41277d6e9dc06, 0x29ace50f52, 0x144bc40)
	/home/couchbase/jenkins/workspace/couchbase-server-unix/godeps/src/github.com/blevesearch/bleve/index/scorch/scorch.go:224 +0x63 fp=0xc420339750 sp=0xc420339720 pc=0x60b253
github.com/blevesearch/bleve/index/scorch.(*Scorch).Close(0xc420364380, 0x0, 0x0)
	/home/couchbase/jenkins/workspace/couchbase-server-unix/godeps/src/github.com/blevesearch/bleve/index/scorch/scorch.go:244 +0xde fp=0xc4203397a0 sp=0xc420339750 pc=0x5fe3ae
---
 index/scorch/scorch.go | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 31d31642a..fe4f71b87 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -424,7 +424,9 @@ func (s *Scorch) Reader() (index.IndexReader, error) {
 func (s *Scorch) currentSnapshot() *IndexSnapshot {
 	s.rootLock.RLock()
 	rv := s.root
-	rv.AddRef()
+	if rv != nil {
+		rv.AddRef()
+	}
 	s.rootLock.RUnlock()
 	return rv
 }
@@ -508,14 +510,18 @@ func (s *Scorch) AddEligibleForRemoval(epoch uint64) {
 	s.rootLock.Unlock()
 }
 
-func (s *Scorch) MemoryUsed() uint64 {
+func (s *Scorch) MemoryUsed() (memUsed uint64) {
 	indexSnapshot := s.currentSnapshot()
+	if indexSnapshot == nil {
+		return
+	}
+
 	defer func() {
 		_ = indexSnapshot.Close()
 	}()
 
 	// Account for current root snapshot overhead
-	memUsed := uint64(indexSnapshot.Size())
+	memUsed += uint64(indexSnapshot.Size())
 
 	// Account for snapshot that the persister may be working on
 	persistEpoch := atomic.LoadUint64(&s.iStats.persistEpoch)

From 5f3c372f877f06ac695d7a22e112e3906aa7b3b9 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Thu, 10 May 2018 17:19:04 -0700
Subject: [PATCH 419/728] Fix bug in bleve's zap stored command, formatting

+ Handle the _id field special case
+ Format the output for better readability
---
 cmd/bleve/cmd/scorch/info.go     | 2 +-
 cmd/bleve/cmd/scorch/snapshot.go | 2 +-
 cmd/bleve/cmd/zap/dict.go        | 6 +++---
 cmd/bleve/cmd/zap/explore.go     | 2 +-
 cmd/bleve/cmd/zap/stored.go      | 9 +++++++--
 5 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/cmd/bleve/cmd/scorch/info.go b/cmd/bleve/cmd/scorch/info.go
index 2b4674f06..31e6481f9 100644
--- a/cmd/bleve/cmd/scorch/info.go
+++ b/cmd/bleve/cmd/scorch/info.go
@@ -37,7 +37,7 @@ var infoCmd = &cobra.Command{
 			return err
 		}
 
-		fmt.Printf("count: %d\n", count)
+		fmt.Printf("doc count: %d\n", count)
 
 		// var numSnapshots int
 		// var rootSnapshot uint64
diff --git a/cmd/bleve/cmd/scorch/snapshot.go b/cmd/bleve/cmd/scorch/snapshot.go
index bb035ce59..df13d4901 100644
--- a/cmd/bleve/cmd/scorch/snapshot.go
+++ b/cmd/bleve/cmd/scorch/snapshot.go
@@ -35,7 +35,7 @@ var snapshotCmd = &cobra.Command{
 				return err
 			}
 			for _, snapshotEpoch := range snapshotEpochs {
-				fmt.Printf("%d\n", snapshotEpoch)
+				fmt.Printf("snapshot epoch: %d\n", snapshotEpoch)
 			}
 		} else if len(args) < 3 {
 			snapshotEpoch, err := strconv.ParseUint(args[1], 10, 64)
diff --git a/cmd/bleve/cmd/zap/dict.go b/cmd/bleve/cmd/zap/dict.go
index e80be3601..35952f5f5 100644
--- a/cmd/bleve/cmd/zap/dict.go
+++ b/cmd/bleve/cmd/zap/dict.go
@@ -45,8 +45,8 @@ var dictCmd = &cobra.Command{
 		vellumLen, read := binary.Uvarint(data[addr : addr+binary.MaxVarintLen64])
 		fmt.Printf("vellum length: %d\n", vellumLen)
 		fstBytes := data[addr+uint64(read) : addr+uint64(read)+vellumLen]
-		fmt.Printf("raw vellum data % x\n", fstBytes)
-		fmt.Printf("dictionary:\n\n")
+		fmt.Printf("raw vellum data:\n % x\n", fstBytes)
+		fmt.Printf("dictionary:\n")
 		if fstBytes != nil {
 			fst, err := vellum.Load(fstBytes)
 			if err != nil {
@@ -63,7 +63,7 @@ var dictCmd = &cobra.Command{
 					extra = fmt.Sprintf("-- docNum: %d, norm: %f", docNum, norm)
 				}
 
-				fmt.Printf("%s - %d (%x) %s\n", currTerm, currVal, currVal, extra)
+				fmt.Printf(" %s - %d (%x) %s\n", currTerm, currVal, currVal, extra)
 				err = itr.Next()
 			}
 			if err != nil && err != vellum.ErrIteratorDone {
diff --git a/cmd/bleve/cmd/zap/explore.go b/cmd/bleve/cmd/zap/explore.go
index 0c2471edc..d22fa8ce1 100644
--- a/cmd/bleve/cmd/zap/explore.go
+++ b/cmd/bleve/cmd/zap/explore.go
@@ -46,7 +46,7 @@ var exploreCmd = &cobra.Command{
 		vellumLen, read := binary.Uvarint(data[addr : addr+binary.MaxVarintLen64])
 		fmt.Printf("vellum length: %d\n", vellumLen)
 		fstBytes := data[addr+uint64(read) : addr+uint64(read)+vellumLen]
-		fmt.Printf("raw vellum data % x\n", fstBytes)
+		fmt.Printf("raw vellum data:\n % x\n", fstBytes)
 
 		if len(args) >= 3 {
 			if fstBytes != nil {
diff --git a/cmd/bleve/cmd/zap/stored.go b/cmd/bleve/cmd/zap/stored.go
index ba1143cb1..28d62c0cb 100644
--- a/cmd/bleve/cmd/zap/stored.go
+++ b/cmd/bleve/cmd/zap/stored.go
@@ -58,11 +58,16 @@ var storedCmd = &cobra.Command{
 		fmt.Printf("Raw meta: % x\n", meta)
 		raw := data[storedStartAddr+n+metaLen : storedStartAddr+n+metaLen+dataLen]
 		fmt.Printf("Raw data (len %d): % x\n", len(raw), raw)
-		uncompressed, err := snappy.Decode(nil, raw)
+
+		// handle _id field special case
+		idFieldValLen, _ := binary.Uvarint(meta)
+		fmt.Printf("Raw _id (len %d): % x\n", idFieldValLen, raw[:idFieldValLen])
+		fmt.Printf("Raw fields (len %d): % x\n", dataLen-idFieldValLen, raw[idFieldValLen:])
+		uncompressed, err := snappy.Decode(nil, raw[idFieldValLen:])
 		if err != nil {
 			panic(err)
 		}
-		fmt.Printf("Uncompressed data (len %d): % x\n", len(uncompressed), uncompressed)
+		fmt.Printf("Uncompressed fields (len %d): % x\n", len(uncompressed), uncompressed)
 
 		return nil
 	},

From f9bb6c0575e4a494638e61ac4af9c9cd07e2ba9b Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Wed, 16 May 2018 15:46:48 +0530
Subject: [PATCH 420/728] MB-29516 - geo search adopts DocValueReader

migrate geo search use of DocumentVisitFieldTerms
to DocValueReader
---
 search/searcher/search_geoboundingbox.go   | 36 ++++++++++++----------
 search/searcher/search_geopointdistance.go | 35 ++++++++++++---------
 2 files changed, 40 insertions(+), 31 deletions(-)

diff --git a/search/searcher/search_geoboundingbox.go b/search/searcher/search_geoboundingbox.go
index f8b1b4cf7..289e41678 100644
--- a/search/searcher/search_geoboundingbox.go
+++ b/search/searcher/search_geoboundingbox.go
@@ -40,6 +40,11 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
 		minLon, minLat, maxLon, maxLat, checkBoundaries)
 
 	var onBoundarySearcher search.Searcher
+	dvReader, err := indexReader.DocValueReader([]string{field})
+	if err != nil {
+		return nil, err
+	}
+
 	if len(onBoundaryTerms) > 0 {
 		rawOnBoundarySearcher, err := NewMultiTermSearcherBytes(indexReader,
 			onBoundaryTerms, field, boost, options, false)
@@ -48,7 +53,7 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
 		}
 		// add filter to check points near the boundary
 		onBoundarySearcher = NewFilteringSearcher(rawOnBoundarySearcher,
-			buildRectFilter(indexReader, field, minLon, minLat, maxLon, maxLat))
+			buildRectFilter(dvReader, field, minLon, minLat, maxLon, maxLat))
 		openedSearchers = append(openedSearchers, onBoundarySearcher)
 	}
 
@@ -144,26 +149,25 @@ func relateAndRecurse(start, end uint64, res uint,
 	return nil, nil
 }
 
-func buildRectFilter(indexReader index.IndexReader, field string,
+func buildRectFilter(dvReader index.DocValueReader, field string,
 	minLon, minLat, maxLon, maxLat float64) FilterFunc {
 	return func(d *search.DocumentMatch) bool {
 		var lon, lat float64
 		var found bool
-		err := indexReader.DocumentVisitFieldTerms(d.IndexInternalID,
-			[]string{field}, func(field string, term []byte) {
-				// only consider the values which are shifted 0
-				prefixCoded := numeric.PrefixCoded(term)
-				shift, err := prefixCoded.Shift()
-				if err == nil && shift == 0 {
-					var i64 int64
-					i64, err = prefixCoded.Int64()
-					if err == nil {
-						lon = geo.MortonUnhashLon(uint64(i64))
-						lat = geo.MortonUnhashLat(uint64(i64))
-						found = true
-					}
+		err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
+			// only consider the values which are shifted 0
+			prefixCoded := numeric.PrefixCoded(term)
+			shift, err := prefixCoded.Shift()
+			if err == nil && shift == 0 {
+				var i64 int64
+				i64, err = prefixCoded.Int64()
+				if err == nil {
+					lon = geo.MortonUnhashLon(uint64(i64))
+					lat = geo.MortonUnhashLat(uint64(i64))
+					found = true
 				}
-			})
+			}
+		})
 		if err == nil && found {
 			return geo.BoundingBoxContains(lon, lat,
 				minLon, minLat, maxLon, maxLat)
diff --git a/search/searcher/search_geopointdistance.go b/search/searcher/search_geopointdistance.go
index fd559766f..a15c194e8 100644
--- a/search/searcher/search_geopointdistance.go
+++ b/search/searcher/search_geopointdistance.go
@@ -39,9 +39,14 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon,
 		return nil, err
 	}
 
+	dvReader, err := indexReader.DocValueReader([]string{field})
+	if err != nil {
+		return nil, err
+	}
+
 	// wrap it in a filtering searcher which checks the actual distance
 	return NewFilteringSearcher(boxSearcher,
-		buildDistFilter(indexReader, field, centerLon, centerLat, dist)), nil
+		buildDistFilter(dvReader, field, centerLon, centerLat, dist)), nil
 }
 
 // boxSearcher builds a searcher for the described bounding box
@@ -87,25 +92,25 @@ func boxSearcher(indexReader index.IndexReader,
 	return boxSearcher, nil
 }
 
-func buildDistFilter(indexReader index.IndexReader, field string,
+func buildDistFilter(dvReader index.DocValueReader, field string,
 	centerLon, centerLat, maxDist float64) FilterFunc {
 	return func(d *search.DocumentMatch) bool {
 		var lon, lat float64
 		var found bool
-		err := indexReader.DocumentVisitFieldTerms(d.IndexInternalID,
-			[]string{field}, func(field string, term []byte) {
-				// only consider the values which are shifted 0
-				prefixCoded := numeric.PrefixCoded(term)
-				shift, err := prefixCoded.Shift()
-				if err == nil && shift == 0 {
-					i64, err := prefixCoded.Int64()
-					if err == nil {
-						lon = geo.MortonUnhashLon(uint64(i64))
-						lat = geo.MortonUnhashLat(uint64(i64))
-						found = true
-					}
+
+		err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
+			// only consider the values which are shifted 0
+			prefixCoded := numeric.PrefixCoded(term)
+			shift, err := prefixCoded.Shift()
+			if err == nil && shift == 0 {
+				i64, err := prefixCoded.Int64()
+				if err == nil {
+					lon = geo.MortonUnhashLon(uint64(i64))
+					lat = geo.MortonUnhashLat(uint64(i64))
+					found = true
 				}
-			})
+			}
+		})
 		if err == nil && found {
 			dist := geo.Haversin(lon, lat, centerLon, centerLat)
 			if dist <= maxDist/1000 {

From a009a463cfa800b0ef8ee46b9a436e23e2bbde45 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Thu, 17 May 2018 11:43:18 -0700
Subject: [PATCH 421/728] errCheck to ignore fmt pkg during checks that
 travis-ci performs

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 775fed3a9..934e86268 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -16,7 +16,7 @@ script:
   - gvt restore
   - go test -v $(go list ./... | grep -v vendor/)
   - go vet $(go list ./... | grep -v vendor/)
-  - errcheck $(go list ./... | grep -v vendor/)
+  - errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/)
   - docs/project-code-coverage.sh
   - docs/build_children.sh
 

From 02405fd47bb60f1d633678c38c988eba80a63dd1 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Tue, 15 May 2018 19:55:37 -0700
Subject: [PATCH 422/728] MB-29654: Copy value, arrayPositions into the interim
 structure(s)

Copy value, array positions in the VisitDocument callback to preserve them
beyond the scope of it's callback.
---
 index/scorch/segment/zap/merge.go      | 10 ++++++++--
 index/scorch/segment/zap/merge_test.go |  8 ++++----
 index/scorch/segment/zap/segment.go    | 10 ++++++++--
 index/scorch/snapshot_index.go         | 17 +++++++++++------
 4 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index dab09f6b3..0e6e52962 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -601,6 +601,9 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 
 	docNumOffsets := make([]uint64, newSegDocCount)
 
+	vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx)
+	defer visitDocumentCtxPool.Put(vdc)
+
 	// for each segment
 	for segI, segment := range segments {
 		segNewDocNums := make([]uint64, segment.numDocs)
@@ -645,11 +648,14 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 				typs[i] = typs[i][:0]
 				poss[i] = poss[i][:0]
 			}
-			err := segment.VisitDocument(docNum, func(field string, typ byte, value []byte, pos []uint64) bool {
+			err := segment.visitDocument(vdc, docNum, func(field string, typ byte, value []byte, pos []uint64) bool {
 				fieldID := int(fieldsMap[field]) - 1
 				vals[fieldID] = append(vals[fieldID], value)
 				typs[fieldID] = append(typs[fieldID], typ)
-				poss[fieldID] = append(poss[fieldID], pos)
+
+				// MB-29654: copy array positions to preserve them beyond the scope of this callback
+				poss[fieldID] = append(poss[fieldID], append([]uint64(nil), pos...))
+
 				return true
 			})
 			if err != nil {
diff --git a/index/scorch/segment/zap/merge_test.go b/index/scorch/segment/zap/merge_test.go
index cd21ecb66..163663a49 100644
--- a/index/scorch/segment/zap/merge_test.go
+++ b/index/scorch/segment/zap/merge_test.go
@@ -406,8 +406,8 @@ func compareSegments(a, b *Segment) string {
 						err = a.VisitDocument(apitrn.Number(),
 							func(field string, typ byte, value []byte, pos []uint64) bool {
 								afields[field+"-typ"] = typ
-								afields[field+"-value"] = value
-								afields[field+"-pos"] = pos
+								afields[field+"-value"] = append([]byte(nil), value...)
+								afields[field+"-pos"] = append([]uint64(nil), pos...)
 								return true
 							})
 						if err != nil {
@@ -417,8 +417,8 @@ func compareSegments(a, b *Segment) string {
 						err = b.VisitDocument(bpitrn.Number(),
 							func(field string, typ byte, value []byte, pos []uint64) bool {
 								bfields[field+"-typ"] = typ
-								bfields[field+"-value"] = value
-								bfields[field+"-pos"] = pos
+								bfields[field+"-value"] = append([]byte(nil), value...)
+								bfields[field+"-pos"] = append([]uint64(nil), pos...)
 								return true
 							})
 						if err != nil {
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index ed09d149d..08d714f39 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -295,10 +295,17 @@ var visitDocumentCtxPool = sync.Pool{
 // VisitDocument invokes the DocFieldValueVistor for each stored field
 // for the specified doc number
 func (s *SegmentBase) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
-	// first make sure this is a valid number in this segment
 	if num < s.numDocs {
 		vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx)
+		defer visitDocumentCtxPool.Put(vdc)
+		return s.visitDocument(vdc, num, visitor)
+	}
+	return nil
+}
 
+func (s *SegmentBase) visitDocument(vdc *visitDocumentCtx, num uint64, visitor segment.DocumentFieldValueVisitor) error {
+	// first make sure this is a valid number in this segment
+	if num < s.numDocs {
 		meta, compressed := s.getDocStoredMetaAndCompressed(num)
 
 		vdc.reader.Reset(meta)
@@ -368,7 +375,6 @@ func (s *SegmentBase) VisitDocument(num uint64, visitor segment.DocumentFieldVal
 		}
 
 		vdc.buf = uncompressed
-		visitDocumentCtxPool.Put(vdc)
 	}
 	return nil
 }
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 0f8c21371..da1558593 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -313,21 +313,26 @@ func (i *IndexSnapshot) Document(id string) (rv *document.Document, err error) {
 	segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum)
 
 	rv = document.NewDocument(id)
-	err = i.segment[segmentIndex].VisitDocument(localDocNum, func(name string, typ byte, value []byte, pos []uint64) bool {
+	err = i.segment[segmentIndex].VisitDocument(localDocNum, func(name string, typ byte, val []byte, pos []uint64) bool {
 		if name == "_id" {
 			return true
 		}
+
+		// MB-29654: copy value, array positions to preserve them beyond the scope of this callback
+		value := append([]byte(nil), val...)
+		arrayPos := append([]uint64(nil), pos...)
+
 		switch typ {
 		case 't':
-			rv.AddField(document.NewTextField(name, pos, value))
+			rv.AddField(document.NewTextField(name, arrayPos, value))
 		case 'n':
-			rv.AddField(document.NewNumericFieldFromBytes(name, pos, value))
+			rv.AddField(document.NewNumericFieldFromBytes(name, arrayPos, value))
 		case 'd':
-			rv.AddField(document.NewDateTimeFieldFromBytes(name, pos, value))
+			rv.AddField(document.NewDateTimeFieldFromBytes(name, arrayPos, value))
 		case 'b':
-			rv.AddField(document.NewBooleanFieldFromBytes(name, pos, value))
+			rv.AddField(document.NewBooleanFieldFromBytes(name, arrayPos, value))
 		case 'g':
-			rv.AddField(document.NewGeoPointFieldFromBytes(name, pos, value))
+			rv.AddField(document.NewGeoPointFieldFromBytes(name, arrayPos, value))
 		}
 
 		return true

From 4bd452b1db9d0113c9be1b6585fea5fdaccf7b73 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Wed, 16 May 2018 18:14:51 -0700
Subject: [PATCH 423/728] Avoid going to the allocator everytime for
 accomodating array positions

During the merge operation create a buffer to copy array positions
into, and re-use this buffer for all fields of all documents in all
segments.
---
 index/scorch/segment/zap/merge.go | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 0e6e52962..04bf75399 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -599,6 +599,8 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 	typs := make([][]byte, len(fieldsInv))
 	poss := make([][][]uint64, len(fieldsInv))
 
+	var posBuf []uint64
+
 	docNumOffsets := make([]uint64, newSegDocCount)
 
 	vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx)
@@ -642,6 +644,8 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 			metaBuf.Reset()
 			data = data[:0]
 
+			posTemp := posBuf
+
 			// collect all the data
 			for i := 0; i < len(fieldsInv); i++ {
 				vals[i] = vals[i][:0]
@@ -654,7 +658,17 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 				typs[fieldID] = append(typs[fieldID], typ)
 
 				// MB-29654: copy array positions to preserve them beyond the scope of this callback
-				poss[fieldID] = append(poss[fieldID], append([]uint64(nil), pos...))
+				var curPos []uint64
+				if len(pos) > 0 {
+					if cap(posTemp) < len(pos) {
+						posBuf = make([]uint64, len(pos)*len(fieldsInv))
+						posTemp = posBuf
+					}
+					curPos = posTemp[0:len(pos)]
+					copy(curPos, pos)
+					posTemp = posTemp[len(pos):]
+				}
+				poss[fieldID] = append(poss[fieldID], curPos)
 
 				return true
 			})

From 21a00e203ec3278f3787aecdaf57828e8a59844b Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Thu, 17 May 2018 11:00:20 -0700
Subject: [PATCH 424/728] Remove extra check within zap's VisitDocument(..)

This public API invokes the internal API: visitDocument(..)
which performs the check for the validity of the docNum.
---
 index/scorch/segment/zap/merge.go   |  2 +-
 index/scorch/segment/zap/segment.go | 12 +++++-------
 index/scorch/snapshot_index.go      |  2 +-
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 04bf75399..c735caad3 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -657,7 +657,7 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 				vals[fieldID] = append(vals[fieldID], value)
 				typs[fieldID] = append(typs[fieldID], typ)
 
-				// MB-29654: copy array positions to preserve them beyond the scope of this callback
+				// copy array positions to preserve them beyond the scope of this callback
 				var curPos []uint64
 				if len(pos) > 0 {
 					if cap(posTemp) < len(pos) {
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 08d714f39..80798093d 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -295,15 +295,13 @@ var visitDocumentCtxPool = sync.Pool{
 // VisitDocument invokes the DocFieldValueVistor for each stored field
 // for the specified doc number
 func (s *SegmentBase) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
-	if num < s.numDocs {
-		vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx)
-		defer visitDocumentCtxPool.Put(vdc)
-		return s.visitDocument(vdc, num, visitor)
-	}
-	return nil
+	vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx)
+	defer visitDocumentCtxPool.Put(vdc)
+	return s.visitDocument(vdc, num, visitor)
 }
 
-func (s *SegmentBase) visitDocument(vdc *visitDocumentCtx, num uint64, visitor segment.DocumentFieldValueVisitor) error {
+func (s *SegmentBase) visitDocument(vdc *visitDocumentCtx, num uint64,
+	visitor segment.DocumentFieldValueVisitor) error {
 	// first make sure this is a valid number in this segment
 	if num < s.numDocs {
 		meta, compressed := s.getDocStoredMetaAndCompressed(num)
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index da1558593..6b615cd17 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -318,7 +318,7 @@ func (i *IndexSnapshot) Document(id string) (rv *document.Document, err error) {
 			return true
 		}
 
-		// MB-29654: copy value, array positions to preserve them beyond the scope of this callback
+		// copy value, array positions to preserve them beyond the scope of this callback
 		value := append([]byte(nil), val...)
 		arrayPos := append([]uint64(nil), pos...)
 

From 9f8b8787616228a716bf424cfa99b6e8b5d4056f Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Fri, 18 May 2018 21:05:27 +0530
Subject: [PATCH 425/728] MB-29576 - Indexing blocked on scorch

More stats to keep track of last merged,
persisted and current root epoch.
---
 index/scorch/introducer.go | 5 +++++
 index/scorch/merge.go      | 2 ++
 index/scorch/persister.go  | 2 ++
 index/scorch/stats.go      | 4 ++++
 4 files changed, 13 insertions(+)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index fb6afd5de..6989bbc9d 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -202,6 +202,7 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 	s.nextSnapshotEpoch++
 	rootPrev := s.root
 	s.root = newSnapshot
+	atomic.StoreUint64(&s.stats.CurRootEpoch, s.root.epoch)
 	// release lock
 	s.rootLock.Unlock()
 
@@ -265,6 +266,7 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
 	s.rootLock.Lock()
 	rootPrev := s.root
 	s.root = newIndexSnapshot
+	atomic.StoreUint64(&s.stats.CurRootEpoch, s.root.epoch)
 	s.rootLock.Unlock()
 
 	if rootPrev != nil {
@@ -369,6 +371,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 	s.nextSnapshotEpoch++
 	rootPrev := s.root
 	s.root = newSnapshot
+	atomic.StoreUint64(&s.stats.CurRootEpoch, s.root.epoch)
 	// release lock
 	s.rootLock.Unlock()
 
@@ -430,6 +433,8 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
 	// swap in new snapshot
 	rootPrev := s.root
 	s.root = newSnapshot
+
+	atomic.StoreUint64(&s.stats.CurRootEpoch, s.root.epoch)
 	// release lock
 	s.rootLock.Unlock()
 
diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 41b734aaf..171f33ae8 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -72,6 +72,8 @@ OUTER:
 				}
 				lastEpochMergePlanned = ourSnapshot.epoch
 
+				atomic.StoreUint64(&s.stats.LastMergedEpoch, ourSnapshot.epoch)
+
 				s.fireEvent(EventKindMergerProgress, time.Since(startTime))
 			}
 			_ = ourSnapshot.DecRef()
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index cbc24cdb7..c822ad0b5 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -109,6 +109,8 @@ OUTER:
 				continue OUTER
 			}
 
+			atomic.StoreUint64(&s.stats.LastPersistedEpoch, ourSnapshot.epoch)
+
 			lastPersistedEpoch = ourSnapshot.epoch
 			for _, ew := range persistWatchers {
 				close(ew.notifyCh)
diff --git a/index/scorch/stats.go b/index/scorch/stats.go
index e9bcd91d2..d4e07f6b4 100644
--- a/index/scorch/stats.go
+++ b/index/scorch/stats.go
@@ -33,6 +33,10 @@ type Stats struct {
 	TotBatchIntroTime uint64
 	MaxBatchIntroTime uint64
 
+	CurRootEpoch       uint64
+	LastPersistedEpoch uint64
+	LastMergedEpoch    uint64
+
 	TotOnErrors uint64
 
 	TotAnalysisTime uint64

From 477688c147c679e43f471d36286c1e462295554b Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 21 May 2018 12:37:22 -0700
Subject: [PATCH 426/728] MB-29763: Revert "scorch optimize via
 vellum.FST.Reader() API"

This reverts commit 0a3f3e44224c1fc8f8b0e8777b7554abb8113d3b.
---
 index/scorch/segment/zap/dict.go    | 13 ++++++-------
 index/scorch/segment/zap/segment.go |  4 ----
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 736fa59f6..c73cc6e5b 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -28,11 +28,10 @@ import (
 
 // Dictionary is the zap representation of the term dictionary
 type Dictionary struct {
-	sb        *SegmentBase
-	field     string
-	fieldID   uint16
-	fst       *vellum.FST
-	fstReader *vellum.Reader
+	sb      *SegmentBase
+	field   string
+	fieldID uint16
+	fst     *vellum.FST
 }
 
 // PostingsList returns the postings list for the specified term
@@ -47,14 +46,14 @@ func (d *Dictionary) PostingsList(term []byte, except *roaring.Bitmap,
 }
 
 func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
-	if d.fstReader == nil {
+	if d.fst == nil {
 		if rv == nil || rv == emptyPostingsList {
 			return emptyPostingsList, nil
 		}
 		return d.postingsListInit(rv, except), nil
 	}
 
-	postingsOffset, exists, err := d.fstReader.Get(term)
+	postingsOffset, exists, err := d.fst.Get(term)
 	if err != nil {
 		return nil, fmt.Errorf("vellum err: %v", err)
 	}
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 80798093d..0fd4e57c4 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -266,10 +266,6 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) {
 				if err != nil {
 					return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err)
 				}
-				rv.fstReader, err = rv.fst.Reader()
-				if err != nil {
-					return nil, fmt.Errorf("dictionary field %s vellum Reader err: %v", field, err)
-				}
 			}
 		}
 	}

From 6638c5ccff18af37d8be33c011265fbd2b45e625 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 25 May 2018 10:05:54 -0700
Subject: [PATCH 427/728] cachedDocs.visitDoc() check whether docs map is ready
 for use

Before this change, the cachedDocs.visitDoc() method was accessing the
docs map before the readyCh was closed.  Without this check, another
goroutine that's executing the cachedFieldDocs.prepareField() method
might be concurrently populating and modifying the docs map.

See also: https://issues.couchbase.com/browse/MB-29844
---
 index/scorch/snapshot_segment.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index 98084a980..90dbcb494 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -293,6 +293,10 @@ func (c *cachedDocs) visitDoc(localDocNum uint64,
 
 	for _, field := range fields {
 		if cachedFieldDocs, exists := c.cache[field]; exists {
+			c.m.Unlock()
+			<-cachedFieldDocs.readyCh
+			c.m.Lock()
+
 			if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists {
 				for {
 					i := bytes.Index(tlist, TermSeparatorSplitSlice)

From 323f83aed5898ba819bcc210249fca7bba877e28 Mon Sep 17 00:00:00 2001
From: Ben Gadbois <git@bengadbois.email>
Date: Wed, 30 May 2018 21:51:02 +0200
Subject: [PATCH 428/728] Fix spelling and typo mistakes

---
 analysis/token/snowball/snowball.go       |  4 ++--
 analysis/token/unique/unique.go           |  2 +-
 cmd/bleve/cmd/zap/dict.go                 |  2 +-
 cmd/bleve/cmd/zap/explore.go              |  2 +-
 geo/parse_test.go                         |  2 +-
 index/scorch/README.md                    |  2 +-
 index/scorch/segment/zap/README.md        |  2 +-
 index/scorch/segment/zap/posting.go       | 16 ++++++++--------
 index/scorch/segment/zap/segment.go       |  2 +-
 index_alias_impl_test.go                  |  6 +++---
 index_test.go                             |  2 +-
 mapping/document.go                       |  2 +-
 mapping/mapping_test.go                   |  6 +++---
 numeric/bin.go                            |  2 +-
 search/searcher/search_phrase.go          |  4 ++--
 search/searcher/search_term_range_test.go |  4 ++--
 16 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/analysis/token/snowball/snowball.go b/analysis/token/snowball/snowball.go
index ae876137a..99dbf4202 100644
--- a/analysis/token/snowball/snowball.go
+++ b/analysis/token/snowball/snowball.go
@@ -26,12 +26,12 @@ import (
 const Name = "stemmer_snowball"
 
 type SnowballStemmer struct {
-	langauge string
+	language string
 }
 
 func NewSnowballStemmer(language string) *SnowballStemmer {
 	return &SnowballStemmer{
-		langauge: language,
+		language: language,
 	}
 }
 
diff --git a/analysis/token/unique/unique.go b/analysis/token/unique/unique.go
index f0d96c504..c60e8c979 100644
--- a/analysis/token/unique/unique.go
+++ b/analysis/token/unique/unique.go
@@ -21,7 +21,7 @@ import (
 
 const Name = "unique"
 
-// UniqueTermFilter retains only the tokens which mark the first occurence of
+// UniqueTermFilter retains only the tokens which mark the first occurrence of
 // a term. Tokens whose term appears in a preceding token are dropped.
 type UniqueTermFilter struct{}
 
diff --git a/cmd/bleve/cmd/zap/dict.go b/cmd/bleve/cmd/zap/dict.go
index 35952f5f5..2c60d31da 100644
--- a/cmd/bleve/cmd/zap/dict.go
+++ b/cmd/bleve/cmd/zap/dict.go
@@ -38,7 +38,7 @@ var dictCmd = &cobra.Command{
 
 		addr, err := segment.DictAddr(args[1])
 		if err != nil {
-			return fmt.Errorf("error determing address: %v", err)
+			return fmt.Errorf("error determining address: %v", err)
 		}
 		fmt.Printf("dictionary for field starts at %d (%x)\n", addr, addr)
 
diff --git a/cmd/bleve/cmd/zap/explore.go b/cmd/bleve/cmd/zap/explore.go
index d22fa8ce1..deac086cb 100644
--- a/cmd/bleve/cmd/zap/explore.go
+++ b/cmd/bleve/cmd/zap/explore.go
@@ -39,7 +39,7 @@ var exploreCmd = &cobra.Command{
 
 		addr, err := segment.DictAddr(args[1])
 		if err != nil {
-			return fmt.Errorf("error determing address: %v", err)
+			return fmt.Errorf("error determining address: %v", err)
 		}
 		fmt.Printf("dictionary for field starts at %d (%x)\n", addr, addr)
 
diff --git a/geo/parse_test.go b/geo/parse_test.go
index 4d4a36d5d..4cbf66dde 100644
--- a/geo/parse_test.go
+++ b/geo/parse_test.go
@@ -87,7 +87,7 @@ func TestExtractGeoPoint(t *testing.T) {
 			lat:     7.5,
 			success: true,
 		},
-		// struct with lng alterante
+		// struct with lng alternate
 		{
 			in: struct {
 				Lng float64
diff --git a/index/scorch/README.md b/index/scorch/README.md
index 861335a1b..9794aed70 100644
--- a/index/scorch/README.md
+++ b/index/scorch/README.md
@@ -302,7 +302,7 @@ Map local bitsets into global number space (global meaning cross-segment but sti
 IndexSnapshot already should have mapping something like:
 0 - Offset 0
 1 - Offset 3 (because segment 0 had 3 docs)
-2 - Offset 4 (becuase segment 1 had 1 doc)
+2 - Offset 4 (because segment 1 had 1 doc)
 
 This maps to search result bitset:
 
diff --git a/index/scorch/segment/zap/README.md b/index/scorch/segment/zap/README.md
index 41f5902a0..872e86c84 100644
--- a/index/scorch/segment/zap/README.md
+++ b/index/scorch/segment/zap/README.md
@@ -109,7 +109,7 @@ If you know the doc number you're interested in, this format lets you jump to th
     - remember the start position for this posting list
     - write freq/norm details offset (remembered from previous, as varint uint64)
     - write location details offset (remembered from previous, as varint uint64)
-    - write location bitmap offset (remembered from pervious, as varint uint64)
+    - write location bitmap offset (remembered from previous, as varint uint64)
     - write length of encoded roaring bitmap
     - write the serialized roaring bitmap data
 
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 7bfbe0bef..71d41a826 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -94,7 +94,7 @@ func under32Bits(x uint64) bool {
 
 const docNum1HitFinished = math.MaxUint64
 
-// PostingsList is an in-memory represenation of a postings list
+// PostingsList is an in-memory representation of a postings list
 type PostingsList struct {
 	sb             *SegmentBase
 	postingsOffset uint64
@@ -733,7 +733,7 @@ func (p *Posting) Number() uint64 {
 	return p.docNum
 }
 
-// Frequency returns the frequence of occurance of this term in this doc/field
+// Frequency returns the frequencies of occurrence of this term in this doc/field
 func (p *Posting) Frequency() uint64 {
 	return p.freq
 }
@@ -743,12 +743,12 @@ func (p *Posting) Norm() float64 {
 	return float64(p.norm)
 }
 
-// Locations returns the location information for each occurance
+// Locations returns the location information for each occurrence
 func (p *Posting) Locations() []segment.Location {
 	return p.locs
 }
 
-// Location represents the location of a single occurance
+// Location represents the location of a single occurrence
 type Location struct {
 	field string
 	pos   uint64
@@ -769,22 +769,22 @@ func (l *Location) Field() string {
 	return l.field
 }
 
-// Start returns the start byte offset of this occurance
+// Start returns the start byte offset of this occurrence
 func (l *Location) Start() uint64 {
 	return l.start
 }
 
-// End returns the end byte offset of this occurance
+// End returns the end byte offset of this occurrence
 func (l *Location) End() uint64 {
 	return l.end
 }
 
-// Pos returns the 1-based phrase position of this occurance
+// Pos returns the 1-based phrase position of this occurrence
 func (l *Location) Pos() uint64 {
 	return l.pos
 }
 
-// ArrayPositions returns the array position vector associated with this occurance
+// ArrayPositions returns the array position vector associated with this occurrence
 func (l *Location) ArrayPositions() []uint64 {
 	return l.ap
 }
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 0fd4e57c4..62933daf3 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -211,7 +211,7 @@ func (s *Segment) loadConfig() error {
 }
 
 func (s *SegmentBase) loadFields() error {
-	// NOTE for now we assume the fields index immediately preceeds
+	// NOTE for now we assume the fields index immediately precedes
 	// the footer, and if this changes, need to adjust accordingly (or
 	// store explicit length), where s.mem was sliced from s.mm in Open().
 	fieldsIndexEnd := uint64(len(s.mem))
diff --git a/index_alias_impl_test.go b/index_alias_impl_test.go
index 9599b89d6..6e25157de 100644
--- a/index_alias_impl_test.go
+++ b/index_alias_impl_test.go
@@ -515,7 +515,7 @@ func TestIndexAliasMulti(t *testing.T) {
 	if err != nil {
 		t.Error(err)
 	}
-	// cheat and ensure that Took field matches since it invovles time
+	// cheat and ensure that Took field matches since it involves time
 	expected.Took = results.Took
 	if !reflect.DeepEqual(results, expected) {
 		t.Errorf("expected %#v, got %#v", expected, results)
@@ -599,7 +599,7 @@ func TestMultiSearchNoError(t *testing.T) {
 	if err != nil {
 		t.Error(err)
 	}
-	// cheat and ensure that Took field matches since it invovles time
+	// cheat and ensure that Took field matches since it involves time
 	expected.Took = results.Took
 	if !reflect.DeepEqual(results, expected) {
 		t.Errorf("expected %#v, got %#v", expected, results)
@@ -1229,7 +1229,7 @@ func TestMultiSearchCustomSort(t *testing.T) {
 	if err != nil {
 		t.Error(err)
 	}
-	// cheat and ensure that Took field matches since it invovles time
+	// cheat and ensure that Took field matches since it involves time
 	expected.Took = results.Took
 	if !reflect.DeepEqual(results, expected) {
 		t.Errorf("expected %v, got %v", expected, results)
diff --git a/index_test.go b/index_test.go
index 69ca61a98..604328a41 100644
--- a/index_test.go
+++ b/index_test.go
@@ -1509,7 +1509,7 @@ func TestSearchTimeout(t *testing.T) {
 		}
 	}()
 
-	// first run a search with an absurdly long timeout (should succeeed)
+	// first run a search with an absurdly long timeout (should succeed)
 	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 	defer cancel()
 	query := NewTermQuery("water")
diff --git a/mapping/document.go b/mapping/document.go
index 6ec0c66bb..0ebeb5856 100644
--- a/mapping/document.go
+++ b/mapping/document.go
@@ -324,7 +324,7 @@ func (dm *DocumentMapping) defaultAnalyzerName(path []string) string {
 }
 
 func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) {
-	// allow default "json" tag to be overriden
+	// allow default "json" tag to be overridden
 	structTagKey := dm.StructTagKey
 	if structTagKey == "" {
 		structTagKey = "json"
diff --git a/mapping/mapping_test.go b/mapping/mapping_test.go
index 1a7709049..a13a90b8b 100644
--- a/mapping/mapping_test.go
+++ b/mapping/mapping_test.go
@@ -950,7 +950,7 @@ func TestMappingForTextMarshaler(t *testing.T) {
 		},
 	}
 
-	// first verify that when using a mapping that doesn't explicity
+	// first verify that when using a mapping that doesn't explicitly
 	// map the stuct field as text, then we traverse inside the struct
 	// and do our best
 	m := NewIndexMapping()
@@ -970,7 +970,7 @@ func TestMappingForTextMarshaler(t *testing.T) {
 		t.Errorf("expected field value to be '%s', got: '%s'", tm.Marshalable.Extra, string(doc.Fields[0].Value()))
 	}
 
-	// now verify that when a mapping explicity
+	// now verify that when a mapping explicitly
 	m = NewIndexMapping()
 	txt := NewTextFieldMapping()
 	m.DefaultMapping.AddFieldMappingsAt("Marshalable", txt)
@@ -1004,7 +1004,7 @@ func TestMappingForNilTextMarshaler(t *testing.T) {
 		Marshalable: nil,
 	}
 
-	// now verify that when a mapping explicity
+	// now verify that when a mapping explicitly
 	m := NewIndexMapping()
 	txt := NewTextFieldMapping()
 	m.DefaultMapping.AddFieldMappingsAt("Marshalable", txt)
diff --git a/numeric/bin.go b/numeric/bin.go
index cd71392dc..368952a2c 100644
--- a/numeric/bin.go
+++ b/numeric/bin.go
@@ -14,7 +14,7 @@ var interleaveShift = []uint{1, 2, 4, 8, 16}
 
 // Interleave the first 32 bits of each uint64
 // apdated from org.apache.lucene.util.BitUtil
-// whcih was adapted from:
+// which was adapted from:
 // http://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN
 func Interleave(v1, v2 uint64) uint64 {
 	v1 = (v1 | (v1 << interleaveShift[4])) & interleaveMagic[4]
diff --git a/search/searcher/search_phrase.go b/search/searcher/search_phrase.go
index 3711da063..08eb13338 100644
--- a/search/searcher/search_phrase.go
+++ b/search/searcher/search_phrase.go
@@ -210,7 +210,7 @@ func (s *PhraseSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch,
 	return nil, nil
 }
 
-// checkCurrMustMatch is soley concerned with determining if the DocumentMatch
+// checkCurrMustMatch is solely concerned with determining if the DocumentMatch
 // pointed to by s.currMust (which satisifies the pre-condition searcher)
 // also satisfies the phase constraints.  if so, it returns a DocumentMatch
 // for this document, otherwise nil
@@ -241,7 +241,7 @@ func (s *PhraseSearcher) checkCurrMustMatch(ctx *search.SearchContext) *search.D
 	return nil
 }
 
-// checkCurrMustMatchField is soley concerned with determining if one
+// checkCurrMustMatchField is solely concerned with determining if one
 // particular field within the currMust DocumentMatch Locations
 // satisfies the phase constraints (possibly more than once).  if so,
 // the matching field term locations are appended to the provided
diff --git a/search/searcher/search_term_range_test.go b/search/searcher/search_term_range_test.go
index f84e28902..cd4e89114 100644
--- a/search/searcher/search_term_range_test.go
+++ b/search/searcher/search_term_range_test.go
@@ -94,7 +94,7 @@ func TestTermRangeSearch(t *testing.T) {
 			inclusiveMax: true,
 			want:         nil,
 		},
-		// max nil sees everyting after marty
+		// max nil sees everything after marty
 		{
 			min:          []byte("marty"),
 			max:          nil,
@@ -103,7 +103,7 @@ func TestTermRangeSearch(t *testing.T) {
 			inclusiveMax: true,
 			want:         []string{"1", "2", "4"},
 		},
-		// min nil sees everyting before ravi
+		// min nil sees everything before ravi
 		{
 			min:          nil,
 			max:          []byte("ravi"),

From 193c43ecddce54283db96db0c103131f0fc558e3 Mon Sep 17 00:00:00 2001
From: Ben Gadbois <git@bengadbois.email>
Date: Wed, 30 May 2018 22:04:55 +0200
Subject: [PATCH 429/728] Fix another spelling for CI

---
 analysis/token/snowball/snowball.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/analysis/token/snowball/snowball.go b/analysis/token/snowball/snowball.go
index 99dbf4202..6a672c2b2 100644
--- a/analysis/token/snowball/snowball.go
+++ b/analysis/token/snowball/snowball.go
@@ -39,7 +39,7 @@ func (s *SnowballStemmer) Filter(input analysis.TokenStream) analysis.TokenStrea
 	for _, token := range input {
 		// if it is not a protected keyword, stem it
 		if !token.KeyWord {
-			stemmed, _ := snowball.Stem(string(token.Term), s.langauge, true)
+			stemmed, _ := snowball.Stem(string(token.Term), s.language, true)
 			token.Term = []byte(stemmed)
 		}
 	}

From d5224d90871d1edc7528cba5092f24010860509a Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Fri, 25 May 2018 11:40:05 -0700
Subject: [PATCH 430/728] Scorch optimize via vellum.FST.Reader() API

+ This change sets up a vellum.Reader (which carries a prealloc'ed
  fstState) for the term dictionaries.
+ However the Reader cannot be used concurrently, so rather than
  re-using a fieldDicts initialized for the IndexSnapshot, re-use
  one that is set up for TermFieldReaders which are recycled.
---
 index/scorch/segment/zap/dict.go    | 13 ++++++------
 index/scorch/segment/zap/segment.go |  4 ++++
 index/scorch/snapshot_index.go      | 32 ++++++++++-------------------
 3 files changed, 22 insertions(+), 27 deletions(-)

diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index c73cc6e5b..736fa59f6 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -28,10 +28,11 @@ import (
 
 // Dictionary is the zap representation of the term dictionary
 type Dictionary struct {
-	sb      *SegmentBase
-	field   string
-	fieldID uint16
-	fst     *vellum.FST
+	sb        *SegmentBase
+	field     string
+	fieldID   uint16
+	fst       *vellum.FST
+	fstReader *vellum.Reader
 }
 
 // PostingsList returns the postings list for the specified term
@@ -46,14 +47,14 @@ func (d *Dictionary) PostingsList(term []byte, except *roaring.Bitmap,
 }
 
 func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
-	if d.fst == nil {
+	if d.fstReader == nil {
 		if rv == nil || rv == emptyPostingsList {
 			return emptyPostingsList, nil
 		}
 		return d.postingsListInit(rv, except), nil
 	}
 
-	postingsOffset, exists, err := d.fst.Get(term)
+	postingsOffset, exists, err := d.fstReader.Get(term)
 	if err != nil {
 		return nil, fmt.Errorf("vellum err: %v", err)
 	}
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 62933daf3..8c6de211a 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -266,6 +266,10 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) {
 				if err != nil {
 					return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err)
 				}
+				rv.fstReader, err = rv.fst.Reader()
+				if err != nil {
+					return nil, fmt.Errorf("dictionary field %s vellum reader err: %v", field, err)
+				}
 			}
 		}
 	}
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 6b615cd17..21e19123d 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -59,9 +59,8 @@ type IndexSnapshot struct {
 	m    sync.Mutex // Protects the fields that follow.
 	refs int64
 
-	m2         sync.Mutex                                 // Protects the fields that follow.
-	fieldTFRs  map[string][]*IndexSnapshotTermFieldReader // keyed by field, recycled TFR's
-	fieldDicts map[string][]segment.TermDictionary        // keyed by field, recycled dicts
+	m2        sync.Mutex                                 // Protects the fields that follow.
+	fieldTFRs map[string][]*IndexSnapshotTermFieldReader // keyed by field, recycled TFR's
 }
 
 func (i *IndexSnapshot) Segments() []*SegmentSnapshot {
@@ -394,7 +393,7 @@ func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err err
 
 func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
 	includeNorm, includeTermVectors bool) (tfr index.TermFieldReader, err error) {
-	rv, dicts := i.allocTermFieldReaderDicts(field)
+	rv := i.allocTermFieldReaderDicts(field)
 
 	rv.term = term
 	rv.field = field
@@ -412,20 +411,19 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
 	rv.currPosting = nil
 	rv.currID = rv.currID[:0]
 
-	if dicts == nil {
-		dicts = make([]segment.TermDictionary, len(i.segment))
+	if rv.dicts == nil {
+		rv.dicts = make([]segment.TermDictionary, len(i.segment))
 		for i, segment := range i.segment {
 			dict, err := segment.Dictionary(field)
 			if err != nil {
 				return nil, err
 			}
-			dicts[i] = dict
+			rv.dicts[i] = dict
 		}
 	}
-	rv.dicts = dicts
 
 	for i := range i.segment {
-		pl, err := dicts[i].PostingsList(term, nil, rv.postings[i])
+		pl, err := rv.dicts[i].PostingsList(term, nil, rv.postings[i])
 		if err != nil {
 			return nil, err
 		}
@@ -436,25 +434,21 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
 	return rv, nil
 }
 
-func (i *IndexSnapshot) allocTermFieldReaderDicts(field string) (
-	tfr *IndexSnapshotTermFieldReader, dicts []segment.TermDictionary) {
+func (i *IndexSnapshot) allocTermFieldReaderDicts(field string) (tfr *IndexSnapshotTermFieldReader) {
 	i.m2.Lock()
-	if i.fieldDicts != nil {
-		dicts = i.fieldDicts[field]
-	}
 	if i.fieldTFRs != nil {
 		tfrs := i.fieldTFRs[field]
 		last := len(tfrs) - 1
 		if last >= 0 {
-			rv := tfrs[last]
+			tfr = tfrs[last]
 			tfrs[last] = nil
 			i.fieldTFRs[field] = tfrs[:last]
 			i.m2.Unlock()
-			return rv, dicts
+			return
 		}
 	}
 	i.m2.Unlock()
-	return &IndexSnapshotTermFieldReader{}, dicts
+	return &IndexSnapshotTermFieldReader{}
 }
 
 func (i *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReader) {
@@ -463,10 +457,6 @@ func (i *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReader
 		i.fieldTFRs = map[string][]*IndexSnapshotTermFieldReader{}
 	}
 	i.fieldTFRs[tfr.field] = append(i.fieldTFRs[tfr.field], tfr)
-	if i.fieldDicts == nil {
-		i.fieldDicts = map[string][]segment.TermDictionary{}
-	}
-	i.fieldDicts[tfr.field] = tfr.dicts
 	i.m2.Unlock()
 }
 

From 74b27a75a730249856522c91be962f2ea68658f6 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 26 Jun 2018 08:51:37 -0700
Subject: [PATCH 431/728] NewTermSearcher converts term to bytes only once

---
 search/searcher/search_term.go | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/search/searcher/search_term.go b/search/searcher/search_term.go
index 4fee58bbf..97b7dbb90 100644
--- a/search/searcher/search_term.go
+++ b/search/searcher/search_term.go
@@ -38,7 +38,8 @@ type TermSearcher struct {
 }
 
 func NewTermSearcher(indexReader index.IndexReader, term string, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
-	reader, err := indexReader.TermFieldReader([]byte(term), field, true, true, options.IncludeTermVectors)
+	termBytes := []byte(term)
+	reader, err := indexReader.TermFieldReader(termBytes, field, true, true, options.IncludeTermVectors)
 	if err != nil {
 		return nil, err
 	}
@@ -47,7 +48,7 @@ func NewTermSearcher(indexReader index.IndexReader, term string, field string, b
 		_ = reader.Close()
 		return nil, err
 	}
-	scorer := scorer.NewTermQueryScorer([]byte(term), field, boost, count, reader.Count(), options)
+	scorer := scorer.NewTermQueryScorer(termBytes, field, boost, count, reader.Count(), options)
 	return &TermSearcher{
 		indexReader: indexReader,
 		reader:      reader,

From 027187d999549bb29f41edeb3d8a139ff4e381a8 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Tue, 26 Jun 2018 16:03:11 -0700
Subject: [PATCH 432/728] MB-30252: Omit default_analyzer from the json
 encoding when unset

---
 mapping/document.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mapping/document.go b/mapping/document.go
index 6ec0c66bb..ec4b8bf64 100644
--- a/mapping/document.go
+++ b/mapping/document.go
@@ -42,7 +42,7 @@ type DocumentMapping struct {
 	Dynamic         bool                        `json:"dynamic"`
 	Properties      map[string]*DocumentMapping `json:"properties,omitempty"`
 	Fields          []*FieldMapping             `json:"fields,omitempty"`
-	DefaultAnalyzer string                      `json:"default_analyzer"`
+	DefaultAnalyzer string                      `json:"default_analyzer,omitempty"`
 
 	// StructTagKey overrides "json" when looking for field names in struct tags
 	StructTagKey string `json:"struct_tag_key,omitempty"`

From ba11b6df3810344e592ceb2cbc3f745ab706b96d Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Tue, 10 Jul 2018 15:02:52 -0700
Subject: [PATCH 433/728] Fix to boolean searcher's Advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

+ Advance nested searchers within a boolean searcher only if
  the cursor is trailing the ID being looked up.
+ Unit test
+ Fixes https://github.com/blevesearch/bleve/issues/954

Here's an example bug scenario with nested boolean searchers ..

conjunctionSearcher [a]
    => booleanSearcher [b]
        Must => conjunctionSearcher [d]
            => disjunctionSearcher [f]
                => disjunctionSearcher
                    => termSearcher
                => disjunctionSearcher
                    => termSearcher
    => booleanSearcher [c]
        Must => conjunctionSearcher [e]
            => termSearcher

Consider there to be docs ranging from 1 to 100 in the index
and these are the expected matches, for the query:
1, 11, 16, 21,..

On the first Next() call by the collector ..
    [d]
        - initSearchers .. sets local cursor of [f] to 1
        - moves local cursor of disjunction cursor to 11

    [b]
        - init: local cursor of [d] set to 1
        - moves [d]’s local cursor of [f] to 12
        - moves local cursor of [d] to 11

    [c]
        - init: local cursor of [e] set to 1
        - moves local cursor of [e] to 11

    [a]
        - initSearchers: sets local cursors of [b], [c] to 1
        - moves [b]’s local cursor of [d] to 12
            - moves [d]’s local cursor of [f] to 16
        - moves [c]’s local cursor of [e] to 16
        - updates local cursor of [b] to 11
        - updates local cursor of [c] to 11

(Note that at this point [b]’s local cursor to [d] and [c]’s
 local cursor to [e] are pointing to different documents,
 owing to the different nesting of searchers underneath)

On the second Next() call by the collector ..
    [a] .. maxID: 11
        - moves [b]’s local cursor of [d] to 16
            - moves [d]’s local cursor of [f] to 17
        - moves [c]’s local cursor of [e] to 21
        - updates local cursor of [b] to 12
        - updates local cursor of [c] to 16

On the third Next() call by the collector ..
    [a] .. maxID: 16
        - sees that maxID 16 > 12 (cursor position of [b])
        - invokes advanceChild([b]) ..
            --> this causes [b]’s local cursor to move from
                16 to 17 and this prevents the match:16 from
                being collected.
---
 search/searcher/search_boolean.go |  43 +++++-----
 search_test.go                    | 128 ++++++++++++++++++++++++++++++
 2 files changed, 153 insertions(+), 18 deletions(-)

diff --git a/search/searcher/search_boolean.go b/search/searcher/search_boolean.go
index f7ee2cd83..f9684af29 100644
--- a/search/searcher/search_boolean.go
+++ b/search/searcher/search_boolean.go
@@ -332,31 +332,38 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
 	}
 
 	var err error
+	// Advance nested searcher(s) only if the cursor is trailing the lookup ID
 	if s.mustSearcher != nil {
-		if s.currMust != nil {
-			ctx.DocumentMatchPool.Put(s.currMust)
-		}
-		s.currMust, err = s.mustSearcher.Advance(ctx, ID)
-		if err != nil {
-			return nil, err
+		if s.currMust == nil || s.currMust.IndexInternalID.Compare(ID) < 0 {
+			if s.currMust != nil {
+				ctx.DocumentMatchPool.Put(s.currMust)
+			}
+			s.currMust, err = s.mustSearcher.Advance(ctx, ID)
+			if err != nil {
+				return nil, err
+			}
 		}
 	}
 	if s.shouldSearcher != nil {
-		if s.currShould != nil {
-			ctx.DocumentMatchPool.Put(s.currShould)
-		}
-		s.currShould, err = s.shouldSearcher.Advance(ctx, ID)
-		if err != nil {
-			return nil, err
+		if s.currShould == nil || s.currShould.IndexInternalID.Compare(ID) < 0 {
+			if s.currShould != nil {
+				ctx.DocumentMatchPool.Put(s.currShould)
+			}
+			s.currShould, err = s.shouldSearcher.Advance(ctx, ID)
+			if err != nil {
+				return nil, err
+			}
 		}
 	}
 	if s.mustNotSearcher != nil {
-		if s.currMustNot != nil {
-			ctx.DocumentMatchPool.Put(s.currMustNot)
-		}
-		s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID)
-		if err != nil {
-			return nil, err
+		if s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0 {
+			if s.currMustNot != nil {
+				ctx.DocumentMatchPool.Put(s.currMustNot)
+			}
+			s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID)
+			if err != nil {
+				return nil, err
+			}
 		}
 	}
 
diff --git a/search_test.go b/search_test.go
index 87a718285..7fcc71d6e 100644
--- a/search_test.go
+++ b/search_test.go
@@ -17,12 +17,21 @@ package bleve
 import (
 	"encoding/json"
 	"fmt"
+	"os"
 	"reflect"
+	"strconv"
 	"strings"
 	"testing"
 	"time"
 
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/analysis/analyzer/custom"
+	"github.com/blevesearch/bleve/analysis/token/lowercase"
+	"github.com/blevesearch/bleve/analysis/tokenizer/single"
+	"github.com/blevesearch/bleve/analysis/tokenizer/whitespace"
+	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/search"
+	"github.com/blevesearch/bleve/search/query"
 )
 
 func TestSearchResultString(t *testing.T) {
@@ -414,3 +423,122 @@ func TestMemoryNeededForSearchResult(t *testing.T) {
 		t.Errorf("estimate not what is expected: %v != %v", estimate, expect)
 	}
 }
+
+// https://github.com/blevesearch/bleve/issues/954
+func TestNestedBooleanSearchers(t *testing.T) {
+	defer func() {
+		err := os.RemoveAll("testidx")
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	// create an index with a custom analyzer
+	idxMapping := NewIndexMapping()
+	if err := idxMapping.AddCustomAnalyzer("3xbla", map[string]interface{}{
+		"type":          custom.Name,
+		"tokenizer":     whitespace.Name,
+		"token_filters": []interface{}{lowercase.Name, "stop_en"},
+	}); err != nil {
+		t.Fatal(err)
+	}
+
+	idxMapping.DefaultAnalyzer = "3xbla"
+	idx, err := New("testidx", idxMapping)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// create and insert documents as a batch
+	batch := idx.NewBatch()
+	matches := 0
+	for i := 0; i < 100; i++ {
+		hostname := fmt.Sprintf("planner_hostname_%d", i%5)
+		metadata := map[string]string{"region": fmt.Sprintf("planner_us-east-%d", i%5)}
+
+		// Expected matches
+		if (hostname == "planner_hostname_1" || hostname == "planner_hostname_2") &&
+			metadata["region"] == "planner_us-east-1" {
+			matches++
+		}
+
+		doc := document.NewDocument(strconv.Itoa(i))
+		doc.Fields = []document.Field{
+			document.NewTextFieldCustom("hostname", []uint64{}, []byte(hostname),
+				document.IndexField,
+				&analysis.Analyzer{
+					Tokenizer: single.NewSingleTokenTokenizer(),
+					TokenFilters: []analysis.TokenFilter{
+						lowercase.NewLowerCaseFilter(),
+					},
+				},
+			),
+		}
+		for k, v := range metadata {
+			doc.AddField(document.NewTextFieldWithIndexingOptions(
+				fmt.Sprintf("metadata.%s", k), []uint64{}, []byte(v), document.IndexField))
+		}
+		doc.CompositeFields = []*document.CompositeField{
+			document.NewCompositeFieldWithIndexingOptions(
+				"_all", true, []string{"text"}, []string{},
+				document.IndexField|document.IncludeTermVectors),
+		}
+
+		if err = batch.IndexAdvanced(doc); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	if err = idx.Batch(batch); err != nil {
+		t.Fatal(err)
+	}
+
+	que, err := query.ParseQuery([]byte(
+		`{
+			"conjuncts": [
+			{
+				"must": {
+					"conjuncts": [
+					{
+						"disjuncts": [
+						{
+							"match": "planner_hostname_1",
+							"field": "hostname"
+						},
+						{
+							"match": "planner_hostname_2",
+							"field": "hostname"
+						}
+						]
+					}
+					]
+				}
+			},
+			{
+				"must": {
+					"conjuncts": [
+					{
+						"match": "planner_us-east-1",
+						"field": "metadata.region"
+					}
+					]
+				}
+			}
+			]
+		}`,
+	))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	req := NewSearchRequest(que)
+	req.Size = 100
+	req.Fields = []string{"hostname", "metadata.region"}
+	searchResults, err := idx.Search(req)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if matches != len(searchResults.Hits) {
+		t.Fatalf("Unexpected result set, %v != %v", matches, len(searchResults.Hits))
+	}
+}

From 17f59588a39d25babbf4e04fe3bc79fc183a26e5 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 12 Jul 2018 15:09:38 -0700
Subject: [PATCH 434/728] optimize scorch FieldDictFuzzy to check for
 tooManyClauses earlier

This should not affect functionality, as the fuzzy searcher uses a
disjunction searcher underneath the hood, and the disjunction searcher
later on checks for the tooManyClauses() case.

But, the idea is that the tooManyClauses() check should be handled the
same whether or not the IndexReaderFuzzy interface is implemented by
the index-reader, and if we can bail earlier, then we waste less
resources.

Also, this commit also checks for error more consistently.
---
 search/searcher/search_fuzzy.go | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/search/searcher/search_fuzzy.go b/search/searcher/search_fuzzy.go
index b812f4840..1ce3ba71d 100644
--- a/search/searcher/search_fuzzy.go
+++ b/search/searcher/search_fuzzy.go
@@ -53,6 +53,7 @@ func NewFuzzySearcher(indexReader index.IndexReader, term string,
 func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
 	fuzziness int, field, prefixTerm string) (rv []string, err error) {
 	rv = make([]string, 0)
+
 	var fieldDict index.FieldDict
 	if len(prefixTerm) > 0 {
 		fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
@@ -63,7 +64,7 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
 		if ir, ok := indexReader.(index.IndexReaderFuzzy); ok {
 			fieldDict, err = ir.FieldDictFuzzy(field, []byte(term), fuzziness)
 			if err != nil {
-				return rv, err
+				return nil, err
 			}
 			defer func() {
 				if cerr := fieldDict.Close(); cerr != nil && err == nil {
@@ -73,12 +74,19 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
 			tfd, err := fieldDict.Next()
 			for err == nil && tfd != nil {
 				rv = append(rv, tfd.Term)
+				if tooManyClauses(len(rv)) {
+					return nil, tooManyClausesErr()
+				}
 				tfd, err = fieldDict.Next()
 			}
 			return rv, err
 		}
+
 		fieldDict, err = indexReader.FieldDict(field)
 	}
+	if err != nil {
+		return nil, err
+	}
 	defer func() {
 		if cerr := fieldDict.Close(); cerr != nil && err == nil {
 			err = cerr
@@ -95,7 +103,7 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
 		if !exceeded && ld <= fuzziness {
 			rv = append(rv, tfd.Term)
 			if tooManyClauses(len(rv)) {
-				return rv, tooManyClausesErr()
+				return nil, tooManyClausesErr()
 			}
 		}
 		tfd, err = fieldDict.Next()

From 269785dea09a720fc5c6acd6f120d77cb58a403d Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 13 Jul 2018 15:09:45 -0700
Subject: [PATCH 435/728] scorch root Add/DecRef() codepath scrubbing

---
 index/scorch/introducer.go | 9 +++++++++
 index/scorch/merge.go      | 4 ++--
 index/scorch/persister.go  | 9 ++++++---
 index/scorch/scorch.go     | 4 ++--
 4 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 6989bbc9d..9f42a5b36 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -107,8 +107,11 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 
 	s.rootLock.RLock()
 	root := s.root
+	root.AddRef()
 	s.rootLock.RUnlock()
 
+	defer func() { _ = root.DecRef() }()
+
 	nsegs := len(root.segment)
 
 	// prepare new index snapshot
@@ -221,10 +224,13 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
 
 	s.rootLock.Lock()
 	root := s.root
+	root.AddRef()
 	nextSnapshotEpoch := s.nextSnapshotEpoch
 	s.nextSnapshotEpoch++
 	s.rootLock.Unlock()
 
+	defer func() { _ = root.DecRef() }()
+
 	newIndexSnapshot := &IndexSnapshot{
 		parent:   s,
 		epoch:    nextSnapshotEpoch,
@@ -282,8 +288,11 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 
 	s.rootLock.RLock()
 	root := s.root
+	root.AddRef()
 	s.rootLock.RUnlock()
 
+	defer func() { _ = root.DecRef() }()
+
 	newSnapshot := &IndexSnapshot{
 		parent:   s,
 		internal: root.internal,
diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 171f33ae8..38646bf0b 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -46,12 +46,12 @@ OUTER:
 
 		default:
 			// check to see if there is a new snapshot to persist
-			s.rootLock.RLock()
+			s.rootLock.Lock()
 			ourSnapshot := s.root
 			ourSnapshot.AddRef()
 			atomic.StoreUint64(&s.iStats.mergeSnapshotSize, uint64(ourSnapshot.Size()))
 			atomic.StoreUint64(&s.iStats.mergeEpoch, ourSnapshot.epoch)
-			s.rootLock.RUnlock()
+			s.rootLock.Unlock()
 
 			if ourSnapshot.epoch != lastEpochMergePlanned {
 				startTime := time.Now()
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index c822ad0b5..bb40c33f8 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -521,11 +521,14 @@ func (s *Scorch) loadFromBolt() error {
 			s.nextSegmentID++
 			s.rootLock.Lock()
 			s.nextSnapshotEpoch = snapshotEpoch + 1
-			if s.root != nil {
-				_ = s.root.DecRef()
-			}
+			rootPrev := s.root
 			s.root = indexSnapshot
 			s.rootLock.Unlock()
+
+			if rootPrev != nil {
+				_ = rootPrev.DecRef()
+			}
+
 			foundRoot = true
 		}
 		return nil
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index fe4f71b87..357836524 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -370,6 +370,8 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
 	root.AddRef()
 	s.rootLock.RUnlock()
 
+	defer func() { _ = root.DecRef() }()
+
 	for _, seg := range root.segment {
 		delta, err := seg.segment.DocNumbers(ids)
 		if err != nil {
@@ -378,8 +380,6 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
 		introduction.obsoletes[seg.id] = delta
 	}
 
-	_ = root.DecRef()
-
 	introStartTime := time.Now()
 
 	s.introductions <- introduction

From a899d29c1b2baa769d836511352993c54ec0b04f Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 16 Jul 2018 13:25:24 +0530
Subject: [PATCH 436/728] MB-30342 - ref_count leak from FieldDict

Attempt to fix the ref_count leak by closing the
field dictionary for term_prefix and term_range
searches
---
 search/searcher/search_term_prefix.go | 8 ++++++++
 search/searcher/search_term_range.go  | 6 ++++++
 2 files changed, 14 insertions(+)

diff --git a/search/searcher/search_term_prefix.go b/search/searcher/search_term_prefix.go
index 05d092249..c49788c71 100644
--- a/search/searcher/search_term_prefix.go
+++ b/search/searcher/search_term_prefix.go
@@ -27,6 +27,11 @@ func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string,
 	if err != nil {
 		return nil, err
 	}
+	defer func() {
+		if cerr := fieldDict.Close(); cerr != nil && err == nil {
+			err = cerr
+		}
+	}()
 
 	var terms []string
 	tfd, err := fieldDict.Next()
@@ -34,6 +39,9 @@ func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string,
 		terms = append(terms, tfd.Term)
 		tfd, err = fieldDict.Next()
 	}
+	if err != nil {
+		return nil, err
+	}
 
 	return NewMultiTermSearcher(indexReader, terms, field, boost, options, true)
 }
diff --git a/search/searcher/search_term_range.go b/search/searcher/search_term_range.go
index 267c681b4..90be1e11a 100644
--- a/search/searcher/search_term_range.go
+++ b/search/searcher/search_term_range.go
@@ -48,6 +48,12 @@ func NewTermRangeSearcher(indexReader index.IndexReader,
 		return nil, err
 	}
 
+	defer func() {
+		if cerr := fieldDict.Close(); cerr != nil && err == nil {
+			err = cerr
+		}
+	}()
+
 	var terms []string
 	tfd, err := fieldDict.Next()
 	for err == nil && tfd != nil {

From 84502bf2c73fd9a85477d7018220e0af6a69374e Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Wed, 11 Jul 2018 14:40:35 +0530
Subject: [PATCH 437/728] MB-29923 - high memory consumption in scorch

The fix aims to loosen the persister-merger lock
stepping and also introduces a configurable
wait in the persister work loop to favour healthier
in-memory segment merges.
---
 index/scorch/persister.go   | 75 ++++++++++++++++++++++++++++++-------
 index/scorch/scorch.go      | 18 ++++++---
 index/scorch/scorch_test.go |  5 +++
 test/versus_test.go         |  6 +++
 4 files changed, 85 insertions(+), 19 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index c822ad0b5..e7f591671 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -17,6 +17,7 @@ package scorch
 import (
 	"bytes"
 	"encoding/binary"
+	"encoding/json"
 	"fmt"
 	"io/ioutil"
 	"log"
@@ -35,16 +36,22 @@ import (
 
 var DefaultChunkFactor uint32 = 1024
 
-// Arbitrary number, need to make it configurable.
-// Lower values like 10/making persister really slow
-// doesn't work well as it is creating more files to
-// persist for in next persist iteration and spikes the # FDs.
-// Ideal value should let persister also proceed at
-// an optimum pace so that the merger can skip
-// many intermediate snapshots.
-// This needs to be based on empirical data.
-// TODO - may need to revisit this approach/value.
-var epochDistance = uint64(5)
+var DefaultPersisterNapTimeMSec int = 2000 // ms
+
+var DefaultPersisterNapUnderNumFiles int = 1000
+
+type persisterOptions struct {
+	// PersisterNapTimeMSec controls the wait/delay injected into
+	// persistence workloop to improve the chances for
+	// a healthier and heavier in-memory merging
+	PersisterNapTimeMSec int
+
+	// PersisterNapTimeMSec > 0, and the number of files is less than
+	// PersisterNapUnderNumFiles, then the persister will sleep
+	// PersisterNapTimeMSec amount of time to improve the chances for
+	// a healthier and heavier in-memory merging
+	PersisterNapUnderNumFiles int
+}
 
 type notificationChan chan struct{}
 
@@ -54,6 +61,13 @@ func (s *Scorch) persisterLoop() {
 	var persistWatchers []*epochWatcher
 	var lastPersistedEpoch, lastMergedEpoch uint64
 	var ew *epochWatcher
+	po, err := s.parsePersisterOptions()
+	if err != nil {
+		s.fireAsyncError(fmt.Errorf("persisterOptions json parsing err: %v", err))
+		s.asyncTasks.Done()
+		return
+	}
+
 OUTER:
 	for {
 		atomic.AddUint64(&s.stats.TotPersistLoopBeg, 1)
@@ -69,7 +83,7 @@ OUTER:
 			lastMergedEpoch = ew.epoch
 		}
 		lastMergedEpoch, persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch,
-			lastMergedEpoch, persistWatchers)
+			lastMergedEpoch, persistWatchers, po)
 
 		var ourSnapshot *IndexSnapshot
 		var ourPersisted []chan error
@@ -180,14 +194,26 @@ func notifyMergeWatchers(lastPersistedEpoch uint64,
 }
 
 func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastMergedEpoch uint64,
-	persistWatchers []*epochWatcher) (uint64, []*epochWatcher) {
+	persistWatchers []*epochWatcher, po *persisterOptions) (uint64, []*epochWatcher) {
 
 	// first, let the watchers proceed if they lag behind
 	persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
 
+	// check the merger lag by counting the segment files on disk,
+	// On finding fewer files on disk, persister takes a short pause
+	// for sufficient in-memory segments to pile up for the next
+	// memory merge cum persist loop.
+	// On finding too many files on disk, persister pause until the merger
+	// catches up to reduce the segment file count under the threshold.
+	numFilesOnDisk, _ := s.diskFileStats()
+	if numFilesOnDisk < uint64(po.PersisterNapUnderNumFiles) &&
+		po.PersisterNapTimeMSec > 0 {
+		time.Sleep(time.Millisecond * time.Duration(po.PersisterNapTimeMSec))
+		return lastMergedEpoch, persistWatchers
+	}
+
 OUTER:
-	// check for slow merger and await until the merger catch up
-	for lastPersistedEpoch > lastMergedEpoch+epochDistance {
+	for numFilesOnDisk > uint64(po.PersisterNapUnderNumFiles) {
 		atomic.AddUint64(&s.stats.TotPersisterSlowMergerPause, 1)
 
 		select {
@@ -202,11 +228,32 @@ OUTER:
 
 		// let the watchers proceed if they lag behind
 		persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
+
+		numFilesOnDisk, _ = s.diskFileStats()
 	}
 
 	return lastMergedEpoch, persistWatchers
 }
 
+func (s *Scorch) parsePersisterOptions() (*persisterOptions, error) {
+	po := persisterOptions{
+		PersisterNapTimeMSec:      DefaultPersisterNapTimeMSec,
+		PersisterNapUnderNumFiles: DefaultPersisterNapUnderNumFiles,
+	}
+	if v, ok := s.config["scorchPersisterOptions"]; ok {
+		b, err := json.Marshal(v)
+		if err != nil {
+			return &po, err
+		}
+
+		err = json.Unmarshal(b, &po)
+		if err != nil {
+			return &po, err
+		}
+	}
+	return &po, nil
+}
+
 func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 	persisted, err := s.persistSnapshotMaybeMerge(snapshot)
 	if err != nil {
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index fe4f71b87..50208311d 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -434,9 +434,9 @@ func (s *Scorch) currentSnapshot() *IndexSnapshot {
 func (s *Scorch) Stats() json.Marshaler {
 	return &s.stats
 }
-func (s *Scorch) StatsMap() map[string]interface{} {
-	m := s.stats.ToMap()
 
+func (s *Scorch) diskFileStats() (uint64, uint64) {
+	var numFilesOnDisk, numBytesUsedDisk uint64
 	if s.path != "" {
 		finfos, err := ioutil.ReadDir(s.path)
 		if err == nil {
@@ -447,11 +447,19 @@ func (s *Scorch) StatsMap() map[string]interface{} {
 					numFilesOnDisk++
 				}
 			}
-
-			m["CurOnDiskBytes"] = numBytesUsedDisk
-			m["CurOnDiskFiles"] = numFilesOnDisk
 		}
 	}
+	return numFilesOnDisk, numBytesUsedDisk
+}
+
+func (s *Scorch) StatsMap() map[string]interface{} {
+	m := s.stats.ToMap()
+
+	numFilesOnDisk, numBytesUsedDisk := s.diskFileStats()
+	if numFilesOnDisk > 0 || numBytesUsedDisk > 0 {
+		m["CurOnDiskBytes"] = numBytesUsedDisk
+		m["CurOnDiskFiles"] = numFilesOnDisk
+	}
 
 	// TODO: consider one day removing these backwards compatible
 	// names for apps using the old names
diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index cf784755d..adcabd22f 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -33,6 +33,11 @@ import (
 	"github.com/blevesearch/bleve/mapping"
 )
 
+func init() {
+	// override for tests
+	DefaultPersisterNapTimeMSec = 1
+}
+
 func DestroyTest() error {
 	return os.RemoveAll("/tmp/bleve-scorch-test")
 }
diff --git a/test/versus_test.go b/test/versus_test.go
index dbc7dd752..10faa311e 100644
--- a/test/versus_test.go
+++ b/test/versus_test.go
@@ -41,6 +41,12 @@ import (
 //     go test -v -run TestScorchVersusUpsideDownBolt ./test
 //     VERBOSE=1 FOCUS=Trista go test -v -run TestScorchVersusUpsideDownBolt ./test
 //
+
+func init() {
+	// override for tests
+	scorch.DefaultPersisterNapTimeMSec = 1
+}
+
 func TestScorchVersusUpsideDownBoltAll(t *testing.T) {
 	(&VersusTest{
 		t:                    t,

From a7e64a5289dbffe898ab09ac5fa28b7c8535b90f Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Wed, 25 Jul 2018 16:09:56 -0700
Subject: [PATCH 438/728] [Scorch] Fix bug in estimating: numFilesOnDisk,
 numBytesUsedDisk

---
 index/scorch/scorch.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index c22b8d401..644a5ea3d 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -440,7 +440,6 @@ func (s *Scorch) diskFileStats() (uint64, uint64) {
 	if s.path != "" {
 		finfos, err := ioutil.ReadDir(s.path)
 		if err == nil {
-			var numFilesOnDisk, numBytesUsedDisk uint64
 			for _, finfo := range finfos {
 				if !finfo.IsDir() {
 					numBytesUsedDisk += uint64(finfo.Size())

From e4b290d350a7bcb636f4498e828ef42de4b3d98d Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Sat, 28 Jul 2018 12:19:37 +0530
Subject: [PATCH 439/728] MB-30694: reflect.Value.Type on zero Value panics

Adding checks to validate the geo point values
while parsing
---
 geo/parse.go        | 3 +++
 mapping/document.go | 4 ++++
 mapping/reflect.go  | 3 +++
 3 files changed, 10 insertions(+)

diff --git a/geo/parse.go b/geo/parse.go
index 04a57538d..703ff6718 100644
--- a/geo/parse.go
+++ b/geo/parse.go
@@ -113,6 +113,9 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
 // extract numeric value (if possible) and returns a float64
 func extractNumericVal(v interface{}) (float64, bool) {
 	val := reflect.ValueOf(v)
+	if !val.IsValid() {
+		return 0, false
+	}
 	typ := val.Type()
 	switch typ.Kind() {
 	case reflect.Float32, reflect.Float64:
diff --git a/mapping/document.go b/mapping/document.go
index 6898e54e8..cc3582cad 100644
--- a/mapping/document.go
+++ b/mapping/document.go
@@ -331,6 +331,10 @@ func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes
 	}
 
 	val := reflect.ValueOf(data)
+	if !val.IsValid() {
+		return
+	}
+
 	typ := val.Type()
 	switch typ.Kind() {
 	case reflect.Map:
diff --git a/mapping/reflect.go b/mapping/reflect.go
index 3068b1906..6500a7059 100644
--- a/mapping/reflect.go
+++ b/mapping/reflect.go
@@ -35,6 +35,9 @@ func lookupPropertyPath(data interface{}, path string) interface{} {
 
 func lookupPropertyPathPart(data interface{}, part string) interface{} {
 	val := reflect.ValueOf(data)
+	if !val.IsValid() {
+		return nil
+	}
 	typ := val.Type()
 	switch typ.Kind() {
 	case reflect.Map:

From aa949f448b5442a2d329c64baf887310b2ee5157 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Sun, 29 Jul 2018 18:56:54 +0530
Subject: [PATCH 440/728] adding UTs and extra checks

---
 geo/parse.go      |  8 ++++++--
 geo/parse_test.go | 17 +++++++++++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/geo/parse.go b/geo/parse.go
index 703ff6718..8dfc6eed2 100644
--- a/geo/parse.go
+++ b/geo/parse.go
@@ -36,10 +36,14 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
 	var foundLon, foundLat bool
 
 	thingVal := reflect.ValueOf(thing)
+	if !thingVal.IsValid() {
+		return lon, lat, false
+	}
+
 	thingTyp := thingVal.Type()
 
 	// is it a slice
-	if thingVal.IsValid() && thingVal.Kind() == reflect.Slice {
+	if thingVal.Kind() == reflect.Slice {
 		// must be length 2
 		if thingVal.Len() == 2 {
 			first := thingVal.Index(0)
@@ -68,7 +72,7 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
 	}
 
 	// now try reflection on struct fields
-	if thingVal.IsValid() && thingVal.Kind() == reflect.Struct {
+	if thingVal.Kind() == reflect.Struct {
 		for i := 0; i < thingVal.NumField(); i++ {
 			fieldName := thingTyp.Field(i).Name
 			if strings.HasPrefix(strings.ToLower(fieldName), "lon") {
diff --git a/geo/parse_test.go b/geo/parse_test.go
index 4cbf66dde..c7d088d93 100644
--- a/geo/parse_test.go
+++ b/geo/parse_test.go
@@ -141,6 +141,23 @@ func TestExtractGeoPoint(t *testing.T) {
 			lat:     5.9,
 			success: true,
 		},
+		// values are nil (not supported)
+		{
+			in: map[string]interface{}{
+				"lat": nil,
+				"lon": nil,
+			},
+			lon:     0,
+			lat:     0,
+			success: false,
+		},
+		// input is nil
+		{
+			in:      nil,
+			lon:     0,
+			lat:     0,
+			success: false,
+		},
 	}
 
 	for _, test := range tests {

From 1d7c871429c3e77f2532f4d59071e492abf523fb Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 30 Jul 2018 10:43:16 -0700
Subject: [PATCH 441/728] MB-30616: Revert "MB-29923 - high memory consumption
 in scorch"

Noticed operational deadlocks by stalling the persister to
allow more in-memory merges.

This reverts commit 84502bf2c73fd9a85477d7018220e0af6a69374e.
---
 index/scorch/persister.go   | 75 +++++++------------------------------
 index/scorch/scorch.go      | 19 +++-------
 index/scorch/scorch_test.go |  5 ---
 test/versus_test.go         |  6 ---
 4 files changed, 20 insertions(+), 85 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 96b4c2ec8..bb40c33f8 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -17,7 +17,6 @@ package scorch
 import (
 	"bytes"
 	"encoding/binary"
-	"encoding/json"
 	"fmt"
 	"io/ioutil"
 	"log"
@@ -36,22 +35,16 @@ import (
 
 var DefaultChunkFactor uint32 = 1024
 
-var DefaultPersisterNapTimeMSec int = 2000 // ms
-
-var DefaultPersisterNapUnderNumFiles int = 1000
-
-type persisterOptions struct {
-	// PersisterNapTimeMSec controls the wait/delay injected into
-	// persistence workloop to improve the chances for
-	// a healthier and heavier in-memory merging
-	PersisterNapTimeMSec int
-
-	// PersisterNapTimeMSec > 0, and the number of files is less than
-	// PersisterNapUnderNumFiles, then the persister will sleep
-	// PersisterNapTimeMSec amount of time to improve the chances for
-	// a healthier and heavier in-memory merging
-	PersisterNapUnderNumFiles int
-}
+// Arbitrary number, need to make it configurable.
+// Lower values like 10/making persister really slow
+// doesn't work well as it is creating more files to
+// persist for in next persist iteration and spikes the # FDs.
+// Ideal value should let persister also proceed at
+// an optimum pace so that the merger can skip
+// many intermediate snapshots.
+// This needs to be based on empirical data.
+// TODO - may need to revisit this approach/value.
+var epochDistance = uint64(5)
 
 type notificationChan chan struct{}
 
@@ -61,13 +54,6 @@ func (s *Scorch) persisterLoop() {
 	var persistWatchers []*epochWatcher
 	var lastPersistedEpoch, lastMergedEpoch uint64
 	var ew *epochWatcher
-	po, err := s.parsePersisterOptions()
-	if err != nil {
-		s.fireAsyncError(fmt.Errorf("persisterOptions json parsing err: %v", err))
-		s.asyncTasks.Done()
-		return
-	}
-
 OUTER:
 	for {
 		atomic.AddUint64(&s.stats.TotPersistLoopBeg, 1)
@@ -83,7 +69,7 @@ OUTER:
 			lastMergedEpoch = ew.epoch
 		}
 		lastMergedEpoch, persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch,
-			lastMergedEpoch, persistWatchers, po)
+			lastMergedEpoch, persistWatchers)
 
 		var ourSnapshot *IndexSnapshot
 		var ourPersisted []chan error
@@ -194,26 +180,14 @@ func notifyMergeWatchers(lastPersistedEpoch uint64,
 }
 
 func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastMergedEpoch uint64,
-	persistWatchers []*epochWatcher, po *persisterOptions) (uint64, []*epochWatcher) {
+	persistWatchers []*epochWatcher) (uint64, []*epochWatcher) {
 
 	// first, let the watchers proceed if they lag behind
 	persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
 
-	// check the merger lag by counting the segment files on disk,
-	// On finding fewer files on disk, persister takes a short pause
-	// for sufficient in-memory segments to pile up for the next
-	// memory merge cum persist loop.
-	// On finding too many files on disk, persister pause until the merger
-	// catches up to reduce the segment file count under the threshold.
-	numFilesOnDisk, _ := s.diskFileStats()
-	if numFilesOnDisk < uint64(po.PersisterNapUnderNumFiles) &&
-		po.PersisterNapTimeMSec > 0 {
-		time.Sleep(time.Millisecond * time.Duration(po.PersisterNapTimeMSec))
-		return lastMergedEpoch, persistWatchers
-	}
-
 OUTER:
-	for numFilesOnDisk > uint64(po.PersisterNapUnderNumFiles) {
+	// check for slow merger and await until the merger catch up
+	for lastPersistedEpoch > lastMergedEpoch+epochDistance {
 		atomic.AddUint64(&s.stats.TotPersisterSlowMergerPause, 1)
 
 		select {
@@ -228,32 +202,11 @@ OUTER:
 
 		// let the watchers proceed if they lag behind
 		persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
-
-		numFilesOnDisk, _ = s.diskFileStats()
 	}
 
 	return lastMergedEpoch, persistWatchers
 }
 
-func (s *Scorch) parsePersisterOptions() (*persisterOptions, error) {
-	po := persisterOptions{
-		PersisterNapTimeMSec:      DefaultPersisterNapTimeMSec,
-		PersisterNapUnderNumFiles: DefaultPersisterNapUnderNumFiles,
-	}
-	if v, ok := s.config["scorchPersisterOptions"]; ok {
-		b, err := json.Marshal(v)
-		if err != nil {
-			return &po, err
-		}
-
-		err = json.Unmarshal(b, &po)
-		if err != nil {
-			return &po, err
-		}
-	}
-	return &po, nil
-}
-
 func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
 	persisted, err := s.persistSnapshotMaybeMerge(snapshot)
 	if err != nil {
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 644a5ea3d..357836524 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -434,30 +434,23 @@ func (s *Scorch) currentSnapshot() *IndexSnapshot {
 func (s *Scorch) Stats() json.Marshaler {
 	return &s.stats
 }
+func (s *Scorch) StatsMap() map[string]interface{} {
+	m := s.stats.ToMap()
 
-func (s *Scorch) diskFileStats() (uint64, uint64) {
-	var numFilesOnDisk, numBytesUsedDisk uint64
 	if s.path != "" {
 		finfos, err := ioutil.ReadDir(s.path)
 		if err == nil {
+			var numFilesOnDisk, numBytesUsedDisk uint64
 			for _, finfo := range finfos {
 				if !finfo.IsDir() {
 					numBytesUsedDisk += uint64(finfo.Size())
 					numFilesOnDisk++
 				}
 			}
-		}
-	}
-	return numFilesOnDisk, numBytesUsedDisk
-}
 
-func (s *Scorch) StatsMap() map[string]interface{} {
-	m := s.stats.ToMap()
-
-	numFilesOnDisk, numBytesUsedDisk := s.diskFileStats()
-	if numFilesOnDisk > 0 || numBytesUsedDisk > 0 {
-		m["CurOnDiskBytes"] = numBytesUsedDisk
-		m["CurOnDiskFiles"] = numFilesOnDisk
+			m["CurOnDiskBytes"] = numBytesUsedDisk
+			m["CurOnDiskFiles"] = numFilesOnDisk
+		}
 	}
 
 	// TODO: consider one day removing these backwards compatible
diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index adcabd22f..cf784755d 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -33,11 +33,6 @@ import (
 	"github.com/blevesearch/bleve/mapping"
 )
 
-func init() {
-	// override for tests
-	DefaultPersisterNapTimeMSec = 1
-}
-
 func DestroyTest() error {
 	return os.RemoveAll("/tmp/bleve-scorch-test")
 }
diff --git a/test/versus_test.go b/test/versus_test.go
index 10faa311e..dbc7dd752 100644
--- a/test/versus_test.go
+++ b/test/versus_test.go
@@ -41,12 +41,6 @@ import (
 //     go test -v -run TestScorchVersusUpsideDownBolt ./test
 //     VERBOSE=1 FOCUS=Trista go test -v -run TestScorchVersusUpsideDownBolt ./test
 //
-
-func init() {
-	// override for tests
-	scorch.DefaultPersisterNapTimeMSec = 1
-}
-
 func TestScorchVersusUpsideDownBoltAll(t *testing.T) {
 	(&VersusTest{
 		t:                    t,

From 6795aad1ccc6feedc1ee40c2005bcee228364a83 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Tue, 31 Jul 2018 16:36:25 +0530
Subject: [PATCH 442/728] replacing maps with slice for faster lookups

---
 search/facets_builder.go | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/search/facets_builder.go b/search/facets_builder.go
index c5d41e2d3..7fc0bedf3 100644
--- a/search/facets_builder.go
+++ b/search/facets_builder.go
@@ -54,14 +54,14 @@ type FacetBuilder interface {
 
 type FacetsBuilder struct {
 	indexReader index.IndexReader
-	facets      map[string]FacetBuilder
+	facetNames  []string
+	facets      []FacetBuilder
 	fields      []string
 }
 
 func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder {
 	return &FacetsBuilder{
 		indexReader: indexReader,
-		facets:      make(map[string]FacetBuilder, 0),
 	}
 }
 
@@ -69,8 +69,7 @@ func (fb *FacetsBuilder) Size() int {
 	sizeInBytes := reflectStaticSizeFacetsBuilder + size.SizeOfPtr
 
 	for k, v := range fb.facets {
-		sizeInBytes += size.SizeOfString + len(k) +
-			v.Size()
+		sizeInBytes += size.SizeOfString + v.Size() + len(fb.facetNames[k])
 	}
 
 	for _, entry := range fb.fields {
@@ -81,7 +80,8 @@ func (fb *FacetsBuilder) Size() int {
 }
 
 func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) {
-	fb.facets[name] = facetBuilder
+	fb.facetNames = append(fb.facetNames, name)
+	fb.facets = append(fb.facets, facetBuilder)
 	fb.fields = append(fb.fields, facetBuilder.Field())
 }
 
@@ -333,9 +333,9 @@ func (fr FacetResults) Fixup(name string, size int) {
 
 func (fb *FacetsBuilder) Results() FacetResults {
 	fr := make(FacetResults)
-	for facetName, facetBuilder := range fb.facets {
+	for i, facetBuilder := range fb.facets {
 		facetResult := facetBuilder.Result()
-		fr[facetName] = facetResult
+		fr[fb.facetNames[i]] = facetResult
 	}
 	return fr
 }

From a05fcc02e479be66289bf20324f314121064d02a Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 8 Aug 2018 18:28:40 -0700
Subject: [PATCH 443/728] optimize scorch/zap dict PrefixIterator()

This optimization avoids creating a regexp for the scorch/zap
dictionary prefix iterator.

See also: https://issues.couchbase.com/browse/MB-30263
---
 index/scorch/segment/zap/dict.go | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 736fa59f6..8e1a03f9b 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -122,16 +122,14 @@ func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator {
 		d: d,
 	}
 
+	kBeg := []byte(prefix)
+	kEnd := incrementBytes(kBeg)
+
 	if d.fst != nil {
-		r, err := regexp.New(prefix + ".*")
+		itr, err := d.fst.Iterator(kBeg, kEnd)
 		if err == nil {
-			itr, err := d.fst.Search(r, nil, nil)
-			if err == nil {
-				rv.itr = itr
-			} else if err != nil && err != vellum.ErrIteratorDone {
-				rv.err = err
-			}
-		} else {
+			rv.itr = itr
+		} else if err != nil && err != vellum.ErrIteratorDone {
 			rv.err = err
 		}
 	}
@@ -139,6 +137,18 @@ func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator {
 	return rv
 }
 
+func incrementBytes(in []byte) []byte {
+	rv := make([]byte, len(in))
+	copy(rv, in)
+	for i := len(rv) - 1; i >= 0; i-- {
+		rv[i] = rv[i] + 1
+		if rv[i] != 0 {
+			return rv // didn't overflow, so stop
+		}
+	}
+	return nil // overflowed
+}
+
 // RangeIterator returns an iterator which only visits terms between the
 // start and end terms.  NOTE: bleve.index API specifies the end is inclusive.
 func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator {

From 0453284a97c2c0b75b5ac609edb2d3f491a47b56 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 10 Aug 2018 10:21:27 -0700
Subject: [PATCH 444/728] RegexpSearcher takes index.Regexp interface instead
 of *regexp.Regexp

This level of indirection allows for alternate regexp implementations.
---
 index/index.go                   | 11 +++++++++++
 search/searcher/search_regexp.go |  6 ++----
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/index/index.go b/index/index.go
index 7c4edcb8d..dd5271cf7 100644
--- a/index/index.go
+++ b/index/index.go
@@ -98,6 +98,17 @@ type IndexReader interface {
 	Close() error
 }
 
+// The Regexp interface defines the subset of the regexp.Regexp API
+// methods that are used by bleve indexes, allowing callers to pass in
+// alternate implementations.
+type Regexp interface {
+	FindStringIndex(s string) (loc []int)
+
+	LiteralPrefix() (prefix string, complete bool)
+
+	String() string
+}
+
 type IndexReaderRegexp interface {
 	FieldDictRegexp(field string, regex []byte) (FieldDict, error)
 }
diff --git a/search/searcher/search_regexp.go b/search/searcher/search_regexp.go
index ad417a056..a55909bbc 100644
--- a/search/searcher/search_regexp.go
+++ b/search/searcher/search_regexp.go
@@ -15,8 +15,6 @@
 package searcher
 
 import (
-	"regexp"
-
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
 )
@@ -26,7 +24,7 @@ import (
 // matching the entire term.  The provided regexp SHOULD NOT start with ^
 // or end with $ as this can intefere with the implementation.  Separately,
 // matches will be checked to ensure they match the entire term.
-func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp,
+func NewRegexpSearcher(indexReader index.IndexReader, pattern index.Regexp,
 	field string, boost float64, options search.SearcherOptions) (
 	search.Searcher, error) {
 	var candidateTerms []string
@@ -70,7 +68,7 @@ func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp,
 }
 
 func findRegexpCandidateTerms(indexReader index.IndexReader,
-	pattern *regexp.Regexp, field, prefixTerm string) (rv []string, err error) {
+	pattern index.Regexp, field, prefixTerm string) (rv []string, err error) {
 	rv = make([]string, 0)
 	var fieldDict index.FieldDict
 	if len(prefixTerm) > 0 {

From 96657413a75bb36fb12dc30400c1042a97b5b670 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 10 Aug 2018 10:38:21 -0700
Subject: [PATCH 445/728] index API & scorch uses index.Regexp instead
 string'ified regexp

In this optimization and bleve "non-porcelain" index API change, the
index.IndexReaderRegexp API is changed to accept an index.Regexp
instance instead of the string representation of a regexp.

This allows scorch to leverage the LiteralPrefix() information of the
regexp instance (which is not implemented by the vellum.regexp API),
so that the FST dictionary searches can be more selective by
invoking...

   d.fst.Search(r, prefixBeg, prefixEnd)

instead of the previous...

   d.fst.Search(r, nil, nil)

See also: https://issues.couchbase.com/browse/MB-30264
---
 index/index.go                   |  2 +-
 index/scorch/segment/empty.go    |  2 +-
 index/scorch/segment/segment.go  |  2 +-
 index/scorch/segment/zap/dict.go | 17 ++++++++++++++---
 index/scorch/snapshot_index.go   |  4 ++--
 index/scorch/snapshot_segment.go |  2 +-
 index_meta.go                    |  2 +-
 search/searcher/search_regexp.go |  2 +-
 8 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/index/index.go b/index/index.go
index dd5271cf7..d734c1db2 100644
--- a/index/index.go
+++ b/index/index.go
@@ -110,7 +110,7 @@ type Regexp interface {
 }
 
 type IndexReaderRegexp interface {
-	FieldDictRegexp(field string, regex []byte) (FieldDict, error)
+	FieldDictRegexp(field string, regex Regexp) (FieldDict, error)
 }
 
 type IndexReaderFuzzy interface {
diff --git a/index/scorch/segment/empty.go b/index/scorch/segment/empty.go
index 0489c8218..59b2af4f0 100644
--- a/index/scorch/segment/empty.go
+++ b/index/scorch/segment/empty.go
@@ -80,7 +80,7 @@ func (e *EmptyDictionary) RangeIterator(start, end string) DictionaryIterator {
 	return &EmptyDictionaryIterator{}
 }
 
-func (e *EmptyDictionary) RegexpIterator(start string) DictionaryIterator {
+func (e *EmptyDictionary) RegexpIterator(r index.Regexp) DictionaryIterator {
 	return &EmptyDictionaryIterator{}
 }
 
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index 3fc315995..bc85aebc0 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -51,7 +51,7 @@ type TermDictionary interface {
 	Iterator() DictionaryIterator
 	PrefixIterator(prefix string) DictionaryIterator
 	RangeIterator(start, end string) DictionaryIterator
-	RegexpIterator(regex string) DictionaryIterator
+	RegexpIterator(regex index.Regexp) DictionaryIterator
 	FuzzyIterator(term string, fuzziness int) DictionaryIterator
 	OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator
 }
diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 8e1a03f9b..ab4c8311e 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -178,15 +178,26 @@ func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator
 
 // RegexpIterator returns an iterator which only visits terms having the
 // the specified regex
-func (d *Dictionary) RegexpIterator(regex string) segment.DictionaryIterator {
+func (d *Dictionary) RegexpIterator(rIn index.Regexp) segment.DictionaryIterator {
+	prefixTerm, complete := rIn.LiteralPrefix()
+	if complete {
+		return d.PrefixIterator(prefixTerm)
+	}
+
 	rv := &DictionaryIterator{
 		d: d,
 	}
 
 	if d.fst != nil {
-		r, err := regexp.New(regex)
+		r, err := regexp.New(rIn.String())
 		if err == nil {
-			itr, err2 := d.fst.Search(r, nil, nil)
+			var prefixBeg, prefixEnd []byte
+			if prefixTerm != "" {
+				prefixBeg = []byte(prefixTerm)
+				prefixEnd = incrementBytes(prefixEnd)
+			}
+
+			itr, err2 := d.fst.Search(r, prefixBeg, prefixEnd)
 			if err2 == nil {
 				rv.itr = itr
 			} else if err2 != nil && err2 != vellum.ErrIteratorDone {
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 21e19123d..dc2ace54c 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -179,9 +179,9 @@ func (i *IndexSnapshot) FieldDictPrefix(field string,
 }
 
 func (i *IndexSnapshot) FieldDictRegexp(field string,
-	termRegex []byte) (index.FieldDict, error) {
+	termRegex index.Regexp) (index.FieldDict, error) {
 	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
-		return i.RegexpIterator(string(termRegex))
+		return i.RegexpIterator(termRegex)
 	})
 }
 
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index 90dbcb494..5b51981c1 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -52,7 +52,7 @@ func (s *SegmentDictionarySnapshot) RangeIterator(start, end string) segment.Dic
 	return s.d.RangeIterator(start, end)
 }
 
-func (s *SegmentDictionarySnapshot) RegexpIterator(regex string) segment.DictionaryIterator {
+func (s *SegmentDictionarySnapshot) RegexpIterator(regex index.Regexp) segment.DictionaryIterator {
 	return s.d.RegexpIterator(regex)
 }
 
diff --git a/index_meta.go b/index_meta.go
index 2614292b7..d814799a8 100644
--- a/index_meta.go
+++ b/index_meta.go
@@ -19,7 +19,7 @@ import (
 	"io/ioutil"
 	"os"
 	"path/filepath"
-	
+
 	"github.com/blevesearch/bleve/index/upsidedown"
 )
 
diff --git a/search/searcher/search_regexp.go b/search/searcher/search_regexp.go
index a55909bbc..dc8573148 100644
--- a/search/searcher/search_regexp.go
+++ b/search/searcher/search_regexp.go
@@ -29,7 +29,7 @@ func NewRegexpSearcher(indexReader index.IndexReader, pattern index.Regexp,
 	search.Searcher, error) {
 	var candidateTerms []string
 	if ir, ok := indexReader.(index.IndexReaderRegexp); ok {
-		fieldDict, err := ir.FieldDictRegexp(field, []byte(pattern.String()))
+		fieldDict, err := ir.FieldDictRegexp(field, pattern)
 		if err != nil {
 			return nil, err
 		}

From 44023bec88c9a74603abb766d4d8c5c5e51e84a2 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 10 Aug 2018 16:14:45 -0700
Subject: [PATCH 446/728] fixed 1-hit scorch optimization freq/norm & added
 scorch regexp test

The "1-hit" encoding optimization for scorch was incorrectly returning
from the postings list iterator constructor too early, before setting
the includeFreqNorm & includeLocs flags.

This was caught while refactoring the existing regexp searcher unit
test from upside-down to also test scorch.
---
 index/scorch/segment/zap/posting.go   |  5 ++-
 search/searcher/base_test.go          | 31 +++++++++++++--
 search/searcher/search_regexp_test.go | 54 +++++++++++++++++++--------
 3 files changed, 70 insertions(+), 20 deletions(-)

diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 71d41a826..0ac7938e1 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -188,7 +188,10 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
 
 		rv.buf = buf
 	}
+
 	rv.postings = p
+	rv.includeFreqNorm = includeFreq || includeNorm
+	rv.includeLocs = includeLocs
 
 	if p.normBits1Hit != 0 {
 		// "1-hit" encoding
@@ -211,7 +214,6 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
 	var read int
 
 	// prepare the freq chunk details
-	rv.includeFreqNorm = includeFreq || includeNorm
 	if rv.includeFreqNorm {
 		var numFreqChunks uint64
 		numFreqChunks, read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
@@ -229,7 +231,6 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
 	}
 
 	// prepare the loc chunk details
-	rv.includeLocs = includeLocs
 	if rv.includeLocs {
 		n = 0
 		var numLocChunks uint64
diff --git a/search/searcher/base_test.go b/search/searcher/base_test.go
index c47cd18c1..425d6703c 100644
--- a/search/searcher/base_test.go
+++ b/search/searcher/base_test.go
@@ -15,6 +15,7 @@
 package searcher
 
 import (
+	"io/ioutil"
 	"math"
 	"regexp"
 
@@ -22,6 +23,7 @@ import (
 	regexpTokenizer "github.com/blevesearch/bleve/analysis/tokenizer/regexp"
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/scorch"
 	"github.com/blevesearch/bleve/index/store/gtreap"
 	"github.com/blevesearch/bleve/index/upsidedown"
 )
@@ -29,9 +31,12 @@ import (
 var twoDocIndex index.Index //= upside_down.NewUpsideDownCouch(inmem.MustOpen())
 
 func init() {
+	twoDocIndex = initTwoDocUpsideDown()
+}
+
+func initTwoDocUpsideDown() index.Index {
 	analysisQueue := index.NewAnalysisQueue(1)
-	var err error
-	twoDocIndex, err = upsidedown.NewUpsideDownCouch(
+	twoDocIndex, err := upsidedown.NewUpsideDownCouch(
 		gtreap.Name,
 		map[string]interface{}{
 			"path": "",
@@ -39,7 +44,27 @@ func init() {
 	if err != nil {
 		panic(err)
 	}
-	err = twoDocIndex.Open()
+	initTwoDocs(twoDocIndex)
+	return twoDocIndex
+}
+
+func initTwoDocScorch() index.Index {
+	analysisQueue := index.NewAnalysisQueue(1)
+	dir, _ := ioutil.TempDir("", "scorchTwoDoc")
+	twoDocIndex, err := scorch.NewScorch(
+		scorch.Name,
+		map[string]interface{}{
+			"path": dir,
+		}, analysisQueue)
+	if err != nil {
+		panic(err)
+	}
+	initTwoDocs(twoDocIndex)
+	return twoDocIndex
+}
+
+func initTwoDocs(twoDocIndex index.Index) {
+	err := twoDocIndex.Open()
 	if err != nil {
 		panic(err)
 	}
diff --git a/search/searcher/search_regexp_test.go b/search/searcher/search_regexp_test.go
index daa5d9c14..2de1162a5 100644
--- a/search/searcher/search_regexp_test.go
+++ b/search/searcher/search_regexp_test.go
@@ -15,6 +15,8 @@
 package searcher
 
 import (
+	"encoding/binary"
+	"fmt"
 	"regexp"
 	"testing"
 
@@ -22,8 +24,28 @@ import (
 	"github.com/blevesearch/bleve/search"
 )
 
-func TestRegexpSearch(t *testing.T) {
+func TestRegexpSearchUpsideDown(t *testing.T) {
+	twoDocIndex := initTwoDocUpsideDown()
+	testRegexpSearch(t, twoDocIndex,
+		func(id int) index.IndexInternalID {
+			return index.IndexInternalID(fmt.Sprintf("%d", id))
+		})
+	_ = twoDocIndex.Close()
+}
+
+func TestRegexpSearchScorch(t *testing.T) {
+	twoDocIndex := initTwoDocScorch()
+	testRegexpSearch(t, twoDocIndex,
+		func(id int) index.IndexInternalID {
+			buf := make([]byte, 8)
+			binary.BigEndian.PutUint64(buf, uint64(id))
+			return index.IndexInternalID(buf)
+		})
+	_ = twoDocIndex.Close()
+}
 
+func testRegexpSearch(t *testing.T, twoDocIndex index.Index,
+	internalIDMaker func(int) index.IndexInternalID) {
 	twoDocIndexReader, err := twoDocIndex.Reader()
 	if err != nil {
 		t.Error(err)
@@ -58,27 +80,27 @@ func TestRegexpSearch(t *testing.T) {
 	}
 
 	tests := []struct {
-		searcher search.Searcher
-		results  []*search.DocumentMatch
+		searcher  search.Searcher
+		expecteds []*search.DocumentMatch
 	}{
 		{
 			searcher: regexpSearcher,
-			results: []*search.DocumentMatch{
+			expecteds: []*search.DocumentMatch{
 				{
-					IndexInternalID: index.IndexInternalID("1"),
+					IndexInternalID: internalIDMaker(1),
 					Score:           1.916290731874155,
 				},
 			},
 		},
 		{
 			searcher: regexpSearcherCo,
-			results: []*search.DocumentMatch{
+			expecteds: []*search.DocumentMatch{
 				{
-					IndexInternalID: index.IndexInternalID("2"),
+					IndexInternalID: internalIDMaker(2),
 					Score:           0.33875554280828685,
 				},
 				{
-					IndexInternalID: index.IndexInternalID("3"),
+					IndexInternalID: internalIDMaker(3),
 					Score:           0.33875554280828685,
 				},
 			},
@@ -99,12 +121,14 @@ func TestRegexpSearch(t *testing.T) {
 		next, err := test.searcher.Next(ctx)
 		i := 0
 		for err == nil && next != nil {
-			if i < len(test.results) {
-				if !next.IndexInternalID.Equals(test.results[i].IndexInternalID) {
-					t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].IndexInternalID, next.IndexInternalID, testIndex)
+			if i < len(test.expecteds) {
+				if !next.IndexInternalID.Equals(test.expecteds[i].IndexInternalID) {
+					t.Errorf("test %d, expected result %d to have id %s got %s, next: %#v",
+						testIndex, i, test.expecteds[i].IndexInternalID, next.IndexInternalID, next)
 				}
-				if next.Score != test.results[i].Score {
-					t.Errorf("expected result %d to have score %v got  %v for test %d", i, test.results[i].Score, next.Score, testIndex)
+				if next.Score != test.expecteds[i].Score {
+					t.Errorf("test %d, expected result %d to have score %v got %v,next: %#v",
+						testIndex, i, test.expecteds[i].Score, next.Score, next)
 					t.Logf("scoring explanation: %s", next.Expl)
 				}
 			}
@@ -115,8 +139,8 @@ func TestRegexpSearch(t *testing.T) {
 		if err != nil {
 			t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
 		}
-		if len(test.results) != i {
-			t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
+		if len(test.expecteds) != i {
+			t.Errorf("expected %d results got %d for test %d", len(test.expecteds), i, testIndex)
 		}
 	}
 }

From d4c0a3e0221c5530ed8b9c772243360e26294942 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 13 Aug 2018 16:11:50 +0530
Subject: [PATCH 447/728] MB-29923 - high memory usage in scorch

This change includes,

-persister nap to favour in-memory merging
-skip in-memory merging on memory pressure
-skip persister pause on memory pressure
-checks during persister wait to guard against
 an already advanced merger
---
 index/scorch/persister.go   | 95 +++++++++++++++++++++++++++++--------
 index/scorch/scorch.go      | 43 ++++++++++++++---
 index/scorch/scorch_test.go |  5 ++
 test/versus_test.go         |  6 +++
 4 files changed, 123 insertions(+), 26 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index bb40c33f8..0de78c41b 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -17,6 +17,7 @@ package scorch
 import (
 	"bytes"
 	"encoding/binary"
+	"encoding/json"
 	"fmt"
 	"io/ioutil"
 	"log"
@@ -35,16 +36,22 @@ import (
 
 var DefaultChunkFactor uint32 = 1024
 
-// Arbitrary number, need to make it configurable.
-// Lower values like 10/making persister really slow
-// doesn't work well as it is creating more files to
-// persist for in next persist iteration and spikes the # FDs.
-// Ideal value should let persister also proceed at
-// an optimum pace so that the merger can skip
-// many intermediate snapshots.
-// This needs to be based on empirical data.
-// TODO - may need to revisit this approach/value.
-var epochDistance = uint64(5)
+var DefaultPersisterNapTimeMSec int = 2000 // ms
+
+var DefaultPersisterNapUnderNumFiles int = 1000
+
+type persisterOptions struct {
+	// PersisterNapTimeMSec controls the wait/delay injected into
+	// persistence workloop to improve the chances for
+	// a healthier and heavier in-memory merging
+	PersisterNapTimeMSec int
+
+	// PersisterNapTimeMSec > 0, and the number of files is less than
+	// PersisterNapUnderNumFiles, then the persister will sleep
+	// PersisterNapTimeMSec amount of time to improve the chances for
+	// a healthier and heavier in-memory merging
+	PersisterNapUnderNumFiles int
+}
 
 type notificationChan chan struct{}
 
@@ -54,6 +61,13 @@ func (s *Scorch) persisterLoop() {
 	var persistWatchers []*epochWatcher
 	var lastPersistedEpoch, lastMergedEpoch uint64
 	var ew *epochWatcher
+	po, err := s.parsePersisterOptions()
+	if err != nil {
+		s.fireAsyncError(fmt.Errorf("persisterOptions json parsing err: %v", err))
+		s.asyncTasks.Done()
+		return
+	}
+
 OUTER:
 	for {
 		atomic.AddUint64(&s.stats.TotPersistLoopBeg, 1)
@@ -69,7 +83,7 @@ OUTER:
 			lastMergedEpoch = ew.epoch
 		}
 		lastMergedEpoch, persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch,
-			lastMergedEpoch, persistWatchers)
+			lastMergedEpoch, persistWatchers, po)
 
 		var ourSnapshot *IndexSnapshot
 		var ourPersisted []chan error
@@ -180,14 +194,31 @@ func notifyMergeWatchers(lastPersistedEpoch uint64,
 }
 
 func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastMergedEpoch uint64,
-	persistWatchers []*epochWatcher) (uint64, []*epochWatcher) {
+	persistWatchers []*epochWatcher, po *persisterOptions) (uint64, []*epochWatcher) {
 
 	// first, let the watchers proceed if they lag behind
 	persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
 
+	// check the merger lag by counting the segment files on disk,
+	// On finding fewer files on disk, persister takes a short pause
+	// for sufficient in-memory segments to pile up for the next
+	// memory merge cum persist loop.
+	// On finding too many files on disk, persister pause until the merger
+	// catches up to reduce the segment file count under the threshold.
+	// But if there is a memory pressue, then skip this sleep maneuver.
+	numFilesOnDisk, _ := s.diskFileStats()
+	if numFilesOnDisk < uint64(po.PersisterNapUnderNumFiles) &&
+		po.PersisterNapTimeMSec > 0 && s.paused() == 0 {
+		select {
+		case <-s.closeCh:
+		case <-time.After(time.Millisecond * time.Duration(po.PersisterNapTimeMSec)):
+		}
+		return lastMergedEpoch, persistWatchers
+	}
+
 OUTER:
-	// check for slow merger and await until the merger catch up
-	for lastPersistedEpoch > lastMergedEpoch+epochDistance {
+	for numFilesOnDisk > uint64(po.PersisterNapUnderNumFiles) &&
+		lastMergedEpoch < lastPersistedEpoch {
 		atomic.AddUint64(&s.stats.TotPersisterSlowMergerPause, 1)
 
 		select {
@@ -202,18 +233,42 @@ OUTER:
 
 		// let the watchers proceed if they lag behind
 		persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
+
+		numFilesOnDisk, _ = s.diskFileStats()
 	}
 
 	return lastMergedEpoch, persistWatchers
 }
 
-func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
-	persisted, err := s.persistSnapshotMaybeMerge(snapshot)
-	if err != nil {
-		return err
+func (s *Scorch) parsePersisterOptions() (*persisterOptions, error) {
+	po := persisterOptions{
+		PersisterNapTimeMSec:      DefaultPersisterNapTimeMSec,
+		PersisterNapUnderNumFiles: DefaultPersisterNapUnderNumFiles,
 	}
-	if persisted {
-		return nil
+	if v, ok := s.config["scorchPersisterOptions"]; ok {
+		b, err := json.Marshal(v)
+		if err != nil {
+			return &po, err
+		}
+
+		err = json.Unmarshal(b, &po)
+		if err != nil {
+			return &po, err
+		}
+	}
+	return &po, nil
+}
+
+func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
+	// perform in-memory merging only when there is no memory pressure
+	if s.paused() == 0 {
+		persisted, err := s.persistSnapshotMaybeMerge(snapshot)
+		if err != nil {
+			return err
+		}
+		if persisted {
+			return nil
+		}
 	}
 
 	return s.persistSnapshotDirect(snapshot)
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 357836524..3980a8a82 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -73,6 +73,10 @@ type Scorch struct {
 	onAsyncError func(err error)
 
 	iStats internalStats
+
+	pauseLock sync.RWMutex
+
+	pauseCount uint64
 }
 
 type internalStats struct {
@@ -117,9 +121,30 @@ func NewScorch(storeName string,
 	return rv, nil
 }
 
+func (s *Scorch) paused() uint64 {
+	s.pauseLock.Lock()
+	pc := s.pauseCount
+	s.pauseLock.Unlock()
+	return pc
+}
+
+func (s *Scorch) incrPause() {
+	s.pauseLock.Lock()
+	s.pauseCount++
+	s.pauseLock.Unlock()
+}
+
+func (s *Scorch) decrPause() {
+	s.pauseLock.Lock()
+	s.pauseCount--
+	s.pauseLock.Unlock()
+}
+
 func (s *Scorch) fireEvent(kind EventKind, dur time.Duration) {
 	if s.onEvent != nil {
+		s.incrPause()
 		s.onEvent(Event{Kind: kind, Scorch: s, Duration: dur})
+		s.decrPause()
 	}
 }
 
@@ -434,24 +459,30 @@ func (s *Scorch) currentSnapshot() *IndexSnapshot {
 func (s *Scorch) Stats() json.Marshaler {
 	return &s.stats
 }
-func (s *Scorch) StatsMap() map[string]interface{} {
-	m := s.stats.ToMap()
 
+func (s *Scorch) diskFileStats() (uint64, uint64) {
+	var numFilesOnDisk, numBytesUsedDisk uint64
 	if s.path != "" {
 		finfos, err := ioutil.ReadDir(s.path)
 		if err == nil {
-			var numFilesOnDisk, numBytesUsedDisk uint64
 			for _, finfo := range finfos {
 				if !finfo.IsDir() {
 					numBytesUsedDisk += uint64(finfo.Size())
 					numFilesOnDisk++
 				}
 			}
-
-			m["CurOnDiskBytes"] = numBytesUsedDisk
-			m["CurOnDiskFiles"] = numFilesOnDisk
 		}
 	}
+	return numFilesOnDisk, numBytesUsedDisk
+}
+
+func (s *Scorch) StatsMap() map[string]interface{} {
+	m := s.stats.ToMap()
+
+	numFilesOnDisk, numBytesUsedDisk := s.diskFileStats()
+
+	m["CurOnDiskBytes"] = numBytesUsedDisk
+	m["CurOnDiskFiles"] = numFilesOnDisk
 
 	// TODO: consider one day removing these backwards compatible
 	// names for apps using the old names
diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index cf784755d..adcabd22f 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -33,6 +33,11 @@ import (
 	"github.com/blevesearch/bleve/mapping"
 )
 
+func init() {
+	// override for tests
+	DefaultPersisterNapTimeMSec = 1
+}
+
 func DestroyTest() error {
 	return os.RemoveAll("/tmp/bleve-scorch-test")
 }
diff --git a/test/versus_test.go b/test/versus_test.go
index dbc7dd752..10faa311e 100644
--- a/test/versus_test.go
+++ b/test/versus_test.go
@@ -41,6 +41,12 @@ import (
 //     go test -v -run TestScorchVersusUpsideDownBolt ./test
 //     VERBOSE=1 FOCUS=Trista go test -v -run TestScorchVersusUpsideDownBolt ./test
 //
+
+func init() {
+	// override for tests
+	scorch.DefaultPersisterNapTimeMSec = 1
+}
+
 func TestScorchVersusUpsideDownBoltAll(t *testing.T) {
 	(&VersusTest{
 		t:                    t,

From 11596bf28dd099f05b4854ffa642d16f116a2d8f Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 13 Aug 2018 12:54:03 -0700
Subject: [PATCH 448/728] NewRegexpStringSearcher() API allows scorch to parse
 regexp string

The additional NewRegexpStringSearcher() API is introduced in this
optimization, where this new constructor takes the raw regexp string
as input, allowing the raw regexp string to be passed down to advanced
index implementations (like scorch), to avoid extra regexp string
parsings.

Lower-level index interfaces are also correspondingly modified to take
a regexp string as input.

Of note, there are some future TODO's found as part of this commit...

* syntax.Regexp supports a Simplify() API that should be explored.

* the LiteralPrefix() optimization is no longer supported with this
  commit and should be revisited.  From the golang regexp source code,
  the LiteralPrefix() computation given an input *syntax.Regexp
  appears to be non-trivial.

See also: https://issues.couchbase.com/browse/MB-30264
---
 index/index.go                   |  2 +-
 index/scorch/segment/empty.go    |  2 +-
 index/scorch/segment/segment.go  |  2 +-
 index/scorch/segment/zap/dict.go | 27 ++++++++---
 index/scorch/snapshot_index.go   |  2 +-
 index/scorch/snapshot_segment.go |  2 +-
 search/searcher/search_regexp.go | 79 ++++++++++++++++++++------------
 7 files changed, 75 insertions(+), 41 deletions(-)

diff --git a/index/index.go b/index/index.go
index d734c1db2..2b577e2e1 100644
--- a/index/index.go
+++ b/index/index.go
@@ -110,7 +110,7 @@ type Regexp interface {
 }
 
 type IndexReaderRegexp interface {
-	FieldDictRegexp(field string, regex Regexp) (FieldDict, error)
+	FieldDictRegexp(field string, regex string) (FieldDict, error)
 }
 
 type IndexReaderFuzzy interface {
diff --git a/index/scorch/segment/empty.go b/index/scorch/segment/empty.go
index 59b2af4f0..83968a11d 100644
--- a/index/scorch/segment/empty.go
+++ b/index/scorch/segment/empty.go
@@ -80,7 +80,7 @@ func (e *EmptyDictionary) RangeIterator(start, end string) DictionaryIterator {
 	return &EmptyDictionaryIterator{}
 }
 
-func (e *EmptyDictionary) RegexpIterator(r index.Regexp) DictionaryIterator {
+func (e *EmptyDictionary) RegexpIterator(r string) DictionaryIterator {
 	return &EmptyDictionaryIterator{}
 }
 
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index bc85aebc0..3fc315995 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -51,7 +51,7 @@ type TermDictionary interface {
 	Iterator() DictionaryIterator
 	PrefixIterator(prefix string) DictionaryIterator
 	RangeIterator(start, end string) DictionaryIterator
-	RegexpIterator(regex index.Regexp) DictionaryIterator
+	RegexpIterator(regex string) DictionaryIterator
 	FuzzyIterator(term string, fuzziness int) DictionaryIterator
 	OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator
 }
diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index ab4c8311e..522731fb3 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -17,6 +17,7 @@ package zap
 import (
 	"bytes"
 	"fmt"
+	"regexp/syntax"
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index"
@@ -178,18 +179,30 @@ func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator
 
 // RegexpIterator returns an iterator which only visits terms having the
 // the specified regex
-func (d *Dictionary) RegexpIterator(rIn index.Regexp) segment.DictionaryIterator {
-	prefixTerm, complete := rIn.LiteralPrefix()
-	if complete {
-		return d.PrefixIterator(prefixTerm)
-	}
-
+func (d *Dictionary) RegexpIterator(expr string) segment.DictionaryIterator {
 	rv := &DictionaryIterator{
 		d: d,
 	}
 
+	parsed, err := syntax.Parse(expr, syntax.Perl)
+	if err != nil {
+		rv.err = err
+		return rv
+	}
+
+	// TODO: potential optimization where syntax.Regexp supports a Simplify() API?
+
+	var prefixTerm string
+	/*  TODO: potential optimization of (re-)supporting LiteralPrefix(),
+	          even perhaps a brute-force, naive prefix detection?
+	    prefixTerm, complete := rIn.LiteralPrefix()
+		if complete {
+			return d.PrefixIterator(prefixTerm)
+		}
+	*/
+
 	if d.fst != nil {
-		r, err := regexp.New(rIn.String())
+		r, err := regexp.NewParsedWithLimit(expr, parsed, regexp.DefaultLimit)
 		if err == nil {
 			var prefixBeg, prefixEnd []byte
 			if prefixTerm != "" {
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index dc2ace54c..48fa1faa1 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -179,7 +179,7 @@ func (i *IndexSnapshot) FieldDictPrefix(field string,
 }
 
 func (i *IndexSnapshot) FieldDictRegexp(field string,
-	termRegex index.Regexp) (index.FieldDict, error) {
+	termRegex string) (index.FieldDict, error) {
 	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
 		return i.RegexpIterator(termRegex)
 	})
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index 5b51981c1..90dbcb494 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -52,7 +52,7 @@ func (s *SegmentDictionarySnapshot) RangeIterator(start, end string) segment.Dic
 	return s.d.RangeIterator(start, end)
 }
 
-func (s *SegmentDictionarySnapshot) RegexpIterator(regex index.Regexp) segment.DictionaryIterator {
+func (s *SegmentDictionarySnapshot) RegexpIterator(regex string) segment.DictionaryIterator {
 	return s.d.RegexpIterator(regex)
 }
 
diff --git a/search/searcher/search_regexp.go b/search/searcher/search_regexp.go
index dc8573148..299d9cdbe 100644
--- a/search/searcher/search_regexp.go
+++ b/search/searcher/search_regexp.go
@@ -15,10 +15,52 @@
 package searcher
 
 import (
+	"regexp"
+
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
 )
 
+// NewRegexpStringSearcher is similar to NewRegexpSearcher, but
+// additionally optimizes for index readers that handle regexp's.
+func NewRegexpStringSearcher(indexReader index.IndexReader, pattern string,
+	field string, boost float64, options search.SearcherOptions) (
+	search.Searcher, error) {
+	ir, ok := indexReader.(index.IndexReaderRegexp)
+	if !ok {
+		r, err := regexp.Compile(pattern)
+		if err != nil {
+			return nil, err
+		}
+
+		return NewRegexpSearcher(indexReader, r, field, boost, options)
+	}
+
+	fieldDict, err := ir.FieldDictRegexp(field, pattern)
+	if err != nil {
+		return nil, err
+	}
+	defer func() {
+		if cerr := fieldDict.Close(); cerr != nil && err == nil {
+			err = cerr
+		}
+	}()
+
+	var candidateTerms []string
+
+	tfd, err := fieldDict.Next()
+	for err == nil && tfd != nil {
+		candidateTerms = append(candidateTerms, tfd.Term)
+		tfd, err = fieldDict.Next()
+	}
+	if err != nil {
+		return nil, err
+	}
+
+	return NewMultiTermSearcher(indexReader, candidateTerms, field, boost,
+		options, true)
+}
+
 // NewRegexpSearcher creates a searcher which will match documents that
 // contain terms which match the pattern regexp.  The match must be EXACT
 // matching the entire term.  The provided regexp SHOULD NOT start with ^
@@ -28,39 +70,18 @@ func NewRegexpSearcher(indexReader index.IndexReader, pattern index.Regexp,
 	field string, boost float64, options search.SearcherOptions) (
 	search.Searcher, error) {
 	var candidateTerms []string
-	if ir, ok := indexReader.(index.IndexReaderRegexp); ok {
-		fieldDict, err := ir.FieldDictRegexp(field, pattern)
-		if err != nil {
-			return nil, err
-		}
-		defer func() {
-			if cerr := fieldDict.Close(); cerr != nil && err == nil {
-				err = cerr
-			}
-		}()
 
-		// enumerate the terms and check against regexp
-		tfd, err := fieldDict.Next()
-		for err == nil && tfd != nil {
-			candidateTerms = append(candidateTerms, tfd.Term)
-			tfd, err = fieldDict.Next()
-		}
+	prefixTerm, complete := pattern.LiteralPrefix()
+	if complete {
+		// there is no pattern
+		candidateTerms = []string{prefixTerm}
+	} else {
+		var err error
+		candidateTerms, err = findRegexpCandidateTerms(indexReader, pattern, field,
+			prefixTerm)
 		if err != nil {
 			return nil, err
 		}
-	} else {
-		prefixTerm, complete := pattern.LiteralPrefix()
-		if complete {
-			// there is no pattern
-			candidateTerms = []string{prefixTerm}
-		} else {
-			var err error
-			candidateTerms, err = findRegexpCandidateTerms(indexReader, pattern, field,
-				prefixTerm)
-			if err != nil {
-				return nil, err
-			}
-		}
 	}
 
 	return NewMultiTermSearcher(indexReader, candidateTerms, field, boost,

From 538612dbad353165f839631ae558e64ca928cc40 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 13 Aug 2018 13:15:30 -0700
Subject: [PATCH 449/728] regexp & wildcard queries use
 NewRegexpStringSearcher() API

This commit is mainly a scorch targetted optimization, moving regexp
string parsing from the regexp/wildcard query tier to the index tier
for scorch.  Additionally, the upside_down index backend should not
have any regexp or wildcard query performance impact with this change.

However, there is one behavior change, in that regexp input validation
is now delayed until the searcher constructor, turning Validate()
until a no-op.

See also: https://issues.couchbase.com/browse/MB-30264
---
 search/query/regexp.go   | 37 +++++++++++--------------------------
 search/query/wildcard.go | 23 +++++------------------
 2 files changed, 16 insertions(+), 44 deletions(-)

diff --git a/search/query/regexp.go b/search/query/regexp.go
index 09544fcf1..0c87a6f92 100644
--- a/search/query/regexp.go
+++ b/search/query/regexp.go
@@ -15,7 +15,6 @@
 package query
 
 import (
-	"regexp"
 	"strings"
 
 	"github.com/blevesearch/bleve/index"
@@ -28,7 +27,6 @@ type RegexpQuery struct {
 	Regexp   string `json:"regexp"`
 	FieldVal string `json:"field,omitempty"`
 	BoostVal *Boost `json:"boost,omitempty"`
-	compiled *regexp.Regexp
 }
 
 // NewRegexpQuery creates a new Query which finds
@@ -64,33 +62,20 @@ func (q *RegexpQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, opti
 	if q.FieldVal == "" {
 		field = m.DefaultSearchField()
 	}
-	err := q.compile()
-	if err != nil {
-		return nil, err
+
+	// require that pattern NOT be anchored to start and end of term.
+	// do not attempt to remove trailing $, its presence is not
+	// known to interfere with LiteralPrefix() the way ^ does
+	// and removing $ introduces possible ambiguities with escaped \$, \\$, etc
+	actualRegexp := q.Regexp
+	if strings.HasPrefix(actualRegexp, "^") {
+		actualRegexp = actualRegexp[1:] // remove leading ^
 	}
 
-	return searcher.NewRegexpSearcher(i, q.compiled, field, q.BoostVal.Value(), options)
+	return searcher.NewRegexpStringSearcher(i, actualRegexp, field,
+		q.BoostVal.Value(), options)
 }
 
 func (q *RegexpQuery) Validate() error {
-	return q.compile()
-}
-
-func (q *RegexpQuery) compile() error {
-	if q.compiled == nil {
-		// require that pattern NOT be anchored to start and end of term
-		actualRegexp := q.Regexp
-		if strings.HasPrefix(actualRegexp, "^") {
-			actualRegexp = actualRegexp[1:] // remove leading ^
-		}
-		// do not attempt to remove trailing $, it's presence is not
-		// known to interfere with LiteralPrefix() the way ^ does
-		// and removing $ introduces possible ambiguities with escaped \$, \\$, etc
-		var err error
-		q.compiled, err = regexp.Compile(actualRegexp)
-		if err != nil {
-			return err
-		}
-	}
-	return nil
+	return nil // real validation delayed until searcher constructor
 }
diff --git a/search/query/wildcard.go b/search/query/wildcard.go
index 7fd7482c4..747dfe76f 100644
--- a/search/query/wildcard.go
+++ b/search/query/wildcard.go
@@ -15,7 +15,6 @@
 package query
 
 import (
-	"regexp"
 	"strings"
 
 	"github.com/blevesearch/bleve/index"
@@ -47,7 +46,6 @@ type WildcardQuery struct {
 	Wildcard string `json:"wildcard"`
 	FieldVal string `json:"field,omitempty"`
 	BoostVal *Boost `json:"boost,omitempty"`
-	compiled *regexp.Regexp
 }
 
 // NewWildcardQuery creates a new Query which finds
@@ -83,24 +81,13 @@ func (q *WildcardQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, op
 	if q.FieldVal == "" {
 		field = m.DefaultSearchField()
 	}
-	if q.compiled == nil {
-		var err error
-		q.compiled, err = q.convertToRegexp()
-		if err != nil {
-			return nil, err
-		}
-	}
 
-	return searcher.NewRegexpSearcher(i, q.compiled, field, q.BoostVal.Value(), options)
-}
+	regexpString := wildcardRegexpReplacer.Replace(q.Wildcard)
 
-func (q *WildcardQuery) Validate() error {
-	var err error
-	q.compiled, err = q.convertToRegexp()
-	return err
+	return searcher.NewRegexpStringSearcher(i, regexpString, field,
+		q.BoostVal.Value(), options)
 }
 
-func (q *WildcardQuery) convertToRegexp() (*regexp.Regexp, error) {
-	regexpString := wildcardRegexpReplacer.Replace(q.Wildcard)
-	return regexp.Compile(regexpString)
+func (q *WildcardQuery) Validate() error {
+	return nil // real validation delayed until searcher constructor
 }

From aeb6995a8b4d4adab2e7bfd0c74bd275eca86f98 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 13 Aug 2018 15:07:30 -0700
Subject: [PATCH 450/728] added unit test for NewRegexpStringSearcher()

See also: https://issues.couchbase.com/browse/MB-30264
---
 search/searcher/search_regexp_test.go | 77 +++++++++++++++++----------
 1 file changed, 49 insertions(+), 28 deletions(-)

diff --git a/search/searcher/search_regexp_test.go b/search/searcher/search_regexp_test.go
index 2de1162a5..0c29068bd 100644
--- a/search/searcher/search_regexp_test.go
+++ b/search/searcher/search_regexp_test.go
@@ -26,58 +26,79 @@ import (
 
 func TestRegexpSearchUpsideDown(t *testing.T) {
 	twoDocIndex := initTwoDocUpsideDown()
-	testRegexpSearch(t, twoDocIndex,
-		func(id int) index.IndexInternalID {
-			return index.IndexInternalID(fmt.Sprintf("%d", id))
-		})
+	testRegexpSearch(t, twoDocIndex, internalIDMakerUpsideDown, searcherMaker)
+	_ = twoDocIndex.Close()
+}
+
+func TestRegexpStringSearchUpsideDown(t *testing.T) {
+	twoDocIndex := initTwoDocUpsideDown()
+	testRegexpSearch(t, twoDocIndex, internalIDMakerUpsideDown, searcherStringMaker)
 	_ = twoDocIndex.Close()
 }
 
 func TestRegexpSearchScorch(t *testing.T) {
 	twoDocIndex := initTwoDocScorch()
-	testRegexpSearch(t, twoDocIndex,
-		func(id int) index.IndexInternalID {
-			buf := make([]byte, 8)
-			binary.BigEndian.PutUint64(buf, uint64(id))
-			return index.IndexInternalID(buf)
-		})
+	testRegexpSearch(t, twoDocIndex, internalIDMakerScorch, searcherMaker)
 	_ = twoDocIndex.Close()
 }
 
-func testRegexpSearch(t *testing.T, twoDocIndex index.Index,
-	internalIDMaker func(int) index.IndexInternalID) {
-	twoDocIndexReader, err := twoDocIndex.Reader()
-	if err != nil {
-		t.Error(err)
-	}
-	defer func() {
-		err := twoDocIndexReader.Close()
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+func TestRegexpStringSearchScorch(t *testing.T) {
+	twoDocIndex := initTwoDocScorch()
+	testRegexpSearch(t, twoDocIndex, internalIDMakerScorch, searcherStringMaker)
+	_ = twoDocIndex.Close()
+}
 
-	explainTrue := search.SearcherOptions{Explain: true}
+func internalIDMakerUpsideDown(id int) index.IndexInternalID {
+	return index.IndexInternalID(fmt.Sprintf("%d", id))
+}
+
+func internalIDMakerScorch(id int) index.IndexInternalID {
+	buf := make([]byte, 8)
+	binary.BigEndian.PutUint64(buf, uint64(id))
+	return index.IndexInternalID(buf)
+}
 
-	pattern, err := regexp.Compile("ma.*")
+func searcherMaker(t *testing.T, ir index.IndexReader, re, field string) search.Searcher {
+	pattern, err := regexp.Compile(re)
 	if err != nil {
 		t.Fatal(err)
 	}
 
-	regexpSearcher, err := NewRegexpSearcher(twoDocIndexReader, pattern, "name", 1.0, explainTrue)
+	regexpSearcher, err := NewRegexpSearcher(ir, pattern, field, 1.0,
+		search.SearcherOptions{Explain: true})
 	if err != nil {
 		t.Fatal(err)
 	}
 
-	patternCo, err := regexp.Compile("co.*")
+	return regexpSearcher
+}
+
+func searcherStringMaker(t *testing.T, ir index.IndexReader, re, field string) search.Searcher {
+	regexpSearcher, err := NewRegexpStringSearcher(ir, re, field, 1.0,
+		search.SearcherOptions{Explain: true})
 	if err != nil {
 		t.Fatal(err)
 	}
 
-	regexpSearcherCo, err := NewRegexpSearcher(twoDocIndexReader, patternCo, "desc", 1.0, explainTrue)
+	return regexpSearcher
+}
+
+func testRegexpSearch(t *testing.T, twoDocIndex index.Index,
+	internalIDMaker func(int) index.IndexInternalID,
+	searcherMaker func(t *testing.T, ir index.IndexReader, re, field string) search.Searcher) {
+	twoDocIndexReader, err := twoDocIndex.Reader()
 	if err != nil {
-		t.Fatal(err)
+		t.Error(err)
 	}
+	defer func() {
+		err := twoDocIndexReader.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	regexpSearcher := searcherMaker(t, twoDocIndexReader, "ma.*", "name")
+	regexpSearcherCo := searcherMaker(t, twoDocIndexReader, "co.*", "desc")
 
 	tests := []struct {
 		searcher  search.Searcher

From e7a78b6e98cbe18203c20ecb970f45282686c00f Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 13 Aug 2018 15:57:44 -0700
Subject: [PATCH 451/728] scorch regexp leverages naive regexp literal prefix

This commit re-introduces a literal prefix optimization to the scorch
regexp dictionary iterator, albeit with only a simplified detection of
a literal prefix from the regexp syntax parse tree.

In contrast, the golang standard library's implementation of the
literal prefix detection first converts the regexp syntax parse tree
to a state machine (or "prog"), performs one-pass
conversions/optimizations, and then examines that for any potential
literal prefix in a more advanced approach.

See also: https://issues.couchbase.com/browse/MB-30264
---
 index/scorch/segment/zap/dict.go        | 11 ++---
 index/scorch/segment/zap/regexp.go      | 38 +++++++++++++++++
 index/scorch/segment/zap/regexp_test.go | 56 +++++++++++++++++++++++++
 3 files changed, 97 insertions(+), 8 deletions(-)
 create mode 100644 index/scorch/segment/zap/regexp.go
 create mode 100644 index/scorch/segment/zap/regexp_test.go

diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 522731fb3..3a9945fd4 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -191,15 +191,10 @@ func (d *Dictionary) RegexpIterator(expr string) segment.DictionaryIterator {
 	}
 
 	// TODO: potential optimization where syntax.Regexp supports a Simplify() API?
+	// TODO: potential optimization where the literal prefix represents the,
+	//       entire regexp, allowing us to use PrefixIterator(prefixTerm)?
 
-	var prefixTerm string
-	/*  TODO: potential optimization of (re-)supporting LiteralPrefix(),
-	          even perhaps a brute-force, naive prefix detection?
-	    prefixTerm, complete := rIn.LiteralPrefix()
-		if complete {
-			return d.PrefixIterator(prefixTerm)
-		}
-	*/
+	prefixTerm := LiteralPrefix(parsed)
 
 	if d.fst != nil {
 		r, err := regexp.NewParsedWithLimit(expr, parsed, regexp.DefaultLimit)
diff --git a/index/scorch/segment/zap/regexp.go b/index/scorch/segment/zap/regexp.go
new file mode 100644
index 000000000..65c1beeb5
--- /dev/null
+++ b/index/scorch/segment/zap/regexp.go
@@ -0,0 +1,38 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zap
+
+import (
+	"regexp/syntax"
+)
+
+// Returns the literal prefix given the parse tree for a regexp
+func LiteralPrefix(s *syntax.Regexp) string {
+	// traverse the left-most branch in the parse tree as long as the
+	// node represents a concatenation
+	for s != nil && s.Op == syntax.OpConcat {
+		if len(s.Sub) < 1 {
+			return ""
+		}
+
+		s = s.Sub[0]
+	}
+
+	if s.Op == syntax.OpLiteral {
+		return string(s.Rune)
+	}
+
+	return "" // no literal prefix
+}
diff --git a/index/scorch/segment/zap/regexp_test.go b/index/scorch/segment/zap/regexp_test.go
new file mode 100644
index 000000000..83e1388dc
--- /dev/null
+++ b/index/scorch/segment/zap/regexp_test.go
@@ -0,0 +1,56 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zap
+
+import (
+	"regexp/syntax"
+	"testing"
+)
+
+func TestLiteralPrefix(t *testing.T) {
+	tests := []struct {
+		input, expected string
+	}{
+		{"", ""},
+		{"hello", "hello"},
+		{"hello.?", "hello"},
+		{"hello$", "hello"},
+		{`[h][e][l][l][o].*world`, "hello"},
+		{`[h-h][e-e][l-l][l-l][o-o].*world`, "hello"},
+		{".*", ""},
+		{"h.*", "h"},
+		{"h.?", "h"},
+		{"h[a-z]", "h"},
+		{`h\s`, "h"},
+		{`(hello)world`, ""},
+		{`日本語`, "日本語"},
+		{`日本語\w`, "日本語"},
+		{`^hello`, ""},
+		{`^`, ""},
+		{`$`, ""},
+	}
+
+	for i, test := range tests {
+		s, err := syntax.Parse(test.input, syntax.Perl)
+		if err != nil {
+			t.Fatalf("expected no syntax.Parse error, got: %v", err)
+		}
+
+		got := LiteralPrefix(s)
+		if test.expected != got {
+			t.Fatalf("test: %d, %+v, got: %s", i, test, got)
+		}
+	}
+}

From 7ec9fe0868ab735d605e49ebe11f86d25e22c427 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Tue, 14 Aug 2018 12:18:53 -0700
Subject: [PATCH 452/728] Update vendor/manifest to include vellum

---
 vendor/manifest | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/vendor/manifest b/vendor/manifest
index 1883de76e..d45734e9e 100644
--- a/vendor/manifest
+++ b/vendor/manifest
@@ -132,6 +132,14 @@
 			"branch": "master",
 			"path": "/unicode/norm",
 			"notests": true
+		},
+		{
+			"importpath": "github.com/couchbase/vellum",
+			"repository": "https://github.com/couchbase/vellum",
+			"vcs": "git",
+			"revision": "dc6110ee42850a6553b6469bf0dc03383900353d",
+			"branch": "master",
+			"notests": true
 		}
 	]
 }

From 0ba6248ccd51804e61ad79349ad4047469c1ee70 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 15 Aug 2018 00:08:50 -0700
Subject: [PATCH 453/728] optimize scorch regexp & fuzzy parsing across
 multiple segments

The recent, previous optimization for regexp searches for scorch took
1 step forwards and N steps backwards, in that although it removed an
extra regexp/syntax re-parsing, it inefficiently pushed the work of
regexp/syntax and vellum/regexp handling too far down to the zap
layer.

That matters especially in the case when there's a large number (N) of
zap segments in an index snapshot, meaning that conversion from
regexp/syntax to vellum/regexp would happen (inefficiently) N times!
That is, each zap segment would be independently re-performing the
same conversion work.

Instead, this commit more efficiently performs that conversion just
once, at the index/scorch level, and reuses that resulting automaton
across all N segments of the index snapshot.

The literal prefix computation is similarly handled now just once and
reused across all N segments of the index snapshot.

Finally, the fuzzy levenshtein dict iteration follows the same pattern
as regexp, so this optimization approach is now also used to handle
levenshtein automaton iteration.

Because both regexp and levenshtein are automatons, the previous two
methods of RegexpIterator() and FuzzyIterator() have now been
collapsed to a single segment/Segment.AutomatonIterator() method.
---
 index/scorch/segment/empty.go                 |  9 +--
 index/scorch/segment/{zap => }/regexp.go      | 39 +++++++++-
 index/scorch/segment/{zap => }/regexp_test.go |  2 +-
 index/scorch/segment/segment.go               |  5 +-
 index/scorch/segment/zap/dict.go              | 78 ++-----------------
 index/scorch/segment/zap/dict_test.go         | 67 +++++++++++++++-
 index/scorch/snapshot_index.go                | 18 ++++-
 index/scorch/snapshot_segment.go              | 11 +--
 8 files changed, 137 insertions(+), 92 deletions(-)
 rename index/scorch/segment/{zap => }/regexp.go (53%)
 rename index/scorch/segment/{zap => }/regexp_test.go (98%)

diff --git a/index/scorch/segment/empty.go b/index/scorch/segment/empty.go
index 83968a11d..af50d0aaf 100644
--- a/index/scorch/segment/empty.go
+++ b/index/scorch/segment/empty.go
@@ -17,6 +17,7 @@ package segment
 import (
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index"
+	"github.com/couchbase/vellum"
 )
 
 type EmptySegment struct{}
@@ -80,12 +81,8 @@ func (e *EmptyDictionary) RangeIterator(start, end string) DictionaryIterator {
 	return &EmptyDictionaryIterator{}
 }
 
-func (e *EmptyDictionary) RegexpIterator(r string) DictionaryIterator {
-	return &EmptyDictionaryIterator{}
-}
-
-func (e *EmptyDictionary) FuzzyIterator(term string,
-	fuzziness int) DictionaryIterator {
+func (e *EmptyDictionary) AutomatonIterator(a vellum.Automaton,
+	startKeyInclusive, endKeyExclusive []byte) DictionaryIterator {
 	return &EmptyDictionaryIterator{}
 }
 
diff --git a/index/scorch/segment/zap/regexp.go b/index/scorch/segment/regexp.go
similarity index 53%
rename from index/scorch/segment/zap/regexp.go
rename to index/scorch/segment/regexp.go
index 65c1beeb5..3aa151d64 100644
--- a/index/scorch/segment/zap/regexp.go
+++ b/index/scorch/segment/regexp.go
@@ -12,12 +12,37 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package zap
+package segment
 
 import (
 	"regexp/syntax"
+
+	"github.com/couchbase/vellum/regexp"
 )
 
+func ParseRegexp(pattern string) (a *regexp.Regexp, prefixBeg, prefixEnd []byte, err error) {
+	// TODO: potential optimization where syntax.Regexp supports a Simplify() API?
+
+	parsed, err := syntax.Parse(pattern, syntax.Perl)
+	if err != nil {
+		return nil, nil, nil, err
+	}
+
+	re, err := regexp.NewParsedWithLimit(pattern, parsed, regexp.DefaultLimit)
+	if err != nil {
+		return nil, nil, nil, err
+	}
+
+	prefix := LiteralPrefix(parsed)
+	if prefix != "" {
+		prefixBeg := []byte(prefix)
+		prefixEnd := IncrementBytes(prefixBeg)
+		return re, prefixBeg, prefixEnd, nil
+	}
+
+	return re, nil, nil, nil
+}
+
 // Returns the literal prefix given the parse tree for a regexp
 func LiteralPrefix(s *syntax.Regexp) string {
 	// traverse the left-most branch in the parse tree as long as the
@@ -36,3 +61,15 @@ func LiteralPrefix(s *syntax.Regexp) string {
 
 	return "" // no literal prefix
 }
+
+func IncrementBytes(in []byte) []byte {
+	rv := make([]byte, len(in))
+	copy(rv, in)
+	for i := len(rv) - 1; i >= 0; i-- {
+		rv[i] = rv[i] + 1
+		if rv[i] != 0 {
+			return rv // didn't overflow, so stop
+		}
+	}
+	return nil // overflowed
+}
diff --git a/index/scorch/segment/zap/regexp_test.go b/index/scorch/segment/regexp_test.go
similarity index 98%
rename from index/scorch/segment/zap/regexp_test.go
rename to index/scorch/segment/regexp_test.go
index 83e1388dc..b4731d6b8 100644
--- a/index/scorch/segment/zap/regexp_test.go
+++ b/index/scorch/segment/regexp_test.go
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package zap
+package segment
 
 import (
 	"regexp/syntax"
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index 3fc315995..28a879949 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -17,6 +17,7 @@ package segment
 import (
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index"
+	"github.com/couchbase/vellum"
 )
 
 // DocumentFieldValueVisitor defines a callback to be visited for each
@@ -51,8 +52,8 @@ type TermDictionary interface {
 	Iterator() DictionaryIterator
 	PrefixIterator(prefix string) DictionaryIterator
 	RangeIterator(start, end string) DictionaryIterator
-	RegexpIterator(regex string) DictionaryIterator
-	FuzzyIterator(term string, fuzziness int) DictionaryIterator
+	AutomatonIterator(a vellum.Automaton,
+		startKeyInclusive, endKeyExclusive []byte) DictionaryIterator
 	OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator
 }
 
diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 3a9945fd4..219bf1526 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -17,14 +17,11 @@ package zap
 import (
 	"bytes"
 	"fmt"
-	"regexp/syntax"
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
 	"github.com/couchbase/vellum"
-	"github.com/couchbase/vellum/levenshtein"
-	"github.com/couchbase/vellum/regexp"
 )
 
 // Dictionary is the zap representation of the term dictionary
@@ -124,7 +121,7 @@ func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator {
 	}
 
 	kBeg := []byte(prefix)
-	kEnd := incrementBytes(kBeg)
+	kEnd := segment.IncrementBytes(kBeg)
 
 	if d.fst != nil {
 		itr, err := d.fst.Iterator(kBeg, kEnd)
@@ -138,18 +135,6 @@ func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator {
 	return rv
 }
 
-func incrementBytes(in []byte) []byte {
-	rv := make([]byte, len(in))
-	copy(rv, in)
-	for i := len(rv) - 1; i >= 0; i-- {
-		rv[i] = rv[i] + 1
-		if rv[i] != 0 {
-			return rv // didn't overflow, so stop
-		}
-	}
-	return nil // overflowed
-}
-
 // RangeIterator returns an iterator which only visits terms between the
 // start and end terms.  NOTE: bleve.index API specifies the end is inclusive.
 func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator {
@@ -177,66 +162,19 @@ func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator
 	return rv
 }
 
-// RegexpIterator returns an iterator which only visits terms having the
-// the specified regex
-func (d *Dictionary) RegexpIterator(expr string) segment.DictionaryIterator {
-	rv := &DictionaryIterator{
-		d: d,
-	}
-
-	parsed, err := syntax.Parse(expr, syntax.Perl)
-	if err != nil {
-		rv.err = err
-		return rv
-	}
-
-	// TODO: potential optimization where syntax.Regexp supports a Simplify() API?
-	// TODO: potential optimization where the literal prefix represents the,
-	//       entire regexp, allowing us to use PrefixIterator(prefixTerm)?
-
-	prefixTerm := LiteralPrefix(parsed)
-
-	if d.fst != nil {
-		r, err := regexp.NewParsedWithLimit(expr, parsed, regexp.DefaultLimit)
-		if err == nil {
-			var prefixBeg, prefixEnd []byte
-			if prefixTerm != "" {
-				prefixBeg = []byte(prefixTerm)
-				prefixEnd = incrementBytes(prefixEnd)
-			}
-
-			itr, err2 := d.fst.Search(r, prefixBeg, prefixEnd)
-			if err2 == nil {
-				rv.itr = itr
-			} else if err2 != nil && err2 != vellum.ErrIteratorDone {
-				rv.err = err2
-			}
-		} else {
-			rv.err = err
-		}
-	}
-
-	return rv
-}
-
-// FuzzyIterator returns an iterator which only visits terms having the
-// the specified edit/levenshtein distance
-func (d *Dictionary) FuzzyIterator(term string,
-	fuzziness int) segment.DictionaryIterator {
+// AutomatonIterator returns an iterator which only visits terms
+// having the the vellum automaton and start/end key range
+func (d *Dictionary) AutomatonIterator(a vellum.Automaton,
+	startKeyInclusive, endKeyExclusive []byte) segment.DictionaryIterator {
 	rv := &DictionaryIterator{
 		d: d,
 	}
 
 	if d.fst != nil {
-		la, err := levenshtein.New(term, fuzziness)
+		itr, err := d.fst.Search(a, startKeyInclusive, endKeyExclusive)
 		if err == nil {
-			itr, err2 := d.fst.Search(la, nil, nil)
-			if err2 == nil {
-				rv.itr = itr
-			} else if err2 != nil && err2 != vellum.ErrIteratorDone {
-				rv.err = err2
-			}
-		} else {
+			rv.itr = itr
+		} else if err != nil && err != vellum.ErrIteratorDone {
 			rv.err = err
 		}
 	}
diff --git a/index/scorch/segment/zap/dict_test.go b/index/scorch/segment/zap/dict_test.go
index 8cbd5710b..b654bf45f 100644
--- a/index/scorch/segment/zap/dict_test.go
+++ b/index/scorch/segment/zap/dict_test.go
@@ -22,6 +22,7 @@ import (
 	"github.com/blevesearch/bleve/analysis"
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index"
+	"github.com/couchbase/vellum/levenshtein"
 )
 
 func buildTestSegmentForDict() (*SegmentBase, uint64, error) {
@@ -205,7 +206,11 @@ func TestDictionaryError(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	itr := dict.FuzzyIterator("summer", 5)
+	a, err := levenshtein.New("summer", 2)
+	if err != nil {
+		t.Fatal(err)
+	}
+	itr := dict.AutomatonIterator(a, nil, nil)
 	if itr == nil {
 		t.Fatalf("got nil itr")
 	}
@@ -213,8 +218,64 @@ func TestDictionaryError(t *testing.T) {
 	if nxt != nil {
 		t.Fatalf("expected nil next")
 	}
-	if err == nil {
-		t.Fatalf("expected error from iterator")
+	if err != nil {
+		t.Fatalf("expected nil error from iterator, got: %v", err)
 	}
 
+	a, err = levenshtein.New("cat", 1) // cat & bat
+	if err != nil {
+		t.Fatal(err)
+	}
+	itr = dict.AutomatonIterator(a, nil, nil)
+	if itr == nil {
+		t.Fatalf("got nil itr")
+	}
+	for i := 0; i < 2; i++ {
+		nxt, err = itr.Next()
+		if nxt == nil || err != nil {
+			t.Fatalf("expected non-nil next and nil err, got: %v, %v", nxt, err)
+		}
+	}
+	nxt, err = itr.Next()
+	if nxt != nil || err != nil {
+		t.Fatalf("expected nil next and nil err, got: %v, %v", nxt, err)
+	}
+
+	a, err = levenshtein.New("cat", 2) // cat & bat
+	if err != nil {
+		t.Fatal(err)
+	}
+	itr = dict.AutomatonIterator(a, nil, nil)
+	if itr == nil {
+		t.Fatalf("got nil itr")
+	}
+	for i := 0; i < 2; i++ {
+		nxt, err = itr.Next()
+		if nxt == nil || err != nil {
+			t.Fatalf("expected non-nil next and nil err, got: %v, %v", nxt, err)
+		}
+	}
+	nxt, err = itr.Next()
+	if nxt != nil || err != nil {
+		t.Fatalf("expected nil next and nil err, got: %v, %v", nxt, err)
+	}
+
+	a, err = levenshtein.New("cat", 3)
+	if err != nil {
+		t.Fatal(err)
+	}
+	itr = dict.AutomatonIterator(a, nil, nil)
+	if itr == nil {
+		t.Fatalf("got nil itr")
+	}
+	for i := 0; i < 5; i++ {
+		nxt, err = itr.Next()
+		if nxt == nil || err != nil {
+			t.Fatalf("expected non-nil next and nil err, got: %v, %v", nxt, err)
+		}
+	}
+	nxt, err = itr.Next()
+	if nxt != nil || err != nil {
+		t.Fatalf("expected nil next and nil err, got: %v, %v", nxt, err)
+	}
 }
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 48fa1faa1..4722d9559 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -27,6 +27,7 @@ import (
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
+	"github.com/couchbase/vellum/levenshtein"
 )
 
 type asynchSegmentResult struct {
@@ -180,15 +181,28 @@ func (i *IndexSnapshot) FieldDictPrefix(field string,
 
 func (i *IndexSnapshot) FieldDictRegexp(field string,
 	termRegex string) (index.FieldDict, error) {
+	// TODO: potential optimization where the literal prefix represents the,
+	//       entire regexp, allowing us to use PrefixIterator(prefixTerm)?
+
+	a, prefixBeg, prefixEnd, err := segment.ParseRegexp(termRegex)
+	if err != nil {
+		return nil, err
+	}
+
 	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
-		return i.RegexpIterator(termRegex)
+		return i.AutomatonIterator(a, prefixBeg, prefixEnd)
 	})
 }
 
 func (i *IndexSnapshot) FieldDictFuzzy(field string,
 	term []byte, fuzziness int) (index.FieldDict, error) {
+	a, err := levenshtein.New(string(term), fuzziness)
+	if err != nil {
+		return nil, err
+	}
+
 	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
-		return i.FuzzyIterator(string(term), fuzziness)
+		return i.AutomatonIterator(a, nil, nil)
 	})
 }
 
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index 90dbcb494..1cc8b76c2 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -23,6 +23,7 @@ import (
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
 	"github.com/blevesearch/bleve/size"
+	"github.com/couchbase/vellum"
 )
 
 var TermSeparator byte = 0xff
@@ -52,13 +53,9 @@ func (s *SegmentDictionarySnapshot) RangeIterator(start, end string) segment.Dic
 	return s.d.RangeIterator(start, end)
 }
 
-func (s *SegmentDictionarySnapshot) RegexpIterator(regex string) segment.DictionaryIterator {
-	return s.d.RegexpIterator(regex)
-}
-
-func (s *SegmentDictionarySnapshot) FuzzyIterator(term string,
-	fuzziness int) segment.DictionaryIterator {
-	return s.d.FuzzyIterator(term, fuzziness)
+func (s *SegmentDictionarySnapshot) AutomatonIterator(a vellum.Automaton,
+	startKeyInclusive, endKeyExclusive []byte) segment.DictionaryIterator {
+	return s.d.AutomatonIterator(a, startKeyInclusive, endKeyExclusive)
 }
 
 func (s *SegmentDictionarySnapshot) OnlyIterator(onlyTerms [][]byte,

From 3cb6fd4c12ca387ddf319efdadeb4bfbeae007ee Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 15 Aug 2018 11:17:43 -0700
Subject: [PATCH 454/728] optimize scorch fuzzy if prefix is available

In this commit, the advanced/optional FieldDictFuzzy() method is
modified to propagate the prefix string, if available.

See also: https://issues.couchbase.com/browse/MB-30456
---
 index/index.go                  |  2 +-
 index/scorch/snapshot_index.go  | 12 ++++++---
 search/searcher/search_fuzzy.go | 48 ++++++++++++++++-----------------
 3 files changed, 34 insertions(+), 28 deletions(-)

diff --git a/index/index.go b/index/index.go
index 2b577e2e1..62128c3c2 100644
--- a/index/index.go
+++ b/index/index.go
@@ -114,7 +114,7 @@ type IndexReaderRegexp interface {
 }
 
 type IndexReaderFuzzy interface {
-	FieldDictFuzzy(field string, term []byte, fuzziness int) (FieldDict, error)
+	FieldDictFuzzy(field string, term string, fuzziness int, prefix string) (FieldDict, error)
 }
 
 type IndexReaderOnly interface {
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 4722d9559..97266a900 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -195,14 +195,20 @@ func (i *IndexSnapshot) FieldDictRegexp(field string,
 }
 
 func (i *IndexSnapshot) FieldDictFuzzy(field string,
-	term []byte, fuzziness int) (index.FieldDict, error) {
-	a, err := levenshtein.New(string(term), fuzziness)
+	term string, fuzziness int, prefix string) (index.FieldDict, error) {
+	a, err := levenshtein.New(term, fuzziness)
 	if err != nil {
 		return nil, err
 	}
 
+	var prefixBeg, prefixEnd []byte
+	if prefix != "" {
+		prefixBeg = []byte(prefix)
+		prefixEnd = segment.IncrementBytes(prefixBeg)
+	}
+
 	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
-		return i.AutomatonIterator(a, nil, nil)
+		return i.AutomatonIterator(a, prefixBeg, prefixEnd)
 	})
 }
 
diff --git a/search/searcher/search_fuzzy.go b/search/searcher/search_fuzzy.go
index 1ce3ba71d..b99528af4 100644
--- a/search/searcher/search_fuzzy.go
+++ b/search/searcher/search_fuzzy.go
@@ -54,34 +54,34 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
 	fuzziness int, field, prefixTerm string) (rv []string, err error) {
 	rv = make([]string, 0)
 
-	var fieldDict index.FieldDict
-	if len(prefixTerm) > 0 {
-		fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
-	} else {
-		// in case of advanced reader implementations directly call
-		// the levenshtein automaton based iterator to collect the
-		// candidate terms
-		if ir, ok := indexReader.(index.IndexReaderFuzzy); ok {
-			fieldDict, err = ir.FieldDictFuzzy(field, []byte(term), fuzziness)
-			if err != nil {
-				return nil, err
+	// in case of advanced reader implementations directly call
+	// the levenshtein automaton based iterator to collect the
+	// candidate terms
+	if ir, ok := indexReader.(index.IndexReaderFuzzy); ok {
+		fieldDict, err := ir.FieldDictFuzzy(field, term, fuzziness, prefixTerm)
+		if err != nil {
+			return nil, err
+		}
+		defer func() {
+			if cerr := fieldDict.Close(); cerr != nil && err == nil {
+				err = cerr
 			}
-			defer func() {
-				if cerr := fieldDict.Close(); cerr != nil && err == nil {
-					err = cerr
-				}
-			}()
-			tfd, err := fieldDict.Next()
-			for err == nil && tfd != nil {
-				rv = append(rv, tfd.Term)
-				if tooManyClauses(len(rv)) {
-					return nil, tooManyClausesErr()
-				}
-				tfd, err = fieldDict.Next()
+		}()
+		tfd, err := fieldDict.Next()
+		for err == nil && tfd != nil {
+			rv = append(rv, tfd.Term)
+			if tooManyClauses(len(rv)) {
+				return nil, tooManyClausesErr()
 			}
-			return rv, err
+			tfd, err = fieldDict.Next()
 		}
+		return rv, err
+	}
 
+	var fieldDict index.FieldDict
+	if len(prefixTerm) > 0 {
+		fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
+	} else {
 		fieldDict, err = indexReader.FieldDict(field)
 	}
 	if err != nil {

From cdd8ff3d30e8250af22d2cc3c68cce02d917e58c Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Tue, 14 Aug 2018 18:33:27 -0700
Subject: [PATCH 455/728] MB-30776: [Scorch] Merger's enumerator to not skip
 empty terms

Fixes: https://github.com/blevesearch/bleve/issues/967
---
 index/scorch/segment/zap/enumerator.go | 16 ++++---
 index/scorch/segment/zap/merge.go      | 17 +++----
 search_test.go                         | 63 ++++++++++++++++++++++++++
 3 files changed, 78 insertions(+), 18 deletions(-)

diff --git a/index/scorch/segment/zap/enumerator.go b/index/scorch/segment/zap/enumerator.go
index 3c708dd57..cd6ff73c7 100644
--- a/index/scorch/segment/zap/enumerator.go
+++ b/index/scorch/segment/zap/enumerator.go
@@ -46,26 +46,27 @@ func newEnumerator(itrs []vellum.Iterator) (*enumerator, error) {
 	for i, itr := range rv.itrs {
 		rv.currKs[i], rv.currVs[i] = itr.Current()
 	}
-	rv.updateMatches()
-	if rv.lowK == nil {
+	rv.updateMatches(false)
+	if rv.lowK == nil && len(rv.lowIdxs) == 0 {
 		return rv, vellum.ErrIteratorDone
 	}
 	return rv, nil
 }
 
 // updateMatches maintains the low key matches based on the currKs
-func (m *enumerator) updateMatches() {
+func (m *enumerator) updateMatches(skipEmptyKey bool) {
 	m.lowK = nil
 	m.lowIdxs = m.lowIdxs[:0]
 	m.lowCurr = 0
 
 	for i, key := range m.currKs {
-		if key == nil {
+		if (key == nil && m.currVs[i] == 0) || // in case of empty iterator
+			(len(key) == 0 && skipEmptyKey) { // skip empty keys
 			continue
 		}
 
 		cmp := bytes.Compare(key, m.lowK)
-		if cmp < 0 || m.lowK == nil {
+		if cmp < 0 || len(m.lowIdxs) == 0 {
 			// reached a new low
 			m.lowK = key
 			m.lowIdxs = m.lowIdxs[:0]
@@ -102,9 +103,10 @@ func (m *enumerator) Next() error {
 			}
 			m.currKs[vi], m.currVs[vi] = m.itrs[vi].Current()
 		}
-		m.updateMatches()
+		// can skip any empty keys encountered at this point
+		m.updateMatches(true)
 	}
-	if m.lowK == nil {
+	if m.lowK == nil && len(m.lowIdxs) == 0 {
 		return vellum.ErrIteratorDone
 	}
 	return nil
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index c735caad3..37e391bab 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -243,10 +243,6 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 		}
 
 		finishTerm := func(term []byte) error {
-			if term == nil {
-				return nil
-			}
-
 			tfEncoder.Close()
 			locEncoder.Close()
 
@@ -283,17 +279,16 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 			if !bytes.Equal(prevTerm, term) {
 				// if the term changed, write out the info collected
 				// for the previous term
-				err2 := finishTerm(prevTerm)
-				if err2 != nil {
-					return nil, 0, err2
+				err = finishTerm(prevTerm)
+				if err != nil {
+					return nil, 0, err
 				}
 			}
 
-			var err2 error
-			postings, err2 = dicts[itrI].postingsListFromOffset(
+			postings, err = dicts[itrI].postingsListFromOffset(
 				postingsOffset, drops[itrI], postings)
-			if err2 != nil {
-				return nil, 0, err2
+			if err != nil {
+				return nil, 0, err
 			}
 
 			postItr = postings.iterator(true, true, true, postItr)
diff --git a/search_test.go b/search_test.go
index 7fcc71d6e..e7a878653 100644
--- a/search_test.go
+++ b/search_test.go
@@ -26,10 +26,14 @@ import (
 
 	"github.com/blevesearch/bleve/analysis"
 	"github.com/blevesearch/bleve/analysis/analyzer/custom"
+	"github.com/blevesearch/bleve/analysis/analyzer/keyword"
+	"github.com/blevesearch/bleve/analysis/analyzer/standard"
 	"github.com/blevesearch/bleve/analysis/token/lowercase"
 	"github.com/blevesearch/bleve/analysis/tokenizer/single"
 	"github.com/blevesearch/bleve/analysis/tokenizer/whitespace"
 	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/index/scorch"
+	"github.com/blevesearch/bleve/mapping"
 	"github.com/blevesearch/bleve/search"
 	"github.com/blevesearch/bleve/search/query"
 )
@@ -542,3 +546,62 @@ func TestNestedBooleanSearchers(t *testing.T) {
 		t.Fatalf("Unexpected result set, %v != %v", matches, len(searchResults.Hits))
 	}
 }
+
+func TestSearchScorchOverEmptyKeyword(t *testing.T) {
+	defaultIndexType := Config.DefaultIndexType
+
+	defer func() {
+		err := os.RemoveAll("testidx")
+		if err != nil {
+			t.Fatal(err)
+		}
+		Config.DefaultIndexType = defaultIndexType
+	}()
+
+	Config.DefaultIndexType = scorch.Name
+
+	dmap := mapping.NewDocumentMapping()
+	dmap.DefaultAnalyzer = standard.Name
+
+	fm := mapping.NewTextFieldMapping()
+	fm.Analyzer = keyword.Name
+
+	fm1 := mapping.NewTextFieldMapping()
+	fm1.Analyzer = standard.Name
+
+	dmap.AddFieldMappingsAt("id", fm)
+	dmap.AddFieldMappingsAt("name", fm1)
+
+	imap := mapping.NewIndexMapping()
+	imap.DefaultMapping = dmap
+	imap.DefaultAnalyzer = standard.Name
+
+	idx, err := New("testidx", imap)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for i := 0; i < 10; i++ {
+		err = idx.Index(fmt.Sprint(i), map[string]string{"name": fmt.Sprintf("test%d", i), "id": ""})
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	count, err := idx.DocCount()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if count != 10 {
+		t.Fatalf("Unexpected doc count: %v, expected 10", count)
+	}
+
+	q := query.NewWildcardQuery("test*")
+	sr := NewSearchRequestOptions(q, 40, 0, false)
+	res, err := idx.Search(sr)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if res.Total != 10 {
+		t.Fatalf("Unexpected search hits: %v, expected 10", res.Total)
+	}
+}

From af1fc320ab846bf2154db8c1a97928a1c946808a Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Thu, 16 Aug 2018 13:31:05 +0530
Subject: [PATCH 456/728] unblock merger while persister in  nap

---
 index/scorch/persister.go | 13 +++++++++++--
 index/scorch/stats.go     |  3 +++
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 0de78c41b..5e9450189 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -205,19 +205,28 @@ func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastM
 	// memory merge cum persist loop.
 	// On finding too many files on disk, persister pause until the merger
 	// catches up to reduce the segment file count under the threshold.
-	// But if there is a memory pressue, then skip this sleep maneuver.
+	// But if there is memory pressure, then skip this sleep maneuvers.
 	numFilesOnDisk, _ := s.diskFileStats()
 	if numFilesOnDisk < uint64(po.PersisterNapUnderNumFiles) &&
 		po.PersisterNapTimeMSec > 0 && s.paused() == 0 {
 		select {
 		case <-s.closeCh:
 		case <-time.After(time.Millisecond * time.Duration(po.PersisterNapTimeMSec)):
+			atomic.AddUint64(&s.stats.TotPersisterNapPauseCompleted, 1)
+
+		case ew := <-s.persisterNotifier:
+			// unblock the merger in meantime
+			persistWatchers = append(persistWatchers, ew)
+			lastMergedEpoch = ew.epoch
+			persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
+			atomic.AddUint64(&s.stats.TotPersisterMergerNapBreak, 1)
 		}
 		return lastMergedEpoch, persistWatchers
 	}
 
 OUTER:
-	for numFilesOnDisk > uint64(po.PersisterNapUnderNumFiles) &&
+	for po.PersisterNapUnderNumFiles > 0 &&
+		numFilesOnDisk >= uint64(po.PersisterNapUnderNumFiles) &&
 		lastMergedEpoch < lastPersistedEpoch {
 		atomic.AddUint64(&s.stats.TotPersisterSlowMergerPause, 1)
 
diff --git a/index/scorch/stats.go b/index/scorch/stats.go
index d4e07f6b4..353641370 100644
--- a/index/scorch/stats.go
+++ b/index/scorch/stats.go
@@ -74,6 +74,9 @@ type Stats struct {
 	TotPersisterSlowMergerPause  uint64
 	TotPersisterSlowMergerResume uint64
 
+	TotPersisterNapPauseCompleted uint64
+	TotPersisterMergerNapBreak    uint64
+
 	TotFileMergeLoopBeg uint64
 	TotFileMergeLoopErr uint64
 	TotFileMergeLoopEnd uint64

From 92d330cd555c971a7acf1356376f1c3ccf85a19d Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Sat, 18 Aug 2018 23:51:33 +0530
Subject: [PATCH 457/728] MB-30943 - incorrect num_recs_to_persist stat

Fixing the num_recs_to_persist stat
---
 index/scorch/introducer.go | 37 ++++++++++++++++++++++++++++++++++++-
 index/scorch/scorch.go     |  1 +
 index/scorch/stats.go      |  2 ++
 3 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 9f42a5b36..dae45f331 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -20,6 +20,7 @@ import (
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index/scorch/segment"
+	"github.com/blevesearch/bleve/index/scorch/segment/zap"
 )
 
 type segmentIntroduction struct {
@@ -126,6 +127,7 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 
 	// iterate through current segments
 	var running uint64
+	var docsToPersistCount uint64
 	for i := range root.segment {
 		// see if optimistic work included this segment
 		delta, ok := next.obsoletes[root.segment[i].id]
@@ -164,8 +166,14 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 			newSnapshot.offsets = append(newSnapshot.offsets, running)
 			running += newss.segment.Count()
 		}
+
+		if isMemorySegment(root.segment[i]) {
+			docsToPersistCount += root.segment[i].Count()
+		}
 	}
 
+	atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
+
 	// append new segment, if any, to end of the new index snapshot
 	if next.data != nil {
 		newSegmentSnapshot := &SegmentSnapshot{
@@ -241,6 +249,7 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
 		creator:  "introducePersist",
 	}
 
+	var docsToPersistCount uint64
 	for i, segmentSnapshot := range root.segment {
 		// see if this segment has been replaced
 		if replacement, ok := persist.persisted[segmentSnapshot.id]; ok {
@@ -260,6 +269,10 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
 		} else {
 			newIndexSnapshot.segment[i] = root.segment[i]
 			newIndexSnapshot.segment[i].segment.AddRef()
+
+			if isMemorySegment(root.segment[i]) {
+				docsToPersistCount += root.segment[i].Count()
+			}
 		}
 		newIndexSnapshot.offsets[i] = root.offsets[i]
 	}
@@ -268,6 +281,8 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
 		newIndexSnapshot.internal[k] = v
 	}
 
+	atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
+
 	newIndexSnapshot.updateSize()
 	s.rootLock.Lock()
 	rootPrev := s.root
@@ -302,7 +317,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 
 	// iterate through current segments
 	newSegmentDeleted := roaring.NewBitmap()
-	var running uint64
+	var running, docsToPersistCount uint64
 	for i := range root.segment {
 		segmentID := root.segment[i].id
 		if segSnapAtMerge, ok := nextMerge.old[segmentID]; ok {
@@ -338,7 +353,12 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 			root.segment[i].segment.AddRef()
 			newSnapshot.offsets = append(newSnapshot.offsets, running)
 			running += root.segment[i].segment.Count()
+
+			if isMemorySegment(root.segment[i]) {
+				docsToPersistCount += root.segment[i].Count()
+			}
 		}
+
 	}
 
 	// before the newMerge introduction, need to clean the newly
@@ -369,8 +389,15 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 		})
 		newSnapshot.offsets = append(newSnapshot.offsets, running)
 		atomic.AddUint64(&s.stats.TotIntroducedSegmentsMerge, 1)
+
+		switch nextMerge.new.(type) {
+		case *zap.SegmentBase:
+			docsToPersistCount += nextMerge.new.Count() - newSegmentDeleted.GetCardinality()
+		}
 	}
 
+	atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
+
 	newSnapshot.AddRef() // 1 ref for the nextMerge.notify response
 
 	newSnapshot.updateSize()
@@ -393,6 +420,14 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 	close(nextMerge.notify)
 }
 
+func isMemorySegment(s *SegmentSnapshot) bool {
+	switch s.segment.(type) {
+	case *zap.SegmentBase:
+		return true
+	}
+	return false
+}
+
 func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
 	atomic.AddUint64(&s.stats.TotIntroduceRevertBeg, 1)
 	defer atomic.AddUint64(&s.stats.TotIntroduceRevertEnd, 1)
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 3980a8a82..f182e83c4 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -497,6 +497,7 @@ func (s *Scorch) StatsMap() map[string]interface{} {
 	m["num_plain_text_bytes_indexed"] = m["TotIndexedPlainTextBytes"]
 	m["num_items_introduced"] = m["TotIntroducedItems"]
 	m["num_items_persisted"] = m["TotPersistedItems"]
+	m["num_recs_to_persist"] = m["TotItemsToPersist"]
 	m["num_bytes_used_disk"] = m["CurOnDiskBytes"]
 	m["num_files_on_disk"] = m["CurOnDiskFiles"]
 	m["total_compaction_written_bytes"] = m["TotFileMergeWrittenBytes"]
diff --git a/index/scorch/stats.go b/index/scorch/stats.go
index 353641370..eb6d946be 100644
--- a/index/scorch/stats.go
+++ b/index/scorch/stats.go
@@ -61,6 +61,8 @@ type Stats struct {
 	TotIntroducedSegmentsBatch uint64
 	TotIntroducedSegmentsMerge uint64
 
+	TotItemsToPersist       uint64
+
 	TotPersistLoopBeg          uint64
 	TotPersistLoopErr          uint64
 	TotPersistLoopProgress     uint64

From 7f2f365802df79d4babd43542c7de2b85d7845ed Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Sun, 19 Aug 2018 15:26:27 +0530
Subject: [PATCH 458/728] more stats for root segments

adding file,mem segment stats for currrent root
---
 index/scorch/introducer.go | 54 +++++++++++++++++++++++++++++---------
 index/scorch/scorch.go     |  2 ++
 index/scorch/stats.go      | 21 ++++++++-------
 3 files changed, 55 insertions(+), 22 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index dae45f331..f17b3bc5b 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -127,7 +127,7 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 
 	// iterate through current segments
 	var running uint64
-	var docsToPersistCount uint64
+	var docsToPersistCount, memSegments, fileSegments uint64
 	for i := range root.segment {
 		// see if optimistic work included this segment
 		delta, ok := next.obsoletes[root.segment[i].id]
@@ -169,10 +169,15 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 
 		if isMemorySegment(root.segment[i]) {
 			docsToPersistCount += root.segment[i].Count()
+			memSegments++
+		} else {
+			fileSegments++
 		}
 	}
 
 	atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
+	atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
+	atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
 
 	// append new segment, if any, to end of the new index snapshot
 	if next.data != nil {
@@ -249,7 +254,7 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
 		creator:  "introducePersist",
 	}
 
-	var docsToPersistCount uint64
+	var docsToPersistCount, memSegments, fileSegments uint64
 	for i, segmentSnapshot := range root.segment {
 		// see if this segment has been replaced
 		if replacement, ok := persist.persisted[segmentSnapshot.id]; ok {
@@ -266,12 +271,16 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
 			// update items persisted incase of a new segment snapshot
 			atomic.AddUint64(&s.stats.TotPersistedItems, newSegmentSnapshot.Count())
 			atomic.AddUint64(&s.stats.TotPersistedSegments, 1)
+			fileSegments++
 		} else {
 			newIndexSnapshot.segment[i] = root.segment[i]
 			newIndexSnapshot.segment[i].segment.AddRef()
 
 			if isMemorySegment(root.segment[i]) {
 				docsToPersistCount += root.segment[i].Count()
+				memSegments++
+			} else {
+				fileSegments++
 			}
 		}
 		newIndexSnapshot.offsets[i] = root.offsets[i]
@@ -282,7 +291,8 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
 	}
 
 	atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
-
+	atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
+	atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
 	newIndexSnapshot.updateSize()
 	s.rootLock.Lock()
 	rootPrev := s.root
@@ -317,7 +327,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 
 	// iterate through current segments
 	newSegmentDeleted := roaring.NewBitmap()
-	var running, docsToPersistCount uint64
+	var running, docsToPersistCount, memSegments, fileSegments uint64
 	for i := range root.segment {
 		segmentID := root.segment[i].id
 		if segSnapAtMerge, ok := nextMerge.old[segmentID]; ok {
@@ -356,6 +366,9 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 
 			if isMemorySegment(root.segment[i]) {
 				docsToPersistCount += root.segment[i].Count()
+				memSegments++
+			} else {
+				fileSegments++
 			}
 		}
 
@@ -393,10 +406,15 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 		switch nextMerge.new.(type) {
 		case *zap.SegmentBase:
 			docsToPersistCount += nextMerge.new.Count() - newSegmentDeleted.GetCardinality()
+			memSegments++
+		case *zap.Segment:
+			fileSegments++
 		}
 	}
 
 	atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
+	atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
+	atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
 
 	newSnapshot.AddRef() // 1 ref for the nextMerge.notify response
 
@@ -420,14 +438,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 	close(nextMerge.notify)
 }
 
-func isMemorySegment(s *SegmentSnapshot) bool {
-	switch s.segment.(type) {
-	case *zap.SegmentBase:
-		return true
-	}
-	return false
-}
-
 func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
 	atomic.AddUint64(&s.stats.TotIntroduceRevertBeg, 1)
 	defer atomic.AddUint64(&s.stats.TotIntroduceRevertEnd, 1)
@@ -453,6 +463,7 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
 	}
 	s.nextSnapshotEpoch++
 
+	var docsToPersistCount, memSegments, fileSegments uint64
 	// iterate through segments
 	for i, segmentSnapshot := range revertTo.snapshot.segment {
 		newSnapshot.segment[i] = &SegmentSnapshot{
@@ -467,8 +478,19 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
 		// remove segment from ineligibleForRemoval map
 		filename := zapFileName(segmentSnapshot.id)
 		delete(s.ineligibleForRemoval, filename)
+
+		if isMemorySegment(segmentSnapshot) {
+			docsToPersistCount += segmentSnapshot.Count()
+			memSegments++
+		} else {
+			fileSegments++
+		}
 	}
 
+	atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
+	atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
+	atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
+
 	if revertTo.persisted != nil {
 		s.rootPersisted = append(s.rootPersisted, revertTo.persisted)
 	}
@@ -490,3 +512,11 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
 
 	return nil
 }
+
+func isMemorySegment(s *SegmentSnapshot) bool {
+	switch s.segment.(type) {
+	case *zap.SegmentBase:
+		return true
+	}
+	return false
+}
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index f182e83c4..39832eb88 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -214,6 +214,8 @@ func (s *Scorch) openBolt() error {
 		}
 	}
 
+	atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, uint64(len(s.root.segment)))
+
 	s.introductions = make(chan *segmentIntroduction)
 	s.persists = make(chan *persistIntroduction)
 	s.merges = make(chan *segmentMerge)
diff --git a/index/scorch/stats.go b/index/scorch/stats.go
index eb6d946be..bc1ca4bd8 100644
--- a/index/scorch/stats.go
+++ b/index/scorch/stats.go
@@ -61,8 +61,6 @@ type Stats struct {
 	TotIntroducedSegmentsBatch uint64
 	TotIntroducedSegmentsMerge uint64
 
-	TotItemsToPersist       uint64
-
 	TotPersistLoopBeg          uint64
 	TotPersistLoopErr          uint64
 	TotPersistLoopProgress     uint64
@@ -71,6 +69,7 @@ type Stats struct {
 	TotPersistLoopEnd          uint64
 
 	TotPersistedItems    uint64
+	TotItemsToPersist    uint64
 	TotPersistedSegments uint64
 
 	TotPersisterSlowMergerPause  uint64
@@ -96,6 +95,7 @@ type Stats struct {
 
 	TotFileMergeSegmentsEmpty uint64
 	TotFileMergeSegments      uint64
+	TotFileSegmentsAtRoot     uint64
 	TotFileMergeWrittenBytes  uint64
 
 	TotFileMergeZapBeg  uint64
@@ -106,14 +106,15 @@ type Stats struct {
 	TotFileMergeIntroductions     uint64
 	TotFileMergeIntroductionsDone uint64
 
-	TotMemMergeBeg      uint64
-	TotMemMergeErr      uint64
-	TotMemMergeDone     uint64
-	TotMemMergeZapBeg   uint64
-	TotMemMergeZapEnd   uint64
-	TotMemMergeZapTime  uint64
-	MaxMemMergeZapTime  uint64
-	TotMemMergeSegments uint64
+	TotMemMergeBeg          uint64
+	TotMemMergeErr          uint64
+	TotMemMergeDone         uint64
+	TotMemMergeZapBeg       uint64
+	TotMemMergeZapEnd       uint64
+	TotMemMergeZapTime      uint64
+	MaxMemMergeZapTime      uint64
+	TotMemMergeSegments     uint64
+	TotMemorySegmentsAtRoot uint64
 }
 
 // atomically populates the returned map

From 0da7b8fc580c5e430b793c769fc89a5e64f4e4d4 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 22 Aug 2018 17:07:53 -0700
Subject: [PATCH 459/728] remove scorch SegmentDictionarySnapshot wrapper

The SegmentDictionarySnapshot wrapper didn't provide a lot of extra
functionality, mainly to help construct PostingsLists with the right
deleted bitmaps.

Removing it helps optimize a little bit on memory allocations.

See also: https://issues.couchbase.com/browse/MB-21318
---
 index/scorch/snapshot_index.go   |  8 +++---
 index/scorch/snapshot_segment.go | 45 --------------------------------
 2 files changed, 4 insertions(+), 49 deletions(-)

diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 97266a900..b42f142ba 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -118,7 +118,7 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
 	results := make(chan *asynchSegmentResult)
 	for index, segment := range i.segment {
 		go func(index int, segment *SegmentSnapshot) {
-			dict, err := segment.Dictionary(field)
+			dict, err := segment.segment.Dictionary(field)
 			if err != nil {
 				results <- &asynchSegmentResult{err: err}
 			} else {
@@ -434,7 +434,7 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
 	if rv.dicts == nil {
 		rv.dicts = make([]segment.TermDictionary, len(i.segment))
 		for i, segment := range i.segment {
-			dict, err := segment.Dictionary(field)
+			dict, err := segment.segment.Dictionary(field)
 			if err != nil {
 				return nil, err
 			}
@@ -442,8 +442,8 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
 		}
 	}
 
-	for i := range i.segment {
-		pl, err := rv.dicts[i].PostingsList(term, nil, rv.postings[i])
+	for i, segment := range i.segment {
+		pl, err := rv.dicts[i].PostingsList(term, segment.deleted, rv.postings[i])
 		if err != nil {
 			return nil, err
 		}
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index 1cc8b76c2..7672e853b 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -23,46 +23,12 @@ import (
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
 	"github.com/blevesearch/bleve/size"
-	"github.com/couchbase/vellum"
 )
 
 var TermSeparator byte = 0xff
 
 var TermSeparatorSplitSlice = []byte{TermSeparator}
 
-type SegmentDictionarySnapshot struct {
-	s *SegmentSnapshot
-	d segment.TermDictionary
-}
-
-func (s *SegmentDictionarySnapshot) PostingsList(term []byte, except *roaring.Bitmap,
-	prealloc segment.PostingsList) (segment.PostingsList, error) {
-	// TODO: if except is non-nil, perhaps need to OR it with s.s.deleted?
-	return s.d.PostingsList(term, s.s.deleted, prealloc)
-}
-
-func (s *SegmentDictionarySnapshot) Iterator() segment.DictionaryIterator {
-	return s.d.Iterator()
-}
-
-func (s *SegmentDictionarySnapshot) PrefixIterator(prefix string) segment.DictionaryIterator {
-	return s.d.PrefixIterator(prefix)
-}
-
-func (s *SegmentDictionarySnapshot) RangeIterator(start, end string) segment.DictionaryIterator {
-	return s.d.RangeIterator(start, end)
-}
-
-func (s *SegmentDictionarySnapshot) AutomatonIterator(a vellum.Automaton,
-	startKeyInclusive, endKeyExclusive []byte) segment.DictionaryIterator {
-	return s.d.AutomatonIterator(a, startKeyInclusive, endKeyExclusive)
-}
-
-func (s *SegmentDictionarySnapshot) OnlyIterator(onlyTerms [][]byte,
-	includeCount bool) segment.DictionaryIterator {
-	return s.d.OnlyIterator(onlyTerms, includeCount)
-}
-
 type SegmentSnapshot struct {
 	id      uint64
 	segment segment.Segment
@@ -112,17 +78,6 @@ func (s *SegmentSnapshot) Count() uint64 {
 	return rv
 }
 
-func (s *SegmentSnapshot) Dictionary(field string) (segment.TermDictionary, error) {
-	d, err := s.segment.Dictionary(field)
-	if err != nil {
-		return nil, err
-	}
-	return &SegmentDictionarySnapshot{
-		s: s,
-		d: d,
-	}, nil
-}
-
 func (s *SegmentSnapshot) DocNumbers(docIDs []string) (*roaring.Bitmap, error) {
 	rv, err := s.segment.DocNumbers(docIDs)
 	if err != nil {

From c0335041e896d4f4868d401b9192505c54fe9c8b Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 23 Aug 2018 08:12:59 -0700
Subject: [PATCH 460/728] optimize scorch to only recycle current
 TermFieldReaders

When scorch is finished using a TermFieldReader, if the indexSnapshot
that it belongs to is obsolete (not the current indexSnapshot), then
don't recycle the TermFieldReader so that GC can take care of it
sooner.

This can matter in a scenario of ongoing mutations (the current
indexSnapshot keeps on changing) when there are searches (like prefix
& wildcard/regexp) that might involve lots (hundreds or more) of terms
per search request, meaning lots of TermFieldReaders can be in play.

See also: https://issues.couchbase.com/browse/MB-29923
---
 index/scorch/snapshot_index.go | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index b42f142ba..285412cf3 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -472,6 +472,14 @@ func (i *IndexSnapshot) allocTermFieldReaderDicts(field string) (tfr *IndexSnaps
 }
 
 func (i *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReader) {
+	i.parent.rootLock.RLock()
+	obsolete := i.parent.root != i
+	i.parent.rootLock.RUnlock()
+	if obsolete {
+		// if we're not the current root (mutations happened), don't bother recycling
+		return
+	}
+
 	i.m2.Lock()
 	if i.fieldTFRs == nil {
 		i.fieldTFRs = map[string][]*IndexSnapshotTermFieldReader{}

From 816b0c6aaea4fa4b9a192f0c630035e774cc8c23 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Sun, 26 Aug 2018 23:53:29 +0530
Subject: [PATCH 461/728] adding new batch merge api

---
 index.go       |  10 ++++
 index/index.go |  20 +++++++
 index_test.go  | 156 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 186 insertions(+)

diff --git a/index.go b/index.go
index 197f4d4df..f9462a41d 100644
--- a/index.go
+++ b/index.go
@@ -119,6 +119,16 @@ func (b *Batch) Reset() {
 	b.internal.Reset()
 }
 
+func (b *Batch) Merge(o *Batch) {
+	if o != nil && o.internal != nil {
+		b.internal.Merge(o.internal)
+		if o.LastDocSize() > 0 {
+			b.lastDocSize = o.LastDocSize()
+		}
+		b.totalSize = uint64(b.internal.TotalDocSize())
+	}
+}
+
 // An Index implements all the indexing and searching
 // capabilities of bleve.  An Index can be created
 // using the New() and Open() methods.
diff --git a/index/index.go b/index/index.go
index 62128c3c2..a44046134 100644
--- a/index/index.go
+++ b/index/index.go
@@ -300,6 +300,26 @@ func (b *Batch) Reset() {
 	b.InternalOps = make(map[string][]byte)
 }
 
+func (b *Batch) Merge(o *Batch) {
+	for k, v := range o.IndexOps {
+		b.IndexOps[k] = v
+	}
+	for k, v := range o.InternalOps {
+		b.InternalOps[k] = v
+	}
+}
+
+func (b *Batch) TotalDocSize() int {
+	var s int
+	for k, v := range b.IndexOps {
+		if v != nil {
+			s += v.Size() + size.SizeOfString
+		}
+		s += len(k)
+	}
+	return s
+}
+
 // Optimizable represents an optional interface that implementable by
 // optimizable resources (e.g., TermFieldReaders, Searchers).  These
 // optimizable resources are provided the same OptimizableContext
diff --git a/index_test.go b/index_test.go
index 604328a41..0d81eba40 100644
--- a/index_test.go
+++ b/index_test.go
@@ -1917,3 +1917,159 @@ func TestSearchQueryCallback(t *testing.T) {
 		t.Fatalf("Expected: %v, Got: %v", expErr, err)
 	}
 }
+
+func TestBatchMerge(t *testing.T) {
+	defer func() {
+		err := os.RemoveAll("testidx")
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	index, err := New("testidx", NewIndexMapping())
+	if err != nil {
+		t.Fatal(err)
+	}
+	doca := map[string]interface{}{
+		"name":   "scorch",
+		"desc":   "gophercon india",
+		"nation": "india",
+	}
+
+	batchA := index.NewBatch()
+	err = batchA.Index("a", doca)
+	if err != nil {
+		t.Error(err)
+	}
+	batchA.SetInternal([]byte("batchkA"), []byte("batchvA"))
+
+	docb := map[string]interface{}{
+		"name": "moss",
+		"desc": "gophercon MV",
+	}
+
+	batchB := index.NewBatch()
+	err = batchB.Index("b", docb)
+	if err != nil {
+		t.Error(err)
+	}
+	batchB.SetInternal([]byte("batchkB"), []byte("batchvB"))
+
+	docC := map[string]interface{}{
+		"name":    "blahblah",
+		"desc":    "inProgress",
+		"country": "usa",
+	}
+
+	batchC := index.NewBatch()
+	err = batchC.Index("c", docC)
+	if err != nil {
+		t.Error(err)
+	}
+	batchC.SetInternal([]byte("batchkC"), []byte("batchvC"))
+	batchC.SetInternal([]byte("batchkB"), []byte("batchvBNew"))
+	batchC.Delete("a")
+	batchC.DeleteInternal([]byte("batchkA"))
+
+	batchA.Merge(batchB)
+
+	if batchA.Size() != 4 {
+		t.Errorf("expected batch size 4, got %d", batchA.Size())
+	}
+
+	batchA.Merge(batchC)
+
+	if batchA.Size() != 6 {
+		t.Errorf("expected batch size 6, got %d", batchA.Size())
+	}
+
+	err = index.Batch(batchA)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// close the index, open it again, and try some more things
+	err = index.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	index, err = Open("testidx")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer func() {
+		err := index.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	count, err := index.DocCount()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if count != 2 {
+		t.Errorf("expected doc count 2, got %d", count)
+	}
+
+	doc, err := index.Document("c")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if doc == nil {
+		t.Errorf("expected doc not nil, got nil")
+	}
+
+	val, err := index.GetInternal([]byte("batchkB"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if val == nil || string(val) != "batchvBNew" {
+		t.Errorf("expected val: batchvBNew , got %s", val)
+	}
+
+	val, err = index.GetInternal([]byte("batchkA"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if val != nil {
+		t.Errorf("expected nil, got %s", val)
+	}
+
+	foundNameField := false
+	for _, field := range doc.Fields {
+		if field.Name() == "name" && string(field.Value()) == "blahblah" {
+			foundNameField = true
+		}
+	}
+	if !foundNameField {
+		t.Errorf("expected to find field named 'name' with value 'blahblah'")
+	}
+
+	fields, err := index.Fields()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	expectedFields := map[string]bool{
+		"_all":    false,
+		"name":    false,
+		"desc":    false,
+		"country": false,
+	}
+	if len(fields) < len(expectedFields) {
+		t.Fatalf("expected %d fields got %d", len(expectedFields), len(fields))
+	}
+
+	for _, f := range fields {
+		expectedFields[f] = true
+	}
+
+	for ef, efp := range expectedFields {
+		if !efp {
+			t.Errorf("field %s is missing", ef)
+		}
+	}
+
+}

From bbeefaf3518cc9d8975f963771af33737bef12c3 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Wed, 29 Aug 2018 14:24:04 +0530
Subject: [PATCH 462/728] exposing segment,persister stats

---
 index/scorch/scorch.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 39832eb88..5e56c49b0 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -502,6 +502,10 @@ func (s *Scorch) StatsMap() map[string]interface{} {
 	m["num_recs_to_persist"] = m["TotItemsToPersist"]
 	m["num_bytes_used_disk"] = m["CurOnDiskBytes"]
 	m["num_files_on_disk"] = m["CurOnDiskFiles"]
+	m["num_root_memorysegments"] = m["TotMemorySegmentsAtRoot"]
+	m["num_root_filesegments"] = m["TotFileSegmentsAtRoot"]
+	m["num_persister_nap_pause_completed"] = m["TotPersisterNapPauseCompleted"]
+	m["num_persister_nap_merger_break"] = m["TotPersisterMergerNapBreak"]
 	m["total_compaction_written_bytes"] = m["TotFileMergeWrittenBytes"]
 
 	return m

From aefc7de851b095f845be56dadb1bea7ed1a38a9c Mon Sep 17 00:00:00 2001
From: Oleg Kovalov <iamolegkovalov@gmail.com>
Date: Sat, 1 Sep 2018 13:25:28 +0200
Subject: [PATCH 463/728] minor code cleanups

---
 analysis/token/camelcase/parser.go |  8 ++++----
 index/scorch/introducer.go         |  3 ++-
 search/collector/heap.go           |  4 ++--
 search/collector/list.go           |  5 ++---
 search/collector/slice.go          |  4 ++--
 search/query/query.go              | 12 ++++--------
 6 files changed, 16 insertions(+), 20 deletions(-)

diff --git a/analysis/token/camelcase/parser.go b/analysis/token/camelcase/parser.go
index d691e5646..ff4ce2fea 100644
--- a/analysis/token/camelcase/parser.go
+++ b/analysis/token/camelcase/parser.go
@@ -46,11 +46,11 @@ type Parser struct {
 	index     int
 }
 
-func NewParser(len, position, index int) *Parser {
+func NewParser(length, position, index int) *Parser {
 	return &Parser{
-		bufferLen: len,
-		buffer:    make([]rune, 0, len),
-		tokens:    make([]*analysis.Token, 0, len),
+		bufferLen: length,
+		buffer:    make([]rune, 0, length),
+		tokens:    make([]*analysis.Token, 0, length),
 		position:  position,
 		index:     index,
 	}
diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index f17b3bc5b..12f27af66 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -517,6 +517,7 @@ func isMemorySegment(s *SegmentSnapshot) bool {
 	switch s.segment.(type) {
 	case *zap.SegmentBase:
 		return true
+	default:
+		return false
 	}
-	return false
 }
diff --git a/search/collector/heap.go b/search/collector/heap.go
index bdf72eade..05502d5df 100644
--- a/search/collector/heap.go
+++ b/search/collector/heap.go
@@ -25,9 +25,9 @@ type collectStoreHeap struct {
 	compare collectorCompare
 }
 
-func newStoreHeap(cap int, compare collectorCompare) *collectStoreHeap {
+func newStoreHeap(capacity int, compare collectorCompare) *collectStoreHeap {
 	rv := &collectStoreHeap{
-		heap:    make(search.DocumentMatchCollection, 0, cap),
+		heap:    make(search.DocumentMatchCollection, 0, capacity),
 		compare: compare,
 	}
 	heap.Init(rv)
diff --git a/search/collector/list.go b/search/collector/list.go
index ec2f69cb8..f01d205c9 100644
--- a/search/collector/list.go
+++ b/search/collector/list.go
@@ -25,7 +25,7 @@ type collectStoreList struct {
 	compare collectorCompare
 }
 
-func newStoreList(cap int, compare collectorCompare) *collectStoreList {
+func newStoreList(capacity int, compare collectorCompare) *collectStoreList {
 	rv := &collectStoreList{
 		results: list.New(),
 		compare: compare,
@@ -34,8 +34,7 @@ func newStoreList(cap int, compare collectorCompare) *collectStoreList {
 	return rv
 }
 
-func (c *collectStoreList) AddNotExceedingSize(doc *search.DocumentMatch,
-	size int) *search.DocumentMatch {
+func (c *collectStoreList) AddNotExceedingSize(doc *search.DocumentMatch, size int) *search.DocumentMatch {
 	c.add(doc)
 	if c.len() > size {
 		return c.removeLast()
diff --git a/search/collector/slice.go b/search/collector/slice.go
index 32cb86244..85fe73c40 100644
--- a/search/collector/slice.go
+++ b/search/collector/slice.go
@@ -21,9 +21,9 @@ type collectStoreSlice struct {
 	compare collectorCompare
 }
 
-func newStoreSlice(cap int, compare collectorCompare) *collectStoreSlice {
+func newStoreSlice(capacity int, compare collectorCompare) *collectStoreSlice {
 	rv := &collectStoreSlice{
-		slice:   make(search.DocumentMatchCollection, 0, cap),
+		slice:   make(search.DocumentMatchCollection, 0, capacity),
 		compare: compare,
 	}
 	return rv
diff --git a/search/query/query.go b/search/query/query.go
index 1b0d94c01..c7c1eefb8 100644
--- a/search/query/query.go
+++ b/search/query/query.go
@@ -296,32 +296,28 @@ func expandQuery(m mapping.IndexMapping, query Query) (Query, error) {
 	}
 
 	expand = func(query Query) (Query, error) {
-		switch query.(type) {
+		switch q := query.(type) {
 		case *QueryStringQuery:
-			q := query.(*QueryStringQuery)
 			parsed, err := parseQuerySyntax(q.Query)
 			if err != nil {
 				return nil, fmt.Errorf("could not parse '%s': %s", q.Query, err)
 			}
 			return expand(parsed)
 		case *ConjunctionQuery:
-			q := *query.(*ConjunctionQuery)
 			children, err := expandSlice(q.Conjuncts)
 			if err != nil {
 				return nil, err
 			}
 			q.Conjuncts = children
-			return &q, nil
+			return q, nil
 		case *DisjunctionQuery:
-			q := *query.(*DisjunctionQuery)
 			children, err := expandSlice(q.Disjuncts)
 			if err != nil {
 				return nil, err
 			}
 			q.Disjuncts = children
-			return &q, nil
+			return q, nil
 		case *BooleanQuery:
-			q := *query.(*BooleanQuery)
 			var err error
 			q.Must, err = expand(q.Must)
 			if err != nil {
@@ -335,7 +331,7 @@ func expandQuery(m mapping.IndexMapping, query Query) (Query, error) {
 			if err != nil {
 				return nil, err
 			}
-			return &q, nil
+			return q, nil
 		default:
 			return query, nil
 		}

From f65b131da12d6a66ed77d4dd561123e96692df09 Mon Sep 17 00:00:00 2001
From: Oleg Kovalov <iamolegkovalov@gmail.com>
Date: Sat, 1 Sep 2018 13:36:29 +0200
Subject: [PATCH 464/728] minor code cleanups

---
 index/scorch/mergeplan/merge_plan.go  |  4 ++--
 index/scorch/persister.go             |  2 +-
 index/scorch/segment/zap/docvalues.go |  2 +-
 index/scorch/snapshot_index.go        |  2 +-
 index/scorch/snapshot_index_dict.go   |  2 +-
 index/store/metrics/metrics_test.go   |  2 +-
 index/upsidedown/row.go               |  2 +-
 index/upsidedown/upsidedown.go        |  2 +-
 search/scorer/scorer_term_test.go     |  2 +-
 test/versus_test.go                   | 10 +++++-----
 10 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/index/scorch/mergeplan/merge_plan.go b/index/scorch/mergeplan/merge_plan.go
index b09e5381e..c2a0d3c64 100644
--- a/index/scorch/mergeplan/merge_plan.go
+++ b/index/scorch/mergeplan/merge_plan.go
@@ -217,14 +217,14 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) {
 			if len(roster) > 0 {
 				rosterScore := scoreSegments(roster, o)
 
-				if len(bestRoster) <= 0 || rosterScore < bestRosterScore {
+				if len(bestRoster) == 0 || rosterScore < bestRosterScore {
 					bestRoster = roster
 					bestRosterScore = rosterScore
 				}
 			}
 		}
 
-		if len(bestRoster) <= 0 {
+		if len(bestRoster) == 0 {
 			return rv, nil
 		}
 
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 5e9450189..fa6b249f5 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -757,7 +757,7 @@ func (s *Scorch) removeOldBoltSnapshots() (numRemoved int, err error) {
 	s.eligibleForRemoval = newEligible
 	s.rootLock.Unlock()
 
-	if len(epochsToRemove) <= 0 {
+	if len(epochsToRemove) == 0 {
 		return 0, nil
 	}
 
diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index 72ce1248f..bcc0f9472 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -176,7 +176,7 @@ func (di *docValueReader) iterateAllDocValues(s *SegmentBase, visitor docNumTerm
 		if err != nil {
 			return err
 		}
-		if di.curChunkData == nil || len(di.curChunkHeader) <= 0 {
+		if di.curChunkData == nil || len(di.curChunkHeader) == 0 {
 			continue
 		}
 
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 285412cf3..0d312fcca 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -654,7 +654,7 @@ func (i *IndexSnapshot) DumpFields() chan interface{} {
 
 // subtractStrings returns set a minus elements of set b.
 func subtractStrings(a, b []string) []string {
-	if len(b) <= 0 {
+	if len(b) == 0 {
 		return a
 	}
 
diff --git a/index/scorch/snapshot_index_dict.go b/index/scorch/snapshot_index_dict.go
index 2d229ca0f..abd3bde8c 100644
--- a/index/scorch/snapshot_index_dict.go
+++ b/index/scorch/snapshot_index_dict.go
@@ -52,7 +52,7 @@ func (i *IndexSnapshotFieldDict) Pop() interface{} {
 }
 
 func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) {
-	if len(i.cursors) <= 0 {
+	if len(i.cursors) == 0 {
 		return nil, nil
 	}
 	i.entry = i.cursors[0].curr
diff --git a/index/store/metrics/metrics_test.go b/index/store/metrics/metrics_test.go
index cda7a2aef..fb78514bf 100644
--- a/index/store/metrics/metrics_test.go
+++ b/index/store/metrics/metrics_test.go
@@ -57,7 +57,7 @@ func TestMetricsStore(t *testing.T) {
 	if err != nil {
 		t.Errorf("expected WriteJSON to be unmarshallable")
 	}
-	if len(m) <= 0 {
+	if len(m) == 0 {
 		t.Errorf("expected some entries")
 	}
 
diff --git a/index/upsidedown/row.go b/index/upsidedown/row.go
index ba50314cd..531e0a0d3 100644
--- a/index/upsidedown/row.go
+++ b/index/upsidedown/row.go
@@ -584,7 +584,7 @@ func (tfr *TermFrequencyRow) parseK(key []byte) error {
 
 func (tfr *TermFrequencyRow) parseKDoc(key []byte, term []byte) error {
 	tfr.doc = key[3+len(term)+1:]
-	if len(tfr.doc) <= 0 {
+	if len(tfr.doc) == 0 {
 		return fmt.Errorf("invalid term frequency key, empty docid")
 	}
 
diff --git a/index/upsidedown/upsidedown.go b/index/upsidedown/upsidedown.go
index 70e6e457f..6d3738539 100644
--- a/index/upsidedown/upsidedown.go
+++ b/index/upsidedown/upsidedown.go
@@ -775,7 +775,7 @@ func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.
 }
 
 func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []*index.TermFieldVector {
-	if len(in) <= 0 {
+	if len(in) == 0 {
 		return nil
 	}
 
diff --git a/search/scorer/scorer_term_test.go b/search/scorer/scorer_term_test.go
index 23d449788..b491ee00c 100644
--- a/search/scorer/scorer_term_test.go
+++ b/search/scorer/scorer_term_test.go
@@ -157,7 +157,7 @@ func TestTermScorer(t *testing.T) {
 		}
 		actual := scorer.Score(ctx, test.termMatch)
 		actual.Complete(nil)
-		if len(actual.FieldTermLocations) <= 0 {
+		if len(actual.FieldTermLocations) == 0 {
 			actual.FieldTermLocations = nil
 		}
 
diff --git a/test/versus_test.go b/test/versus_test.go
index 10faa311e..20a85d161 100644
--- a/test/versus_test.go
+++ b/test/versus_test.go
@@ -253,7 +253,7 @@ func testVersusSearches(vt *VersusTest, searchTemplates []string, idxA, idxB ble
 		// definitely find at least one document.
 		"bodyWord": func(i int) string {
 			body := vt.Bodies[vt.CurAttempt%len(vt.Bodies)]
-			if len(body) <= 0 {
+			if len(body) == 0 {
 				return ""
 			}
 			return body[i%len(body)]
@@ -324,10 +324,10 @@ func testVersusSearches(vt *VersusTest, searchTemplates []string, idxA, idxB ble
 			hitsB := hitsById(resB)
 			for id, hitA := range hitsA {
 				hitB := hitsB[id]
-				if len(hitA.FieldTermLocations) <= 0 {
+				if len(hitA.FieldTermLocations) == 0 {
 					hitA.FieldTermLocations = nil
 				}
-				if len(hitB.FieldTermLocations) <= 0 {
+				if len(hitB.FieldTermLocations) == 0 {
 					hitB.FieldTermLocations = nil
 				}
 				if !reflect.DeepEqual(hitA, hitB) {
@@ -338,10 +338,10 @@ func testVersusSearches(vt *VersusTest, searchTemplates []string, idxA, idxB ble
 			}
 			for id, hitB := range hitsB {
 				hitA := hitsA[id]
-				if len(hitA.FieldTermLocations) <= 0 {
+				if len(hitA.FieldTermLocations) == 0 {
 					hitA.FieldTermLocations = nil
 				}
-				if len(hitB.FieldTermLocations) <= 0 {
+				if len(hitB.FieldTermLocations) == 0 {
 					hitB.FieldTermLocations = nil
 				}
 				if !reflect.DeepEqual(hitA, hitB) {

From 38f239cf70326c6983e7e09a22a4a3769969f6a6 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Thu, 23 Aug 2018 08:59:55 -0700
Subject: [PATCH 465/728] optimize scorch with sync.Pool of zap
 Dict/PostingsList/Iterator

Some searches like prefix searches can generate many term field
readers per search request, and in the face of ongoing mutations,
scorch will not recycle those term field readers.

This change adds sync pooling or recycling of the constituent parts of
a scorch TermFieldReader, which hopefully reduces some garbage and
takes some pressure off the memory allocator.
---
 index/scorch/segment/empty.go       |  6 ++
 index/scorch/segment/zap/dict.go    | 13 ++++-
 index/scorch/segment/zap/posting.go | 87 +++++++++++++++++++----------
 index/scorch/segment/zap/segment.go |  9 ++-
 index/scorch/snapshot_index.go      |  6 +-
 index/scorch/snapshot_index_tfr.go  | 39 ++++++++++++-
 index/scorch/stats.go               |  7 +++
 7 files changed, 126 insertions(+), 41 deletions(-)

diff --git a/index/scorch/segment/empty.go b/index/scorch/segment/empty.go
index af50d0aaf..a0fe74555 100644
--- a/index/scorch/segment/empty.go
+++ b/index/scorch/segment/empty.go
@@ -91,6 +91,8 @@ func (e *EmptyDictionary) OnlyIterator(onlyTerms [][]byte,
 	return &EmptyDictionaryIterator{}
 }
 
+func (e *EmptyDictionary) Recycle() {}
+
 type EmptyDictionaryIterator struct{}
 
 func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) {
@@ -116,6 +118,8 @@ func (e *EmptyPostingsList) Count() uint64 {
 	return 0
 }
 
+func (e *EmptyPostingsList) Recycle() {}
+
 type EmptyPostingsIterator struct{}
 
 func (e *EmptyPostingsIterator) Next() (Posting, error) {
@@ -125,3 +129,5 @@ func (e *EmptyPostingsIterator) Next() (Posting, error) {
 func (e *EmptyPostingsIterator) Size() int {
 	return 0
 }
+
+func (e *EmptyPostingsIterator) Recycle() {}
diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 219bf1526..4af29409a 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -17,6 +17,7 @@ package zap
 import (
 	"bytes"
 	"fmt"
+	"sync"
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index"
@@ -33,6 +34,8 @@ type Dictionary struct {
 	fstReader *vellum.Reader
 }
 
+var dictionaryPool = sync.Pool{New: func() interface{} { return &Dictionary{} }}
+
 // PostingsList returns the postings list for the specified term
 func (d *Dictionary) PostingsList(term []byte, except *roaring.Bitmap,
 	prealloc segment.PostingsList) (segment.PostingsList, error) {
@@ -79,7 +82,7 @@ func (d *Dictionary) postingsListFromOffset(postingsOffset uint64, except *roari
 
 func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap) *PostingsList {
 	if rv == nil || rv == emptyPostingsList {
-		rv = &PostingsList{}
+		rv = postingsListPool.Get().(*PostingsList)
 	} else {
 		postings := rv.postings
 		if postings != nil {
@@ -225,6 +228,14 @@ func (d *Dictionary) OnlyIterator(onlyTerms [][]byte,
 	return rv
 }
 
+func (d *Dictionary) Recycle() {
+	*d = Dictionary{} // clear fields
+
+	// TODO: need vellum API's to allow for recycled or prealloc'ed FST's
+
+	dictionaryPool.Put(d)
+}
+
 // DictionaryIterator is an iterator for term dictionary
 type DictionaryIterator struct {
 	d         *Dictionary
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 0ac7938e1..64f223896 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -21,6 +21,7 @@ import (
 	"io"
 	"math"
 	"reflect"
+	"sync"
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index/scorch/segment"
@@ -112,6 +113,8 @@ type PostingsList struct {
 // represents an immutable, empty postings list
 var emptyPostingsList = &PostingsList{}
 
+var postingsListPool = sync.Pool{New: func() interface{} { return &PostingsList{} }}
+
 func (p *PostingsList) Size() int {
 	sizeInBytes := reflectStaticSizePostingsList + size.SizeOfPtr
 
@@ -155,38 +158,9 @@ func (p *PostingsList) Iterator(includeFreq, includeNorm, includeLocs bool,
 func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
 	rv *PostingsIterator) *PostingsIterator {
 	if rv == nil {
-		rv = &PostingsIterator{}
+		rv = postingsIteratorPool.Get().(*PostingsIterator)
 	} else {
-		freqNormReader := rv.freqNormReader
-		if freqNormReader != nil {
-			freqNormReader.Reset([]byte(nil))
-		}
-
-		locReader := rv.locReader
-		if locReader != nil {
-			locReader.Reset([]byte(nil))
-		}
-
-		freqChunkOffsets := rv.freqChunkOffsets[:0]
-		locChunkOffsets := rv.locChunkOffsets[:0]
-
-		nextLocs := rv.nextLocs[:0]
-		nextSegmentLocs := rv.nextSegmentLocs[:0]
-
-		buf := rv.buf
-
-		*rv = PostingsIterator{} // clear the struct
-
-		rv.freqNormReader = freqNormReader
-		rv.locReader = locReader
-
-		rv.freqChunkOffsets = freqChunkOffsets
-		rv.locChunkOffsets = locChunkOffsets
-
-		rv.nextLocs = nextLocs
-		rv.nextSegmentLocs = nextSegmentLocs
-
-		rv.buf = buf
+		rv.Clear()
 	}
 
 	rv.postings = p
@@ -322,6 +296,14 @@ func (rv *PostingsList) init1Hit(fstVal uint64) error {
 	return nil
 }
 
+func (rv *PostingsList) Recycle() {
+	if rv != emptyPostingsList {
+		*rv = PostingsList{}
+		// TODO: can we also recycle the roaring bitmaps?
+		postingsListPool.Put(rv)
+	}
+}
+
 // PostingsIterator provides a way to iterate through the postings list
 type PostingsIterator struct {
 	postings *PostingsList
@@ -357,6 +339,8 @@ type PostingsIterator struct {
 
 var emptyPostingsIterator = &PostingsIterator{}
 
+var postingsIteratorPool = sync.Pool{New: func() interface{} { return &PostingsIterator{} }}
+
 func (i *PostingsIterator) Size() int {
 	sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr +
 		len(i.currChunkFreqNorm) +
@@ -711,6 +695,47 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool,
 	return uint64(n), true, nil
 }
 
+func (rv *PostingsIterator) Clear() {
+	freqNormReader := rv.freqNormReader
+	if freqNormReader != nil {
+		freqNormReader.Reset([]byte(nil))
+	}
+
+	locReader := rv.locReader
+	if locReader != nil {
+		locReader.Reset([]byte(nil))
+	}
+
+	freqChunkOffsets := rv.freqChunkOffsets[:0]
+	locChunkOffsets := rv.locChunkOffsets[:0]
+
+	nextLocs := rv.nextLocs[:0]
+	nextSegmentLocs := rv.nextSegmentLocs[:0]
+
+	buf := rv.buf
+
+	*rv = PostingsIterator{} // clear the struct
+
+	rv.freqNormReader = freqNormReader
+	rv.locReader = locReader
+
+	rv.freqChunkOffsets = freqChunkOffsets
+	rv.locChunkOffsets = locChunkOffsets
+
+	rv.nextLocs = nextLocs
+	rv.nextSegmentLocs = nextSegmentLocs
+
+	rv.buf = buf
+}
+
+func (i *PostingsIterator) Recycle() {
+	if i != emptyPostingsIterator {
+		i.Clear()
+
+		postingsIteratorPool.Put(i)
+	}
+}
+
 // Posting is a single entry in a postings list
 type Posting struct {
 	docNum uint64
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 8c6de211a..b9a328ad4 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -250,11 +250,10 @@ func (s *SegmentBase) Dictionary(field string) (segment.TermDictionary, error) {
 func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) {
 	fieldIDPlus1 := sb.fieldsMap[field]
 	if fieldIDPlus1 > 0 {
-		rv = &Dictionary{
-			sb:      sb,
-			field:   field,
-			fieldID: fieldIDPlus1 - 1,
-		}
+		rv = dictionaryPool.Get().(*Dictionary)
+		rv.sb = sb
+		rv.field = field
+		rv.fieldID = fieldIDPlus1 - 1
 
 		dictStart := sb.dictLocs[rv.fieldID]
 		if dictStart > 0 {
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 285412cf3..7dab97599 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -471,13 +471,13 @@ func (i *IndexSnapshot) allocTermFieldReaderDicts(field string) (tfr *IndexSnaps
 	return &IndexSnapshotTermFieldReader{}
 }
 
-func (i *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReader) {
+func (i *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReader) bool {
 	i.parent.rootLock.RLock()
 	obsolete := i.parent.root != i
 	i.parent.rootLock.RUnlock()
 	if obsolete {
 		// if we're not the current root (mutations happened), don't bother recycling
-		return
+		return false
 	}
 
 	i.m2.Lock()
@@ -486,6 +486,8 @@ func (i *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReader
 	}
 	i.fieldTFRs[tfr.field] = append(i.fieldTFRs[tfr.field], tfr)
 	i.m2.Unlock()
+
+	return true
 }
 
 func docNumberToBytes(buf []byte, in uint64) []byte {
diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go
index 89af3be4c..229869537 100644
--- a/index/scorch/snapshot_index_tfr.go
+++ b/index/scorch/snapshot_index_tfr.go
@@ -178,8 +178,43 @@ func (i *IndexSnapshotTermFieldReader) Count() uint64 {
 
 func (i *IndexSnapshotTermFieldReader) Close() error {
 	if i.snapshot != nil {
-		atomic.AddUint64(&i.snapshot.parent.stats.TotTermSearchersFinished, uint64(1))
-		i.snapshot.recycleTermFieldReader(i)
+		stats := &i.snapshot.parent.stats
+
+		atomic.AddUint64(&stats.TotTermSearchersFinished, uint64(1))
+
+		if i.snapshot.recycleTermFieldReader(i) {
+			atomic.AddUint64(&stats.TotTermFieldReadersRecycled, uint64(1))
+		} else {
+			atomic.AddUint64(&stats.TotTermFieldReadersGarbaged, uint64(1))
+
+			// the snapshot couldn't recycle the TFR (it was too
+			// obsolete), so recycle its constituent parts
+
+			for _, x := range i.iterators {
+				recycle(x)
+			}
+			atomic.AddUint64(&stats.TotPostingsIteratorsRecycled, uint64(len(i.iterators)))
+
+			for _, x := range i.postings {
+				recycle(x)
+			}
+			atomic.AddUint64(&stats.TotPostingsListsRecycled, uint64(len(i.postings)))
+
+			for _, x := range i.dicts {
+				recycle(x)
+			}
+			atomic.AddUint64(&stats.TotTermDictionariesRecycled, uint64(len(i.dicts)))
+		}
 	}
 	return nil
 }
+
+type Recycler interface {
+	Recycle()
+}
+
+func recycle(x interface{}) {
+	if r, ok := x.(Recycler); ok {
+		r.Recycle()
+	}
+}
diff --git a/index/scorch/stats.go b/index/scorch/stats.go
index bc1ca4bd8..ba497de99 100644
--- a/index/scorch/stats.go
+++ b/index/scorch/stats.go
@@ -115,6 +115,13 @@ type Stats struct {
 	MaxMemMergeZapTime      uint64
 	TotMemMergeSegments     uint64
 	TotMemorySegmentsAtRoot uint64
+
+	TotTermFieldReadersGarbaged uint64
+	TotTermFieldReadersRecycled uint64
+
+	TotTermDictionariesRecycled  uint64
+	TotPostingsListsRecycled     uint64
+	TotPostingsIteratorsRecycled uint64
 }
 
 // atomically populates the returned map

From 15e5f6d3059089149b59cd47c5a5b069d3e21d59 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Thu, 6 Sep 2018 11:28:37 -0700
Subject: [PATCH 466/728] Invvoke fst/reader's Close() to reuse via sync pool
 on recycle

---
 index/scorch/segment/zap/dict.go | 10 +++++++---
 vendor/manifest                  |  2 +-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 4af29409a..cfbbd2397 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -229,10 +229,14 @@ func (d *Dictionary) OnlyIterator(onlyTerms [][]byte,
 }
 
 func (d *Dictionary) Recycle() {
-	*d = Dictionary{} // clear fields
-
-	// TODO: need vellum API's to allow for recycled or prealloc'ed FST's
+	if d.fst != nil {
+		_ = d.fst.Close()
+	}
+	if d.fstReader != nil {
+		d.fstReader.Close()
+	}
 
+	*d = Dictionary{} // clear fields
 	dictionaryPool.Put(d)
 }
 
diff --git a/vendor/manifest b/vendor/manifest
index d45734e9e..46755ffd8 100644
--- a/vendor/manifest
+++ b/vendor/manifest
@@ -137,7 +137,7 @@
 			"importpath": "github.com/couchbase/vellum",
 			"repository": "https://github.com/couchbase/vellum",
 			"vcs": "git",
-			"revision": "dc6110ee42850a6553b6469bf0dc03383900353d",
+			"revision": "7b31f610cdce81be8b91a93cdcb0d6a151c6f8fd",
 			"branch": "master",
 			"notests": true
 		}

From cc4fd7137ccfcc34fcf5ae766b0b6cf9fc47f482 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Thu, 6 Sep 2018 12:14:28 -0700
Subject: [PATCH 467/728] Check for TooManyClauses as early as possible where
 applicable

Context:
+ MultiTermSearcher
+ TermPrefixSearcher
---
 search/searcher/search_multi_term.go  | 8 ++++++++
 search/searcher/search_term_prefix.go | 3 +++
 2 files changed, 11 insertions(+)

diff --git a/search/searcher/search_multi_term.go b/search/searcher/search_multi_term.go
index b469beadb..a723aedc5 100644
--- a/search/searcher/search_multi_term.go
+++ b/search/searcher/search_multi_term.go
@@ -22,6 +22,10 @@ import (
 func NewMultiTermSearcher(indexReader index.IndexReader, terms []string,
 	field string, boost float64, options search.SearcherOptions, limit bool) (
 	search.Searcher, error) {
+	if limit && tooManyClauses(len(terms)) {
+		return nil, tooManyClausesErr()
+	}
+
 	qsearchers := make([]search.Searcher, len(terms))
 	qsearchersClose := func() {
 		for _, searcher := range qsearchers {
@@ -46,6 +50,10 @@ func NewMultiTermSearcher(indexReader index.IndexReader, terms []string,
 func NewMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byte,
 	field string, boost float64, options search.SearcherOptions, limit bool) (
 	search.Searcher, error) {
+	if limit && tooManyClauses(len(terms)) {
+		return nil, tooManyClausesErr()
+	}
+
 	qsearchers := make([]search.Searcher, len(terms))
 	qsearchersClose := func() {
 		for _, searcher := range qsearchers {
diff --git a/search/searcher/search_term_prefix.go b/search/searcher/search_term_prefix.go
index c49788c71..59db93101 100644
--- a/search/searcher/search_term_prefix.go
+++ b/search/searcher/search_term_prefix.go
@@ -37,6 +37,9 @@ func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string,
 	tfd, err := fieldDict.Next()
 	for err == nil && tfd != nil {
 		terms = append(terms, tfd.Term)
+		if tooManyClauses(len(terms)) {
+			return nil, tooManyClausesErr()
+		}
 		tfd, err = fieldDict.Next()
 	}
 	if err != nil {

From 6f6636037e5344564da3eaa75054010440357ceb Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Thu, 6 Sep 2018 13:05:22 -0700
Subject: [PATCH 468/728] Update vendor/manifest revision of vellum

Point to:
35d9e73 Prateek Rungta | Re-work pooling to have a fixed memory cost
---
 vendor/manifest | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vendor/manifest b/vendor/manifest
index 46755ffd8..5d1fb4717 100644
--- a/vendor/manifest
+++ b/vendor/manifest
@@ -137,7 +137,7 @@
 			"importpath": "github.com/couchbase/vellum",
 			"repository": "https://github.com/couchbase/vellum",
 			"vcs": "git",
-			"revision": "7b31f610cdce81be8b91a93cdcb0d6a151c6f8fd",
+			"revision": "35d9e7346a69d2499623063cd39f305599fc7eae",
 			"branch": "master",
 			"notests": true
 		}

From 504bcd3a1da5f1001032052a580efd33ce3b2676 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 10 Sep 2018 10:45:06 -0700
Subject: [PATCH 469/728] Revert "Invvoke fst/reader's Close() to reuse via
 sync pool on recycle"

This reverts commit 15e5f6d3059089149b59cd47c5a5b069d3e21d59.
---
 index/scorch/segment/zap/dict.go | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index cfbbd2397..4af29409a 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -229,14 +229,10 @@ func (d *Dictionary) OnlyIterator(onlyTerms [][]byte,
 }
 
 func (d *Dictionary) Recycle() {
-	if d.fst != nil {
-		_ = d.fst.Close()
-	}
-	if d.fstReader != nil {
-		d.fstReader.Close()
-	}
-
 	*d = Dictionary{} // clear fields
+
+	// TODO: need vellum API's to allow for recycled or prealloc'ed FST's
+
 	dictionaryPool.Put(d)
 }
 

From 13d37d5a469b69a534ddd6096111d242a816eaa9 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 10 Sep 2018 11:45:21 -0700
Subject: [PATCH 470/728] Revert "optimize scorch with sync.Pool of zap
 Dict/PostingsList/Iterator"

+ Observed an overall performace regression with this change.

This reverts commit 38f239cf70326c6983e7e09a22a4a3769969f6a6.
---
 index/scorch/segment/empty.go       |  6 --
 index/scorch/segment/zap/dict.go    | 13 +----
 index/scorch/segment/zap/posting.go | 87 ++++++++++-------------------
 index/scorch/segment/zap/segment.go |  9 +--
 index/scorch/snapshot_index.go      |  6 +-
 index/scorch/snapshot_index_tfr.go  | 39 +------------
 index/scorch/stats.go               |  7 ---
 7 files changed, 41 insertions(+), 126 deletions(-)

diff --git a/index/scorch/segment/empty.go b/index/scorch/segment/empty.go
index a0fe74555..af50d0aaf 100644
--- a/index/scorch/segment/empty.go
+++ b/index/scorch/segment/empty.go
@@ -91,8 +91,6 @@ func (e *EmptyDictionary) OnlyIterator(onlyTerms [][]byte,
 	return &EmptyDictionaryIterator{}
 }
 
-func (e *EmptyDictionary) Recycle() {}
-
 type EmptyDictionaryIterator struct{}
 
 func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) {
@@ -118,8 +116,6 @@ func (e *EmptyPostingsList) Count() uint64 {
 	return 0
 }
 
-func (e *EmptyPostingsList) Recycle() {}
-
 type EmptyPostingsIterator struct{}
 
 func (e *EmptyPostingsIterator) Next() (Posting, error) {
@@ -129,5 +125,3 @@ func (e *EmptyPostingsIterator) Next() (Posting, error) {
 func (e *EmptyPostingsIterator) Size() int {
 	return 0
 }
-
-func (e *EmptyPostingsIterator) Recycle() {}
diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 4af29409a..219bf1526 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -17,7 +17,6 @@ package zap
 import (
 	"bytes"
 	"fmt"
-	"sync"
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index"
@@ -34,8 +33,6 @@ type Dictionary struct {
 	fstReader *vellum.Reader
 }
 
-var dictionaryPool = sync.Pool{New: func() interface{} { return &Dictionary{} }}
-
 // PostingsList returns the postings list for the specified term
 func (d *Dictionary) PostingsList(term []byte, except *roaring.Bitmap,
 	prealloc segment.PostingsList) (segment.PostingsList, error) {
@@ -82,7 +79,7 @@ func (d *Dictionary) postingsListFromOffset(postingsOffset uint64, except *roari
 
 func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap) *PostingsList {
 	if rv == nil || rv == emptyPostingsList {
-		rv = postingsListPool.Get().(*PostingsList)
+		rv = &PostingsList{}
 	} else {
 		postings := rv.postings
 		if postings != nil {
@@ -228,14 +225,6 @@ func (d *Dictionary) OnlyIterator(onlyTerms [][]byte,
 	return rv
 }
 
-func (d *Dictionary) Recycle() {
-	*d = Dictionary{} // clear fields
-
-	// TODO: need vellum API's to allow for recycled or prealloc'ed FST's
-
-	dictionaryPool.Put(d)
-}
-
 // DictionaryIterator is an iterator for term dictionary
 type DictionaryIterator struct {
 	d         *Dictionary
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 64f223896..0ac7938e1 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -21,7 +21,6 @@ import (
 	"io"
 	"math"
 	"reflect"
-	"sync"
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index/scorch/segment"
@@ -113,8 +112,6 @@ type PostingsList struct {
 // represents an immutable, empty postings list
 var emptyPostingsList = &PostingsList{}
 
-var postingsListPool = sync.Pool{New: func() interface{} { return &PostingsList{} }}
-
 func (p *PostingsList) Size() int {
 	sizeInBytes := reflectStaticSizePostingsList + size.SizeOfPtr
 
@@ -158,9 +155,38 @@ func (p *PostingsList) Iterator(includeFreq, includeNorm, includeLocs bool,
 func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
 	rv *PostingsIterator) *PostingsIterator {
 	if rv == nil {
-		rv = postingsIteratorPool.Get().(*PostingsIterator)
+		rv = &PostingsIterator{}
 	} else {
-		rv.Clear()
+		freqNormReader := rv.freqNormReader
+		if freqNormReader != nil {
+			freqNormReader.Reset([]byte(nil))
+		}
+
+		locReader := rv.locReader
+		if locReader != nil {
+			locReader.Reset([]byte(nil))
+		}
+
+		freqChunkOffsets := rv.freqChunkOffsets[:0]
+		locChunkOffsets := rv.locChunkOffsets[:0]
+
+		nextLocs := rv.nextLocs[:0]
+		nextSegmentLocs := rv.nextSegmentLocs[:0]
+
+		buf := rv.buf
+
+		*rv = PostingsIterator{} // clear the struct
+
+		rv.freqNormReader = freqNormReader
+		rv.locReader = locReader
+
+		rv.freqChunkOffsets = freqChunkOffsets
+		rv.locChunkOffsets = locChunkOffsets
+
+		rv.nextLocs = nextLocs
+		rv.nextSegmentLocs = nextSegmentLocs
+
+		rv.buf = buf
 	}
 
 	rv.postings = p
@@ -296,14 +322,6 @@ func (rv *PostingsList) init1Hit(fstVal uint64) error {
 	return nil
 }
 
-func (rv *PostingsList) Recycle() {
-	if rv != emptyPostingsList {
-		*rv = PostingsList{}
-		// TODO: can we also recycle the roaring bitmaps?
-		postingsListPool.Put(rv)
-	}
-}
-
 // PostingsIterator provides a way to iterate through the postings list
 type PostingsIterator struct {
 	postings *PostingsList
@@ -339,8 +357,6 @@ type PostingsIterator struct {
 
 var emptyPostingsIterator = &PostingsIterator{}
 
-var postingsIteratorPool = sync.Pool{New: func() interface{} { return &PostingsIterator{} }}
-
 func (i *PostingsIterator) Size() int {
 	sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr +
 		len(i.currChunkFreqNorm) +
@@ -695,47 +711,6 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool,
 	return uint64(n), true, nil
 }
 
-func (rv *PostingsIterator) Clear() {
-	freqNormReader := rv.freqNormReader
-	if freqNormReader != nil {
-		freqNormReader.Reset([]byte(nil))
-	}
-
-	locReader := rv.locReader
-	if locReader != nil {
-		locReader.Reset([]byte(nil))
-	}
-
-	freqChunkOffsets := rv.freqChunkOffsets[:0]
-	locChunkOffsets := rv.locChunkOffsets[:0]
-
-	nextLocs := rv.nextLocs[:0]
-	nextSegmentLocs := rv.nextSegmentLocs[:0]
-
-	buf := rv.buf
-
-	*rv = PostingsIterator{} // clear the struct
-
-	rv.freqNormReader = freqNormReader
-	rv.locReader = locReader
-
-	rv.freqChunkOffsets = freqChunkOffsets
-	rv.locChunkOffsets = locChunkOffsets
-
-	rv.nextLocs = nextLocs
-	rv.nextSegmentLocs = nextSegmentLocs
-
-	rv.buf = buf
-}
-
-func (i *PostingsIterator) Recycle() {
-	if i != emptyPostingsIterator {
-		i.Clear()
-
-		postingsIteratorPool.Put(i)
-	}
-}
-
 // Posting is a single entry in a postings list
 type Posting struct {
 	docNum uint64
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index b9a328ad4..8c6de211a 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -250,10 +250,11 @@ func (s *SegmentBase) Dictionary(field string) (segment.TermDictionary, error) {
 func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) {
 	fieldIDPlus1 := sb.fieldsMap[field]
 	if fieldIDPlus1 > 0 {
-		rv = dictionaryPool.Get().(*Dictionary)
-		rv.sb = sb
-		rv.field = field
-		rv.fieldID = fieldIDPlus1 - 1
+		rv = &Dictionary{
+			sb:      sb,
+			field:   field,
+			fieldID: fieldIDPlus1 - 1,
+		}
 
 		dictStart := sb.dictLocs[rv.fieldID]
 		if dictStart > 0 {
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 7dab97599..285412cf3 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -471,13 +471,13 @@ func (i *IndexSnapshot) allocTermFieldReaderDicts(field string) (tfr *IndexSnaps
 	return &IndexSnapshotTermFieldReader{}
 }
 
-func (i *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReader) bool {
+func (i *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReader) {
 	i.parent.rootLock.RLock()
 	obsolete := i.parent.root != i
 	i.parent.rootLock.RUnlock()
 	if obsolete {
 		// if we're not the current root (mutations happened), don't bother recycling
-		return false
+		return
 	}
 
 	i.m2.Lock()
@@ -486,8 +486,6 @@ func (i *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReader
 	}
 	i.fieldTFRs[tfr.field] = append(i.fieldTFRs[tfr.field], tfr)
 	i.m2.Unlock()
-
-	return true
 }
 
 func docNumberToBytes(buf []byte, in uint64) []byte {
diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go
index 229869537..89af3be4c 100644
--- a/index/scorch/snapshot_index_tfr.go
+++ b/index/scorch/snapshot_index_tfr.go
@@ -178,43 +178,8 @@ func (i *IndexSnapshotTermFieldReader) Count() uint64 {
 
 func (i *IndexSnapshotTermFieldReader) Close() error {
 	if i.snapshot != nil {
-		stats := &i.snapshot.parent.stats
-
-		atomic.AddUint64(&stats.TotTermSearchersFinished, uint64(1))
-
-		if i.snapshot.recycleTermFieldReader(i) {
-			atomic.AddUint64(&stats.TotTermFieldReadersRecycled, uint64(1))
-		} else {
-			atomic.AddUint64(&stats.TotTermFieldReadersGarbaged, uint64(1))
-
-			// the snapshot couldn't recycle the TFR (it was too
-			// obsolete), so recycle its constituent parts
-
-			for _, x := range i.iterators {
-				recycle(x)
-			}
-			atomic.AddUint64(&stats.TotPostingsIteratorsRecycled, uint64(len(i.iterators)))
-
-			for _, x := range i.postings {
-				recycle(x)
-			}
-			atomic.AddUint64(&stats.TotPostingsListsRecycled, uint64(len(i.postings)))
-
-			for _, x := range i.dicts {
-				recycle(x)
-			}
-			atomic.AddUint64(&stats.TotTermDictionariesRecycled, uint64(len(i.dicts)))
-		}
+		atomic.AddUint64(&i.snapshot.parent.stats.TotTermSearchersFinished, uint64(1))
+		i.snapshot.recycleTermFieldReader(i)
 	}
 	return nil
 }
-
-type Recycler interface {
-	Recycle()
-}
-
-func recycle(x interface{}) {
-	if r, ok := x.(Recycler); ok {
-		r.Recycle()
-	}
-}
diff --git a/index/scorch/stats.go b/index/scorch/stats.go
index ba497de99..bc1ca4bd8 100644
--- a/index/scorch/stats.go
+++ b/index/scorch/stats.go
@@ -115,13 +115,6 @@ type Stats struct {
 	MaxMemMergeZapTime      uint64
 	TotMemMergeSegments     uint64
 	TotMemorySegmentsAtRoot uint64
-
-	TotTermFieldReadersGarbaged uint64
-	TotTermFieldReadersRecycled uint64
-
-	TotTermDictionariesRecycled  uint64
-	TotPostingsListsRecycled     uint64
-	TotPostingsIteratorsRecycled uint64
 }
 
 // atomically populates the returned map

From 424c56dbfe8301f7338e70cdc01c5755827d3c21 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Wed, 12 Sep 2018 12:15:58 -0700
Subject: [PATCH 471/728] Update vellum's revision in the vendor/manifest

---
 vendor/manifest | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vendor/manifest b/vendor/manifest
index 5d1fb4717..72f0b6d7c 100644
--- a/vendor/manifest
+++ b/vendor/manifest
@@ -137,7 +137,7 @@
 			"importpath": "github.com/couchbase/vellum",
 			"repository": "https://github.com/couchbase/vellum",
 			"vcs": "git",
-			"revision": "35d9e7346a69d2499623063cd39f305599fc7eae",
+			"revision": "01d5c56e609533acd717717c8acc0d2dea6bfb89",
 			"branch": "master",
 			"notests": true
 		}

From 478805bc7f6790f6f088c4d49ca0bee9e443ca10 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 17 Sep 2018 16:46:22 -0700
Subject: [PATCH 472/728] Add support for GeoSpatial points in string format

+ Following specification used in elastic search:
    https://www.elastic.co/guide/en/elasticsearch/reference/current/geo-point.html
+ String formats allowed:
    - "lat,lon"
    - "  lat, lon   "
    - "geohash"
+ All the necessary code to decode geohashes is obtained from:
    - https://github.com/mmcloughlin/geohash
+ Also see:
    https://issues.couchbase.com/browse/MB-30542
---
 geo/geohash.go          | 174 ++++++++++++++++++++++++++++++++++++++++
 geo/parse.go            |  32 ++++++++
 mapping/document.go     |   6 +-
 mapping/mapping_test.go |  78 ++++++++++++++++--
 4 files changed, 283 insertions(+), 7 deletions(-)
 create mode 100644 geo/geohash.go

diff --git a/geo/geohash.go b/geo/geohash.go
new file mode 100644
index 000000000..35db720c0
--- /dev/null
+++ b/geo/geohash.go
@@ -0,0 +1,174 @@
+// The code here was obtained from:
+//   https://github.com/mmcloughlin/geohash
+
+// The MIT License (MIT)
+// Copyright (c) 2015 Michael McLoughlin
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+package geo
+
+import (
+	"math"
+)
+
+// encoding encapsulates an encoding defined by a given base32 alphabet.
+type encoding struct {
+	enc string
+	dec [256]byte
+}
+
+// newEncoding constructs a new encoding defined by the given alphabet,
+// which must be a 32-byte string.
+func newEncoding(encoder string) *encoding {
+	e := new(encoding)
+	e.enc = encoder
+	for i := 0; i < len(e.dec); i++ {
+		e.dec[i] = 0xff
+	}
+	for i := 0; i < len(encoder); i++ {
+		e.dec[encoder[i]] = byte(i)
+	}
+	return e
+}
+
+// Decode string into bits of a 64-bit word. The string s may be at most 12
+// characters.
+func (e *encoding) decode(s string) uint64 {
+	x := uint64(0)
+	for i := 0; i < len(s); i++ {
+		x = (x << 5) | uint64(e.dec[s[i]])
+	}
+	return x
+}
+
+// Encode bits of 64-bit word into a string.
+func (e *encoding) encode(x uint64) string {
+	b := [12]byte{}
+	for i := 0; i < 12; i++ {
+		b[11-i] = e.enc[x&0x1f]
+		x >>= 5
+	}
+	return string(b[:])
+}
+
+// Base32Encoding with the Geohash alphabet.
+var base32encoding = newEncoding("0123456789bcdefghjkmnpqrstuvwxyz")
+
+// BoundingBox returns the region encoded by the given string geohash.
+func geoBoundingBox(hash string) geoBox {
+	bits := uint(5 * len(hash))
+	inthash := base32encoding.decode(hash)
+	return geoBoundingBoxIntWithPrecision(inthash, bits)
+}
+
+// Box represents a rectangle in latitude/longitude space.
+type geoBox struct {
+	minLat float64
+	maxLat float64
+	minLng float64
+	maxLng float64
+}
+
+// Round returns a point inside the box, making an effort to round to minimal
+// precision.
+func (b geoBox) round() (lat, lng float64) {
+	x := maxDecimalPower(b.maxLat - b.minLat)
+	lat = math.Ceil(b.minLat/x) * x
+	x = maxDecimalPower(b.maxLng - b.minLng)
+	lng = math.Ceil(b.minLng/x) * x
+	return
+}
+
+// precalculated for performance
+var exp232 = math.Exp2(32)
+
+// errorWithPrecision returns the error range in latitude and longitude for in
+// integer geohash with bits of precision.
+func errorWithPrecision(bits uint) (latErr, lngErr float64) {
+	b := int(bits)
+	latBits := b / 2
+	lngBits := b - latBits
+	latErr = math.Ldexp(180.0, -latBits)
+	lngErr = math.Ldexp(360.0, -lngBits)
+	return
+}
+
+// minDecimalPlaces returns the minimum number of decimal places such that
+// there must exist an number with that many places within any range of width
+// r. This is intended for returning minimal precision coordinates inside a
+// box.
+func maxDecimalPower(r float64) float64 {
+	m := int(math.Floor(math.Log10(r)))
+	return math.Pow10(m)
+}
+
+// Encode the position of x within the range -r to +r as a 32-bit integer.
+func encodeRange(x, r float64) uint32 {
+	p := (x + r) / (2 * r)
+	return uint32(p * exp232)
+}
+
+// Decode the 32-bit range encoding X back to a value in the range -r to +r.
+func decodeRange(X uint32, r float64) float64 {
+	p := float64(X) / exp232
+	x := 2*r*p - r
+	return x
+}
+
+// Squash the even bitlevels of X into a 32-bit word. Odd bitlevels of X are
+// ignored, and may take any value.
+func squash(X uint64) uint32 {
+	X &= 0x5555555555555555
+	X = (X | (X >> 1)) & 0x3333333333333333
+	X = (X | (X >> 2)) & 0x0f0f0f0f0f0f0f0f
+	X = (X | (X >> 4)) & 0x00ff00ff00ff00ff
+	X = (X | (X >> 8)) & 0x0000ffff0000ffff
+	X = (X | (X >> 16)) & 0x00000000ffffffff
+	return uint32(X)
+}
+
+// Deinterleave the bits of X into 32-bit words containing the even and odd
+// bitlevels of X, respectively.
+func deinterleave(X uint64) (uint32, uint32) {
+	return squash(X), squash(X >> 1)
+}
+
+// BoundingBoxIntWithPrecision returns the region encoded by the integer
+// geohash with the specified precision.
+func geoBoundingBoxIntWithPrecision(hash uint64, bits uint) geoBox {
+	fullHash := hash << (64 - bits)
+	latInt, lngInt := deinterleave(fullHash)
+	lat := decodeRange(latInt, 90)
+	lng := decodeRange(lngInt, 180)
+	latErr, lngErr := errorWithPrecision(bits)
+	return geoBox{
+		minLat: lat,
+		maxLat: lat + latErr,
+		minLng: lng,
+		maxLng: lng + lngErr,
+	}
+}
+
+// ----------------------------------------------------------------------
+
+// Decode the string geohash to a (lat, lng) point.
+func GeoHashDecode(hash string) (lat, lng float64) {
+	box := geoBoundingBox(hash)
+	return box.round()
+}
diff --git a/geo/parse.go b/geo/parse.go
index 8dfc6eed2..0511fea7b 100644
--- a/geo/parse.go
+++ b/geo/parse.go
@@ -16,6 +16,7 @@ package geo
 
 import (
 	"reflect"
+	"strconv"
 	"strings"
 )
 
@@ -24,6 +25,8 @@ import (
 // Container:
 // slice length 2 (GeoJSON)
 //  first element lon, second element lat
+// string (coordinates separated by comma, or a geohash)
+//  first element lat, second element lon
 // map[string]interface{}
 //  exact keys lat and lon or lng
 // struct
@@ -59,6 +62,35 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
 		}
 	}
 
+	// is it a string
+	if thingVal.Kind() == reflect.String {
+		geoStr := thingVal.Interface().(string)
+		if strings.Contains(geoStr, ",") {
+			// geo point with coordinates split by comma
+			points := strings.Split(geoStr, ",")
+			for i, point := range points {
+				// trim any leading or trailing white spaces
+				points[i] = strings.TrimSpace(point)
+			}
+			if len(points) == 2 {
+				var err error
+				lat, err = strconv.ParseFloat(points[0], 64)
+				if err == nil {
+					foundLat = true
+				}
+				lon, err = strconv.ParseFloat(points[1], 64)
+				if err == nil {
+					foundLon = true
+				}
+			}
+		} else {
+			// geohash
+			lat, lon = GeoHashDecode(geoStr)
+			foundLat = true
+			foundLon = true
+		}
+	}
+
 	// is it a map
 	if l, ok := thing.(map[string]interface{}); ok {
 		if lval, ok := l["lon"]; ok {
diff --git a/mapping/document.go b/mapping/document.go
index cc3582cad..f950b59be 100644
--- a/mapping/document.go
+++ b/mapping/document.go
@@ -424,7 +424,11 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
 		if subDocMapping != nil {
 			// index by explicit mapping
 			for _, fieldMapping := range subDocMapping.Fields {
-				fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
+				if fieldMapping.Type == "geopoint" {
+					fieldMapping.processGeoPoint(property, pathString, path, indexes, context)
+				} else {
+					fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
+				}
 			}
 		} else if closestDocMapping.Dynamic {
 			// automatic indexing behavior
diff --git a/mapping/mapping_test.go b/mapping/mapping_test.go
index a13a90b8b..498fdb4ec 100644
--- a/mapping/mapping_test.go
+++ b/mapping/mapping_test.go
@@ -18,13 +18,13 @@ import (
 	"encoding/json"
 	"fmt"
 	"reflect"
+	"strconv"
 	"testing"
 	"time"
 
 	"github.com/blevesearch/bleve/analysis/tokenizer/exception"
 	"github.com/blevesearch/bleve/analysis/tokenizer/regexp"
 	"github.com/blevesearch/bleve/document"
-	"github.com/blevesearch/bleve/numeric"
 )
 
 var mappingSource = []byte(`{
@@ -870,6 +870,7 @@ func TestMappingForGeo(t *testing.T) {
 	mapping.DefaultMapping = thingMapping
 
 	geopoints := []interface{}{}
+	expect := [][]float64{} // to contain expected [lon,lat] for geopoints
 
 	// geopoint as a struct
 	geopoints = append(geopoints, struct {
@@ -882,6 +883,7 @@ func TestMappingForGeo(t *testing.T) {
 			Lat: -90,
 		},
 	})
+	expect = append(expect, []float64{-180, -90})
 
 	// geopoint as a map
 	geopoints = append(geopoints, struct {
@@ -894,8 +896,9 @@ func TestMappingForGeo(t *testing.T) {
 			"lat": -90,
 		},
 	})
+	expect = append(expect, []float64{-180, -90})
 
-	// geopoint as a slice
+	// geopoint as a slice, format: {lon, lat}
 	geopoints = append(geopoints, struct {
 		Name     string        `json:"name"`
 		Location []interface{} `json:"location"`
@@ -905,6 +908,55 @@ func TestMappingForGeo(t *testing.T) {
 			-180, -90,
 		},
 	})
+	expect = append(expect, []float64{-180, -90})
+
+	// geopoint as a string, format: "lat,lon"
+	geopoints = append(geopoints, struct {
+		Name     string        `json:"name"`
+		Location []interface{} `json:"location"`
+	}{
+		Name: "string",
+		Location: []interface{}{
+			"-90,-180",
+		},
+	})
+	expect = append(expect, []float64{-180, -90})
+
+	// geopoint as a string, format: "lat , lon" with leading/trailing whitespaces
+	geopoints = append(geopoints, struct {
+		Name     string        `json:"name"`
+		Location []interface{} `json:"location"`
+	}{
+		Name: "string",
+		Location: []interface{}{
+			"-90    ,    -180",
+		},
+	})
+	expect = append(expect, []float64{-180, -90})
+
+	// geopoint as a string - geohash
+	geopoints = append(geopoints, struct {
+		Name     string        `json:"name"`
+		Location []interface{} `json:"location"`
+	}{
+		Name: "string",
+		Location: []interface{}{
+			"000000000000",
+		},
+	})
+	expect = append(expect, []float64{-180, -90})
+
+	// geopoint as a string - geohash
+	geopoints = append(geopoints, struct {
+		Name     string        `json:"name"`
+		Location []interface{} `json:"location"`
+	}{
+		Name: "string",
+		Location: []interface{}{
+			"drm3btev3e86",
+		},
+	})
+	expect = append(expect, []float64{-71.34, 41.12})
 
 	for i, geopoint := range geopoints {
 		doc := document.NewDocument(string(i))
@@ -917,10 +969,24 @@ func TestMappingForGeo(t *testing.T) {
 		for _, f := range doc.Fields {
 			if f.Name() == "location" {
 				foundGeo = true
-				got := f.Value()
-				expect := []byte(numeric.MustNewPrefixCodedInt64(0, 0))
-				if !reflect.DeepEqual(got, expect) {
-					t.Errorf("expected geo value: %v, got %v", expect, got)
+				geoF, ok := f.(*document.GeoPointField)
+				if !ok {
+					t.Errorf("expected a geopoint field!")
+				}
+				lon, err := geoF.Lon()
+				if err != nil {
+					t.Errorf("error in fetching lon, err: %v", err)
+				}
+				lat, err := geoF.Lat()
+				if err != nil {
+					t.Errorf("error in fetching lat, err: %v", err)
+				}
+				// round obtained lon, lat to 2 decimal places
+				roundLon, _ := strconv.ParseFloat(fmt.Sprintf("%.2f", lon), 64)
+				roundLat, _ := strconv.ParseFloat(fmt.Sprintf("%.2f", lat), 64)
+				if roundLon != expect[i][0] || roundLat != expect[i][1] {
+					t.Errorf("expected geo point: {%v, %v}, got {%v, %v}",
+						expect[i][0], expect[i][1], lon, lat)
 				}
 			}
 		}

From 2e631fb90052bd251a388944111cd0da102ed4cc Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 15 Oct 2018 11:22:13 +0530
Subject: [PATCH 473/728] MB-31405 - higher disk usage in DGM Always perform
 in-memory segment merging before persisting segments, esp even when the
 memory pressure is applied.

---
 index/scorch/persister.go | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 5e9450189..60c2c4ef1 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -269,15 +269,12 @@ func (s *Scorch) parsePersisterOptions() (*persisterOptions, error) {
 }
 
 func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
-	// perform in-memory merging only when there is no memory pressure
-	if s.paused() == 0 {
-		persisted, err := s.persistSnapshotMaybeMerge(snapshot)
-		if err != nil {
-			return err
-		}
-		if persisted {
-			return nil
-		}
+	persisted, err := s.persistSnapshotMaybeMerge(snapshot)
+	if err != nil {
+		return err
+	}
+	if persisted {
+		return nil
 	}
 
 	return s.persistSnapshotDirect(snapshot)

From f92653698f2903d2e066c3f22794efdc079839c5 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 15 Oct 2018 12:57:53 -0700
Subject: [PATCH 474/728] Fix Boolean searcher's Advance

+ Relates to: https://github.com/blevesearch/bleve/pull/955
+ Advance the boolean searcher's cursor only if necessary,
  the boolean searcher's cursor is tracked by it's currentID.
+ With this in place, we shouldn't be further checking
  the tracked cursor positions of the nested searchers.
+ Unit test
+ Fixes: https://github.com/blevesearch/bleve/issues/1021
---
 search/searcher/search_boolean.go |  31 ++++----
 search_test.go                    | 123 ++++++++++++++++++++++++++++++
 2 files changed, 137 insertions(+), 17 deletions(-)

diff --git a/search/searcher/search_boolean.go b/search/searcher/search_boolean.go
index f9684af29..a6f3a150b 100644
--- a/search/searcher/search_boolean.go
+++ b/search/searcher/search_boolean.go
@@ -331,10 +331,10 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
 		}
 	}
 
-	var err error
-	// Advance nested searcher(s) only if the cursor is trailing the lookup ID
-	if s.mustSearcher != nil {
-		if s.currMust == nil || s.currMust.IndexInternalID.Compare(ID) < 0 {
+	// Advance the searcher only if the cursor is trailing the lookup ID
+	if s.currentID == nil || s.currentID.Compare(ID) < 0 {
+		var err error
+		if s.mustSearcher != nil {
 			if s.currMust != nil {
 				ctx.DocumentMatchPool.Put(s.currMust)
 			}
@@ -343,9 +343,7 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
 				return nil, err
 			}
 		}
-	}
-	if s.shouldSearcher != nil {
-		if s.currShould == nil || s.currShould.IndexInternalID.Compare(ID) < 0 {
+		if s.shouldSearcher != nil {
 			if s.currShould != nil {
 				ctx.DocumentMatchPool.Put(s.currShould)
 			}
@@ -354,9 +352,8 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
 				return nil, err
 			}
 		}
-	}
-	if s.mustNotSearcher != nil {
-		if s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0 {
+
+		if s.mustNotSearcher != nil {
 			if s.currMustNot != nil {
 				ctx.DocumentMatchPool.Put(s.currMustNot)
 			}
@@ -365,14 +362,14 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
 				return nil, err
 			}
 		}
-	}
 
-	if s.mustSearcher != nil && s.currMust != nil {
-		s.currentID = s.currMust.IndexInternalID
-	} else if s.mustSearcher == nil && s.currShould != nil {
-		s.currentID = s.currShould.IndexInternalID
-	} else {
-		s.currentID = nil
+		if s.mustSearcher != nil && s.currMust != nil {
+			s.currentID = s.currMust.IndexInternalID
+		} else if s.mustSearcher == nil && s.currShould != nil {
+			s.currentID = s.currShould.IndexInternalID
+		} else {
+			s.currentID = nil
+		}
 	}
 
 	return s.Next(ctx)
diff --git a/search_test.go b/search_test.go
index e7a878653..3c9da5f35 100644
--- a/search_test.go
+++ b/search_test.go
@@ -547,6 +547,129 @@ func TestNestedBooleanSearchers(t *testing.T) {
 	}
 }
 
+func TestNestedBooleanMustNotSearcher(t *testing.T) {
+	defer func() {
+		err := os.RemoveAll("testidx")
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	// create an index with default settings
+	idxMapping := NewIndexMapping()
+	idx, err := New("testidx", idxMapping)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// create and insert documents as a batch
+	batch := idx.NewBatch()
+
+	docs := []struct {
+		id              string
+		hasRole         bool
+		investigationId string
+	}{
+		{
+			id:              "1@1",
+			hasRole:         true,
+			investigationId: "1",
+		},
+		{
+			id:              "1@2",
+			hasRole:         false,
+			investigationId: "2",
+		},
+		{
+			id:              "2@1",
+			hasRole:         true,
+			investigationId: "1",
+		},
+		{
+			id:              "2@2",
+			hasRole:         false,
+			investigationId: "2",
+		},
+		{
+			id:              "3@1",
+			hasRole:         true,
+			investigationId: "1",
+		},
+		{
+			id:              "3@2",
+			hasRole:         false,
+			investigationId: "2",
+		},
+		{
+			id:              "4@1",
+			hasRole:         true,
+			investigationId: "1",
+		},
+		{
+			id:              "5@1",
+			hasRole:         true,
+			investigationId: "1",
+		},
+		{
+			id:              "6@1",
+			hasRole:         true,
+			investigationId: "1",
+		},
+		{
+			id:              "7@1",
+			hasRole:         true,
+			investigationId: "1",
+		},
+	}
+
+	for i := 0; i < len(docs); i++ {
+		doc := document.NewDocument(docs[i].id)
+		doc.Fields = []document.Field{
+			document.NewTextField("id", []uint64{}, []byte(docs[i].id)),
+			document.NewBooleanField("hasRole", []uint64{}, docs[i].hasRole),
+			document.NewTextField("investigationId", []uint64{}, []byte(docs[i].investigationId)),
+		}
+
+		doc.CompositeFields = []*document.CompositeField{
+			document.NewCompositeFieldWithIndexingOptions(
+				"_all", true, []string{"text"}, []string{},
+				document.IndexField|document.IncludeTermVectors),
+		}
+
+		if err = batch.IndexAdvanced(doc); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	if err = idx.Batch(batch); err != nil {
+		t.Fatal(err)
+	}
+
+	tq := NewTermQuery("1")
+	tq.SetField("investigationId")
+	// using must not, for cases that the field did not exists at all
+	hasRole := NewBoolFieldQuery(true)
+	hasRole.SetField("hasRole")
+	noRole := NewBooleanQuery()
+	noRole.AddMustNot(hasRole)
+	oneRolesOrNoRoles := NewBooleanQuery()
+	oneRolesOrNoRoles.AddShould(noRole)
+	oneRolesOrNoRoles.SetMinShould(1)
+	q := NewConjunctionQuery(tq, oneRolesOrNoRoles)
+
+	sr := NewSearchRequestOptions(q, 100, 0, false)
+	sr.Fields = []string{"hasRole"}
+	sr.Highlight = NewHighlight()
+
+	res, err := idx.Search(sr)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if res.Total != 0 {
+		t.Fatalf("Unexpected result, %v != 0", res.Total)
+	}
+}
+
 func TestSearchScorchOverEmptyKeyword(t *testing.T) {
 	defaultIndexType := Config.DefaultIndexType
 

From de7c7204188a403d73fb2360f0f1a66ea44845d9 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Fri, 19 Oct 2018 10:28:25 +0530
Subject: [PATCH 475/728] MB-31660 - Scorch file leaks

Faster zap file cleanup on index deletion
---
 index/scorch/merge.go                    | 35 ++++++++-------
 index/scorch/persister.go                |  8 ++--
 index/scorch/segment/segment.go          |  4 ++
 index/scorch/segment/zap/merge.go        | 54 ++++++++++++++++++++----
 index/scorch/segment/zap/merge_test.go   | 10 ++---
 index/scorch/segment/zap/segment_test.go |  2 +-
 index/scorch/stats.go                    |  5 ++-
 7 files changed, 81 insertions(+), 37 deletions(-)

diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 38646bf0b..61abe6951 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -60,7 +60,7 @@ OUTER:
 				err := s.planMergeAtSnapshot(ourSnapshot, mergePlannerOptions)
 				if err != nil {
 					atomic.StoreUint64(&s.iStats.mergeEpoch, 0)
-					if err == ErrClosed {
+					if err == segment.ErrClosed {
 						// index has been closed
 						_ = ourSnapshot.DecRef()
 						break OUTER
@@ -187,7 +187,7 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 		}
 
 		var oldNewDocNums map[uint64][]uint64
-		var segment segment.Segment
+		var seg segment.Segment
 		if len(segmentsToMerge) > 0 {
 			filename := zapFileName(newSegmentID)
 			s.markIneligibleForRemoval(filename)
@@ -196,7 +196,8 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 			fileMergeZapStartTime := time.Now()
 
 			atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1)
-			newDocNums, nBytes, err := zap.Merge(segmentsToMerge, docsToDrop, path, DefaultChunkFactor)
+			newDocNums, nBytes, err := zap.Merge(segmentsToMerge, docsToDrop, path,
+				DefaultChunkFactor, s.closeCh)
 			atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
 			atomic.AddUint64(&s.stats.TotFileMergeWrittenBytes, nBytes)
 
@@ -209,10 +210,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 			if err != nil {
 				s.unmarkIneligibleForRemoval(filename)
 				atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
+				if err == segment.ErrClosed {
+					return err
+				}
 				return fmt.Errorf("merging failed: %v", err)
 			}
 
-			segment, err = zap.Open(path)
+			seg, err = zap.Open(path)
 			if err != nil {
 				s.unmarkIneligibleForRemoval(filename)
 				atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
@@ -230,7 +234,7 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 			id:            newSegmentID,
 			old:           oldMap,
 			oldNewDocNums: oldNewDocNums,
-			new:           segment,
+			new:           seg,
 			notify:        make(chan *IndexSnapshot, 1),
 		}
 		notifications = append(notifications, sm.notify)
@@ -238,8 +242,8 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 		// give it to the introducer
 		select {
 		case <-s.closeCh:
-			_ = segment.Close()
-			return ErrClosed
+			_ = seg.Close()
+			return segment.ErrClosed
 		case s.merges <- sm:
 			atomic.AddUint64(&s.stats.TotFileMergeIntroductions, 1)
 		}
@@ -250,7 +254,8 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 	for _, notification := range notifications {
 		select {
 		case <-s.closeCh:
-			return ErrClosed
+			atomic.AddUint64(&s.stats.TotFileMergeIntroductionsSkipped, 1)
+			return segment.ErrClosed
 		case newSnapshot := <-notification:
 			atomic.AddUint64(&s.stats.TotFileMergeIntroductionsDone, 1)
 			if newSnapshot != nil {
@@ -287,7 +292,7 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 	path := s.path + string(os.PathSeparator) + filename
 
 	newDocNums, _, err :=
-		zap.MergeSegmentBases(sbs, sbsDrops, path, chunkFactor)
+		zap.MergeSegmentBases(sbs, sbsDrops, path, chunkFactor, s.closeCh)
 
 	atomic.AddUint64(&s.stats.TotMemMergeZapEnd, 1)
 
@@ -302,21 +307,21 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 		return nil, 0, err
 	}
 
-	segment, err := zap.Open(path)
+	seg, err := zap.Open(path)
 	if err != nil {
 		atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
 		return nil, 0, err
 	}
 
 	// update persisted stats
-	atomic.AddUint64(&s.stats.TotPersistedItems, segment.Count())
+	atomic.AddUint64(&s.stats.TotPersistedItems, seg.Count())
 	atomic.AddUint64(&s.stats.TotPersistedSegments, 1)
 
 	sm := &segmentMerge{
 		id:            newSegmentID,
 		old:           make(map[uint64]*SegmentSnapshot),
 		oldNewDocNums: make(map[uint64][]uint64),
-		new:           segment,
+		new:           seg,
 		notify:        make(chan *IndexSnapshot, 1),
 	}
 
@@ -328,14 +333,14 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 
 	select { // send to introducer
 	case <-s.closeCh:
-		_ = segment.DecRef()
-		return nil, 0, ErrClosed
+		_ = seg.DecRef()
+		return nil, 0, segment.ErrClosed
 	case s.merges <- sm:
 	}
 
 	select { // wait for introduction to complete
 	case <-s.closeCh:
-		return nil, 0, ErrClosed
+		return nil, 0, segment.ErrClosed
 	case newSnapshot := <-sm.notify:
 		atomic.AddUint64(&s.stats.TotMemMergeSegments, uint64(len(sbs)))
 		atomic.AddUint64(&s.stats.TotMemMergeDone, 1)
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 59e84d42f..01102c2f2 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -112,7 +112,7 @@ OUTER:
 			}
 			if err != nil {
 				atomic.StoreUint64(&s.iStats.persistEpoch, 0)
-				if err == ErrClosed {
+				if err == segment.ErrClosed {
 					// index has been closed
 					_ = ourSnapshot.DecRef()
 					break OUTER
@@ -497,15 +497,13 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
 
 		select {
 		case <-s.closeCh:
-			err = ErrClosed
-			return err
+			return segment.ErrClosed
 		case s.persists <- persist:
 		}
 
 		select {
 		case <-s.closeCh:
-			err = ErrClosed
-			return err
+			return segment.ErrClosed
 		case <-persist.applied:
 		}
 	}
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index 28a879949..be9142c40 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -15,11 +15,15 @@
 package segment
 
 import (
+	"fmt"
+
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index"
 	"github.com/couchbase/vellum"
 )
 
+var ErrClosed = fmt.Errorf("index closed")
+
 // DocumentFieldValueVisitor defines a callback to be visited for each
 // stored field value.  The return value determines if the visitor
 // should keep going.  Returning true continues visiting, false stops.
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 37e391bab..901115898 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -24,6 +24,7 @@ import (
 	"sort"
 
 	"github.com/RoaringBitmap/roaring"
+	seg "github.com/blevesearch/bleve/index/scorch/segment"
 	"github.com/couchbase/vellum"
 	"github.com/golang/snappy"
 )
@@ -37,17 +38,17 @@ const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc
 // remaining data.  This new segment is built at the specified path,
 // with the provided chunkFactor.
 func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
-	chunkFactor uint32) ([][]uint64, uint64, error) {
+	chunkFactor uint32, closeCh chan struct{}) ([][]uint64, uint64, error) {
 	segmentBases := make([]*SegmentBase, len(segments))
 	for segmenti, segment := range segments {
 		segmentBases[segmenti] = &segment.SegmentBase
 	}
 
-	return MergeSegmentBases(segmentBases, drops, path, chunkFactor)
+	return MergeSegmentBases(segmentBases, drops, path, chunkFactor, closeCh)
 }
 
 func MergeSegmentBases(segmentBases []*SegmentBase, drops []*roaring.Bitmap, path string,
-	chunkFactor uint32) ([][]uint64, uint64, error) {
+	chunkFactor uint32, closeCh chan struct{}) ([][]uint64, uint64, error) {
 	flag := os.O_RDWR | os.O_CREATE
 
 	f, err := os.OpenFile(path, flag, 0600)
@@ -67,7 +68,7 @@ func MergeSegmentBases(segmentBases []*SegmentBase, drops []*roaring.Bitmap, pat
 	cr := NewCountHashWriter(br)
 
 	newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, _, _, _, err :=
-		MergeToWriter(segmentBases, drops, chunkFactor, cr)
+		MergeToWriter(segmentBases, drops, chunkFactor, cr, closeCh)
 	if err != nil {
 		cleanup()
 		return nil, 0, err
@@ -102,7 +103,7 @@ func MergeSegmentBases(segmentBases []*SegmentBase, drops []*roaring.Bitmap, pat
 }
 
 func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap,
-	chunkFactor uint32, cr *CountHashWriter) (
+	chunkFactor uint32, cr *CountHashWriter, closeCh chan struct{}) (
 	newDocNums [][]uint64,
 	numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64,
 	dictLocs []uint64, fieldsInv []string, fieldsMap map[string]uint16,
@@ -114,16 +115,21 @@ func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap,
 	fieldsMap = mapFields(fieldsInv)
 
 	numDocs = computeNewDocCount(segments, drops)
+
+	if isClosed(closeCh) {
+		return nil, 0, 0, 0, 0, nil, nil, nil, seg.ErrClosed
+	}
+
 	if numDocs > 0 {
 		storedIndexOffset, newDocNums, err = mergeStoredAndRemap(segments, drops,
-			fieldsMap, fieldsInv, fieldsSame, numDocs, cr)
+			fieldsMap, fieldsInv, fieldsSame, numDocs, cr, closeCh)
 		if err != nil {
 			return nil, 0, 0, 0, 0, nil, nil, nil, err
 		}
 
 		dictLocs, docValueOffset, err = persistMergedRest(segments, drops,
 			fieldsInv, fieldsMap, fieldsSame,
-			newDocNums, numDocs, chunkFactor, cr)
+			newDocNums, numDocs, chunkFactor, cr, closeCh)
 		if err != nil {
 			return nil, 0, 0, 0, 0, nil, nil, nil, err
 		}
@@ -165,7 +171,7 @@ func computeNewDocCount(segments []*SegmentBase, drops []*roaring.Bitmap) uint64
 func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 	fieldsInv []string, fieldsMap map[string]uint16, fieldsSame bool,
 	newDocNumsIn [][]uint64, newSegDocCount uint64, chunkFactor uint32,
-	w *CountHashWriter) ([]uint64, uint64, error) {
+	w *CountHashWriter, closeCh chan struct{}) ([]uint64, uint64, error) {
 
 	var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64)
 	var bufLoc []uint64
@@ -200,6 +206,12 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 		var segmentsInFocus []*SegmentBase
 
 		for segmentI, segment := range segments {
+
+			// check for the closure in meantime
+			if isClosed(closeCh) {
+				return nil, 0, seg.ErrClosed
+			}
+
 			dict, err2 := segment.dictionary(fieldName)
 			if err2 != nil {
 				return nil, 0, err2
@@ -277,6 +289,11 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 			term, itrI, postingsOffset := enumerator.Current()
 
 			if !bytes.Equal(prevTerm, term) {
+				// check for the closure in meantime
+				if isClosed(closeCh) {
+					return nil, 0, seg.ErrClosed
+				}
+
 				// if the term changed, write out the info collected
 				// for the previous term
 				err = finishTerm(prevTerm)
@@ -353,6 +370,11 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 		fdvReadersAvailable := false
 		var dvIterClone *docValueReader
 		for segmentI, segment := range segmentsInFocus {
+			// check for the closure in meantime
+			if isClosed(closeCh) {
+				return nil, 0, seg.ErrClosed
+			}
+
 			fieldIDPlus1 := uint16(segment.fieldsMap[fieldName])
 			if dvIter, exists := segment.fieldDvReaders[fieldIDPlus1-1]; exists &&
 				dvIter != nil {
@@ -576,7 +598,7 @@ type varintEncoder func(uint64) (int, error)
 
 func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 	fieldsMap map[string]uint16, fieldsInv []string, fieldsSame bool, newSegDocCount uint64,
-	w *CountHashWriter) (uint64, [][]uint64, error) {
+	w *CountHashWriter, closeCh chan struct{}) (uint64, [][]uint64, error) {
 	var rv [][]uint64 // The remapped or newDocNums for each segment.
 
 	var newDocNum uint64
@@ -603,6 +625,11 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 
 	// for each segment
 	for segI, segment := range segments {
+		// check for the closure in meantime
+		if isClosed(closeCh) {
+			return 0, nil, seg.ErrClosed
+		}
+
 		segNewDocNums := make([]uint64, segment.numDocs)
 
 		dropsI := drops[segI]
@@ -814,3 +841,12 @@ func mergeFields(segments []*SegmentBase) (bool, []string) {
 
 	return fieldsSame, rv
 }
+
+func isClosed(closeCh chan struct{}) bool {
+	select {
+	case <-closeCh:
+		return true
+	default:
+		return false
+	}
+}
diff --git a/index/scorch/segment/zap/merge_test.go b/index/scorch/segment/zap/merge_test.go
index 163663a49..276259c71 100644
--- a/index/scorch/segment/zap/merge_test.go
+++ b/index/scorch/segment/zap/merge_test.go
@@ -71,7 +71,7 @@ func TestMerge(t *testing.T) {
 	segsToMerge[0] = segment.(*Segment)
 	segsToMerge[1] = segment2.(*Segment)
 
-	_, _, err = Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024)
+	_, _, err = Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -175,7 +175,7 @@ func testMergeWithEmptySegments(t *testing.T, before bool, numEmptySegments int)
 
 	drops := make([]*roaring.Bitmap, len(segsToMerge))
 
-	_, _, err = Merge(segsToMerge, drops, "/tmp/scorch3.zap", 1024)
+	_, _, err = Merge(segsToMerge, drops, "/tmp/scorch3.zap", 1024, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -217,7 +217,7 @@ func testMergeWithSelf(t *testing.T, segCur *Segment, expectedCount uint64) {
 		segsToMerge := make([]*Segment, 1)
 		segsToMerge[0] = segCur
 
-		_, _, err := Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/"+fname, 1024)
+		_, _, err := Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/"+fname, 1024, nil)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -589,7 +589,7 @@ func testMergeWithUpdates(t *testing.T, segmentDocIds [][]string, docsToDrop []*
 func testMergeAndDropSegments(t *testing.T, segsToMerge []*Segment, docsToDrop []*roaring.Bitmap, expectedNumDocs uint64) {
 	_ = os.RemoveAll("/tmp/scorch-merged.zap")
 
-	_, _, err := Merge(segsToMerge, docsToDrop, "/tmp/scorch-merged.zap", 1024)
+	_, _, err := Merge(segsToMerge, docsToDrop, "/tmp/scorch-merged.zap", 1024, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -823,7 +823,7 @@ func TestMergeBytesWritten(t *testing.T) {
 	segsToMerge[0] = segment.(*Segment)
 	segsToMerge[1] = segment2.(*Segment)
 
-	_, nBytes, err := Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024)
+	_, nBytes, err := Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
diff --git a/index/scorch/segment/zap/segment_test.go b/index/scorch/segment/zap/segment_test.go
index 22b8af23f..623198c63 100644
--- a/index/scorch/segment/zap/segment_test.go
+++ b/index/scorch/segment/zap/segment_test.go
@@ -691,7 +691,7 @@ func TestMergedSegmentDocsWithNonOverlappingFields(t *testing.T) {
 	segsToMerge[0] = segment1.(*Segment)
 	segsToMerge[1] = segment2.(*Segment)
 
-	_, nBytes, err := Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024)
+	_, nBytes, err := Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
diff --git a/index/scorch/stats.go b/index/scorch/stats.go
index bc1ca4bd8..2eb832f2c 100644
--- a/index/scorch/stats.go
+++ b/index/scorch/stats.go
@@ -103,8 +103,9 @@ type Stats struct {
 	TotFileMergeZapTime uint64
 	MaxFileMergeZapTime uint64
 
-	TotFileMergeIntroductions     uint64
-	TotFileMergeIntroductionsDone uint64
+	TotFileMergeIntroductions        uint64
+	TotFileMergeIntroductionsDone    uint64
+	TotFileMergeIntroductionsSkipped uint64
 
 	TotMemMergeBeg          uint64
 	TotMemMergeErr          uint64

From 8b0d5dc7031dc4ee955b89b24f2c74578ebec47c Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Thu, 25 Oct 2018 11:57:05 -0700
Subject: [PATCH 476/728] [zap cli] Handle case when read chunk is 0 bytes

---
 cmd/bleve/cmd/zap/docvalue.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/cmd/bleve/cmd/zap/docvalue.go b/cmd/bleve/cmd/zap/docvalue.go
index dcfa58de1..065f078e4 100644
--- a/cmd/bleve/cmd/zap/docvalue.go
+++ b/cmd/bleve/cmd/zap/docvalue.go
@@ -196,6 +196,10 @@ var docvalueCmd = &cobra.Command{
 		}
 		curChunkSize := chunkLens[docInChunk]
 
+		if curChunkSize == 0 {
+			return nil
+		}
+
 		// read the number of docs reside in the chunk
 		numDocs := uint64(0)
 		numDocs, nread = binary.Uvarint(data[destChunkDataLoc : destChunkDataLoc+binary.MaxVarintLen64])

From 0954b3af441c3fd7ad1f216e82cb15bc4ca27d6d Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Sat, 27 Oct 2018 12:46:15 +0530
Subject: [PATCH 477/728] configurable merge of mem segments

This change makes the memory merge operations of
persister configuable.
---
 index/scorch/persister.go | 37 +++++++++++++++++++++++++++----------
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 01102c2f2..45bab7ab0 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -21,6 +21,7 @@ import (
 	"fmt"
 	"io/ioutil"
 	"log"
+	"math"
 	"os"
 	"path/filepath"
 	"strconv"
@@ -40,6 +41,8 @@ var DefaultPersisterNapTimeMSec int = 2000 // ms
 
 var DefaultPersisterNapUnderNumFiles int = 1000
 
+var DefaultMemoryPressurePauseThreshold uint64 = math.MaxUint64
+
 type persisterOptions struct {
 	// PersisterNapTimeMSec controls the wait/delay injected into
 	// persistence workloop to improve the chances for
@@ -51,6 +54,13 @@ type persisterOptions struct {
 	// PersisterNapTimeMSec amount of time to improve the chances for
 	// a healthier and heavier in-memory merging
 	PersisterNapUnderNumFiles int
+
+	// MemoryPressurePauseThreshold let persister to have a better leeway
+	// for prudently performing the memory merge of segments on a memory
+	// pressure situation. Here the config value is an upper threshold
+	// for the number of paused application threads. The default value would
+	// be a very high number to always favour the merging of memory segments.
+	MemoryPressurePauseThreshold uint64
 }
 
 type notificationChan chan struct{}
@@ -103,7 +113,7 @@ OUTER:
 		if ourSnapshot != nil {
 			startTime := time.Now()
 
-			err := s.persistSnapshot(ourSnapshot)
+			err := s.persistSnapshot(ourSnapshot, po)
 			for _, ch := range ourPersisted {
 				if err != nil {
 					ch <- err
@@ -251,8 +261,9 @@ OUTER:
 
 func (s *Scorch) parsePersisterOptions() (*persisterOptions, error) {
 	po := persisterOptions{
-		PersisterNapTimeMSec:      DefaultPersisterNapTimeMSec,
-		PersisterNapUnderNumFiles: DefaultPersisterNapUnderNumFiles,
+		PersisterNapTimeMSec:         DefaultPersisterNapTimeMSec,
+		PersisterNapUnderNumFiles:    DefaultPersisterNapUnderNumFiles,
+		MemoryPressurePauseThreshold: DefaultMemoryPressurePauseThreshold,
 	}
 	if v, ok := s.config["scorchPersisterOptions"]; ok {
 		b, err := json.Marshal(v)
@@ -268,13 +279,19 @@ func (s *Scorch) parsePersisterOptions() (*persisterOptions, error) {
 	return &po, nil
 }
 
-func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
-	persisted, err := s.persistSnapshotMaybeMerge(snapshot)
-	if err != nil {
-		return err
-	}
-	if persisted {
-		return nil
+func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot,
+	po *persisterOptions) error {
+	// Perform in-memory segment merging only when the memory pressure is
+	// below the configured threshold, else the persister performs the
+	// direct persistence of segments.
+	if s.paused() < po.MemoryPressurePauseThreshold {
+		persisted, err := s.persistSnapshotMaybeMerge(snapshot)
+		if err != nil {
+			return err
+		}
+		if persisted {
+			return nil
+		}
 	}
 
 	return s.persistSnapshotDirect(snapshot)

From e1479789acb991022bf323547f3ecf6a40ed35c2 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Sun, 28 Oct 2018 16:57:08 -0700
Subject: [PATCH 478/728] Prevent boolean must not cursor from incorrectly
 advancing

The currMustNot cursor of the boolean searcher isn't tracked
by the currentID. This means that during Advance(), we'll
need to additionally check to prevent updating the cached
cursor ID for the must not searcher.

Fixes https://github.com/blevesearch/bleve/issues/1029
---
 search/searcher/search_boolean.go |  19 ++--
 search_test.go                    | 141 ++++++++++++++++++++++++++++++
 2 files changed, 154 insertions(+), 6 deletions(-)

diff --git a/search/searcher/search_boolean.go b/search/searcher/search_boolean.go
index a6f3a150b..0a223bb0b 100644
--- a/search/searcher/search_boolean.go
+++ b/search/searcher/search_boolean.go
@@ -319,6 +319,7 @@ func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch
 			return nil, err
 		}
 	}
+
 	return rv, nil
 }
 
@@ -343,6 +344,7 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
 				return nil, err
 			}
 		}
+
 		if s.shouldSearcher != nil {
 			if s.currShould != nil {
 				ctx.DocumentMatchPool.Put(s.currShould)
@@ -354,12 +356,17 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
 		}
 
 		if s.mustNotSearcher != nil {
-			if s.currMustNot != nil {
-				ctx.DocumentMatchPool.Put(s.currMustNot)
-			}
-			s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID)
-			if err != nil {
-				return nil, err
+			// Additional check for mustNotSearcher whose cursor isn't tracked by
+			// currentID to prevent it from moving when the searcher's already
+			// where it should be.
+			if s.currMustNot == nil || !s.currMustNot.IndexInternalID.Equals(ID) {
+				if s.currMustNot != nil {
+					ctx.DocumentMatchPool.Put(s.currMustNot)
+				}
+				s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID)
+				if err != nil {
+					return nil, err
+				}
 			}
 		}
 
diff --git a/search_test.go b/search_test.go
index 3c9da5f35..75aeac713 100644
--- a/search_test.go
+++ b/search_test.go
@@ -728,3 +728,144 @@ func TestSearchScorchOverEmptyKeyword(t *testing.T) {
 		t.Fatalf("Unexpected search hits: %v, expected 10", res.Total)
 	}
 }
+
+func TestMultipleNestedBooleanMustNotSearchersOnScorch(t *testing.T) {
+	defaultIndexType := Config.DefaultIndexType
+
+	defer func() {
+		err := os.RemoveAll("testidx")
+		if err != nil {
+			t.Fatal(err)
+		}
+		Config.DefaultIndexType = defaultIndexType
+	}()
+
+	Config.DefaultIndexType = scorch.Name
+
+	// create an index with default settings
+	idxMapping := NewIndexMapping()
+	idx, err := New("testidx", idxMapping)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// create and insert documents as a batch
+	batch := idx.NewBatch()
+
+	doc := document.NewDocument("1-child-0")
+	doc.Fields = []document.Field{
+		document.NewTextField("id", []uint64{}, []byte("1-child-0")),
+		document.NewBooleanField("hasRole", []uint64{}, false),
+		document.NewTextField("roles", []uint64{}, []byte("R1")),
+		document.NewNumericField("type", []uint64{}, 0),
+	}
+	doc.CompositeFields = []*document.CompositeField{
+		document.NewCompositeFieldWithIndexingOptions(
+			"_all", true, []string{"text"}, []string{},
+			document.IndexField|document.IncludeTermVectors),
+	}
+
+	if err = batch.IndexAdvanced(doc); err != nil {
+		t.Fatal(err)
+	}
+
+	docs := []struct {
+		id      string
+		hasRole bool
+		typ     int
+	}{
+		{
+			id:      "16d6fa37-48fd-4dea-8b3d-a52bddf73951",
+			hasRole: false,
+			typ:     9,
+		},
+		{
+			id:      "18fa9eb2-8b1f-46f0-8b56-b4c551213f78",
+			hasRole: false,
+			typ:     9,
+		},
+		{
+			id:      "3085855b-d74b-474a-86c3-9bf3e4504382",
+			hasRole: false,
+			typ:     9,
+		},
+		{
+			id:      "38ef5d28-0f85-4fb0-8a94-dd20751c3364",
+			hasRole: false,
+			typ:     9,
+		},
+	}
+
+	for i := 0; i < len(docs); i++ {
+		doc := document.NewDocument(docs[i].id)
+		doc.Fields = []document.Field{
+			document.NewTextField("id", []uint64{}, []byte(docs[i].id)),
+			document.NewBooleanField("hasRole", []uint64{}, docs[i].hasRole),
+			document.NewNumericField("type", []uint64{}, float64(docs[i].typ)),
+		}
+
+		doc.CompositeFields = []*document.CompositeField{
+			document.NewCompositeFieldWithIndexingOptions(
+				"_all", true, []string{"text"}, []string{},
+				document.IndexField|document.IncludeTermVectors),
+		}
+
+		if err = batch.IndexAdvanced(doc); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	if err = idx.Batch(batch); err != nil {
+		t.Fatal(err)
+	}
+
+	batch = idx.NewBatch()
+
+	// Update 1st doc
+	doc = document.NewDocument("1-child-0")
+	doc.Fields = []document.Field{
+		document.NewTextField("id", []uint64{}, []byte("1-child-0")),
+		document.NewBooleanField("hasRole", []uint64{}, false),
+		document.NewNumericField("type", []uint64{}, 0),
+	}
+	doc.CompositeFields = []*document.CompositeField{
+		document.NewCompositeFieldWithIndexingOptions(
+			"_all", true, []string{"text"}, []string{},
+			document.IndexField|document.IncludeTermVectors),
+	}
+
+	if err = batch.IndexAdvanced(doc); err != nil {
+		t.Fatal(err)
+	}
+
+	if err = idx.Batch(batch); err != nil {
+		t.Fatal(err)
+	}
+
+	inclusive := true
+	val := float64(9)
+	q := query.NewNumericRangeInclusiveQuery(&val, &val, &inclusive, &inclusive)
+	q.SetField("type")
+	initialQuery := query.NewBooleanQuery(nil, nil, []query.Query{q})
+
+	// using must not, for cases that the field did not exists at all
+	hasRole := NewBoolFieldQuery(true)
+	hasRole.SetField("hasRole")
+	noRole := NewBooleanQuery()
+	noRole.AddMustNot(hasRole)
+
+	rq := query.NewBooleanQuery([]query.Query{initialQuery, noRole}, nil, nil)
+
+	sr := NewSearchRequestOptions(rq, 100, 0, false)
+	sr.Fields = []string{"id", "hasRole", "type"}
+	sr.Highlight = NewHighlight()
+
+	res, err := idx.Search(sr)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if res.Total != 1 {
+		t.Fatalf("Unexpected result, %v != 1", res.Total)
+	}
+}

From 7e1adf70140b9e5686fae9e721eff141ab2d63c8 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 5 Nov 2018 15:22:09 -0800
Subject: [PATCH 479/728] Log number of clauses that caused the TooManyClauses
 error

---
 search/searcher/search_disjunction.go       | 6 +++---
 search/searcher/search_disjunction_heap.go  | 2 +-
 search/searcher/search_disjunction_slice.go | 2 +-
 search/searcher/search_fuzzy.go             | 4 ++--
 search/searcher/search_multi_term.go        | 4 ++--
 search/searcher/search_numeric_range.go     | 2 +-
 search/searcher/search_regexp.go            | 2 +-
 search/searcher/search_term_prefix.go       | 2 +-
 8 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/search/searcher/search_disjunction.go b/search/searcher/search_disjunction.go
index bbf7b4bbc..882b02ccb 100644
--- a/search/searcher/search_disjunction.go
+++ b/search/searcher/search_disjunction.go
@@ -55,7 +55,7 @@ func tooManyClauses(count int) bool {
 	return false
 }
 
-func tooManyClausesErr() error {
-	return fmt.Errorf("TooManyClauses[maxClauseCount is set to %d]",
-		DisjunctionMaxClauseCount)
+func tooManyClausesErr(count int) error {
+	return fmt.Errorf("TooManyClauses[%d > maxClauseCount, which is set to %d]",
+		count, DisjunctionMaxClauseCount)
 }
diff --git a/search/searcher/search_disjunction_heap.go b/search/searcher/search_disjunction_heap.go
index ffa373d2d..ec133f1f8 100644
--- a/search/searcher/search_disjunction_heap.go
+++ b/search/searcher/search_disjunction_heap.go
@@ -62,7 +62,7 @@ func newDisjunctionHeapSearcher(indexReader index.IndexReader,
 	limit bool) (
 	*DisjunctionHeapSearcher, error) {
 	if limit && tooManyClauses(len(searchers)) {
-		return nil, tooManyClausesErr()
+		return nil, tooManyClausesErr(len(searchers))
 	}
 
 	// build our searcher
diff --git a/search/searcher/search_disjunction_slice.go b/search/searcher/search_disjunction_slice.go
index e3efdf2a7..e47f39ad0 100644
--- a/search/searcher/search_disjunction_slice.go
+++ b/search/searcher/search_disjunction_slice.go
@@ -50,7 +50,7 @@ func newDisjunctionSliceSearcher(indexReader index.IndexReader,
 	limit bool) (
 	*DisjunctionSliceSearcher, error) {
 	if limit && tooManyClauses(len(qsearchers)) {
-		return nil, tooManyClausesErr()
+		return nil, tooManyClausesErr(len(qsearchers))
 	}
 	// build the downstream searchers
 	searchers := make(OrderedSearcherList, len(qsearchers))
diff --git a/search/searcher/search_fuzzy.go b/search/searcher/search_fuzzy.go
index b99528af4..668d11afc 100644
--- a/search/searcher/search_fuzzy.go
+++ b/search/searcher/search_fuzzy.go
@@ -71,7 +71,7 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
 		for err == nil && tfd != nil {
 			rv = append(rv, tfd.Term)
 			if tooManyClauses(len(rv)) {
-				return nil, tooManyClausesErr()
+				return nil, tooManyClausesErr(len(rv))
 			}
 			tfd, err = fieldDict.Next()
 		}
@@ -103,7 +103,7 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
 		if !exceeded && ld <= fuzziness {
 			rv = append(rv, tfd.Term)
 			if tooManyClauses(len(rv)) {
-				return nil, tooManyClausesErr()
+				return nil, tooManyClausesErr(len(rv))
 			}
 		}
 		tfd, err = fieldDict.Next()
diff --git a/search/searcher/search_multi_term.go b/search/searcher/search_multi_term.go
index a723aedc5..c48366ee2 100644
--- a/search/searcher/search_multi_term.go
+++ b/search/searcher/search_multi_term.go
@@ -23,7 +23,7 @@ func NewMultiTermSearcher(indexReader index.IndexReader, terms []string,
 	field string, boost float64, options search.SearcherOptions, limit bool) (
 	search.Searcher, error) {
 	if limit && tooManyClauses(len(terms)) {
-		return nil, tooManyClausesErr()
+		return nil, tooManyClausesErr(len(terms))
 	}
 
 	qsearchers := make([]search.Searcher, len(terms))
@@ -51,7 +51,7 @@ func NewMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byte,
 	field string, boost float64, options search.SearcherOptions, limit bool) (
 	search.Searcher, error) {
 	if limit && tooManyClauses(len(terms)) {
-		return nil, tooManyClausesErr()
+		return nil, tooManyClausesErr(len(terms))
 	}
 
 	qsearchers := make([]search.Searcher, len(terms))
diff --git a/search/searcher/search_numeric_range.go b/search/searcher/search_numeric_range.go
index 1eae7a5ec..e52ef9a82 100644
--- a/search/searcher/search_numeric_range.go
+++ b/search/searcher/search_numeric_range.go
@@ -68,7 +68,7 @@ func NewNumericRangeSearcher(indexReader index.IndexReader,
 		return nil, err
 	}
 	if tooManyClauses(len(terms)) {
-		return nil, tooManyClausesErr()
+		return nil, tooManyClausesErr(len(terms))
 	}
 
 	return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options,
diff --git a/search/searcher/search_regexp.go b/search/searcher/search_regexp.go
index 299d9cdbe..4def832c4 100644
--- a/search/searcher/search_regexp.go
+++ b/search/searcher/search_regexp.go
@@ -110,7 +110,7 @@ func findRegexpCandidateTerms(indexReader index.IndexReader,
 		if matchPos != nil && matchPos[0] == 0 && matchPos[1] == len(tfd.Term) {
 			rv = append(rv, tfd.Term)
 			if tooManyClauses(len(rv)) {
-				return rv, tooManyClausesErr()
+				return rv, tooManyClausesErr(len(rv))
 			}
 		}
 		tfd, err = fieldDict.Next()
diff --git a/search/searcher/search_term_prefix.go b/search/searcher/search_term_prefix.go
index 59db93101..b5af4631f 100644
--- a/search/searcher/search_term_prefix.go
+++ b/search/searcher/search_term_prefix.go
@@ -38,7 +38,7 @@ func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string,
 	for err == nil && tfd != nil {
 		terms = append(terms, tfd.Term)
 		if tooManyClauses(len(terms)) {
-			return nil, tooManyClausesErr()
+			return nil, tooManyClausesErr(len(terms))
 		}
 		tfd, err = fieldDict.Next()
 	}

From b5e94b7320f3e7e3a4311b5c9447528254683f9f Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Thu, 8 Nov 2018 12:07:34 +0530
Subject: [PATCH 480/728] persister nap regression with safe batches

Re-initialising the default persister nap configs
to favour direct persistence without any naps.
---
 index/scorch/persister.go | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 45bab7ab0..8d54b3a70 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -37,8 +37,26 @@ import (
 
 var DefaultChunkFactor uint32 = 1024
 
-var DefaultPersisterNapTimeMSec int = 2000 // ms
-
+// DefaultPersisterNapTimeMSec is kept to zero as this helps in direct
+// persistence of segments with the default safe batch option.
+// If the default safe batch option results in high number of
+// files on disk, then users may initialise this configuration parameter
+// with higher values so that the persister will nap a bit within it's
+// work loop to favour better in-memory merging of segments to result
+// in fewer segment files on disk. But that may come with an indexing
+// performance overhead.
+// Unsafe batch users are advised to override this to higher value
+// for better performance especially with high data density.
+var DefaultPersisterNapTimeMSec int = 0 // ms
+
+// DefaultPersisterNapUnderNumFiles helps in controlling the pace of
+// persister. At times of a slow merger progress with heavy file merging
+// operations, its better to pace down the persister for letting the merger
+// to catch up within a range defined by this parameter.
+// Fewer files on disk (as per the merge plan) would result in keeping the
+// file handle usage under limit, faster disk merger and a healthier index.
+// Its been observed that such a loosely sync'ed introducer-persister-merger
+// trio results in better overall performance.
 var DefaultPersisterNapUnderNumFiles int = 1000
 
 var DefaultMemoryPressurePauseThreshold uint64 = math.MaxUint64

From d39180b2f0a10c6a0d83c5c865b76147eee024ff Mon Sep 17 00:00:00 2001
From: Dmitriy Kalugin-Balashov <rvncerr@rvncerr.org>
Date: Tue, 13 Nov 2018 16:21:33 -0800
Subject: [PATCH 481/728] Improving ZAP File Format Documentation.

---
 index/scorch/segment/zap/zap.md | 177 ++++++++++++++++++++++++++++++++
 1 file changed, 177 insertions(+)
 create mode 100644 index/scorch/segment/zap/zap.md

diff --git a/index/scorch/segment/zap/zap.md b/index/scorch/segment/zap/zap.md
new file mode 100644
index 000000000..bc54af92d
--- /dev/null
+++ b/index/scorch/segment/zap/zap.md
@@ -0,0 +1,177 @@
+# ZAP File Format 
+
+## Legend
+
+### Sections
+
+    |========|
+    |        | section
+    |========|
+    
+### Fixed-size fields
+
+    |--------|        |----|        |--|        |-|
+    |        | uint64 |    | uint32 |  | uint16 | | uint8
+    |--------|        |----|        |--|        |-|
+
+### Varints
+
+    |~~~~~~~~|
+    |        | varint(up to uint64)
+    |~~~~~~~~|
+
+### Arbitary-length fields
+
+    |--------...---|
+    |              | arbitrary-length field (string, vellum, roaring bitmap)
+    |--------...---|
+
+### Chunked data
+
+	[--------]
+	[        ]
+	[--------]
+
+## Overview
+
+Footer sectrion describes the configuration of particular ZAP file. The format of footer is version-dependent, so it is necessary to check `V` field before the parsing.
+
+            |==================================================|
+            | Stored Fields                                    |
+            |==================================================|
+    |-----> | Stored Fields Index                              |
+    |       |==================================================|   
+    |       | Dictionaries + Postings + DocValues              | 
+    |       |==================================================|
+    | |---> | DocValues Index                                  |
+    | |     |==================================================|   
+    | |     | Fields                                           |
+    | |     |==================================================|
+    | | |-> | Fields Index                                     |
+    | | |   |========|========|========|========|====|====|====|
+    | | |   |     D# |     SF |      F |    FDV | CF |  V | CC | (Footer)
+    | | |   |========|====|===|====|===|====|===|====|====|====|
+    | | |                 |        |        |
+    |-+-+-----------------|        |        |
+      | |--------------------------|        |
+      |-------------------------------------|
+
+     D#. Number of Docs.
+     SF. Stored Fields Index Offset.
+      F. Field Index Offset.
+    FDV. Field DocValue Offset.
+     CF. Chunk Factor.
+      V. Version.
+     CC. CRC32.
+
+## Stored Fields
+
+Stored Fields Index is `D#` consecutive 64-bit unsigned integers - offsets, where relevant Stored Fields Data records are located.
+
+    0                                [SF]                   [SF + D# * 8]
+    | Stored Fields                  | Stored Fields Index              |
+    |================================|==================================|
+    |                                |                                  |
+    |       |--------------------|   ||--------|--------|. . .|--------||
+    |   |-> | Stored Fields Data |   ||      0 |      1 |     | D# - 1 ||
+    |   |   |--------------------|   ||--------|----|---|. . .|--------||
+    |   |                            |              |                   |
+    |===|============================|==============|===================|
+        |                                           |
+        |-------------------------------------------|
+
+Stored Fields Data is an arbitrary size record, which consists of metadata and [Snappy](https://github.com/golang/snappy)-compressed data.
+
+    Stored Fields Data
+    |~~~~~~~~|~~~~~~~~|~~~~~~~~...~~~~~~~~|~~~~~~~~...~~~~~~~~|
+    |    MDS |    CDS |                MD |                CD |
+    |~~~~~~~~|~~~~~~~~|~~~~~~~~...~~~~~~~~|~~~~~~~~...~~~~~~~~|
+    
+    MDS. Metadata size.
+    CDS. Compressed data size.
+    MD. Metadata.
+    CD. Snappy-compressed data.
+
+## Fields
+
+Fields Index section located between addresses `F` and `len(file) - len(footer)` and consist of `uint64` values (`F1`, `F2`, ...) which are offsets to records in Fields section. We have `F# = (len(file) - len(footer) - F) / sizeof(uint64)` fields.
+
+
+    (...)                            [F]                       [F + F#]
+    | Fields                         | Fields Index.                  |
+    |================================|================================|
+    |                                |                                |
+    |   |~~~~~~~~|~~~~~~~~|---...---|||--------|--------|...|--------||
+    ||->|   Dict | Length |    Name |||      0 |      1 |   | F# - 1 ||
+    ||  |~~~~~~~~|~~~~~~~~|---...---|||--------|----|---|...|--------||
+    ||                               |              |                 |
+    ||===============================|==============|=================|
+     |                                              |
+     |----------------------------------------------|
+        
+
+## Dictionaries + Postings
+
+Each of fields has its own dictionary, encoded in [Vellum](https://github.com/couchbase/vellum) format. Dictionary consists of pairs `(term, offset)`, where `offset` indicates the position of postings (list of documents) for this particular term.
+
+	|================================================================|- Dictionaries + 
+	|                                                                |   Postings +
+	|                                                                |    DocValues
+	|    Freq/Norm (chunked)                                         |
+	|    [~~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~]                      |
+	| |->[ Freq | Norm (float32 under varint) ]                      |
+	| |  [~~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~]                      |
+	| |                                                              |
+	| |------------------------------------------------------------| |
+	|    Location Details (chunked)                                | |
+	|    [~~~~~~|~~~~~|~~~~~~~|~~~~~|~~~~~~|~~~~~~~~|~~~~~]        | |
+	| |->[ Size | Pos | Start | End | Arr# | ArrPos | ... ]        | |
+	| |  [~~~~~~|~~~~~|~~~~~~~|~~~~~|~~~~~~|~~~~~~~~|~~~~~]        | |
+	| |                                                            | |
+	| |----------------------|                                     | |
+	|          Postings List |                                     | |
+	|         |~~~~~~~~|~~~~~|~~|~~~~~~~~|-----------...--|        | |
+	|      |->|    F/N |     LD | Length | ROARING BITMAP |        | |
+	|      |  |~~~~~|~~|~~~~~~~~|~~~~~~~~|-----------...--|        | |
+	|      |        |----------------------------------------------| |
+	|      |--------------------------------------|                  |
+	|          Dictionary                         |                  |
+	|         |~~~~~~~~|--------------------------|-...-|            |
+	|      |->| Length | VELLUM DATA : (TERM -> OFFSET) |            |
+	|      |  |~~~~~~~~|----------------------------...-|            |
+	|      |                                                         |
+	|======|=========================================================|- DocValues Index
+	|      |                                                         |
+	|======|=========================================================|- Fields
+	|      |                                                         |
+	| |~~~~|~~~|~~~~~~~~|---...---|                                  |
+	| |   Dict | Length |    Name |                                  |
+	| |~~~~~~~~|~~~~~~~~|---...---|                                  |
+	|                                                                |
+	|================================================================|
+
+## DocValues
+
+DocValues Index is `F#` pairs of varints, one pair per field. Each pair of varints indicates start and end point of DocValues slice.
+
+	|================================================================|
+	|     |------...--|                                              |
+	|  |->| DocValues |<-|                                           |
+	|  |  |------...--|  |                                           |
+	|==|=================|===========================================|- DocValues Index
+	||~|~~~~~~~~~|~~~~~~~|~~|           |~~~~~~~~~~~~~~|~~~~~~~~~~~~||
+	|| DV1 START | DV1 STOP | . . . . . | DV(F#) START | DV(F#) END ||
+	||~~~~~~~~~~~|~~~~~~~~~~|           |~~~~~~~~~~~~~~|~~~~~~~~~~~~||
+	|================================================================|
+
+DocValues is chunked Snappy-compressed values for each document and field.
+
+    [~~~~~~~~~~~~~~~|~~~~~~|~~~~~~~~~|-...-|~~~~~~|~~~~~~~~~|--------------------...-]
+    [ Doc# in Chunk | Doc1 | Offset1 | ... | DocN | OffsetN | SNAPPY COMPRESSED DATA ]
+    [~~~~~~~~~~~~~~~|~~~~~~|~~~~~~~~~|-...-|~~~~~~|~~~~~~~~~|--------------------...-]
+
+Last 16 bytes are description of chunks.
+
+    |~~~~~~~~~~~~...~|----------------|----------------|
+    |   Chunk Sizes  | Chunk Size Arr |         Chunk# |
+    |~~~~~~~~~~~~...~|----------------|----------------|
\ No newline at end of file

From d0ea32a67c16a368209190dc5ac752a5903e2b1b Mon Sep 17 00:00:00 2001
From: Dmitriy Kalugin-Balashov <rvncerr@rvncerr.org>
Date: Tue, 13 Nov 2018 16:25:44 -0800
Subject: [PATCH 482/728] index/scorch/segment/zap/README.md - removing legacy.

---
 index/scorch/segment/zap/README.md | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/index/scorch/segment/zap/README.md b/index/scorch/segment/zap/README.md
index 872e86c84..0facb669f 100644
--- a/index/scorch/segment/zap/README.md
+++ b/index/scorch/segment/zap/README.md
@@ -1,5 +1,7 @@
 # zap file format
 
+Advanced ZAP File Format Documentation is [here](zap.md).
+
 The file is written in the reverse order that we typically access data.  This helps us write in one pass since later sections of the file require file offsets of things we've already written.
 
 Current usage:
@@ -90,16 +92,6 @@ If you know the doc number you're interested in, this format lets you jump to th
 
 If you know the doc number you're interested in, this format lets you jump to the correct chunk (docNum/chunkFactor) directly and then seek within that chunk until you find it.
 
-## bitmaps of hits with location info
-
-- for each posting list
-  - preparation phase:
-    - encode roaring bitmap (inidicating which hits have location details indexed) posting list to bytes (so we know the length)
-  - file writing phase:
-    - remember the start position for this bitmap
-    - write length of encoded roaring bitmap
-    - write the serialized roaring bitmap data
-
 ## postings list section
 
 - for each posting list
@@ -109,7 +101,6 @@ If you know the doc number you're interested in, this format lets you jump to th
     - remember the start position for this posting list
     - write freq/norm details offset (remembered from previous, as varint uint64)
     - write location details offset (remembered from previous, as varint uint64)
-    - write location bitmap offset (remembered from previous, as varint uint64)
     - write length of encoded roaring bitmap
     - write the serialized roaring bitmap data
 

From ccf5e17bb77855e4ca757c68e29feb4094d23f4c Mon Sep 17 00:00:00 2001
From: Dmitriy Kalugin-Balashov <rvncerr@rvncerr.org>
Date: Wed, 14 Nov 2018 11:33:30 -0800
Subject: [PATCH 483/728] Typos fix.

---
 index/scorch/segment/zap/zap.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/index/scorch/segment/zap/zap.md b/index/scorch/segment/zap/zap.md
index bc54af92d..d74dc548b 100644
--- a/index/scorch/segment/zap/zap.md
+++ b/index/scorch/segment/zap/zap.md
@@ -20,7 +20,7 @@
     |        | varint(up to uint64)
     |~~~~~~~~|
 
-### Arbitary-length fields
+### Arbitrary-length fields
 
     |--------...---|
     |              | arbitrary-length field (string, vellum, roaring bitmap)
@@ -34,7 +34,7 @@
 
 ## Overview
 
-Footer sectrion describes the configuration of particular ZAP file. The format of footer is version-dependent, so it is necessary to check `V` field before the parsing.
+Footer section describes the configuration of particular ZAP file. The format of footer is version-dependent, so it is necessary to check `V` field before the parsing.
 
             |==================================================|
             | Stored Fields                                    |
@@ -174,4 +174,4 @@ Last 16 bytes are description of chunks.
 
     |~~~~~~~~~~~~...~|----------------|----------------|
     |   Chunk Sizes  | Chunk Size Arr |         Chunk# |
-    |~~~~~~~~~~~~...~|----------------|----------------|
\ No newline at end of file
+    |~~~~~~~~~~~~...~|----------------|----------------|

From bd21bf72cd7aaf06e260590a735e4a01a9a1db9d Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 19 Nov 2018 15:22:26 +0530
Subject: [PATCH 484/728] fixing go vet errs

---
 index/scorch/segment/zap/dict.go       | 10 +++++-----
 index/scorch/segment/zap/merge.go      |  2 +-
 index/scorch/segment/zap/merge_test.go |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 219bf1526..2c0e1bf2a 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -105,7 +105,7 @@ func (d *Dictionary) Iterator() segment.DictionaryIterator {
 		itr, err := d.fst.Iterator(nil, nil)
 		if err == nil {
 			rv.itr = itr
-		} else if err != nil && err != vellum.ErrIteratorDone {
+		} else if err != vellum.ErrIteratorDone {
 			rv.err = err
 		}
 	}
@@ -127,7 +127,7 @@ func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator {
 		itr, err := d.fst.Iterator(kBeg, kEnd)
 		if err == nil {
 			rv.itr = itr
-		} else if err != nil && err != vellum.ErrIteratorDone {
+		} else if err != vellum.ErrIteratorDone {
 			rv.err = err
 		}
 	}
@@ -154,7 +154,7 @@ func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator
 		itr, err := d.fst.Iterator([]byte(start), endBytes)
 		if err == nil {
 			rv.itr = itr
-		} else if err != nil && err != vellum.ErrIteratorDone {
+		} else if err != vellum.ErrIteratorDone {
 			rv.err = err
 		}
 	}
@@ -174,7 +174,7 @@ func (d *Dictionary) AutomatonIterator(a vellum.Automaton,
 		itr, err := d.fst.Search(a, startKeyInclusive, endKeyExclusive)
 		if err == nil {
 			rv.itr = itr
-		} else if err != nil && err != vellum.ErrIteratorDone {
+		} else if err != vellum.ErrIteratorDone {
 			rv.err = err
 		}
 	}
@@ -218,7 +218,7 @@ func (d *Dictionary) OnlyIterator(onlyTerms [][]byte,
 	itr, err := d.fst.Search(onlyFST, nil, nil)
 	if err == nil {
 		rv.itr = itr
-	} else if err != nil && err != vellum.ErrIteratorDone {
+	} else if err != vellum.ErrIteratorDone {
 		rv.err = err
 	}
 
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 901115898..13e9bf97c 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -329,7 +329,7 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
 
 			err = enumerator.Next()
 		}
-		if err != nil && err != vellum.ErrIteratorDone {
+		if err != vellum.ErrIteratorDone {
 			return nil, 0, err
 		}
 
diff --git a/index/scorch/segment/zap/merge_test.go b/index/scorch/segment/zap/merge_test.go
index 276259c71..db1cfff15 100644
--- a/index/scorch/segment/zap/merge_test.go
+++ b/index/scorch/segment/zap/merge_test.go
@@ -342,7 +342,7 @@ func compareSegments(a, b *Segment) string {
 
 				for {
 					apitrn, aerr := apitr.Next()
-					bpitrn, aerr := bpitr.Next()
+					bpitrn, berr := bpitr.Next()
 					if aerr != berr {
 						rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsListIterator Next() errors different: %v %v",
 							fieldName, next.Term, aerr, berr))

From ff5953ff61a54a28581a12e4e361e7741a9c4e84 Mon Sep 17 00:00:00 2001
From: Aviad Lichtenstadt <aviad@demisto.com>
Date: Wed, 21 Nov 2018 17:57:05 -0800
Subject: [PATCH 485/728] Add callbacks So we can know when a batch was
 persisted properly to disk

---
 index.go                       |  8 +++++
 index/index.go                 | 13 ++++++++
 index/scorch/introducer.go     |  5 +++
 index/scorch/persister.go      |  7 ++++
 index/scorch/scorch.go         |  6 ++--
 index/scorch/scorch_test.go    | 59 ++++++++++++++++++++++++++++++++++
 index/upsidedown/upsidedown.go |  5 +++
 7 files changed, 101 insertions(+), 2 deletions(-)

diff --git a/index.go b/index.go
index f9462a41d..a9369f485 100644
--- a/index.go
+++ b/index.go
@@ -129,6 +129,14 @@ func (b *Batch) Merge(o *Batch) {
 	}
 }
 
+func (b *Batch) AddCallback(f index.BatchCallbackFunction) {
+	b.internal.AddCallback(f)
+}
+
+func (b *Batch) Callback() []index.BatchCallbackFunction {
+	return b.internal.Callback()
+}
+
 // An Index implements all the indexing and searching
 // capabilities of bleve.  An Index can be created
 // using the New() and Open() methods.
diff --git a/index/index.go b/index/index.go
index a44046134..c4f3d5dfa 100644
--- a/index/index.go
+++ b/index/index.go
@@ -248,15 +248,19 @@ type DocIDReader interface {
 	Close() error
 }
 
+type BatchCallbackFunction func(error)
+
 type Batch struct {
 	IndexOps    map[string]*document.Document
 	InternalOps map[string][]byte
+	callBack    []BatchCallbackFunction
 }
 
 func NewBatch() *Batch {
 	return &Batch{
 		IndexOps:    make(map[string]*document.Document),
 		InternalOps: make(map[string][]byte),
+		callBack:    nil,
 	}
 }
 
@@ -276,6 +280,14 @@ func (b *Batch) DeleteInternal(key []byte) {
 	b.InternalOps[string(key)] = nil
 }
 
+func (b *Batch) AddCallback(f BatchCallbackFunction) {
+	b.callBack = append(b.callBack, f)
+}
+
+func (b *Batch) Callback() []BatchCallbackFunction {
+	return b.callBack
+}
+
 func (b *Batch) String() string {
 	rv := fmt.Sprintf("Batch (%d ops, %d internal ops)\n", len(b.IndexOps), len(b.InternalOps))
 	for k, v := range b.IndexOps {
@@ -298,6 +310,7 @@ func (b *Batch) String() string {
 func (b *Batch) Reset() {
 	b.IndexOps = make(map[string]*document.Document)
 	b.InternalOps = make(map[string][]byte)
+	b.callBack = nil
 }
 
 func (b *Batch) Merge(o *Batch) {
diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 12f27af66..fd39d4a94 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -19,6 +19,7 @@ import (
 	"sync/atomic"
 
 	"github.com/RoaringBitmap/roaring"
+	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
 	"github.com/blevesearch/bleve/index/scorch/segment/zap"
 )
@@ -32,6 +33,7 @@ type segmentIntroduction struct {
 
 	applied   chan error
 	persisted chan error
+	callbacks []index.BatchCallbackFunction
 }
 
 type persistIntroduction struct {
@@ -213,6 +215,9 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 	if next.persisted != nil {
 		s.rootPersisted = append(s.rootPersisted, next.persisted)
 	}
+	if next.callbacks != nil {
+		s.callbacks = append(s.callbacks, next.callbacks...)
+	}
 	// swap in new index snapshot
 	newSnapshot.epoch = s.nextSnapshotEpoch
 	s.nextSnapshotEpoch++
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 8d54b3a70..1e1b9b42c 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -30,6 +30,7 @@ import (
 	"time"
 
 	"github.com/RoaringBitmap/roaring"
+	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
 	"github.com/blevesearch/bleve/index/scorch/segment/zap"
 	"github.com/boltdb/bolt"
@@ -115,6 +116,7 @@ OUTER:
 
 		var ourSnapshot *IndexSnapshot
 		var ourPersisted []chan error
+		var ourCallbacks []index.BatchCallbackFunction
 
 		// check to see if there is a new snapshot to persist
 		s.rootLock.Lock()
@@ -123,6 +125,8 @@ OUTER:
 			ourSnapshot.AddRef()
 			ourPersisted = s.rootPersisted
 			s.rootPersisted = nil
+			ourCallbacks = s.callbacks
+			s.callbacks = nil
 			atomic.StoreUint64(&s.iStats.persistSnapshotSize, uint64(ourSnapshot.Size()))
 			atomic.StoreUint64(&s.iStats.persistEpoch, ourSnapshot.epoch)
 		}
@@ -150,6 +154,9 @@ OUTER:
 				atomic.AddUint64(&s.stats.TotPersistLoopErr, 1)
 				continue OUTER
 			}
+			for i := range ourCallbacks {
+				ourCallbacks[i](err)
+			}
 
 			atomic.StoreUint64(&s.stats.LastPersistedEpoch, ourSnapshot.epoch)
 
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 5e56c49b0..691b9d971 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -54,6 +54,7 @@ type Scorch struct {
 	rootLock             sync.RWMutex
 	root                 *IndexSnapshot // holds 1 ref-count on the root
 	rootPersisted        []chan error   // closed when root is persisted
+	callbacks            []index.BatchCallbackFunction
 	nextSnapshotEpoch    uint64
 	eligibleForRemoval   []uint64        // Index snapshot epochs that are safe to GC.
 	ineligibleForRemoval map[string]bool // Filenames that should not be GC'ed yet.
@@ -355,7 +356,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
 		atomic.AddUint64(&s.stats.TotBatchesEmpty, 1)
 	}
 
-	err = s.prepareSegment(newSegment, ids, batch.InternalOps)
+	err = s.prepareSegment(newSegment, ids, batch.InternalOps, batch.Callback())
 	if err != nil {
 		if newSegment != nil {
 			_ = newSegment.Close()
@@ -375,7 +376,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
 }
 
 func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
-	internalOps map[string][]byte) error {
+	internalOps map[string][]byte, callbacks []index.BatchCallbackFunction) error {
 
 	// new introduction
 	introduction := &segmentIntroduction{
@@ -385,6 +386,7 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
 		obsoletes: make(map[uint64]*roaring.Bitmap),
 		internal:  internalOps,
 		applied:   make(chan error),
+		callbacks: callbacks,
 	}
 
 	if !s.unsafeBatch {
diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index adcabd22f..1fed924e1 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -15,6 +15,7 @@
 package scorch
 
 import (
+	"fmt"
 	"log"
 	"os"
 	"reflect"
@@ -883,6 +884,64 @@ func TestIndexBatch(t *testing.T) {
 	}
 }
 
+func TestIndexBatchWithCallbacks(t *testing.T) {
+	defer func() {
+		err := DestroyTest()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Fatalf("error opening index: %v", err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	// Check that callback function works
+	updated := false
+	cbErr := fmt.Errorf("")
+
+	batch := index.NewBatch()
+	doc := document.NewDocument("3")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test3")))
+	batch.Update(doc)
+	batch.AddCallback(func(e error) {
+		updated = true
+		cbErr = e
+
+	})
+
+	err = idx.Batch(batch)
+	if err != nil {
+		t.Error(err)
+	}
+
+	for i := 0; i < 30; i++ {
+		if updated {
+			break
+		}
+		time.Sleep(500 * time.Millisecond)
+	}
+	if !updated {
+		t.Fatal("Callback function wasn't called")
+	}
+	if cbErr != nil {
+		t.Fatal("Error wasn't updated properly on callback function")
+	}
+
+}
+
 func TestIndexInsertUpdateDeleteWithMultipleTypesStored(t *testing.T) {
 	defer func() {
 		err := DestroyTest()
diff --git a/index/upsidedown/upsidedown.go b/index/upsidedown/upsidedown.go
index 6d3738539..c7caa26b5 100644
--- a/index/upsidedown/upsidedown.go
+++ b/index/upsidedown/upsidedown.go
@@ -958,6 +958,11 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
 	} else {
 		atomic.AddUint64(&udc.stats.errors, 1)
 	}
+
+	// For sake of completeness
+	for i := range batch.Callback() {
+		batch.Callback()[i](err)
+	}
 	return
 }
 

From 939c6e2255066a93e6968ff942f8652c5bb2dd37 Mon Sep 17 00:00:00 2001
From: Aviad Lichtenstadt <aviad@demisto.com>
Date: Wed, 21 Nov 2018 18:31:59 -0800
Subject: [PATCH 486/728] trigger tests again

---
 index/scorch/scorch_test.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index 1fed924e1..13ec719e8 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -885,6 +885,7 @@ func TestIndexBatch(t *testing.T) {
 }
 
 func TestIndexBatchWithCallbacks(t *testing.T) {
+
 	defer func() {
 		err := DestroyTest()
 		if err != nil {

From a7e3fd0f45728d8ca3d26ca8ea5a02a7d199441e Mon Sep 17 00:00:00 2001
From: Aviad Lichtenstadt <aviad@demisto.com>
Date: Wed, 21 Nov 2018 19:40:35 -0800
Subject: [PATCH 487/728] CR fixes Naming conventions & remove unneeded `if`

---
 index.go                    |  6 ++--
 index/index.go              | 16 +++++-----
 index/scorch/introducer.go  |  4 ++-
 index/scorch/persister.go   |  2 +-
 index/scorch/scorch.go      |  6 ++--
 index/scorch/scorch_test.go | 58 +++++++++++++++++++++++++++++++++++++
 6 files changed, 76 insertions(+), 16 deletions(-)

diff --git a/index.go b/index.go
index a9369f485..0747dd65f 100644
--- a/index.go
+++ b/index.go
@@ -129,12 +129,12 @@ func (b *Batch) Merge(o *Batch) {
 	}
 }
 
-func (b *Batch) AddCallback(f index.BatchCallbackFunction) {
+func (b *Batch) AddCallback(f index.BatchCallback) {
 	b.internal.AddCallback(f)
 }
 
-func (b *Batch) Callback() []index.BatchCallbackFunction {
-	return b.internal.Callback()
+func (b *Batch) Callbacks() []index.BatchCallback {
+	return b.internal.Callbacks()
 }
 
 // An Index implements all the indexing and searching
diff --git a/index/index.go b/index/index.go
index c4f3d5dfa..398c9fc87 100644
--- a/index/index.go
+++ b/index/index.go
@@ -248,19 +248,19 @@ type DocIDReader interface {
 	Close() error
 }
 
-type BatchCallbackFunction func(error)
+type BatchCallback func(error)
 
 type Batch struct {
 	IndexOps    map[string]*document.Document
 	InternalOps map[string][]byte
-	callBack    []BatchCallbackFunction
+	callbacks   []BatchCallback
 }
 
 func NewBatch() *Batch {
 	return &Batch{
 		IndexOps:    make(map[string]*document.Document),
 		InternalOps: make(map[string][]byte),
-		callBack:    nil,
+		callbacks:   nil,
 	}
 }
 
@@ -280,12 +280,12 @@ func (b *Batch) DeleteInternal(key []byte) {
 	b.InternalOps[string(key)] = nil
 }
 
-func (b *Batch) AddCallback(f BatchCallbackFunction) {
-	b.callBack = append(b.callBack, f)
+func (b *Batch) AddCallback(f BatchCallback) {
+	b.callbacks = append(b.callbacks, f)
 }
 
-func (b *Batch) Callback() []BatchCallbackFunction {
-	return b.callBack
+func (b *Batch) Callbacks() []BatchCallback {
+	return b.callbacks
 }
 
 func (b *Batch) String() string {
@@ -310,7 +310,7 @@ func (b *Batch) String() string {
 func (b *Batch) Reset() {
 	b.IndexOps = make(map[string]*document.Document)
 	b.InternalOps = make(map[string][]byte)
-	b.callBack = nil
+	b.callbacks = nil
 }
 
 func (b *Batch) Merge(o *Batch) {
diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index fd39d4a94..7b012a390 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -33,7 +33,7 @@ type segmentIntroduction struct {
 
 	applied   chan error
 	persisted chan error
-	callbacks []index.BatchCallbackFunction
+	callbacks []index.BatchCallback
 }
 
 type persistIntroduction struct {
@@ -50,6 +50,7 @@ type snapshotReversion struct {
 	snapshot  *IndexSnapshot
 	applied   chan error
 	persisted chan error
+	callbacks []index.BatchCallback
 }
 
 func (s *Scorch) mainLoop() {
@@ -218,6 +219,7 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 	if next.callbacks != nil {
 		s.callbacks = append(s.callbacks, next.callbacks...)
 	}
+	s.callbacks = append(s.callbacks, next.callbacks...)
 	// swap in new index snapshot
 	newSnapshot.epoch = s.nextSnapshotEpoch
 	s.nextSnapshotEpoch++
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 1e1b9b42c..ebbc240e7 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -116,7 +116,7 @@ OUTER:
 
 		var ourSnapshot *IndexSnapshot
 		var ourPersisted []chan error
-		var ourCallbacks []index.BatchCallbackFunction
+		var ourCallbacks []index.BatchCallback
 
 		// check to see if there is a new snapshot to persist
 		s.rootLock.Lock()
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 691b9d971..89b3ba408 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -54,7 +54,7 @@ type Scorch struct {
 	rootLock             sync.RWMutex
 	root                 *IndexSnapshot // holds 1 ref-count on the root
 	rootPersisted        []chan error   // closed when root is persisted
-	callbacks            []index.BatchCallbackFunction
+	callbacks            []index.BatchCallback
 	nextSnapshotEpoch    uint64
 	eligibleForRemoval   []uint64        // Index snapshot epochs that are safe to GC.
 	ineligibleForRemoval map[string]bool // Filenames that should not be GC'ed yet.
@@ -356,7 +356,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
 		atomic.AddUint64(&s.stats.TotBatchesEmpty, 1)
 	}
 
-	err = s.prepareSegment(newSegment, ids, batch.InternalOps, batch.Callback())
+	err = s.prepareSegment(newSegment, ids, batch.InternalOps, batch.Callbacks())
 	if err != nil {
 		if newSegment != nil {
 			_ = newSegment.Close()
@@ -376,7 +376,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
 }
 
 func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
-	internalOps map[string][]byte, callbacks []index.BatchCallbackFunction) error {
+	internalOps map[string][]byte, callbacks []index.BatchCallback) error {
 
 	// new introduction
 	introduction := &segmentIntroduction{
diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index 13ec719e8..e1cfb68ab 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -943,6 +943,64 @@ func TestIndexBatchWithCallbacks(t *testing.T) {
 
 }
 
+func TestIndexBatchWithCallbacks(t *testing.T) {
+	defer func() {
+		err := DestroyTest()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Fatalf("error opening index: %v", err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	// Check that callback function works
+	updated := false
+	cbErr := fmt.Errorf("")
+
+	batch := index.NewBatch()
+	doc := document.NewDocument("3")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test3")))
+	batch.Update(doc)
+	batch.AddCallback(func(e error) {
+		updated = true
+		cbErr = e
+
+	})
+
+	err = idx.Batch(batch)
+	if err != nil {
+		t.Error(err)
+	}
+
+	for i := 0; i < 30; i++ {
+		if updated {
+			break
+		}
+		time.Sleep(500 * time.Millisecond)
+	}
+	if !updated {
+		t.Fatal("Callback function wasn't called")
+	}
+	if cbErr != nil {
+		t.Fatal("Error wasn't updated properly on callback function")
+	}
+
+}
+
 func TestIndexInsertUpdateDeleteWithMultipleTypesStored(t *testing.T) {
 	defer func() {
 		err := DestroyTest()

From 08c3a849cdc2691bba72164d293d948a8b32f243 Mon Sep 17 00:00:00 2001
From: Aviad Lichtenstadt <aviad@demisto.com>
Date: Wed, 21 Nov 2018 19:44:16 -0800
Subject: [PATCH 488/728] Make sure code compiles (fix upsidedown flow)

---
 index/upsidedown/upsidedown.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/index/upsidedown/upsidedown.go b/index/upsidedown/upsidedown.go
index c7caa26b5..d0a27c000 100644
--- a/index/upsidedown/upsidedown.go
+++ b/index/upsidedown/upsidedown.go
@@ -960,8 +960,8 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
 	}
 
 	// For sake of completeness
-	for i := range batch.Callback() {
-		batch.Callback()[i](err)
+	for i := range batch.Callbacks() {
+		batch.Callbacks()[i](err)
 	}
 	return
 }

From 364fb19f5f547feb79c83ce03c5dee650d644c19 Mon Sep 17 00:00:00 2001
From: Aviad Lichtenstadt <aviad@demisto.com>
Date: Wed, 21 Nov 2018 21:05:40 -0800
Subject: [PATCH 489/728] bad merge fix

---
 index/scorch/scorch_test.go | 58 -------------------------------------
 1 file changed, 58 deletions(-)

diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index e1cfb68ab..13ec719e8 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -943,64 +943,6 @@ func TestIndexBatchWithCallbacks(t *testing.T) {
 
 }
 
-func TestIndexBatchWithCallbacks(t *testing.T) {
-	defer func() {
-		err := DestroyTest()
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
-
-	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, testConfig, analysisQueue)
-	if err != nil {
-		t.Fatal(err)
-	}
-	err = idx.Open()
-	if err != nil {
-		t.Fatalf("error opening index: %v", err)
-	}
-	defer func() {
-		err := idx.Close()
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
-
-	// Check that callback function works
-	updated := false
-	cbErr := fmt.Errorf("")
-
-	batch := index.NewBatch()
-	doc := document.NewDocument("3")
-	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test3")))
-	batch.Update(doc)
-	batch.AddCallback(func(e error) {
-		updated = true
-		cbErr = e
-
-	})
-
-	err = idx.Batch(batch)
-	if err != nil {
-		t.Error(err)
-	}
-
-	for i := 0; i < 30; i++ {
-		if updated {
-			break
-		}
-		time.Sleep(500 * time.Millisecond)
-	}
-	if !updated {
-		t.Fatal("Callback function wasn't called")
-	}
-	if cbErr != nil {
-		t.Fatal("Error wasn't updated properly on callback function")
-	}
-
-}
-
 func TestIndexInsertUpdateDeleteWithMultipleTypesStored(t *testing.T) {
 	defer func() {
 		err := DestroyTest()

From 51097a76ae6c9a40b729c4df312c116310a0a569 Mon Sep 17 00:00:00 2001
From: Aviad Lichtenstadt <aviad@demisto.com>
Date: Sun, 25 Nov 2018 21:54:22 -0500
Subject: [PATCH 490/728] Fixing bad merge

---
 index/scorch/introducer.go | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 7b012a390..836e716c4 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -216,9 +216,6 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 	if next.persisted != nil {
 		s.rootPersisted = append(s.rootPersisted, next.persisted)
 	}
-	if next.callbacks != nil {
-		s.callbacks = append(s.callbacks, next.callbacks...)
-	}
 	s.callbacks = append(s.callbacks, next.callbacks...)
 	// swap in new index snapshot
 	newSnapshot.epoch = s.nextSnapshotEpoch

From 55d3031c6ab8e91e135ad6a0d5e3eb43bd7315c6 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 26 Nov 2018 12:35:58 -0800
Subject: [PATCH 491/728] fix doc comment typo

---
 index/scorch/snapshot_segment.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index 7672e853b..0e0c59e9f 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -89,7 +89,7 @@ func (s *SegmentSnapshot) DocNumbers(docIDs []string) (*roaring.Bitmap, error) {
 	return rv, nil
 }
 
-// DocNumbersLive returns bitsit containing doc numbers for all live docs
+// DocNumbersLive returns a bitmap containing doc numbers for all live docs
 func (s *SegmentSnapshot) DocNumbersLive() *roaring.Bitmap {
 	rv := roaring.NewBitmap()
 	rv.AddRange(0, s.segment.Count())

From 367c7402b36681f493829e049ef77068a4862ce4 Mon Sep 17 00:00:00 2001
From: Dmitriy Kalugin-Balashov <rvncerr@rvncerr.org>
Date: Thu, 29 Nov 2018 17:25:55 -0800
Subject: [PATCH 492/728] Travis-CI reports sporadic failures on
 TestRegexpSearchScorch (#1057)

* Bugfix: Travis-CI reports sporadic failures on TestRegexpSearchScorch.

* Check return values.

* TestRegexpSearchScorch - Changing InternalID to docID ('_id').

* Map initializing.
---
 search/searcher/base_test.go          | 14 +++----
 search/searcher/search_regexp_test.go | 57 ++++++++++++++-------------
 2 files changed, 37 insertions(+), 34 deletions(-)

diff --git a/search/searcher/base_test.go b/search/searcher/base_test.go
index 425d6703c..feb00a6aa 100644
--- a/search/searcher/base_test.go
+++ b/search/searcher/base_test.go
@@ -15,7 +15,6 @@
 package searcher
 
 import (
-	"io/ioutil"
 	"math"
 	"regexp"
 
@@ -48,9 +47,8 @@ func initTwoDocUpsideDown() index.Index {
 	return twoDocIndex
 }
 
-func initTwoDocScorch() index.Index {
+func initTwoDocScorch(dir string) index.Index {
 	analysisQueue := index.NewAnalysisQueue(1)
-	dir, _ := ioutil.TempDir("", "scorchTwoDoc")
 	twoDocIndex, err := scorch.NewScorch(
 		scorch.Name,
 		map[string]interface{}{
@@ -68,11 +66,13 @@ func initTwoDocs(twoDocIndex index.Index) {
 	if err != nil {
 		panic(err)
 	}
+	batch := index.NewBatch()
 	for _, doc := range twoDocIndexDocs {
-		err := twoDocIndex.Update(doc)
-		if err != nil {
-			panic(err)
-		}
+		batch.Update(doc)
+	}
+	err = twoDocIndex.Batch(batch)
+	if err != nil {
+		panic(err)
 	}
 }
 
diff --git a/search/searcher/search_regexp_test.go b/search/searcher/search_regexp_test.go
index 0c29068bd..feb23579c 100644
--- a/search/searcher/search_regexp_test.go
+++ b/search/searcher/search_regexp_test.go
@@ -17,6 +17,8 @@ package searcher
 import (
 	"encoding/binary"
 	"fmt"
+	"io/ioutil"
+	"os"
 	"regexp"
 	"testing"
 
@@ -37,13 +39,23 @@ func TestRegexpStringSearchUpsideDown(t *testing.T) {
 }
 
 func TestRegexpSearchScorch(t *testing.T) {
-	twoDocIndex := initTwoDocScorch()
+	dir, _ := ioutil.TempDir("", "scorchTwoDoc")
+	defer func() {
+		_ = os.RemoveAll(dir)
+	}()
+
+	twoDocIndex := initTwoDocScorch(dir)
 	testRegexpSearch(t, twoDocIndex, internalIDMakerScorch, searcherMaker)
 	_ = twoDocIndex.Close()
 }
 
 func TestRegexpStringSearchScorch(t *testing.T) {
-	twoDocIndex := initTwoDocScorch()
+	dir, _ := ioutil.TempDir("", "scorchTwoDoc")
+	defer func() {
+		_ = os.RemoveAll(dir)
+	}()
+
+	twoDocIndex := initTwoDocScorch(dir)
 	testRegexpSearch(t, twoDocIndex, internalIDMakerScorch, searcherStringMaker)
 	_ = twoDocIndex.Close()
 }
@@ -101,29 +113,20 @@ func testRegexpSearch(t *testing.T, twoDocIndex index.Index,
 	regexpSearcherCo := searcherMaker(t, twoDocIndexReader, "co.*", "desc")
 
 	tests := []struct {
-		searcher  search.Searcher
-		expecteds []*search.DocumentMatch
+		searcher search.Searcher
+		id2score map[string]float64
 	}{
 		{
 			searcher: regexpSearcher,
-			expecteds: []*search.DocumentMatch{
-				{
-					IndexInternalID: internalIDMaker(1),
-					Score:           1.916290731874155,
-				},
+			id2score: map[string]float64{
+				"1": 1.916290731874155,
 			},
 		},
 		{
 			searcher: regexpSearcherCo,
-			expecteds: []*search.DocumentMatch{
-				{
-					IndexInternalID: internalIDMaker(2),
-					Score:           0.33875554280828685,
-				},
-				{
-					IndexInternalID: internalIDMaker(3),
-					Score:           0.33875554280828685,
-				},
+			id2score: map[string]float64{
+				"2": 0.33875554280828685,
+				"3": 0.33875554280828685,
 			},
 		},
 	}
@@ -142,14 +145,14 @@ func testRegexpSearch(t *testing.T, twoDocIndex index.Index,
 		next, err := test.searcher.Next(ctx)
 		i := 0
 		for err == nil && next != nil {
-			if i < len(test.expecteds) {
-				if !next.IndexInternalID.Equals(test.expecteds[i].IndexInternalID) {
-					t.Errorf("test %d, expected result %d to have id %s got %s, next: %#v",
-						testIndex, i, test.expecteds[i].IndexInternalID, next.IndexInternalID, next)
-				}
-				if next.Score != test.expecteds[i].Score {
+			exID, _ := twoDocIndexReader.ExternalID(next.IndexInternalID)
+			if _, ok := test.id2score[exID]; !ok {
+				t.Errorf("test %d, found unexpected docID = %v, next = %v", testIndex, exID, next)
+			} else {
+				score := test.id2score[exID]
+				if next.Score != score {
 					t.Errorf("test %d, expected result %d to have score %v got %v,next: %#v",
-						testIndex, i, test.expecteds[i].Score, next.Score, next)
+						testIndex, i, score, next.Score, next)
 					t.Logf("scoring explanation: %s", next.Expl)
 				}
 			}
@@ -160,8 +163,8 @@ func testRegexpSearch(t *testing.T, twoDocIndex index.Index,
 		if err != nil {
 			t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
 		}
-		if len(test.expecteds) != i {
-			t.Errorf("expected %d results got %d for test %d", len(test.expecteds), i, testIndex)
+		if len(test.id2score) != i {
+			t.Errorf("expected %d results got %d for test %d", len(test.id2score), i, testIndex)
 		}
 	}
 }

From 84c8ddee12a428200ba00c645b5c38658742aa22 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 3 Dec 2018 10:59:58 +0530
Subject: [PATCH 493/728] Adding error messages for negative fuzziness

 Querying for negative fuzziness or edit distance is
 an invalid option and adding error messages for that.
 This would help further wrinkles down the line due to
 wrapping of -ve ED to a higher value during the uint
 typecasting.
---
 search/searcher/search_fuzzy.go      |  4 ++++
 search/searcher/search_fuzzy_test.go | 13 +++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/search/searcher/search_fuzzy.go b/search/searcher/search_fuzzy.go
index 668d11afc..8176e59b5 100644
--- a/search/searcher/search_fuzzy.go
+++ b/search/searcher/search_fuzzy.go
@@ -31,6 +31,10 @@ func NewFuzzySearcher(indexReader index.IndexReader, term string,
 		return nil, fmt.Errorf("fuzziness exceeds max (%d)", MaxFuzziness)
 	}
 
+	if fuzziness < 0 {
+		return nil, fmt.Errorf("invalid fuzziness, negative")
+	}
+
 	// Note: we don't byte slice the term for a prefix because of runes.
 	prefixTerm := ""
 	for i, r := range term {
diff --git a/search/searcher/search_fuzzy_test.go b/search/searcher/search_fuzzy_test.go
index dfac2f468..4a87f139a 100644
--- a/search/searcher/search_fuzzy_test.go
+++ b/search/searcher/search_fuzzy_test.go
@@ -140,3 +140,16 @@ func TestFuzzySearch(t *testing.T) {
 		}
 	}
 }
+
+func TestFuzzySearchLimitErrors(t *testing.T) {
+	explainTrue := search.SearcherOptions{Explain: true}
+	_, err := NewFuzzySearcher(nil, "water", 3, 3, "desc", 1.0, explainTrue)
+	if err == nil {
+		t.Fatal("`fuzziness exceeds max (2)` error expected")
+	}
+
+	_, err = NewFuzzySearcher(nil, "water", 3, -1, "desc", 1.0, explainTrue)
+	if err == nil {
+		t.Fatal("`invalid fuzziness, negative` error expected")
+	}
+}

From 9bb1687420e429f94ffcd4a159ad1c7181772452 Mon Sep 17 00:00:00 2001
From: Dmitriy Kalugin-Balashov <rvncerr@rvncerr.org>
Date: Tue, 4 Dec 2018 11:36:41 -0800
Subject: [PATCH 494/728] DocNumbers - skip segments, that don't include
 requested ids.

---
 index/scorch/segment/zap/segment.go | 31 ++++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 8c6de211a..1bab522e2 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -418,12 +418,33 @@ func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) {
 		}
 
 		postingsList := emptyPostingsList
-		for _, id := range ids {
-			postingsList, err = idDict.postingsList([]byte(id), nil, postingsList)
-			if err != nil {
-				return nil, err
+
+		skipCheck := false
+		sMax := ""
+		iMin := ""
+
+		sMaxB, err := idDict.fst.GetMaxKey()
+		if err != nil {
+			skipCheck = true
+		} else {
+			sMax = string(sMaxB)
+			iMin = ids[0]
+			for i := 1; i < len(ids); i++ {
+				if ids[i] < iMin {
+					iMin = ids[i]
+				}
+			}
+		}
+		if skipCheck || (iMin <= sMax) {
+			for _, id := range ids {
+				if skipCheck || (id <= sMax) {
+					postingsList, err = idDict.postingsList([]byte(id), nil, postingsList)
+					if err != nil {
+						return nil, err
+					}
+					postingsList.OrInto(rv)
+				}
 			}
-			postingsList.OrInto(rv)
 		}
 	}
 

From 66e3c954fc4dd7ff51a9197480ce87dca10bc055 Mon Sep 17 00:00:00 2001
From: Dmitriy Kalugin-Balashov <rvncerr@rvncerr.org>
Date: Tue, 4 Dec 2018 13:56:08 -0800
Subject: [PATCH 495/728] vendor/manifest update - new vellum API.

---
 vendor/manifest | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vendor/manifest b/vendor/manifest
index 72f0b6d7c..9471f14e2 100644
--- a/vendor/manifest
+++ b/vendor/manifest
@@ -137,7 +137,7 @@
 			"importpath": "github.com/couchbase/vellum",
 			"repository": "https://github.com/couchbase/vellum",
 			"vcs": "git",
-			"revision": "01d5c56e609533acd717717c8acc0d2dea6bfb89",
+			"revision": "f377ee3282b954c46915d89482bf93288ee7dd12",
 			"branch": "master",
 			"notests": true
 		}

From 8395e90b544ddfa13cbe9b112b330a8093bdec2c Mon Sep 17 00:00:00 2001
From: Dmitriy Kalugin-Balashov <rvncerr@rvncerr.org>
Date: Tue, 4 Dec 2018 14:12:07 -0800
Subject: [PATCH 496/728] Fix: DocNumbers must work with empty ids array.

---
 index/scorch/segment/zap/segment.go | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 1bab522e2..9342dd273 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -423,17 +423,21 @@ func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) {
 		sMax := ""
 		iMin := ""
 
-		sMaxB, err := idDict.fst.GetMaxKey()
-		if err != nil {
-			skipCheck = true
-		} else {
-			sMax = string(sMaxB)
-			iMin = ids[0]
-			for i := 1; i < len(ids); i++ {
-				if ids[i] < iMin {
-					iMin = ids[i]
+		if len(ids) > 0 {
+			sMaxB, err := idDict.fst.GetMaxKey()
+			if err != nil {
+				skipCheck = true
+			} else {
+				sMax = string(sMaxB)
+				iMin = ids[0]
+				for i := 1; i < len(ids); i++ {
+					if ids[i] < iMin {
+						iMin = ids[i]
+					}
 				}
 			}
+		} else {
+			skipCheck = true
 		}
 		if skipCheck || (iMin <= sMax) {
 			for _, id := range ids {

From 43e8e27f8e2cf5d076973dbad26f436aaa8a12c6 Mon Sep 17 00:00:00 2001
From: Dmitriy Kalugin-Balashov <rvncerr@rvncerr.org>
Date: Tue, 4 Dec 2018 17:10:12 -0800
Subject: [PATCH 497/728] Simplification - Using filteredIds instead of ids.

---
 index/scorch/segment/zap/segment.go | 39 +++++++++++------------------
 1 file changed, 15 insertions(+), 24 deletions(-)

diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 9342dd273..606ea0cfa 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -418,38 +418,29 @@ func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) {
 		}
 
 		postingsList := emptyPostingsList
-
-		skipCheck := false
+		filteredIds := ids[:0]
 		sMax := ""
-		iMin := ""
 
-		if len(ids) > 0 {
-			sMaxB, err := idDict.fst.GetMaxKey()
-			if err != nil {
-				skipCheck = true
-			} else {
-				sMax = string(sMaxB)
-				iMin = ids[0]
-				for i := 1; i < len(ids); i++ {
-					if ids[i] < iMin {
-						iMin = ids[i]
-					}
+		sMaxB, err := idDict.fst.GetMaxKey()
+		if err == nil {
+			sMax = string(sMaxB)
+			for _, id := range ids {
+				if id <= sMax {
+					filteredIds = append(filteredIds, id)
 				}
 			}
 		} else {
-			skipCheck = true
+			filteredIds = ids
 		}
-		if skipCheck || (iMin <= sMax) {
-			for _, id := range ids {
-				if skipCheck || (id <= sMax) {
-					postingsList, err = idDict.postingsList([]byte(id), nil, postingsList)
-					if err != nil {
-						return nil, err
-					}
-					postingsList.OrInto(rv)
-				}
+
+		for _, id := range filteredIds {
+			postingsList, err = idDict.postingsList([]byte(id), nil, postingsList)
+			if err != nil {
+				return nil, err
 			}
+			postingsList.OrInto(rv)
 		}
+
 	}
 
 	return rv, nil

From e3510d8ce59a6dcae03cd454cbf09926f89c20df Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Wed, 5 Dec 2018 14:04:58 +0530
Subject: [PATCH 498/728] Fix for Travis CI failuers

Adding index close for better cleanup
---
 search_test.go | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/search_test.go b/search_test.go
index 75aeac713..9937eb7b2 100644
--- a/search_test.go
+++ b/search_test.go
@@ -545,6 +545,11 @@ func TestNestedBooleanSearchers(t *testing.T) {
 	if matches != len(searchResults.Hits) {
 		t.Fatalf("Unexpected result set, %v != %v", matches, len(searchResults.Hits))
 	}
+
+	err = idx.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
 }
 
 func TestNestedBooleanMustNotSearcher(t *testing.T) {
@@ -668,6 +673,11 @@ func TestNestedBooleanMustNotSearcher(t *testing.T) {
 	if res.Total != 0 {
 		t.Fatalf("Unexpected result, %v != 0", res.Total)
 	}
+
+	err = idx.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
 }
 
 func TestSearchScorchOverEmptyKeyword(t *testing.T) {
@@ -727,6 +737,11 @@ func TestSearchScorchOverEmptyKeyword(t *testing.T) {
 	if res.Total != 10 {
 		t.Fatalf("Unexpected search hits: %v, expected 10", res.Total)
 	}
+
+	err = idx.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
 }
 
 func TestMultipleNestedBooleanMustNotSearchersOnScorch(t *testing.T) {
@@ -868,4 +883,9 @@ func TestMultipleNestedBooleanMustNotSearchersOnScorch(t *testing.T) {
 	if res.Total != 1 {
 		t.Fatalf("Unexpected result, %v != 1", res.Total)
 	}
+
+	err = idx.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
 }

From 57aadf7f935b36a3fd7e1669b28d3df8da79fdf0 Mon Sep 17 00:00:00 2001
From: Dmitriy Kalugin-Balashov <rvncerr@rvncerr.org>
Date: Wed, 5 Dec 2018 11:18:41 -0800
Subject: [PATCH 499/728] Scorch test refactoring (#1055)

* Scorch test refactoring.

* Go 1.7 support for scorch tests.

* Explicit test folder assigning.
---
 index/scorch/event_test.go             |   9 +-
 index/scorch/field_dict_test.go        |  14 +-
 index/scorch/reader_test.go            |  34 +++-
 index/scorch/scorch_test.go            | 236 ++++++++++++++++++-------
 index/scorch/snapshot_rollback_test.go |  13 +-
 5 files changed, 225 insertions(+), 81 deletions(-)

diff --git a/index/scorch/event_test.go b/index/scorch/event_test.go
index 92b49d20d..58ba7c06c 100644
--- a/index/scorch/event_test.go
+++ b/index/scorch/event_test.go
@@ -22,8 +22,13 @@ import (
 )
 
 func TestEventBatchIntroductionStart(t *testing.T) {
+	testConfig := CreateConfig("TestEventBatchIntroductionStart")
+	err := InitTest(testConfig)
+	if err != nil {
+		t.Fatal(err)
+	}
 	defer func() {
-		err := DestroyTest()
+		err := DestroyTest(testConfig)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -70,4 +75,4 @@ func TestEventBatchIntroductionStart(t *testing.T) {
 	if count != 1 {
 		t.Fatalf("expected to see 1 batch introduction event event, saw %d", count)
 	}
-}
+}
\ No newline at end of file
diff --git a/index/scorch/field_dict_test.go b/index/scorch/field_dict_test.go
index a25c5c984..92c49890e 100644
--- a/index/scorch/field_dict_test.go
+++ b/index/scorch/field_dict_test.go
@@ -23,15 +23,21 @@ import (
 )
 
 func TestIndexFieldDict(t *testing.T) {
+	cfg := CreateConfig("TestIndexFieldDict")
+	err := InitTest(cfg)
+	if err != nil {
+		t.Fatal(err)
+	}
 	defer func() {
-		err := DestroyTest()
+		err := DestroyTest(cfg)
 		if err != nil {
-			t.Fatal(err)
+			t.Log(err)
 		}
 	}()
 
+
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -181,4 +187,4 @@ func TestIndexFieldDict(t *testing.T) {
 	if !reflect.DeepEqual(expectedTerms, terms) {
 		t.Errorf("expected %#v, got %#v", expectedTerms, terms)
 	}
-}
+}
\ No newline at end of file
diff --git a/index/scorch/reader_test.go b/index/scorch/reader_test.go
index 8414cbdc1..b89ba6dd9 100644
--- a/index/scorch/reader_test.go
+++ b/index/scorch/reader_test.go
@@ -24,15 +24,21 @@ import (
 )
 
 func TestIndexReader(t *testing.T) {
+	cfg := CreateConfig("TestIndexReader")
+	err := InitTest(cfg)
+	if err != nil {
+		t.Fatal(err)
+	}
 	defer func() {
-		err := DestroyTest()
+		err := DestroyTest(cfg)
 		if err != nil {
-			t.Fatal(err)
+			t.Log(err)
 		}
 	}()
 
+
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -214,15 +220,20 @@ func TestIndexReader(t *testing.T) {
 }
 
 func TestIndexDocIdReader(t *testing.T) {
+	cfg := CreateConfig("TestIndexDocIdReader")
+	err := InitTest(cfg)
+	if err != nil {
+		t.Fatal(err)
+	}
 	defer func() {
-		err := DestroyTest()
+		err := DestroyTest(cfg)
 		if err != nil {
-			t.Fatal(err)
+			t.Log(err)
 		}
 	}()
 
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -325,15 +336,20 @@ func TestIndexDocIdReader(t *testing.T) {
 }
 
 func TestIndexDocIdOnlyReader(t *testing.T) {
+	cfg := CreateConfig("TestIndexDocIdOnlyReader")
+	err := InitTest(cfg)
+	if err != nil {
+		t.Fatal(err)
+	}
 	defer func() {
-		err := DestroyTest()
+		err := DestroyTest(cfg)
 		if err != nil {
-			t.Fatal(err)
+			t.Log(err)
 		}
 	}()
 
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index adcabd22f..424afbfc6 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -38,12 +38,19 @@ func init() {
 	DefaultPersisterNapTimeMSec = 1
 }
 
-func DestroyTest() error {
-	return os.RemoveAll("/tmp/bleve-scorch-test")
+func InitTest(cfg map[string]interface{}) error {
+	return os.RemoveAll(cfg["path"].(string))
 }
 
-var testConfig = map[string]interface{}{
-	"path": "/tmp/bleve-scorch-test",
+func DestroyTest(cfg map[string]interface{}) error {
+	return os.RemoveAll(cfg["path"].(string))
+}
+
+func CreateConfig(name string) map[string]interface{} {
+	// TODO: Use t.Name() when Go 1.7 support terminates.
+	rv := make(map[string]interface{})
+	rv["path"] = os.TempDir() + "/bleve-scorch-test-" + name
+	return rv
 }
 
 var testAnalyzer = &analysis.Analyzer{
@@ -51,15 +58,20 @@ var testAnalyzer = &analysis.Analyzer{
 }
 
 func TestIndexOpenReopen(t *testing.T) {
+	cfg := CreateConfig("TestIndexOpenReopen")
+	err := InitTest(cfg)
+	if err != nil {
+		t.Fatal(err)
+	}
 	defer func() {
-		err := DestroyTest()
+		err := DestroyTest(cfg)
 		if err != nil {
-			t.Fatal(err)
+			t.Log(err)
 		}
 	}()
 
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -116,7 +128,7 @@ func TestIndexOpenReopen(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	idx, err = NewScorch(Name, testConfig, analysisQueue)
+	idx, err = NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -150,15 +162,20 @@ func TestIndexOpenReopen(t *testing.T) {
 }
 
 func TestIndexInsert(t *testing.T) {
+	cfg := CreateConfig("TestIndexInsert")
+	err := InitTest(cfg)
+	if err != nil {
+		t.Fatal(err)
+	}
 	defer func() {
-		err := DestroyTest()
+		err := DestroyTest(cfg)
 		if err != nil {
-			t.Fatal(err)
+			t.Log(err)
 		}
 	}()
 
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -216,15 +233,20 @@ func TestIndexInsert(t *testing.T) {
 }
 
 func TestIndexInsertThenDelete(t *testing.T) {
+	cfg := CreateConfig("TestIndexInsertThenDelete")
+	err := InitTest(cfg)
+	if err != nil {
+		t.Fatal(err)
+	}
 	defer func() {
-		err := DestroyTest()
+		err := DestroyTest(cfg)
 		if err != nil {
-			t.Fatal(err)
+			t.Log(err)
 		}
 	}()
 
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -342,7 +364,7 @@ func TestIndexInsertThenDelete(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	idx, err = NewScorch(Name, testConfig, analysisQueue) // reopen
+	idx, err = NewScorch(Name, cfg, analysisQueue) // reopen
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -427,15 +449,20 @@ func TestIndexInsertThenDelete(t *testing.T) {
 }
 
 func TestIndexInsertThenUpdate(t *testing.T) {
+	cfg := CreateConfig("TestIndexInsertThenUpdate")
+	err := InitTest(cfg)
+	if err != nil {
+		t.Fatal(err)
+	}
 	defer func() {
-		err := DestroyTest()
+		err := DestroyTest(cfg)
 		if err != nil {
-			t.Fatal(err)
+			t.Log(err)
 		}
 	}()
 
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -494,15 +521,20 @@ func TestIndexInsertThenUpdate(t *testing.T) {
 }
 
 func TestIndexInsertMultiple(t *testing.T) {
+	cfg := CreateConfig("TestIndexInsertMultiple")
+	err := InitTest(cfg)
+	if err != nil {
+		t.Fatal(err)
+	}
 	defer func() {
-		err := DestroyTest()
+		err := DestroyTest(cfg)
 		if err != nil {
-			t.Fatal(err)
+			t.Log(err)
 		}
 	}()
 
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -561,15 +593,20 @@ func TestIndexInsertMultiple(t *testing.T) {
 }
 
 func TestIndexInsertWithStore(t *testing.T) {
+	cfg := CreateConfig("TestIndexInsertWithStore")
+	err := InitTest(cfg)
+	if err != nil {
+		t.Fatal(err)
+	}
 	defer func() {
-		err := DestroyTest()
+		err := DestroyTest(cfg)
 		if err != nil {
-			t.Fatal(err)
+			t.Log(err)
 		}
 	}()
 
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -578,9 +615,9 @@ func TestIndexInsertWithStore(t *testing.T) {
 		t.Fatalf("error opening index: %v", err)
 	}
 	defer func() {
-		cerr := idx.Close()
+		err := idx.Close()
 		if err != nil {
-			t.Fatal(cerr)
+			t.Fatal(err)
 		}
 	}()
 
@@ -660,15 +697,20 @@ func TestIndexInsertWithStore(t *testing.T) {
 }
 
 func TestIndexInternalCRUD(t *testing.T) {
+	cfg := CreateConfig("TestIndexInternalCRUD")
+	err := InitTest(cfg)
+	if err != nil {
+		t.Fatal(err)
+	}
 	defer func() {
-		err := DestroyTest()
+		err := DestroyTest(cfg)
 		if err != nil {
-			t.Fatal(err)
+			t.Log(err)
 		}
 	}()
 
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -766,15 +808,20 @@ func TestIndexInternalCRUD(t *testing.T) {
 }
 
 func TestIndexBatch(t *testing.T) {
+	cfg := CreateConfig("TestIndexBatch")
+	err := InitTest(cfg)
+	if err != nil {
+		t.Fatal(err)
+	}
 	defer func() {
-		err := DestroyTest()
+		err := DestroyTest(cfg)
 		if err != nil {
-			t.Fatal(err)
+			t.Log(err)
 		}
 	}()
 
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -884,15 +931,20 @@ func TestIndexBatch(t *testing.T) {
 }
 
 func TestIndexInsertUpdateDeleteWithMultipleTypesStored(t *testing.T) {
+	cfg := CreateConfig("TestIndexInsertUpdateDeleteWithMultipleTypesStored")
+	err := InitTest(cfg)
+	if err != nil {
+		t.Fatal(err)
+	}
 	defer func() {
-		err := DestroyTest()
+		err := DestroyTest(cfg)
 		if err != nil {
-			t.Fatal(err)
+			t.Log(err)
 		}
 	}()
 
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1102,15 +1154,20 @@ func TestIndexInsertUpdateDeleteWithMultipleTypesStored(t *testing.T) {
 }
 
 func TestIndexInsertFields(t *testing.T) {
+	cfg := CreateConfig("TestIndexInsertFields")
+	err := InitTest(cfg)
+	if err != nil {
+		t.Fatal(err)
+	}
 	defer func() {
-		err := DestroyTest()
+		err := DestroyTest(cfg)
 		if err != nil {
-			t.Fatal(err)
+			t.Log(err)
 		}
 	}()
 
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1171,15 +1228,20 @@ func TestIndexInsertFields(t *testing.T) {
 }
 
 func TestIndexUpdateComposites(t *testing.T) {
+	cfg := CreateConfig("TestIndexUpdateComposites")
+	err := InitTest(cfg)
+	if err != nil {
+		t.Fatal(err)
+	}
 	defer func() {
-		err := DestroyTest()
+		err := DestroyTest(cfg)
 		if err != nil {
-			t.Fatal(err)
+			t.Log(err)
 		}
 	}()
 
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1248,15 +1310,20 @@ func TestIndexUpdateComposites(t *testing.T) {
 }
 
 func TestIndexTermReaderCompositeFields(t *testing.T) {
+	cfg := CreateConfig("TestIndexTermReaderCompositeFields")
+	err := InitTest(cfg)
+	if err != nil {
+		t.Fatal(err)
+	}
 	defer func() {
-		err := DestroyTest()
+		err := DestroyTest(cfg)
 		if err != nil {
-			t.Fatal(err)
+			t.Log(err)
 		}
 	}()
 
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1313,15 +1380,20 @@ func TestIndexTermReaderCompositeFields(t *testing.T) {
 }
 
 func TestIndexDocumentVisitFieldTerms(t *testing.T) {
+	cfg := CreateConfig("TestIndexDocumentVisitFieldTerms")
+	err := InitTest(cfg)
+	if err != nil {
+		t.Fatal(err)
+	}
 	defer func() {
-		err := DestroyTest()
+		err := DestroyTest(cfg)
 		if err != nil {
-			t.Fatal(err)
+			t.Log(err)
 		}
 	}()
 
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1378,15 +1450,20 @@ func TestIndexDocumentVisitFieldTerms(t *testing.T) {
 }
 
 func TestConcurrentUpdate(t *testing.T) {
+	cfg := CreateConfig("TestConcurrentUpdate")
+	err := InitTest(cfg)
+	if err != nil {
+		t.Fatal(err)
+	}
 	defer func() {
-		err := DestroyTest()
+		err := DestroyTest(cfg)
 		if err != nil {
-			t.Fatal(err)
+			t.Log(err)
 		}
 	}()
 
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1422,6 +1499,12 @@ func TestConcurrentUpdate(t *testing.T) {
 	if err != nil {
 		log.Fatal(err)
 	}
+	defer func() {
+		err := r.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
 
 	doc, err := r.Document("1")
 	if err != nil {
@@ -1434,15 +1517,20 @@ func TestConcurrentUpdate(t *testing.T) {
 }
 
 func TestLargeField(t *testing.T) {
+	cfg := CreateConfig("TestLargeField")
+	err := InitTest(cfg)
+	if err != nil {
+		t.Fatal(err)
+	}
 	defer func() {
-		err := DestroyTest()
+		err := DestroyTest(cfg)
 		if err != nil {
-			t.Fatal(err)
+			t.Log(err)
 		}
 	}()
 
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1496,15 +1584,20 @@ This section needs additional citations for verification. Please help improve th
 There are three characteristics of liquids which are relevant to the discussion of a BLEVE:`)
 
 func TestIndexDocumentVisitFieldTermsWithMultipleDocs(t *testing.T) {
+	cfg := CreateConfig("TestIndexDocumentVisitFieldTermsWithMultipleDocs")
+	err := InitTest(cfg)
+	if err != nil {
+		t.Fatal(err)
+	}
 	defer func() {
-		err := DestroyTest()
+		err := DestroyTest(cfg)
 		if err != nil {
-			t.Fatal(err)
+			t.Log(err)
 		}
 	}()
 
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1648,15 +1741,20 @@ func TestIndexDocumentVisitFieldTermsWithMultipleDocs(t *testing.T) {
 }
 
 func TestIndexDocumentVisitFieldTermsWithMultipleFieldOptions(t *testing.T) {
+	cfg := CreateConfig("TestIndexDocumentVisitFieldTermsWithMultipleFieldOptions")
+	err := InitTest(cfg)
+	if err != nil {
+		t.Fatal(err)
+	}
 	defer func() {
-		err := DestroyTest()
+		err := DestroyTest(cfg)
 		if err != nil {
-			t.Fatal(err)
+			t.Log(err)
 		}
 	}()
 
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1718,6 +1816,19 @@ func TestIndexDocumentVisitFieldTermsWithMultipleFieldOptions(t *testing.T) {
 
 func TestAllFieldWithDifferentTermVectorsEnabled(t *testing.T) {
 	// Based on https://github.com/blevesearch/bleve/issues/895 from xeizmendi
+	cfg := CreateConfig("TestAllFieldWithDifferentTermVectorsEnabled")
+	err := InitTest(cfg)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer func() {
+		err := DestroyTest(cfg)
+		if err != nil {
+			t.Log(err)
+		}
+	}()
+
+	testConfig := cfg
 	mp := mapping.NewIndexMapping()
 
 	keywordMapping := mapping.NewTextFieldMapping()
@@ -1736,7 +1847,6 @@ func TestAllFieldWithDifferentTermVectorsEnabled(t *testing.T) {
 
 	mp.DefaultMapping = docMapping
 
-	_ = os.RemoveAll(testConfig["path"].(string))
 	analysisQueue := index.NewAnalysisQueue(1)
 	idx, err := NewScorch("storeName", testConfig, analysisQueue)
 	if err != nil {
@@ -1747,8 +1857,10 @@ func TestAllFieldWithDifferentTermVectorsEnabled(t *testing.T) {
 		t.Errorf("error opening index: %v", err)
 	}
 	defer func() {
-		_ = idx.Close()
-		_ = os.RemoveAll(testConfig["path"].(string))
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
 	}()
 
 	data := map[string]string{
diff --git a/index/scorch/snapshot_rollback_test.go b/index/scorch/snapshot_rollback_test.go
index 0065a746d..1fe2a5909 100644
--- a/index/scorch/snapshot_rollback_test.go
+++ b/index/scorch/snapshot_rollback_test.go
@@ -22,20 +22,25 @@ import (
 )
 
 func TestIndexRollback(t *testing.T) {
+	cfg := CreateConfig("TestIndexRollback")
 	numSnapshotsToKeepOrig := NumSnapshotsToKeep
 	NumSnapshotsToKeep = 1000
 
+	err := InitTest(cfg)
+	if err != nil {
+		t.Fatal(err)
+	}
 	defer func() {
 		NumSnapshotsToKeep = numSnapshotsToKeepOrig
 
-		err := DestroyTest()
+		err := DestroyTest(cfg)
 		if err != nil {
-			t.Fatal(err)
+			t.Log(err)
 		}
 	}()
 
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -212,4 +217,4 @@ func TestIndexRollback(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-}
+}
\ No newline at end of file

From 934a36ea597bf6c20b8b05bda553bbf3f1d2d073 Mon Sep 17 00:00:00 2001
From: Dmitriy Kalugin-Balashov <rvncerr@rvncerr.org>
Date: Wed, 5 Dec 2018 12:00:55 -0800
Subject: [PATCH 500/728] Refactoring.

---
 index/scorch/segment/zap/segment.go | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 606ea0cfa..4ee6c290b 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -418,19 +418,18 @@ func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) {
 		}
 
 		postingsList := emptyPostingsList
-		filteredIds := ids[:0]
+		filteredIds := make([]string, 0, len(ids))
 		sMax := ""
 
 		sMaxB, err := idDict.fst.GetMaxKey()
-		if err == nil {
-			sMax = string(sMaxB)
-			for _, id := range ids {
-				if id <= sMax {
-					filteredIds = append(filteredIds, id)
-				}
+		if err != nil {
+			return nil, err
+		}
+		sMax = string(sMaxB)
+		for _, id := range ids {
+			if id <= sMax {
+				filteredIds = append(filteredIds, id)
 			}
-		} else {
-			filteredIds = ids
 		}
 
 		for _, id := range filteredIds {

From 1ef74c961c24647952d6626f40b12034d5ffda7d Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Wed, 5 Dec 2018 12:18:41 -0800
Subject: [PATCH 501/728] Formatting: go fmt ./...

---
 index/scorch/event_test.go             | 2 +-
 index/scorch/field_dict_test.go        | 3 +--
 index/scorch/reader_test.go            | 1 -
 index/scorch/snapshot_rollback_test.go | 2 +-
 4 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/index/scorch/event_test.go b/index/scorch/event_test.go
index 58ba7c06c..9d39332e9 100644
--- a/index/scorch/event_test.go
+++ b/index/scorch/event_test.go
@@ -75,4 +75,4 @@ func TestEventBatchIntroductionStart(t *testing.T) {
 	if count != 1 {
 		t.Fatalf("expected to see 1 batch introduction event event, saw %d", count)
 	}
-}
\ No newline at end of file
+}
diff --git a/index/scorch/field_dict_test.go b/index/scorch/field_dict_test.go
index 92c49890e..8cffe9e62 100644
--- a/index/scorch/field_dict_test.go
+++ b/index/scorch/field_dict_test.go
@@ -35,7 +35,6 @@ func TestIndexFieldDict(t *testing.T) {
 		}
 	}()
 
-
 	analysisQueue := index.NewAnalysisQueue(1)
 	idx, err := NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
@@ -187,4 +186,4 @@ func TestIndexFieldDict(t *testing.T) {
 	if !reflect.DeepEqual(expectedTerms, terms) {
 		t.Errorf("expected %#v, got %#v", expectedTerms, terms)
 	}
-}
\ No newline at end of file
+}
diff --git a/index/scorch/reader_test.go b/index/scorch/reader_test.go
index b89ba6dd9..a1d612b5f 100644
--- a/index/scorch/reader_test.go
+++ b/index/scorch/reader_test.go
@@ -36,7 +36,6 @@ func TestIndexReader(t *testing.T) {
 		}
 	}()
 
-
 	analysisQueue := index.NewAnalysisQueue(1)
 	idx, err := NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
diff --git a/index/scorch/snapshot_rollback_test.go b/index/scorch/snapshot_rollback_test.go
index 1fe2a5909..73523a0ba 100644
--- a/index/scorch/snapshot_rollback_test.go
+++ b/index/scorch/snapshot_rollback_test.go
@@ -217,4 +217,4 @@ func TestIndexRollback(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-}
\ No newline at end of file
+}

From 2ab80c36124329279fda5621bd1213cc2b0a64dd Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Wed, 5 Dec 2018 16:37:11 -0800
Subject: [PATCH 502/728] Defer closing the index in tests to avoid cascade
 failures

If a test fails, to avoid a subsequent test from failing because
of the earlier test not closing the index, defer the closing part.
---
 search_test.go | 101 ++++++++++++++++++++++++-------------------------
 1 file changed, 50 insertions(+), 51 deletions(-)

diff --git a/search_test.go b/search_test.go
index 9937eb7b2..59ac13402 100644
--- a/search_test.go
+++ b/search_test.go
@@ -430,13 +430,6 @@ func TestMemoryNeededForSearchResult(t *testing.T) {
 
 // https://github.com/blevesearch/bleve/issues/954
 func TestNestedBooleanSearchers(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
-
 	// create an index with a custom analyzer
 	idxMapping := NewIndexMapping()
 	if err := idxMapping.AddCustomAnalyzer("3xbla", map[string]interface{}{
@@ -453,6 +446,18 @@ func TestNestedBooleanSearchers(t *testing.T) {
 		t.Fatal(err)
 	}
 
+	defer func() {
+		err = idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = os.RemoveAll("testidx")
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
 	// create and insert documents as a batch
 	batch := idx.NewBatch()
 	matches := 0
@@ -545,27 +550,27 @@ func TestNestedBooleanSearchers(t *testing.T) {
 	if matches != len(searchResults.Hits) {
 		t.Fatalf("Unexpected result set, %v != %v", matches, len(searchResults.Hits))
 	}
+}
 
-	err = idx.Close()
+func TestNestedBooleanMustNotSearcher(t *testing.T) {
+	// create an index with default settings
+	idxMapping := NewIndexMapping()
+	idx, err := New("testidx", idxMapping)
 	if err != nil {
 		t.Fatal(err)
 	}
-}
 
-func TestNestedBooleanMustNotSearcher(t *testing.T) {
 	defer func() {
-		err := os.RemoveAll("testidx")
+		err = idx.Close()
 		if err != nil {
 			t.Fatal(err)
 		}
-	}()
 
-	// create an index with default settings
-	idxMapping := NewIndexMapping()
-	idx, err := New("testidx", idxMapping)
-	if err != nil {
-		t.Fatal(err)
-	}
+		err = os.RemoveAll("testidx")
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
 
 	// create and insert documents as a batch
 	batch := idx.NewBatch()
@@ -673,24 +678,10 @@ func TestNestedBooleanMustNotSearcher(t *testing.T) {
 	if res.Total != 0 {
 		t.Fatalf("Unexpected result, %v != 0", res.Total)
 	}
-
-	err = idx.Close()
-	if err != nil {
-		t.Fatal(err)
-	}
 }
 
 func TestSearchScorchOverEmptyKeyword(t *testing.T) {
 	defaultIndexType := Config.DefaultIndexType
-
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-		Config.DefaultIndexType = defaultIndexType
-	}()
-
 	Config.DefaultIndexType = scorch.Name
 
 	dmap := mapping.NewDocumentMapping()
@@ -713,6 +704,20 @@ func TestSearchScorchOverEmptyKeyword(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
+
+	defer func() {
+		err = idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = os.RemoveAll("testidx")
+		if err != nil {
+			t.Fatal(err)
+		}
+		Config.DefaultIndexType = defaultIndexType
+	}()
+
 	for i := 0; i < 10; i++ {
 		err = idx.Index(fmt.Sprint(i), map[string]string{"name": fmt.Sprintf("test%d", i), "id": ""})
 		if err != nil {
@@ -737,24 +742,10 @@ func TestSearchScorchOverEmptyKeyword(t *testing.T) {
 	if res.Total != 10 {
 		t.Fatalf("Unexpected search hits: %v, expected 10", res.Total)
 	}
-
-	err = idx.Close()
-	if err != nil {
-		t.Fatal(err)
-	}
 }
 
 func TestMultipleNestedBooleanMustNotSearchersOnScorch(t *testing.T) {
 	defaultIndexType := Config.DefaultIndexType
-
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-		Config.DefaultIndexType = defaultIndexType
-	}()
-
 	Config.DefaultIndexType = scorch.Name
 
 	// create an index with default settings
@@ -764,6 +755,19 @@ func TestMultipleNestedBooleanMustNotSearchersOnScorch(t *testing.T) {
 		t.Fatal(err)
 	}
 
+	defer func() {
+		err = idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = os.RemoveAll("testidx")
+		if err != nil {
+			t.Fatal(err)
+		}
+		Config.DefaultIndexType = defaultIndexType
+	}()
+
 	// create and insert documents as a batch
 	batch := idx.NewBatch()
 
@@ -883,9 +887,4 @@ func TestMultipleNestedBooleanMustNotSearchersOnScorch(t *testing.T) {
 	if res.Total != 1 {
 		t.Fatalf("Unexpected result, %v != 1", res.Total)
 	}
-
-	err = idx.Close()
-	if err != nil {
-		t.Fatal(err)
-	}
 }

From c19cdeeb03bd0f2ef0f4fa4cee097ff012ba85c5 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Wed, 5 Dec 2018 20:16:51 -0800
Subject: [PATCH 503/728] Update .travis.yml to use supported versions of
 golang

https://golang.org/doc/devel/release.html#policy
---
 .travis.yml | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 934e86268..fd6e1ba38 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,10 +3,9 @@ sudo: false
 language: go
 
 go:
- - 1.7.x
- - 1.8.x
- - 1.9.x
- - "1.10"
+ - "1.9.x"
+ - "1.10.x"
+ - "1.11.x"
 
 script:
   - go get golang.org/x/tools/cmd/cover

From a321e9eb77d9db3fb3fd45943dbcc7599b89f165 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Thu, 6 Dec 2018 13:04:21 +0530
Subject: [PATCH 504/728] MB-32183 - new Levenshtein automaton

Adopting new levenshtein automaton
---
 index/scorch/snapshot_index.go | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 0d312fcca..4c4d92144 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -27,9 +27,13 @@ import (
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
-	"github.com/couchbase/vellum/levenshtein"
+	"github.com/couchbase/vellum"
+	lev2 "github.com/couchbase/vellum/levenshtein2"
 )
 
+// re usable, threadsafe levenshtein builders
+var lb1, lb2 *lev2.LevenshteinAutomatonBuilder
+
 type asynchSegmentResult struct {
 	dictItr segment.DictionaryIterator
 
@@ -46,6 +50,15 @@ var reflectStaticSizeIndexSnapshot int
 func init() {
 	var is interface{} = IndexSnapshot{}
 	reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size())
+	var err error
+	lb1, err = lev2.NewLevenshteinAutomatonBuilder(1, true)
+	if err != nil {
+		panic(fmt.Errorf("Levenshtein automaton ed1 builder err: %v", err))
+	}
+	lb2, err = lev2.NewLevenshteinAutomatonBuilder(2, true)
+	if err != nil {
+		panic(fmt.Errorf("Levenshtein automaton ed2 builder err: %v", err))
+	}
 }
 
 type IndexSnapshot struct {
@@ -194,9 +207,19 @@ func (i *IndexSnapshot) FieldDictRegexp(field string,
 	})
 }
 
+func (i *IndexSnapshot) getLevAutomaton(term string,
+	fuzziness uint8) (vellum.Automaton, error) {
+	if fuzziness == 1 {
+		return lb1.BuildDfa(term, fuzziness)
+	} else if fuzziness == 2 {
+		return lb2.BuildDfa(term, fuzziness)
+	}
+	return nil, fmt.Errorf("fuzziness exceeds the max limit")
+}
+
 func (i *IndexSnapshot) FieldDictFuzzy(field string,
 	term string, fuzziness int, prefix string) (index.FieldDict, error) {
-	a, err := levenshtein.New(term, fuzziness)
+	a, err := i.getLevAutomaton(term, uint8(fuzziness))
 	if err != nil {
 		return nil, err
 	}

From 482566ce61e617fbfdc006408861189a9106b6dc Mon Sep 17 00:00:00 2001
From: Funzinator <github@funzi.de>
Date: Thu, 6 Dec 2018 21:10:52 +0100
Subject: [PATCH 505/728] add ASCII Folding Filter

fixes #957
---
 analysis/char/asciifolding/asciifolding.go    | 3570 +++++++++++++++++
 .../char/asciifolding/asciifolding_test.go    |   56 +
 2 files changed, 3626 insertions(+)
 create mode 100644 analysis/char/asciifolding/asciifolding.go
 create mode 100644 analysis/char/asciifolding/asciifolding_test.go

diff --git a/analysis/char/asciifolding/asciifolding.go b/analysis/char/asciifolding/asciifolding.go
new file mode 100644
index 000000000..469102e2c
--- /dev/null
+++ b/analysis/char/asciifolding/asciifolding.go
@@ -0,0 +1,3570 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// converted to Go from Lucene's AsciiFoldingFilter
+// https://lucene.apache.org/core/4_0_0/analyzers-common/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilter.html
+
+package asciifolding
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+const Name = "asciifolding"
+
+type AsciiFoldingFilter struct{}
+
+func New() *AsciiFoldingFilter {
+	return &AsciiFoldingFilter{}
+}
+
+func (s *AsciiFoldingFilter) Filter(input []byte) []byte {
+	if len(input) == 0 {
+		return input
+	}
+
+	in := []rune(string(input))
+	length := len(in)
+
+	// Worst-case length required if all runes fold to 4 runes
+	out := make([]rune, length, length*4)
+
+	out = foldToASCII(in, 0, out, 0, length)
+	return []byte(string(out))
+}
+
+func AsciiFoldingFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.CharFilter, error) {
+	return New(), nil
+}
+
+func init() {
+	registry.RegisterCharFilter(Name, AsciiFoldingFilterConstructor)
+}
+
+// Converts characters above ASCII to their ASCII equivalents.
+// For example, accents are removed from accented characters.
+func foldToASCII(input []rune, inputPos int, output []rune, outputPos int, length int) []rune {
+	end := inputPos + length
+	for pos := inputPos; pos < end; pos++ {
+		c := input[pos]
+
+		// Quick test: if it's not in range then just keep current character
+		if c < '\u0080' {
+			output[outputPos] = c
+			outputPos++
+		} else {
+			switch c {
+			case '\u00C0': // À [LATIN CAPITAL LETTER A WITH GRAVE]
+				fallthrough
+			case '\u00C1': // Á [LATIN CAPITAL LETTER A WITH ACUTE]
+				fallthrough
+			case '\u00C2': // Â [LATIN CAPITAL LETTER A WITH CIRCUMFLEX]
+				fallthrough
+			case '\u00C3': // Ã [LATIN CAPITAL LETTER A WITH TILDE]
+				fallthrough
+			case '\u00C4': // Ä [LATIN CAPITAL LETTER A WITH DIAERESIS]
+				fallthrough
+			case '\u00C5': // Å [LATIN CAPITAL LETTER A WITH RING ABOVE]
+				fallthrough
+			case '\u0100': // Ā [LATIN CAPITAL LETTER A WITH MACRON]
+				fallthrough
+			case '\u0102': // Ă [LATIN CAPITAL LETTER A WITH BREVE]
+				fallthrough
+			case '\u0104': // Ą [LATIN CAPITAL LETTER A WITH OGONEK]
+				fallthrough
+			case '\u018F': // Ə http://en.wikipedia.org/wiki/Schwa [LATIN CAPITAL LETTER SCHWA]
+				fallthrough
+			case '\u01CD': // Ǎ [LATIN CAPITAL LETTER A WITH CARON]
+				fallthrough
+			case '\u01DE': // Ǟ [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON]
+				fallthrough
+			case '\u01E0': // Ǡ [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON]
+				fallthrough
+			case '\u01FA': // Ǻ [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE]
+				fallthrough
+			case '\u0200': // Ȁ [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE]
+				fallthrough
+			case '\u0202': // Ȃ [LATIN CAPITAL LETTER A WITH INVERTED BREVE]
+				fallthrough
+			case '\u0226': // Ȧ [LATIN CAPITAL LETTER A WITH DOT ABOVE]
+				fallthrough
+			case '\u023A': // Ⱥ [LATIN CAPITAL LETTER A WITH STROKE]
+				fallthrough
+			case '\u1D00': // ᴀ [LATIN LETTER SMALL CAPITAL A]
+				fallthrough
+			case '\u1E00': // Ḁ [LATIN CAPITAL LETTER A WITH RING BELOW]
+				fallthrough
+			case '\u1EA0': // Ạ [LATIN CAPITAL LETTER A WITH DOT BELOW]
+				fallthrough
+			case '\u1EA2': // Ả [LATIN CAPITAL LETTER A WITH HOOK ABOVE]
+				fallthrough
+			case '\u1EA4': // Ấ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE]
+				fallthrough
+			case '\u1EA6': // Ầ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE]
+				fallthrough
+			case '\u1EA8': // Ẩ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
+				fallthrough
+			case '\u1EAA': // Ẫ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE]
+				fallthrough
+			case '\u1EAC': // Ậ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
+				fallthrough
+			case '\u1EAE': // Ắ [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE]
+				fallthrough
+			case '\u1EB0': // Ằ [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE]
+				fallthrough
+			case '\u1EB2': // Ẳ [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE]
+				fallthrough
+			case '\u1EB4': // Ẵ [LATIN CAPITAL LETTER A WITH BREVE AND TILDE]
+				fallthrough
+			case '\u24B6': // Ⓐ [CIRCLED LATIN CAPITAL LETTER A]
+				fallthrough
+			case '\uFF21': // Ａ [FULLWIDTH LATIN CAPITAL LETTER A]
+				fallthrough
+			case '\u1EB6': // Ặ [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW]
+				output[outputPos] = 'A'
+				outputPos++
+
+			case '\u00E0': // à [LATIN SMALL LETTER A WITH GRAVE]
+				fallthrough
+			case '\u00E1': // á [LATIN SMALL LETTER A WITH ACUTE]
+				fallthrough
+			case '\u00E2': // â [LATIN SMALL LETTER A WITH CIRCUMFLEX]
+				fallthrough
+			case '\u00E3': // ã [LATIN SMALL LETTER A WITH TILDE]
+				fallthrough
+			case '\u00E4': // ä [LATIN SMALL LETTER A WITH DIAERESIS]
+				fallthrough
+			case '\u00E5': // å [LATIN SMALL LETTER A WITH RING ABOVE]
+				fallthrough
+			case '\u0101': // ā [LATIN SMALL LETTER A WITH MACRON]
+				fallthrough
+			case '\u0103': // ă [LATIN SMALL LETTER A WITH BREVE]
+				fallthrough
+			case '\u0105': // ą [LATIN SMALL LETTER A WITH OGONEK]
+				fallthrough
+			case '\u01CE': // ǎ [LATIN SMALL LETTER A WITH CARON]
+				fallthrough
+			case '\u01DF': // ǟ [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON]
+				fallthrough
+			case '\u01E1': // ǡ [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON]
+				fallthrough
+			case '\u01FB': // ǻ [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE]
+				fallthrough
+			case '\u0201': // ȁ [LATIN SMALL LETTER A WITH DOUBLE GRAVE]
+				fallthrough
+			case '\u0203': // ȃ [LATIN SMALL LETTER A WITH INVERTED BREVE]
+				fallthrough
+			case '\u0227': // ȧ [LATIN SMALL LETTER A WITH DOT ABOVE]
+				fallthrough
+			case '\u0250': // ɐ [LATIN SMALL LETTER TURNED A]
+				fallthrough
+			case '\u0259': // ə [LATIN SMALL LETTER SCHWA]
+				fallthrough
+			case '\u025A': // ɚ [LATIN SMALL LETTER SCHWA WITH HOOK]
+				fallthrough
+			case '\u1D8F': // ᶏ [LATIN SMALL LETTER A WITH RETROFLEX HOOK]
+				fallthrough
+			case '\u1D95': // ᶕ [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK]
+				fallthrough
+			case '\u1E01': // ạ [LATIN SMALL LETTER A WITH RING BELOW]
+				fallthrough
+			case '\u1E9A': // ả [LATIN SMALL LETTER A WITH RIGHT HALF RING]
+				fallthrough
+			case '\u1EA1': // ạ [LATIN SMALL LETTER A WITH DOT BELOW]
+				fallthrough
+			case '\u1EA3': // ả [LATIN SMALL LETTER A WITH HOOK ABOVE]
+				fallthrough
+			case '\u1EA5': // ấ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE]
+				fallthrough
+			case '\u1EA7': // ầ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE]
+				fallthrough
+			case '\u1EA9': // ẩ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
+				fallthrough
+			case '\u1EAB': // ẫ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE]
+				fallthrough
+			case '\u1EAD': // ậ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
+				fallthrough
+			case '\u1EAF': // ắ [LATIN SMALL LETTER A WITH BREVE AND ACUTE]
+				fallthrough
+			case '\u1EB1': // ằ [LATIN SMALL LETTER A WITH BREVE AND GRAVE]
+				fallthrough
+			case '\u1EB3': // ẳ [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE]
+				fallthrough
+			case '\u1EB5': // ẵ [LATIN SMALL LETTER A WITH BREVE AND TILDE]
+				fallthrough
+			case '\u1EB7': // ặ [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW]
+				fallthrough
+			case '\u2090': // ₐ [LATIN SUBSCRIPT SMALL LETTER A]
+				fallthrough
+			case '\u2094': // ₔ [LATIN SUBSCRIPT SMALL LETTER SCHWA]
+				fallthrough
+			case '\u24D0': // ⓐ [CIRCLED LATIN SMALL LETTER A]
+				fallthrough
+			case '\u2C65': // ⱥ [LATIN SMALL LETTER A WITH STROKE]
+				fallthrough
+			case '\u2C6F': // Ɐ [LATIN CAPITAL LETTER TURNED A]
+				fallthrough
+			case '\uFF41': // ａ [FULLWIDTH LATIN SMALL LETTER A]
+				output[outputPos] = 'a'
+				outputPos++
+
+			case '\uA732': // Ꜳ [LATIN CAPITAL LETTER AA]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'A'
+				outputPos++
+				output[outputPos] = 'A'
+				outputPos++
+
+			case '\u00C6': // Æ [LATIN CAPITAL LETTER AE]
+				fallthrough
+			case '\u01E2': // Ǣ [LATIN CAPITAL LETTER AE WITH MACRON]
+				fallthrough
+			case '\u01FC': // Ǽ [LATIN CAPITAL LETTER AE WITH ACUTE]
+				fallthrough
+			case '\u1D01': // ᴁ [LATIN LETTER SMALL CAPITAL AE]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'A'
+				outputPos++
+				output[outputPos] = 'E'
+				outputPos++
+
+			case '\uA734': // Ꜵ [LATIN CAPITAL LETTER AO]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'A'
+				outputPos++
+				output[outputPos] = 'O'
+				outputPos++
+
+			case '\uA736': // Ꜷ [LATIN CAPITAL LETTER AU]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'A'
+				outputPos++
+				output[outputPos] = 'U'
+				outputPos++
+
+			case '\uA738': // Ꜹ [LATIN CAPITAL LETTER AV]
+				fallthrough
+			case '\uA73A': // Ꜻ [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'A'
+				outputPos++
+				output[outputPos] = 'V'
+				outputPos++
+
+			case '\uA73C': // Ꜽ [LATIN CAPITAL LETTER AY]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'A'
+				outputPos++
+				output[outputPos] = 'Y'
+				outputPos++
+
+			case '\u249C': // ⒜ [PARENTHESIZED LATIN SMALL LETTER A]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = 'a'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\uA733': // ꜳ [LATIN SMALL LETTER AA]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'a'
+				outputPos++
+				output[outputPos] = 'a'
+				outputPos++
+
+			case '\u00E6': // æ [LATIN SMALL LETTER AE]
+				fallthrough
+			case '\u01E3': // ǣ [LATIN SMALL LETTER AE WITH MACRON]
+				fallthrough
+			case '\u01FD': // ǽ [LATIN SMALL LETTER AE WITH ACUTE]
+				fallthrough
+			case '\u1D02': // ᴂ [LATIN SMALL LETTER TURNED AE]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'a'
+				outputPos++
+				output[outputPos] = 'e'
+				outputPos++
+
+			case '\uA735': // ꜵ [LATIN SMALL LETTER AO]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'a'
+				outputPos++
+				output[outputPos] = 'o'
+				outputPos++
+
+			case '\uA737': // ꜷ [LATIN SMALL LETTER AU]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'a'
+				outputPos++
+				output[outputPos] = 'u'
+				outputPos++
+
+			case '\uA739': // ꜹ [LATIN SMALL LETTER AV]
+				fallthrough
+			case '\uA73B': // ꜻ [LATIN SMALL LETTER AV WITH HORIZONTAL BAR]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'a'
+				outputPos++
+				output[outputPos] = 'v'
+				outputPos++
+
+			case '\uA73D': // ꜽ [LATIN SMALL LETTER AY]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'a'
+				outputPos++
+				output[outputPos] = 'y'
+				outputPos++
+
+			case '\u0181': // Ɓ [LATIN CAPITAL LETTER B WITH HOOK]
+				fallthrough
+			case '\u0182': // Ƃ [LATIN CAPITAL LETTER B WITH TOPBAR]
+				fallthrough
+			case '\u0243': // Ƀ [LATIN CAPITAL LETTER B WITH STROKE]
+				fallthrough
+			case '\u0299': // ʙ [LATIN LETTER SMALL CAPITAL B]
+				fallthrough
+			case '\u1D03': // ᴃ [LATIN LETTER SMALL CAPITAL BARRED B]
+				fallthrough
+			case '\u1E02': // Ḃ [LATIN CAPITAL LETTER B WITH DOT ABOVE]
+				fallthrough
+			case '\u1E04': // Ḅ [LATIN CAPITAL LETTER B WITH DOT BELOW]
+				fallthrough
+			case '\u1E06': // Ḇ [LATIN CAPITAL LETTER B WITH LINE BELOW]
+				fallthrough
+			case '\u24B7': // Ⓑ [CIRCLED LATIN CAPITAL LETTER B]
+				fallthrough
+			case '\uFF22': // Ｂ [FULLWIDTH LATIN CAPITAL LETTER B]
+				output[outputPos] = 'B'
+				outputPos++
+
+			case '\u0180': // ƀ [LATIN SMALL LETTER B WITH STROKE]
+				fallthrough
+			case '\u0183': // ƃ [LATIN SMALL LETTER B WITH TOPBAR]
+				fallthrough
+			case '\u0253': // ɓ [LATIN SMALL LETTER B WITH HOOK]
+				fallthrough
+			case '\u1D6C': // ᵬ [LATIN SMALL LETTER B WITH MIDDLE TILDE]
+				fallthrough
+			case '\u1D80': // ᶀ [LATIN SMALL LETTER B WITH PALATAL HOOK]
+				fallthrough
+			case '\u1E03': // ḃ [LATIN SMALL LETTER B WITH DOT ABOVE]
+				fallthrough
+			case '\u1E05': // ḅ [LATIN SMALL LETTER B WITH DOT BELOW]
+				fallthrough
+			case '\u1E07': // ḇ [LATIN SMALL LETTER B WITH LINE BELOW]
+				fallthrough
+			case '\u24D1': // ⓑ [CIRCLED LATIN SMALL LETTER B]
+				fallthrough
+			case '\uFF42': // ｂ [FULLWIDTH LATIN SMALL LETTER B]
+				output[outputPos] = 'b'
+				outputPos++
+
+			case '\u249D': // ⒝ [PARENTHESIZED LATIN SMALL LETTER B]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = 'b'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u00C7': // Ç [LATIN CAPITAL LETTER C WITH CEDILLA]
+				fallthrough
+			case '\u0106': // Ć [LATIN CAPITAL LETTER C WITH ACUTE]
+				fallthrough
+			case '\u0108': // Ĉ [LATIN CAPITAL LETTER C WITH CIRCUMFLEX]
+				fallthrough
+			case '\u010A': // Ċ [LATIN CAPITAL LETTER C WITH DOT ABOVE]
+				fallthrough
+			case '\u010C': // Č [LATIN CAPITAL LETTER C WITH CARON]
+				fallthrough
+			case '\u0187': // Ƈ [LATIN CAPITAL LETTER C WITH HOOK]
+				fallthrough
+			case '\u023B': // Ȼ [LATIN CAPITAL LETTER C WITH STROKE]
+				fallthrough
+			case '\u0297': // ʗ [LATIN LETTER STRETCHED C]
+				fallthrough
+			case '\u1D04': // ᴄ [LATIN LETTER SMALL CAPITAL C]
+				fallthrough
+			case '\u1E08': // Ḉ [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE]
+				fallthrough
+			case '\u24B8': // Ⓒ [CIRCLED LATIN CAPITAL LETTER C]
+				fallthrough
+			case '\uFF23': // Ｃ [FULLWIDTH LATIN CAPITAL LETTER C]
+				output[outputPos] = 'C'
+				outputPos++
+
+			case '\u00E7': // ç [LATIN SMALL LETTER C WITH CEDILLA]
+				fallthrough
+			case '\u0107': // ć [LATIN SMALL LETTER C WITH ACUTE]
+				fallthrough
+			case '\u0109': // ĉ [LATIN SMALL LETTER C WITH CIRCUMFLEX]
+				fallthrough
+			case '\u010B': // ċ [LATIN SMALL LETTER C WITH DOT ABOVE]
+				fallthrough
+			case '\u010D': // č [LATIN SMALL LETTER C WITH CARON]
+				fallthrough
+			case '\u0188': // ƈ [LATIN SMALL LETTER C WITH HOOK]
+				fallthrough
+			case '\u023C': // ȼ [LATIN SMALL LETTER C WITH STROKE]
+				fallthrough
+			case '\u0255': // ɕ [LATIN SMALL LETTER C WITH CURL]
+				fallthrough
+			case '\u1E09': // ḉ [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE]
+				fallthrough
+			case '\u2184': // ↄ [LATIN SMALL LETTER REVERSED C]
+				fallthrough
+			case '\u24D2': // ⓒ [CIRCLED LATIN SMALL LETTER C]
+				fallthrough
+			case '\uA73E': // Ꜿ [LATIN CAPITAL LETTER REVERSED C WITH DOT]
+				fallthrough
+			case '\uA73F': // ꜿ [LATIN SMALL LETTER REVERSED C WITH DOT]
+				fallthrough
+			case '\uFF43': // ｃ [FULLWIDTH LATIN SMALL LETTER C]
+				output[outputPos] = 'c'
+				outputPos++
+
+			case '\u249E': // ⒞ [PARENTHESIZED LATIN SMALL LETTER C]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = 'c'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u00D0': // Ð [LATIN CAPITAL LETTER ETH]
+				fallthrough
+			case '\u010E': // Ď [LATIN CAPITAL LETTER D WITH CARON]
+				fallthrough
+			case '\u0110': // Đ [LATIN CAPITAL LETTER D WITH STROKE]
+				fallthrough
+			case '\u0189': // Ɖ [LATIN CAPITAL LETTER AFRICAN D]
+				fallthrough
+			case '\u018A': // Ɗ [LATIN CAPITAL LETTER D WITH HOOK]
+				fallthrough
+			case '\u018B': // Ƌ [LATIN CAPITAL LETTER D WITH TOPBAR]
+				fallthrough
+			case '\u1D05': // ᴅ [LATIN LETTER SMALL CAPITAL D]
+				fallthrough
+			case '\u1D06': // ᴆ [LATIN LETTER SMALL CAPITAL ETH]
+				fallthrough
+			case '\u1E0A': // Ḋ [LATIN CAPITAL LETTER D WITH DOT ABOVE]
+				fallthrough
+			case '\u1E0C': // Ḍ [LATIN CAPITAL LETTER D WITH DOT BELOW]
+				fallthrough
+			case '\u1E0E': // Ḏ [LATIN CAPITAL LETTER D WITH LINE BELOW]
+				fallthrough
+			case '\u1E10': // Ḑ [LATIN CAPITAL LETTER D WITH CEDILLA]
+				fallthrough
+			case '\u1E12': // Ḓ [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW]
+				fallthrough
+			case '\u24B9': // Ⓓ [CIRCLED LATIN CAPITAL LETTER D]
+				fallthrough
+			case '\uA779': // Ꝺ [LATIN CAPITAL LETTER INSULAR D]
+				fallthrough
+			case '\uFF24': // Ｄ [FULLWIDTH LATIN CAPITAL LETTER D]
+				output[outputPos] = 'D'
+				outputPos++
+
+			case '\u00F0': // ð [LATIN SMALL LETTER ETH]
+				fallthrough
+			case '\u010F': // ď [LATIN SMALL LETTER D WITH CARON]
+				fallthrough
+			case '\u0111': // đ [LATIN SMALL LETTER D WITH STROKE]
+				fallthrough
+			case '\u018C': // ƌ [LATIN SMALL LETTER D WITH TOPBAR]
+				fallthrough
+			case '\u0221': // ȡ [LATIN SMALL LETTER D WITH CURL]
+				fallthrough
+			case '\u0256': // ɖ [LATIN SMALL LETTER D WITH TAIL]
+				fallthrough
+			case '\u0257': // ɗ [LATIN SMALL LETTER D WITH HOOK]
+				fallthrough
+			case '\u1D6D': // ᵭ [LATIN SMALL LETTER D WITH MIDDLE TILDE]
+				fallthrough
+			case '\u1D81': // ᶁ [LATIN SMALL LETTER D WITH PALATAL HOOK]
+				fallthrough
+			case '\u1D91': // ᶑ [LATIN SMALL LETTER D WITH HOOK AND TAIL]
+				fallthrough
+			case '\u1E0B': // ḋ [LATIN SMALL LETTER D WITH DOT ABOVE]
+				fallthrough
+			case '\u1E0D': // ḍ [LATIN SMALL LETTER D WITH DOT BELOW]
+				fallthrough
+			case '\u1E0F': // ḏ [LATIN SMALL LETTER D WITH LINE BELOW]
+				fallthrough
+			case '\u1E11': // ḑ [LATIN SMALL LETTER D WITH CEDILLA]
+				fallthrough
+			case '\u1E13': // ḓ [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW]
+				fallthrough
+			case '\u24D3': // ⓓ [CIRCLED LATIN SMALL LETTER D]
+				fallthrough
+			case '\uA77A': // ꝺ [LATIN SMALL LETTER INSULAR D]
+				fallthrough
+			case '\uFF44': // ｄ [FULLWIDTH LATIN SMALL LETTER D]
+				output[outputPos] = 'd'
+				outputPos++
+
+			case '\u01C4': // Ǆ [LATIN CAPITAL LETTER DZ WITH CARON]
+				fallthrough
+			case '\u01F1': // Ǳ [LATIN CAPITAL LETTER DZ]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'D'
+				outputPos++
+				output[outputPos] = 'Z'
+				outputPos++
+
+			case '\u01C5': // ǅ [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON]
+				fallthrough
+			case '\u01F2': // ǲ [LATIN CAPITAL LETTER D WITH SMALL LETTER Z]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'D'
+				outputPos++
+				output[outputPos] = 'z'
+				outputPos++
+
+			case '\u249F': // ⒟ [PARENTHESIZED LATIN SMALL LETTER D]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = 'd'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u0238': // ȸ [LATIN SMALL LETTER DB DIGRAPH]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'd'
+				outputPos++
+				output[outputPos] = 'b'
+				outputPos++
+
+			case '\u01C6': // ǆ [LATIN SMALL LETTER DZ WITH CARON]
+				fallthrough
+			case '\u01F3': // ǳ [LATIN SMALL LETTER DZ]
+				fallthrough
+			case '\u02A3': // ʣ [LATIN SMALL LETTER DZ DIGRAPH]
+				fallthrough
+			case '\u02A5': // ʥ [LATIN SMALL LETTER DZ DIGRAPH WITH CURL]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'd'
+				outputPos++
+				output[outputPos] = 'z'
+				outputPos++
+
+			case '\u00C8': // È [LATIN CAPITAL LETTER E WITH GRAVE]
+				fallthrough
+			case '\u00C9': // É [LATIN CAPITAL LETTER E WITH ACUTE]
+				fallthrough
+			case '\u00CA': // Ê [LATIN CAPITAL LETTER E WITH CIRCUMFLEX]
+				fallthrough
+			case '\u00CB': // Ë [LATIN CAPITAL LETTER E WITH DIAERESIS]
+				fallthrough
+			case '\u0112': // Ē [LATIN CAPITAL LETTER E WITH MACRON]
+				fallthrough
+			case '\u0114': // Ĕ [LATIN CAPITAL LETTER E WITH BREVE]
+				fallthrough
+			case '\u0116': // Ė [LATIN CAPITAL LETTER E WITH DOT ABOVE]
+				fallthrough
+			case '\u0118': // Ę [LATIN CAPITAL LETTER E WITH OGONEK]
+				fallthrough
+			case '\u011A': // Ě [LATIN CAPITAL LETTER E WITH CARON]
+				fallthrough
+			case '\u018E': // Ǝ [LATIN CAPITAL LETTER REVERSED E]
+				fallthrough
+			case '\u0190': // Ɛ [LATIN CAPITAL LETTER OPEN E]
+				fallthrough
+			case '\u0204': // Ȅ [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE]
+				fallthrough
+			case '\u0206': // Ȇ [LATIN CAPITAL LETTER E WITH INVERTED BREVE]
+				fallthrough
+			case '\u0228': // Ȩ [LATIN CAPITAL LETTER E WITH CEDILLA]
+				fallthrough
+			case '\u0246': // Ɇ [LATIN CAPITAL LETTER E WITH STROKE]
+				fallthrough
+			case '\u1D07': // ᴇ [LATIN LETTER SMALL CAPITAL E]
+				fallthrough
+			case '\u1E14': // Ḕ [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE]
+				fallthrough
+			case '\u1E16': // Ḗ [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE]
+				fallthrough
+			case '\u1E18': // Ḙ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW]
+				fallthrough
+			case '\u1E1A': // Ḛ [LATIN CAPITAL LETTER E WITH TILDE BELOW]
+				fallthrough
+			case '\u1E1C': // Ḝ [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE]
+				fallthrough
+			case '\u1EB8': // Ẹ [LATIN CAPITAL LETTER E WITH DOT BELOW]
+				fallthrough
+			case '\u1EBA': // Ẻ [LATIN CAPITAL LETTER E WITH HOOK ABOVE]
+				fallthrough
+			case '\u1EBC': // Ẽ [LATIN CAPITAL LETTER E WITH TILDE]
+				fallthrough
+			case '\u1EBE': // Ế [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE]
+				fallthrough
+			case '\u1EC0': // Ề [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE]
+				fallthrough
+			case '\u1EC2': // Ể [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
+				fallthrough
+			case '\u1EC4': // Ễ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE]
+				fallthrough
+			case '\u1EC6': // Ệ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
+				fallthrough
+			case '\u24BA': // Ⓔ [CIRCLED LATIN CAPITAL LETTER E]
+				fallthrough
+			case '\u2C7B': // ⱻ [LATIN LETTER SMALL CAPITAL TURNED E]
+				fallthrough
+			case '\uFF25': // Ｅ [FULLWIDTH LATIN CAPITAL LETTER E]
+				output[outputPos] = 'E'
+				outputPos++
+
+			case '\u00E8': // è [LATIN SMALL LETTER E WITH GRAVE]
+				fallthrough
+			case '\u00E9': // é [LATIN SMALL LETTER E WITH ACUTE]
+				fallthrough
+			case '\u00EA': // ê [LATIN SMALL LETTER E WITH CIRCUMFLEX]
+				fallthrough
+			case '\u00EB': // ë [LATIN SMALL LETTER E WITH DIAERESIS]
+				fallthrough
+			case '\u0113': // ē [LATIN SMALL LETTER E WITH MACRON]
+				fallthrough
+			case '\u0115': // ĕ [LATIN SMALL LETTER E WITH BREVE]
+				fallthrough
+			case '\u0117': // ė [LATIN SMALL LETTER E WITH DOT ABOVE]
+				fallthrough
+			case '\u0119': // ę [LATIN SMALL LETTER E WITH OGONEK]
+				fallthrough
+			case '\u011B': // ě [LATIN SMALL LETTER E WITH CARON]
+				fallthrough
+			case '\u01DD': // ǝ [LATIN SMALL LETTER TURNED E]
+				fallthrough
+			case '\u0205': // ȅ [LATIN SMALL LETTER E WITH DOUBLE GRAVE]
+				fallthrough
+			case '\u0207': // ȇ [LATIN SMALL LETTER E WITH INVERTED BREVE]
+				fallthrough
+			case '\u0229': // ȩ [LATIN SMALL LETTER E WITH CEDILLA]
+				fallthrough
+			case '\u0247': // ɇ [LATIN SMALL LETTER E WITH STROKE]
+				fallthrough
+			case '\u0258': // ɘ [LATIN SMALL LETTER REVERSED E]
+				fallthrough
+			case '\u025B': // ɛ [LATIN SMALL LETTER OPEN E]
+				fallthrough
+			case '\u025C': // ɜ [LATIN SMALL LETTER REVERSED OPEN E]
+				fallthrough
+			case '\u025D': // ɝ [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK]
+				fallthrough
+			case '\u025E': // ɞ [LATIN SMALL LETTER CLOSED REVERSED OPEN E]
+				fallthrough
+			case '\u029A': // ʚ [LATIN SMALL LETTER CLOSED OPEN E]
+				fallthrough
+			case '\u1D08': // ᴈ [LATIN SMALL LETTER TURNED OPEN E]
+				fallthrough
+			case '\u1D92': // ᶒ [LATIN SMALL LETTER E WITH RETROFLEX HOOK]
+				fallthrough
+			case '\u1D93': // ᶓ [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK]
+				fallthrough
+			case '\u1D94': // ᶔ [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK]
+				fallthrough
+			case '\u1E15': // ḕ [LATIN SMALL LETTER E WITH MACRON AND GRAVE]
+				fallthrough
+			case '\u1E17': // ḗ [LATIN SMALL LETTER E WITH MACRON AND ACUTE]
+				fallthrough
+			case '\u1E19': // ḙ [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW]
+				fallthrough
+			case '\u1E1B': // ḛ [LATIN SMALL LETTER E WITH TILDE BELOW]
+				fallthrough
+			case '\u1E1D': // ḝ [LATIN SMALL LETTER E WITH CEDILLA AND BREVE]
+				fallthrough
+			case '\u1EB9': // ẹ [LATIN SMALL LETTER E WITH DOT BELOW]
+				fallthrough
+			case '\u1EBB': // ẻ [LATIN SMALL LETTER E WITH HOOK ABOVE]
+				fallthrough
+			case '\u1EBD': // ẽ [LATIN SMALL LETTER E WITH TILDE]
+				fallthrough
+			case '\u1EBF': // ế [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE]
+				fallthrough
+			case '\u1EC1': // ề [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE]
+				fallthrough
+			case '\u1EC3': // ể [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
+				fallthrough
+			case '\u1EC5': // ễ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE]
+				fallthrough
+			case '\u1EC7': // ệ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
+				fallthrough
+			case '\u2091': // ₑ [LATIN SUBSCRIPT SMALL LETTER E]
+				fallthrough
+			case '\u24D4': // ⓔ [CIRCLED LATIN SMALL LETTER E]
+				fallthrough
+			case '\u2C78': // ⱸ [LATIN SMALL LETTER E WITH NOTCH]
+				fallthrough
+			case '\uFF45': // ｅ [FULLWIDTH LATIN SMALL LETTER E]
+				output[outputPos] = 'e'
+				outputPos++
+
+			case '\u24A0': // ⒠ [PARENTHESIZED LATIN SMALL LETTER E]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = 'e'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u0191': // Ƒ [LATIN CAPITAL LETTER F WITH HOOK]
+				fallthrough
+			case '\u1E1E': // Ḟ [LATIN CAPITAL LETTER F WITH DOT ABOVE]
+				fallthrough
+			case '\u24BB': // Ⓕ [CIRCLED LATIN CAPITAL LETTER F]
+				fallthrough
+			case '\uA730': // ꜰ [LATIN LETTER SMALL CAPITAL F]
+				fallthrough
+			case '\uA77B': // Ꝼ [LATIN CAPITAL LETTER INSULAR F]
+				fallthrough
+			case '\uA7FB': // ꟻ [LATIN EPIGRAPHIC LETTER REVERSED F]
+				fallthrough
+			case '\uFF26': // Ｆ [FULLWIDTH LATIN CAPITAL LETTER F]
+				output[outputPos] = 'F'
+				outputPos++
+
+			case '\u0192': // ƒ [LATIN SMALL LETTER F WITH HOOK]
+				fallthrough
+			case '\u1D6E': // ᵮ [LATIN SMALL LETTER F WITH MIDDLE TILDE]
+				fallthrough
+			case '\u1D82': // ᶂ [LATIN SMALL LETTER F WITH PALATAL HOOK]
+				fallthrough
+			case '\u1E1F': // ḟ [LATIN SMALL LETTER F WITH DOT ABOVE]
+				fallthrough
+			case '\u1E9B': // ẛ [LATIN SMALL LETTER LONG S WITH DOT ABOVE]
+				fallthrough
+			case '\u24D5': // ⓕ [CIRCLED LATIN SMALL LETTER F]
+				fallthrough
+			case '\uA77C': // ꝼ [LATIN SMALL LETTER INSULAR F]
+				fallthrough
+			case '\uFF46': // ｆ [FULLWIDTH LATIN SMALL LETTER F]
+				output[outputPos] = 'f'
+				outputPos++
+
+			case '\u24A1': // ⒡ [PARENTHESIZED LATIN SMALL LETTER F]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = 'f'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\uFB00': // ﬀ [LATIN SMALL LIGATURE FF]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'f'
+				outputPos++
+				output[outputPos] = 'f'
+				outputPos++
+
+			case '\uFB03': // ﬃ [LATIN SMALL LIGATURE FFI]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = 'f'
+				outputPos++
+				output[outputPos] = 'f'
+				outputPos++
+				output[outputPos] = 'i'
+				outputPos++
+
+			case '\uFB04': // ﬄ [LATIN SMALL LIGATURE FFL]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = 'f'
+				outputPos++
+				output[outputPos] = 'f'
+				outputPos++
+				output[outputPos] = 'l'
+				outputPos++
+
+			case '\uFB01': // ﬁ [LATIN SMALL LIGATURE FI]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'f'
+				outputPos++
+				output[outputPos] = 'i'
+				outputPos++
+
+			case '\uFB02': // ﬂ [LATIN SMALL LIGATURE FL]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'f'
+				outputPos++
+				output[outputPos] = 'l'
+				outputPos++
+
+			case '\u011C': // Ĝ [LATIN CAPITAL LETTER G WITH CIRCUMFLEX]
+				fallthrough
+			case '\u011E': // Ğ [LATIN CAPITAL LETTER G WITH BREVE]
+				fallthrough
+			case '\u0120': // Ġ [LATIN CAPITAL LETTER G WITH DOT ABOVE]
+				fallthrough
+			case '\u0122': // Ģ [LATIN CAPITAL LETTER G WITH CEDILLA]
+				fallthrough
+			case '\u0193': // Ɠ [LATIN CAPITAL LETTER G WITH HOOK]
+				fallthrough
+			case '\u01E4': // Ǥ [LATIN CAPITAL LETTER G WITH STROKE]
+				fallthrough
+			case '\u01E5': // ǥ [LATIN SMALL LETTER G WITH STROKE]
+				fallthrough
+			case '\u01E6': // Ǧ [LATIN CAPITAL LETTER G WITH CARON]
+				fallthrough
+			case '\u01E7': // ǧ [LATIN SMALL LETTER G WITH CARON]
+				fallthrough
+			case '\u01F4': // Ǵ [LATIN CAPITAL LETTER G WITH ACUTE]
+				fallthrough
+			case '\u0262': // ɢ [LATIN LETTER SMALL CAPITAL G]
+				fallthrough
+			case '\u029B': // ʛ [LATIN LETTER SMALL CAPITAL G WITH HOOK]
+				fallthrough
+			case '\u1E20': // Ḡ [LATIN CAPITAL LETTER G WITH MACRON]
+				fallthrough
+			case '\u24BC': // Ⓖ [CIRCLED LATIN CAPITAL LETTER G]
+				fallthrough
+			case '\uA77D': // Ᵹ [LATIN CAPITAL LETTER INSULAR G]
+				fallthrough
+			case '\uA77E': // Ꝿ [LATIN CAPITAL LETTER TURNED INSULAR G]
+				fallthrough
+			case '\uFF27': // Ｇ [FULLWIDTH LATIN CAPITAL LETTER G]
+				output[outputPos] = 'G'
+				outputPos++
+
+			case '\u011D': // ĝ [LATIN SMALL LETTER G WITH CIRCUMFLEX]
+				fallthrough
+			case '\u011F': // ğ [LATIN SMALL LETTER G WITH BREVE]
+				fallthrough
+			case '\u0121': // ġ [LATIN SMALL LETTER G WITH DOT ABOVE]
+				fallthrough
+			case '\u0123': // ģ [LATIN SMALL LETTER G WITH CEDILLA]
+				fallthrough
+			case '\u01F5': // ǵ [LATIN SMALL LETTER G WITH ACUTE]
+				fallthrough
+			case '\u0260': // ɠ [LATIN SMALL LETTER G WITH HOOK]
+				fallthrough
+			case '\u0261': // ɡ [LATIN SMALL LETTER SCRIPT G]
+				fallthrough
+			case '\u1D77': // ᵷ [LATIN SMALL LETTER TURNED G]
+				fallthrough
+			case '\u1D79': // ᵹ [LATIN SMALL LETTER INSULAR G]
+				fallthrough
+			case '\u1D83': // ᶃ [LATIN SMALL LETTER G WITH PALATAL HOOK]
+				fallthrough
+			case '\u1E21': // ḡ [LATIN SMALL LETTER G WITH MACRON]
+				fallthrough
+			case '\u24D6': // ⓖ [CIRCLED LATIN SMALL LETTER G]
+				fallthrough
+			case '\uA77F': // ꝿ [LATIN SMALL LETTER TURNED INSULAR G]
+				fallthrough
+			case '\uFF47': // ｇ [FULLWIDTH LATIN SMALL LETTER G]
+				output[outputPos] = 'g'
+				outputPos++
+
+			case '\u24A2': // ⒢ [PARENTHESIZED LATIN SMALL LETTER G]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = 'g'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u0124': // Ĥ [LATIN CAPITAL LETTER H WITH CIRCUMFLEX]
+				fallthrough
+			case '\u0126': // Ħ [LATIN CAPITAL LETTER H WITH STROKE]
+				fallthrough
+			case '\u021E': // Ȟ [LATIN CAPITAL LETTER H WITH CARON]
+				fallthrough
+			case '\u029C': // ʜ [LATIN LETTER SMALL CAPITAL H]
+				fallthrough
+			case '\u1E22': // Ḣ [LATIN CAPITAL LETTER H WITH DOT ABOVE]
+				fallthrough
+			case '\u1E24': // Ḥ [LATIN CAPITAL LETTER H WITH DOT BELOW]
+				fallthrough
+			case '\u1E26': // Ḧ [LATIN CAPITAL LETTER H WITH DIAERESIS]
+				fallthrough
+			case '\u1E28': // Ḩ [LATIN CAPITAL LETTER H WITH CEDILLA]
+				fallthrough
+			case '\u1E2A': // Ḫ [LATIN CAPITAL LETTER H WITH BREVE BELOW]
+				fallthrough
+			case '\u24BD': // Ⓗ [CIRCLED LATIN CAPITAL LETTER H]
+				fallthrough
+			case '\u2C67': // Ⱨ [LATIN CAPITAL LETTER H WITH DESCENDER]
+				fallthrough
+			case '\u2C75': // Ⱶ [LATIN CAPITAL LETTER HALF H]
+				fallthrough
+			case '\uFF28': // Ｈ [FULLWIDTH LATIN CAPITAL LETTER H]
+				output[outputPos] = 'H'
+				outputPos++
+
+			case '\u0125': // ĥ [LATIN SMALL LETTER H WITH CIRCUMFLEX]
+				fallthrough
+			case '\u0127': // ħ [LATIN SMALL LETTER H WITH STROKE]
+				fallthrough
+			case '\u021F': // ȟ [LATIN SMALL LETTER H WITH CARON]
+				fallthrough
+			case '\u0265': // ɥ [LATIN SMALL LETTER TURNED H]
+				fallthrough
+			case '\u0266': // ɦ [LATIN SMALL LETTER H WITH HOOK]
+				fallthrough
+			case '\u02AE': // ʮ [LATIN SMALL LETTER TURNED H WITH FISHHOOK]
+				fallthrough
+			case '\u02AF': // ʯ [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL]
+				fallthrough
+			case '\u1E23': // ḣ [LATIN SMALL LETTER H WITH DOT ABOVE]
+				fallthrough
+			case '\u1E25': // ḥ [LATIN SMALL LETTER H WITH DOT BELOW]
+				fallthrough
+			case '\u1E27': // ḧ [LATIN SMALL LETTER H WITH DIAERESIS]
+				fallthrough
+			case '\u1E29': // ḩ [LATIN SMALL LETTER H WITH CEDILLA]
+				fallthrough
+			case '\u1E2B': // ḫ [LATIN SMALL LETTER H WITH BREVE BELOW]
+				fallthrough
+			case '\u1E96': // ẖ [LATIN SMALL LETTER H WITH LINE BELOW]
+				fallthrough
+			case '\u24D7': // ⓗ [CIRCLED LATIN SMALL LETTER H]
+				fallthrough
+			case '\u2C68': // ⱨ [LATIN SMALL LETTER H WITH DESCENDER]
+				fallthrough
+			case '\u2C76': // ⱶ [LATIN SMALL LETTER HALF H]
+				fallthrough
+			case '\uFF48': // ｈ [FULLWIDTH LATIN SMALL LETTER H]
+				output[outputPos] = 'h'
+				outputPos++
+
+			case '\u01F6': // Ƕ http://en.wikipedia.org/wiki/Hwair [LATIN CAPITAL LETTER HWAIR]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'H'
+				outputPos++
+				output[outputPos] = 'V'
+				outputPos++
+
+			case '\u24A3': // ⒣ [PARENTHESIZED LATIN SMALL LETTER H]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = 'h'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u0195': // ƕ [LATIN SMALL LETTER HV]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'h'
+				outputPos++
+				output[outputPos] = 'v'
+				outputPos++
+
+			case '\u00CC': // Ì [LATIN CAPITAL LETTER I WITH GRAVE]
+				fallthrough
+			case '\u00CD': // Í [LATIN CAPITAL LETTER I WITH ACUTE]
+				fallthrough
+			case '\u00CE': // Î [LATIN CAPITAL LETTER I WITH CIRCUMFLEX]
+				fallthrough
+			case '\u00CF': // Ï [LATIN CAPITAL LETTER I WITH DIAERESIS]
+				fallthrough
+			case '\u0128': // Ĩ [LATIN CAPITAL LETTER I WITH TILDE]
+				fallthrough
+			case '\u012A': // Ī [LATIN CAPITAL LETTER I WITH MACRON]
+				fallthrough
+			case '\u012C': // Ĭ [LATIN CAPITAL LETTER I WITH BREVE]
+				fallthrough
+			case '\u012E': // Į [LATIN CAPITAL LETTER I WITH OGONEK]
+				fallthrough
+			case '\u0130': // İ [LATIN CAPITAL LETTER I WITH DOT ABOVE]
+				fallthrough
+			case '\u0196': // Ɩ [LATIN CAPITAL LETTER IOTA]
+				fallthrough
+			case '\u0197': // Ɨ [LATIN CAPITAL LETTER I WITH STROKE]
+				fallthrough
+			case '\u01CF': // Ǐ [LATIN CAPITAL LETTER I WITH CARON]
+				fallthrough
+			case '\u0208': // Ȉ [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE]
+				fallthrough
+			case '\u020A': // Ȋ [LATIN CAPITAL LETTER I WITH INVERTED BREVE]
+				fallthrough
+			case '\u026A': // ɪ [LATIN LETTER SMALL CAPITAL I]
+				fallthrough
+			case '\u1D7B': // ᵻ [LATIN SMALL CAPITAL LETTER I WITH STROKE]
+				fallthrough
+			case '\u1E2C': // Ḭ [LATIN CAPITAL LETTER I WITH TILDE BELOW]
+				fallthrough
+			case '\u1E2E': // Ḯ [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE]
+				fallthrough
+			case '\u1EC8': // Ỉ [LATIN CAPITAL LETTER I WITH HOOK ABOVE]
+				fallthrough
+			case '\u1ECA': // Ị [LATIN CAPITAL LETTER I WITH DOT BELOW]
+				fallthrough
+			case '\u24BE': // Ⓘ [CIRCLED LATIN CAPITAL LETTER I]
+				fallthrough
+			case '\uA7FE': // ꟾ [LATIN EPIGRAPHIC LETTER I LONGA]
+				fallthrough
+			case '\uFF29': // Ｉ [FULLWIDTH LATIN CAPITAL LETTER I]
+				output[outputPos] = 'I'
+				outputPos++
+
+			case '\u00EC': // ì [LATIN SMALL LETTER I WITH GRAVE]
+				fallthrough
+			case '\u00ED': // í [LATIN SMALL LETTER I WITH ACUTE]
+				fallthrough
+			case '\u00EE': // î [LATIN SMALL LETTER I WITH CIRCUMFLEX]
+				fallthrough
+			case '\u00EF': // ï [LATIN SMALL LETTER I WITH DIAERESIS]
+				fallthrough
+			case '\u0129': // ĩ [LATIN SMALL LETTER I WITH TILDE]
+				fallthrough
+			case '\u012B': // ī [LATIN SMALL LETTER I WITH MACRON]
+				fallthrough
+			case '\u012D': // ĭ [LATIN SMALL LETTER I WITH BREVE]
+				fallthrough
+			case '\u012F': // į [LATIN SMALL LETTER I WITH OGONEK]
+				fallthrough
+			case '\u0131': // ı [LATIN SMALL LETTER DOTLESS I]
+				fallthrough
+			case '\u01D0': // ǐ [LATIN SMALL LETTER I WITH CARON]
+				fallthrough
+			case '\u0209': // ȉ [LATIN SMALL LETTER I WITH DOUBLE GRAVE]
+				fallthrough
+			case '\u020B': // ȋ [LATIN SMALL LETTER I WITH INVERTED BREVE]
+				fallthrough
+			case '\u0268': // ɨ [LATIN SMALL LETTER I WITH STROKE]
+				fallthrough
+			case '\u1D09': // ᴉ [LATIN SMALL LETTER TURNED I]
+				fallthrough
+			case '\u1D62': // ᵢ [LATIN SUBSCRIPT SMALL LETTER I]
+				fallthrough
+			case '\u1D7C': // ᵼ [LATIN SMALL LETTER IOTA WITH STROKE]
+				fallthrough
+			case '\u1D96': // ᶖ [LATIN SMALL LETTER I WITH RETROFLEX HOOK]
+				fallthrough
+			case '\u1E2D': // ḭ [LATIN SMALL LETTER I WITH TILDE BELOW]
+				fallthrough
+			case '\u1E2F': // ḯ [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE]
+				fallthrough
+			case '\u1EC9': // ỉ [LATIN SMALL LETTER I WITH HOOK ABOVE]
+				fallthrough
+			case '\u1ECB': // ị [LATIN SMALL LETTER I WITH DOT BELOW]
+				fallthrough
+			case '\u2071': // ⁱ [SUPERSCRIPT LATIN SMALL LETTER I]
+				fallthrough
+			case '\u24D8': // ⓘ [CIRCLED LATIN SMALL LETTER I]
+				fallthrough
+			case '\uFF49': // ｉ [FULLWIDTH LATIN SMALL LETTER I]
+				output[outputPos] = 'i'
+				outputPos++
+
+			case '\u0132': // Ĳ [LATIN CAPITAL LIGATURE IJ]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'I'
+				outputPos++
+				output[outputPos] = 'J'
+				outputPos++
+
+			case '\u24A4': // ⒤ [PARENTHESIZED LATIN SMALL LETTER I]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = 'i'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u0133': // ĳ [LATIN SMALL LIGATURE IJ]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'i'
+				outputPos++
+				output[outputPos] = 'j'
+				outputPos++
+
+			case '\u0134': // Ĵ [LATIN CAPITAL LETTER J WITH CIRCUMFLEX]
+				fallthrough
+			case '\u0248': // Ɉ [LATIN CAPITAL LETTER J WITH STROKE]
+				fallthrough
+			case '\u1D0A': // ᴊ [LATIN LETTER SMALL CAPITAL J]
+				fallthrough
+			case '\u24BF': // Ⓙ [CIRCLED LATIN CAPITAL LETTER J]
+				fallthrough
+			case '\uFF2A': // Ｊ [FULLWIDTH LATIN CAPITAL LETTER J]
+				output[outputPos] = 'J'
+				outputPos++
+
+			case '\u0135': // ĵ [LATIN SMALL LETTER J WITH CIRCUMFLEX]
+				fallthrough
+			case '\u01F0': // ǰ [LATIN SMALL LETTER J WITH CARON]
+				fallthrough
+			case '\u0237': // ȷ [LATIN SMALL LETTER DOTLESS J]
+				fallthrough
+			case '\u0249': // ɉ [LATIN SMALL LETTER J WITH STROKE]
+				fallthrough
+			case '\u025F': // ɟ [LATIN SMALL LETTER DOTLESS J WITH STROKE]
+				fallthrough
+			case '\u0284': // ʄ [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK]
+				fallthrough
+			case '\u029D': // ʝ [LATIN SMALL LETTER J WITH CROSSED-TAIL]
+				fallthrough
+			case '\u24D9': // ⓙ [CIRCLED LATIN SMALL LETTER J]
+				fallthrough
+			case '\u2C7C': // ⱼ [LATIN SUBSCRIPT SMALL LETTER J]
+				fallthrough
+			case '\uFF4A': // ｊ [FULLWIDTH LATIN SMALL LETTER J]
+				output[outputPos] = 'j'
+				outputPos++
+
+			case '\u24A5': // ⒥ [PARENTHESIZED LATIN SMALL LETTER J]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = 'j'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u0136': // Ķ [LATIN CAPITAL LETTER K WITH CEDILLA]
+				fallthrough
+			case '\u0198': // Ƙ [LATIN CAPITAL LETTER K WITH HOOK]
+				fallthrough
+			case '\u01E8': // Ǩ [LATIN CAPITAL LETTER K WITH CARON]
+				fallthrough
+			case '\u1D0B': // ᴋ [LATIN LETTER SMALL CAPITAL K]
+				fallthrough
+			case '\u1E30': // Ḱ [LATIN CAPITAL LETTER K WITH ACUTE]
+				fallthrough
+			case '\u1E32': // Ḳ [LATIN CAPITAL LETTER K WITH DOT BELOW]
+				fallthrough
+			case '\u1E34': // Ḵ [LATIN CAPITAL LETTER K WITH LINE BELOW]
+				fallthrough
+			case '\u24C0': // Ⓚ [CIRCLED LATIN CAPITAL LETTER K]
+				fallthrough
+			case '\u2C69': // Ⱪ [LATIN CAPITAL LETTER K WITH DESCENDER]
+				fallthrough
+			case '\uA740': // Ꝁ [LATIN CAPITAL LETTER K WITH STROKE]
+				fallthrough
+			case '\uA742': // Ꝃ [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE]
+				fallthrough
+			case '\uA744': // Ꝅ [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE]
+				fallthrough
+			case '\uFF2B': // Ｋ [FULLWIDTH LATIN CAPITAL LETTER K]
+				output[outputPos] = 'K'
+				outputPos++
+
+			case '\u0137': // ķ [LATIN SMALL LETTER K WITH CEDILLA]
+				fallthrough
+			case '\u0199': // ƙ [LATIN SMALL LETTER K WITH HOOK]
+				fallthrough
+			case '\u01E9': // ǩ [LATIN SMALL LETTER K WITH CARON]
+				fallthrough
+			case '\u029E': // ʞ [LATIN SMALL LETTER TURNED K]
+				fallthrough
+			case '\u1D84': // ᶄ [LATIN SMALL LETTER K WITH PALATAL HOOK]
+				fallthrough
+			case '\u1E31': // ḱ [LATIN SMALL LETTER K WITH ACUTE]
+				fallthrough
+			case '\u1E33': // ḳ [LATIN SMALL LETTER K WITH DOT BELOW]
+				fallthrough
+			case '\u1E35': // ḵ [LATIN SMALL LETTER K WITH LINE BELOW]
+				fallthrough
+			case '\u24DA': // ⓚ [CIRCLED LATIN SMALL LETTER K]
+				fallthrough
+			case '\u2C6A': // ⱪ [LATIN SMALL LETTER K WITH DESCENDER]
+				fallthrough
+			case '\uA741': // ꝁ [LATIN SMALL LETTER K WITH STROKE]
+				fallthrough
+			case '\uA743': // ꝃ [LATIN SMALL LETTER K WITH DIAGONAL STROKE]
+				fallthrough
+			case '\uA745': // ꝅ [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE]
+				fallthrough
+			case '\uFF4B': // ｋ [FULLWIDTH LATIN SMALL LETTER K]
+				output[outputPos] = 'k'
+				outputPos++
+
+			case '\u24A6': // ⒦ [PARENTHESIZED LATIN SMALL LETTER K]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = 'k'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u0139': // Ĺ [LATIN CAPITAL LETTER L WITH ACUTE]
+				fallthrough
+			case '\u013B': // Ļ [LATIN CAPITAL LETTER L WITH CEDILLA]
+				fallthrough
+			case '\u013D': // Ľ [LATIN CAPITAL LETTER L WITH CARON]
+				fallthrough
+			case '\u013F': // Ŀ [LATIN CAPITAL LETTER L WITH MIDDLE DOT]
+				fallthrough
+			case '\u0141': // Ł [LATIN CAPITAL LETTER L WITH STROKE]
+				fallthrough
+			case '\u023D': // Ƚ [LATIN CAPITAL LETTER L WITH BAR]
+				fallthrough
+			case '\u029F': // ʟ [LATIN LETTER SMALL CAPITAL L]
+				fallthrough
+			case '\u1D0C': // ᴌ [LATIN LETTER SMALL CAPITAL L WITH STROKE]
+				fallthrough
+			case '\u1E36': // Ḷ [LATIN CAPITAL LETTER L WITH DOT BELOW]
+				fallthrough
+			case '\u1E38': // Ḹ [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON]
+				fallthrough
+			case '\u1E3A': // Ḻ [LATIN CAPITAL LETTER L WITH LINE BELOW]
+				fallthrough
+			case '\u1E3C': // Ḽ [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW]
+				fallthrough
+			case '\u24C1': // Ⓛ [CIRCLED LATIN CAPITAL LETTER L]
+				fallthrough
+			case '\u2C60': // Ⱡ [LATIN CAPITAL LETTER L WITH DOUBLE BAR]
+				fallthrough
+			case '\u2C62': // Ɫ [LATIN CAPITAL LETTER L WITH MIDDLE TILDE]
+				fallthrough
+			case '\uA746': // Ꝇ [LATIN CAPITAL LETTER BROKEN L]
+				fallthrough
+			case '\uA748': // Ꝉ [LATIN CAPITAL LETTER L WITH HIGH STROKE]
+				fallthrough
+			case '\uA780': // Ꞁ [LATIN CAPITAL LETTER TURNED L]
+				fallthrough
+			case '\uFF2C': // Ｌ [FULLWIDTH LATIN CAPITAL LETTER L]
+				output[outputPos] = 'L'
+				outputPos++
+
+			case '\u013A': // ĺ [LATIN SMALL LETTER L WITH ACUTE]
+				fallthrough
+			case '\u013C': // ļ [LATIN SMALL LETTER L WITH CEDILLA]
+				fallthrough
+			case '\u013E': // ľ [LATIN SMALL LETTER L WITH CARON]
+				fallthrough
+			case '\u0140': // ŀ [LATIN SMALL LETTER L WITH MIDDLE DOT]
+				fallthrough
+			case '\u0142': // ł [LATIN SMALL LETTER L WITH STROKE]
+				fallthrough
+			case '\u019A': // ƚ [LATIN SMALL LETTER L WITH BAR]
+				fallthrough
+			case '\u0234': // ȴ [LATIN SMALL LETTER L WITH CURL]
+				fallthrough
+			case '\u026B': // ɫ [LATIN SMALL LETTER L WITH MIDDLE TILDE]
+				fallthrough
+			case '\u026C': // ɬ [LATIN SMALL LETTER L WITH BELT]
+				fallthrough
+			case '\u026D': // ɭ [LATIN SMALL LETTER L WITH RETROFLEX HOOK]
+				fallthrough
+			case '\u1D85': // ᶅ [LATIN SMALL LETTER L WITH PALATAL HOOK]
+				fallthrough
+			case '\u1E37': // ḷ [LATIN SMALL LETTER L WITH DOT BELOW]
+				fallthrough
+			case '\u1E39': // ḹ [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON]
+				fallthrough
+			case '\u1E3B': // ḻ [LATIN SMALL LETTER L WITH LINE BELOW]
+				fallthrough
+			case '\u1E3D': // ḽ [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW]
+				fallthrough
+			case '\u24DB': // ⓛ [CIRCLED LATIN SMALL LETTER L]
+				fallthrough
+			case '\u2C61': // ⱡ [LATIN SMALL LETTER L WITH DOUBLE BAR]
+				fallthrough
+			case '\uA747': // ꝇ [LATIN SMALL LETTER BROKEN L]
+				fallthrough
+			case '\uA749': // ꝉ [LATIN SMALL LETTER L WITH HIGH STROKE]
+				fallthrough
+			case '\uA781': // ꞁ [LATIN SMALL LETTER TURNED L]
+				fallthrough
+			case '\uFF4C': // ｌ [FULLWIDTH LATIN SMALL LETTER L]
+				output[outputPos] = 'l'
+				outputPos++
+
+			case '\u01C7': // Ǉ [LATIN CAPITAL LETTER LJ]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'L'
+				outputPos++
+				output[outputPos] = 'J'
+				outputPos++
+
+			case '\u1EFA': // Ỻ [LATIN CAPITAL LETTER MIDDLE-WELSH LL]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'L'
+				outputPos++
+				output[outputPos] = 'L'
+				outputPos++
+
+			case '\u01C8': // ǈ [LATIN CAPITAL LETTER L WITH SMALL LETTER J]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'L'
+				outputPos++
+				output[outputPos] = 'j'
+				outputPos++
+
+			case '\u24A7': // ⒧ [PARENTHESIZED LATIN SMALL LETTER L]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = 'l'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u01C9': // ǉ [LATIN SMALL LETTER LJ]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'l'
+				outputPos++
+				output[outputPos] = 'j'
+				outputPos++
+
+			case '\u1EFB': // ỻ [LATIN SMALL LETTER MIDDLE-WELSH LL]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'l'
+				outputPos++
+				output[outputPos] = 'l'
+				outputPos++
+
+			case '\u02AA': // ʪ [LATIN SMALL LETTER LS DIGRAPH]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'l'
+				outputPos++
+				output[outputPos] = 's'
+				outputPos++
+
+			case '\u02AB': // ʫ [LATIN SMALL LETTER LZ DIGRAPH]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'l'
+				outputPos++
+				output[outputPos] = 'z'
+				outputPos++
+
+			case '\u019C': // Ɯ [LATIN CAPITAL LETTER TURNED M]
+				fallthrough
+			case '\u1D0D': // ᴍ [LATIN LETTER SMALL CAPITAL M]
+				fallthrough
+			case '\u1E3E': // Ḿ [LATIN CAPITAL LETTER M WITH ACUTE]
+				fallthrough
+			case '\u1E40': // Ṁ [LATIN CAPITAL LETTER M WITH DOT ABOVE]
+				fallthrough
+			case '\u1E42': // Ṃ [LATIN CAPITAL LETTER M WITH DOT BELOW]
+				fallthrough
+			case '\u24C2': // Ⓜ [CIRCLED LATIN CAPITAL LETTER M]
+				fallthrough
+			case '\u2C6E': // Ɱ [LATIN CAPITAL LETTER M WITH HOOK]
+				fallthrough
+			case '\uA7FD': // ꟽ [LATIN EPIGRAPHIC LETTER INVERTED M]
+				fallthrough
+			case '\uA7FF': // ꟿ [LATIN EPIGRAPHIC LETTER ARCHAIC M]
+				fallthrough
+			case '\uFF2D': // Ｍ [FULLWIDTH LATIN CAPITAL LETTER M]
+				output[outputPos] = 'M'
+				outputPos++
+
+			case '\u026F': // ɯ [LATIN SMALL LETTER TURNED M]
+				fallthrough
+			case '\u0270': // ɰ [LATIN SMALL LETTER TURNED M WITH LONG LEG]
+				fallthrough
+			case '\u0271': // ɱ [LATIN SMALL LETTER M WITH HOOK]
+				fallthrough
+			case '\u1D6F': // ᵯ [LATIN SMALL LETTER M WITH MIDDLE TILDE]
+				fallthrough
+			case '\u1D86': // ᶆ [LATIN SMALL LETTER M WITH PALATAL HOOK]
+				fallthrough
+			case '\u1E3F': // ḿ [LATIN SMALL LETTER M WITH ACUTE]
+				fallthrough
+			case '\u1E41': // ṁ [LATIN SMALL LETTER M WITH DOT ABOVE]
+				fallthrough
+			case '\u1E43': // ṃ [LATIN SMALL LETTER M WITH DOT BELOW]
+				fallthrough
+			case '\u24DC': // ⓜ [CIRCLED LATIN SMALL LETTER M]
+				fallthrough
+			case '\uFF4D': // ｍ [FULLWIDTH LATIN SMALL LETTER M]
+				output[outputPos] = 'm'
+				outputPos++
+
+			case '\u24A8': // ⒨ [PARENTHESIZED LATIN SMALL LETTER M]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = 'm'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u00D1': // Ñ [LATIN CAPITAL LETTER N WITH TILDE]
+				fallthrough
+			case '\u0143': // Ń [LATIN CAPITAL LETTER N WITH ACUTE]
+				fallthrough
+			case '\u0145': // Ņ [LATIN CAPITAL LETTER N WITH CEDILLA]
+				fallthrough
+			case '\u0147': // Ň [LATIN CAPITAL LETTER N WITH CARON]
+				fallthrough
+			case '\u014A': // Ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN CAPITAL LETTER ENG]
+				fallthrough
+			case '\u019D': // Ɲ [LATIN CAPITAL LETTER N WITH LEFT HOOK]
+				fallthrough
+			case '\u01F8': // Ǹ [LATIN CAPITAL LETTER N WITH GRAVE]
+				fallthrough
+			case '\u0220': // Ƞ [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG]
+				fallthrough
+			case '\u0274': // ɴ [LATIN LETTER SMALL CAPITAL N]
+				fallthrough
+			case '\u1D0E': // ᴎ [LATIN LETTER SMALL CAPITAL REVERSED N]
+				fallthrough
+			case '\u1E44': // Ṅ [LATIN CAPITAL LETTER N WITH DOT ABOVE]
+				fallthrough
+			case '\u1E46': // Ṇ [LATIN CAPITAL LETTER N WITH DOT BELOW]
+				fallthrough
+			case '\u1E48': // Ṉ [LATIN CAPITAL LETTER N WITH LINE BELOW]
+				fallthrough
+			case '\u1E4A': // Ṋ [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW]
+				fallthrough
+			case '\u24C3': // Ⓝ [CIRCLED LATIN CAPITAL LETTER N]
+				fallthrough
+			case '\uFF2E': // Ｎ [FULLWIDTH LATIN CAPITAL LETTER N]
+				output[outputPos] = 'N'
+				outputPos++
+
+			case '\u00F1': // ñ [LATIN SMALL LETTER N WITH TILDE]
+				fallthrough
+			case '\u0144': // ń [LATIN SMALL LETTER N WITH ACUTE]
+				fallthrough
+			case '\u0146': // ņ [LATIN SMALL LETTER N WITH CEDILLA]
+				fallthrough
+			case '\u0148': // ň [LATIN SMALL LETTER N WITH CARON]
+				fallthrough
+			case '\u0149': // ŉ [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE]
+				fallthrough
+			case '\u014B': // ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN SMALL LETTER ENG]
+				fallthrough
+			case '\u019E': // ƞ [LATIN SMALL LETTER N WITH LONG RIGHT LEG]
+				fallthrough
+			case '\u01F9': // ǹ [LATIN SMALL LETTER N WITH GRAVE]
+				fallthrough
+			case '\u0235': // ȵ [LATIN SMALL LETTER N WITH CURL]
+				fallthrough
+			case '\u0272': // ɲ [LATIN SMALL LETTER N WITH LEFT HOOK]
+				fallthrough
+			case '\u0273': // ɳ [LATIN SMALL LETTER N WITH RETROFLEX HOOK]
+				fallthrough
+			case '\u1D70': // ᵰ [LATIN SMALL LETTER N WITH MIDDLE TILDE]
+				fallthrough
+			case '\u1D87': // ᶇ [LATIN SMALL LETTER N WITH PALATAL HOOK]
+				fallthrough
+			case '\u1E45': // ṅ [LATIN SMALL LETTER N WITH DOT ABOVE]
+				fallthrough
+			case '\u1E47': // ṇ [LATIN SMALL LETTER N WITH DOT BELOW]
+				fallthrough
+			case '\u1E49': // ṉ [LATIN SMALL LETTER N WITH LINE BELOW]
+				fallthrough
+			case '\u1E4B': // ṋ [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW]
+				fallthrough
+			case '\u207F': // ⁿ [SUPERSCRIPT LATIN SMALL LETTER N]
+				fallthrough
+			case '\u24DD': // ⓝ [CIRCLED LATIN SMALL LETTER N]
+				fallthrough
+			case '\uFF4E': // ｎ [FULLWIDTH LATIN SMALL LETTER N]
+				output[outputPos] = 'n'
+				outputPos++
+
+			case '\u01CA': // Ǌ [LATIN CAPITAL LETTER NJ]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'N'
+				outputPos++
+				output[outputPos] = 'J'
+				outputPos++
+
+			case '\u01CB': // ǋ [LATIN CAPITAL LETTER N WITH SMALL LETTER J]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'N'
+				outputPos++
+				output[outputPos] = 'j'
+				outputPos++
+
+			case '\u24A9': // ⒩ [PARENTHESIZED LATIN SMALL LETTER N]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = 'n'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u01CC': // ǌ [LATIN SMALL LETTER NJ]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'n'
+				outputPos++
+				output[outputPos] = 'j'
+				outputPos++
+
+			case '\u00D2': // Ò [LATIN CAPITAL LETTER O WITH GRAVE]
+				fallthrough
+			case '\u00D3': // Ó [LATIN CAPITAL LETTER O WITH ACUTE]
+				fallthrough
+			case '\u00D4': // Ô [LATIN CAPITAL LETTER O WITH CIRCUMFLEX]
+				fallthrough
+			case '\u00D5': // Õ [LATIN CAPITAL LETTER O WITH TILDE]
+				fallthrough
+			case '\u00D6': // Ö [LATIN CAPITAL LETTER O WITH DIAERESIS]
+				fallthrough
+			case '\u00D8': // Ø [LATIN CAPITAL LETTER O WITH STROKE]
+				fallthrough
+			case '\u014C': // Ō [LATIN CAPITAL LETTER O WITH MACRON]
+				fallthrough
+			case '\u014E': // Ŏ [LATIN CAPITAL LETTER O WITH BREVE]
+				fallthrough
+			case '\u0150': // Ő [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE]
+				fallthrough
+			case '\u0186': // Ɔ [LATIN CAPITAL LETTER OPEN O]
+				fallthrough
+			case '\u019F': // Ɵ [LATIN CAPITAL LETTER O WITH MIDDLE TILDE]
+				fallthrough
+			case '\u01A0': // Ơ [LATIN CAPITAL LETTER O WITH HORN]
+				fallthrough
+			case '\u01D1': // Ǒ [LATIN CAPITAL LETTER O WITH CARON]
+				fallthrough
+			case '\u01EA': // Ǫ [LATIN CAPITAL LETTER O WITH OGONEK]
+				fallthrough
+			case '\u01EC': // Ǭ [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON]
+				fallthrough
+			case '\u01FE': // Ǿ [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE]
+				fallthrough
+			case '\u020C': // Ȍ [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE]
+				fallthrough
+			case '\u020E': // Ȏ [LATIN CAPITAL LETTER O WITH INVERTED BREVE]
+				fallthrough
+			case '\u022A': // Ȫ [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON]
+				fallthrough
+			case '\u022C': // Ȭ [LATIN CAPITAL LETTER O WITH TILDE AND MACRON]
+				fallthrough
+			case '\u022E': // Ȯ [LATIN CAPITAL LETTER O WITH DOT ABOVE]
+				fallthrough
+			case '\u0230': // Ȱ [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON]
+				fallthrough
+			case '\u1D0F': // ᴏ [LATIN LETTER SMALL CAPITAL O]
+				fallthrough
+			case '\u1D10': // ᴐ [LATIN LETTER SMALL CAPITAL OPEN O]
+				fallthrough
+			case '\u1E4C': // Ṍ [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE]
+				fallthrough
+			case '\u1E4E': // Ṏ [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS]
+				fallthrough
+			case '\u1E50': // Ṑ [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE]
+				fallthrough
+			case '\u1E52': // Ṓ [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE]
+				fallthrough
+			case '\u1ECC': // Ọ [LATIN CAPITAL LETTER O WITH DOT BELOW]
+				fallthrough
+			case '\u1ECE': // Ỏ [LATIN CAPITAL LETTER O WITH HOOK ABOVE]
+				fallthrough
+			case '\u1ED0': // Ố [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE]
+				fallthrough
+			case '\u1ED2': // Ồ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE]
+				fallthrough
+			case '\u1ED4': // Ổ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
+				fallthrough
+			case '\u1ED6': // Ỗ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE]
+				fallthrough
+			case '\u1ED8': // Ộ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
+				fallthrough
+			case '\u1EDA': // Ớ [LATIN CAPITAL LETTER O WITH HORN AND ACUTE]
+				fallthrough
+			case '\u1EDC': // Ờ [LATIN CAPITAL LETTER O WITH HORN AND GRAVE]
+				fallthrough
+			case '\u1EDE': // Ở [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE]
+				fallthrough
+			case '\u1EE0': // Ỡ [LATIN CAPITAL LETTER O WITH HORN AND TILDE]
+				fallthrough
+			case '\u1EE2': // Ợ [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW]
+				fallthrough
+			case '\u24C4': // Ⓞ [CIRCLED LATIN CAPITAL LETTER O]
+				fallthrough
+			case '\uA74A': // Ꝋ [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY]
+				fallthrough
+			case '\uA74C': // Ꝍ [LATIN CAPITAL LETTER O WITH LOOP]
+				fallthrough
+			case '\uFF2F': // Ｏ [FULLWIDTH LATIN CAPITAL LETTER O]
+				output[outputPos] = 'O'
+				outputPos++
+
+			case '\u00F2': // ò [LATIN SMALL LETTER O WITH GRAVE]
+				fallthrough
+			case '\u00F3': // ó [LATIN SMALL LETTER O WITH ACUTE]
+				fallthrough
+			case '\u00F4': // ô [LATIN SMALL LETTER O WITH CIRCUMFLEX]
+				fallthrough
+			case '\u00F5': // õ [LATIN SMALL LETTER O WITH TILDE]
+				fallthrough
+			case '\u00F6': // ö [LATIN SMALL LETTER O WITH DIAERESIS]
+				fallthrough
+			case '\u00F8': // ø [LATIN SMALL LETTER O WITH STROKE]
+				fallthrough
+			case '\u014D': // ō [LATIN SMALL LETTER O WITH MACRON]
+				fallthrough
+			case '\u014F': // ŏ [LATIN SMALL LETTER O WITH BREVE]
+				fallthrough
+			case '\u0151': // ő [LATIN SMALL LETTER O WITH DOUBLE ACUTE]
+				fallthrough
+			case '\u01A1': // ơ [LATIN SMALL LETTER O WITH HORN]
+				fallthrough
+			case '\u01D2': // ǒ [LATIN SMALL LETTER O WITH CARON]
+				fallthrough
+			case '\u01EB': // ǫ [LATIN SMALL LETTER O WITH OGONEK]
+				fallthrough
+			case '\u01ED': // ǭ [LATIN SMALL LETTER O WITH OGONEK AND MACRON]
+				fallthrough
+			case '\u01FF': // ǿ [LATIN SMALL LETTER O WITH STROKE AND ACUTE]
+				fallthrough
+			case '\u020D': // ȍ [LATIN SMALL LETTER O WITH DOUBLE GRAVE]
+				fallthrough
+			case '\u020F': // ȏ [LATIN SMALL LETTER O WITH INVERTED BREVE]
+				fallthrough
+			case '\u022B': // ȫ [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON]
+				fallthrough
+			case '\u022D': // ȭ [LATIN SMALL LETTER O WITH TILDE AND MACRON]
+				fallthrough
+			case '\u022F': // ȯ [LATIN SMALL LETTER O WITH DOT ABOVE]
+				fallthrough
+			case '\u0231': // ȱ [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON]
+				fallthrough
+			case '\u0254': // ɔ [LATIN SMALL LETTER OPEN O]
+				fallthrough
+			case '\u0275': // ɵ [LATIN SMALL LETTER BARRED O]
+				fallthrough
+			case '\u1D16': // ᴖ [LATIN SMALL LETTER TOP HALF O]
+				fallthrough
+			case '\u1D17': // ᴗ [LATIN SMALL LETTER BOTTOM HALF O]
+				fallthrough
+			case '\u1D97': // ᶗ [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK]
+				fallthrough
+			case '\u1E4D': // ṍ [LATIN SMALL LETTER O WITH TILDE AND ACUTE]
+				fallthrough
+			case '\u1E4F': // ṏ [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS]
+				fallthrough
+			case '\u1E51': // ṑ [LATIN SMALL LETTER O WITH MACRON AND GRAVE]
+				fallthrough
+			case '\u1E53': // ṓ [LATIN SMALL LETTER O WITH MACRON AND ACUTE]
+				fallthrough
+			case '\u1ECD': // ọ [LATIN SMALL LETTER O WITH DOT BELOW]
+				fallthrough
+			case '\u1ECF': // ỏ [LATIN SMALL LETTER O WITH HOOK ABOVE]
+				fallthrough
+			case '\u1ED1': // ố [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE]
+				fallthrough
+			case '\u1ED3': // ồ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE]
+				fallthrough
+			case '\u1ED5': // ổ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
+				fallthrough
+			case '\u1ED7': // ỗ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE]
+				fallthrough
+			case '\u1ED9': // ộ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
+				fallthrough
+			case '\u1EDB': // ớ [LATIN SMALL LETTER O WITH HORN AND ACUTE]
+				fallthrough
+			case '\u1EDD': // ờ [LATIN SMALL LETTER O WITH HORN AND GRAVE]
+				fallthrough
+			case '\u1EDF': // ở [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE]
+				fallthrough
+			case '\u1EE1': // ỡ [LATIN SMALL LETTER O WITH HORN AND TILDE]
+				fallthrough
+			case '\u1EE3': // ợ [LATIN SMALL LETTER O WITH HORN AND DOT BELOW]
+				fallthrough
+			case '\u2092': // ₒ [LATIN SUBSCRIPT SMALL LETTER O]
+				fallthrough
+			case '\u24DE': // ⓞ [CIRCLED LATIN SMALL LETTER O]
+				fallthrough
+			case '\u2C7A': // ⱺ [LATIN SMALL LETTER O WITH LOW RING INSIDE]
+				fallthrough
+			case '\uA74B': // ꝋ [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY]
+				fallthrough
+			case '\uA74D': // ꝍ [LATIN SMALL LETTER O WITH LOOP]
+				fallthrough
+			case '\uFF4F': // ｏ [FULLWIDTH LATIN SMALL LETTER O]
+				output[outputPos] = 'o'
+				outputPos++
+
+			case '\u0152': // Œ [LATIN CAPITAL LIGATURE OE]
+				fallthrough
+			case '\u0276': // ɶ [LATIN LETTER SMALL CAPITAL OE]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'O'
+				outputPos++
+				output[outputPos] = 'E'
+				outputPos++
+
+			case '\uA74E': // Ꝏ [LATIN CAPITAL LETTER OO]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'O'
+				outputPos++
+				output[outputPos] = 'O'
+				outputPos++
+
+			case '\u0222': // Ȣ http://en.wikipedia.org/wiki/OU [LATIN CAPITAL LETTER OU]
+				fallthrough
+			case '\u1D15': // ᴕ [LATIN LETTER SMALL CAPITAL OU]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'O'
+				outputPos++
+				output[outputPos] = 'U'
+				outputPos++
+
+			case '\u24AA': // ⒪ [PARENTHESIZED LATIN SMALL LETTER O]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = 'o'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u0153': // œ [LATIN SMALL LIGATURE OE]
+				fallthrough
+			case '\u1D14': // ᴔ [LATIN SMALL LETTER TURNED OE]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'o'
+				outputPos++
+				output[outputPos] = 'e'
+				outputPos++
+
+			case '\uA74F': // ꝏ [LATIN SMALL LETTER OO]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'o'
+				outputPos++
+				output[outputPos] = 'o'
+				outputPos++
+
+			case '\u0223': // ȣ http://en.wikipedia.org/wiki/OU [LATIN SMALL LETTER OU]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'o'
+				outputPos++
+				output[outputPos] = 'u'
+				outputPos++
+
+			case '\u01A4': // Ƥ [LATIN CAPITAL LETTER P WITH HOOK]
+				fallthrough
+			case '\u1D18': // ᴘ [LATIN LETTER SMALL CAPITAL P]
+				fallthrough
+			case '\u1E54': // Ṕ [LATIN CAPITAL LETTER P WITH ACUTE]
+				fallthrough
+			case '\u1E56': // Ṗ [LATIN CAPITAL LETTER P WITH DOT ABOVE]
+				fallthrough
+			case '\u24C5': // Ⓟ [CIRCLED LATIN CAPITAL LETTER P]
+				fallthrough
+			case '\u2C63': // Ᵽ [LATIN CAPITAL LETTER P WITH STROKE]
+				fallthrough
+			case '\uA750': // Ꝑ [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER]
+				fallthrough
+			case '\uA752': // Ꝓ [LATIN CAPITAL LETTER P WITH FLOURISH]
+				fallthrough
+			case '\uA754': // Ꝕ [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL]
+				fallthrough
+			case '\uFF30': // Ｐ [FULLWIDTH LATIN CAPITAL LETTER P]
+				output[outputPos] = 'P'
+				outputPos++
+
+			case '\u01A5': // ƥ [LATIN SMALL LETTER P WITH HOOK]
+				fallthrough
+			case '\u1D71': // ᵱ [LATIN SMALL LETTER P WITH MIDDLE TILDE]
+				fallthrough
+			case '\u1D7D': // ᵽ [LATIN SMALL LETTER P WITH STROKE]
+				fallthrough
+			case '\u1D88': // ᶈ [LATIN SMALL LETTER P WITH PALATAL HOOK]
+				fallthrough
+			case '\u1E55': // ṕ [LATIN SMALL LETTER P WITH ACUTE]
+				fallthrough
+			case '\u1E57': // ṗ [LATIN SMALL LETTER P WITH DOT ABOVE]
+				fallthrough
+			case '\u24DF': // ⓟ [CIRCLED LATIN SMALL LETTER P]
+				fallthrough
+			case '\uA751': // ꝑ [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER]
+				fallthrough
+			case '\uA753': // ꝓ [LATIN SMALL LETTER P WITH FLOURISH]
+				fallthrough
+			case '\uA755': // ꝕ [LATIN SMALL LETTER P WITH SQUIRREL TAIL]
+				fallthrough
+			case '\uA7FC': // ꟼ [LATIN EPIGRAPHIC LETTER REVERSED P]
+				fallthrough
+			case '\uFF50': // ｐ [FULLWIDTH LATIN SMALL LETTER P]
+				output[outputPos] = 'p'
+				outputPos++
+
+			case '\u24AB': // ⒫ [PARENTHESIZED LATIN SMALL LETTER P]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = 'p'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u024A': // Ɋ [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL]
+				fallthrough
+			case '\u24C6': // Ⓠ [CIRCLED LATIN CAPITAL LETTER Q]
+				fallthrough
+			case '\uA756': // Ꝗ [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER]
+				fallthrough
+			case '\uA758': // Ꝙ [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE]
+				fallthrough
+			case '\uFF31': // Ｑ [FULLWIDTH LATIN CAPITAL LETTER Q]
+				output[outputPos] = 'Q'
+				outputPos++
+
+			case '\u0138': // ĸ http://en.wikipedia.org/wiki/Kra_(letter) [LATIN SMALL LETTER KRA]
+				fallthrough
+			case '\u024B': // ɋ [LATIN SMALL LETTER Q WITH HOOK TAIL]
+				fallthrough
+			case '\u02A0': // ʠ [LATIN SMALL LETTER Q WITH HOOK]
+				fallthrough
+			case '\u24E0': // ⓠ [CIRCLED LATIN SMALL LETTER Q]
+				fallthrough
+			case '\uA757': // ꝗ [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER]
+				fallthrough
+			case '\uA759': // ꝙ [LATIN SMALL LETTER Q WITH DIAGONAL STROKE]
+				fallthrough
+			case '\uFF51': // ｑ [FULLWIDTH LATIN SMALL LETTER Q]
+				output[outputPos] = 'q'
+				outputPos++
+
+			case '\u24AC': // ⒬ [PARENTHESIZED LATIN SMALL LETTER Q]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = 'q'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u0239': // ȹ [LATIN SMALL LETTER QP DIGRAPH]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'q'
+				outputPos++
+				output[outputPos] = 'p'
+				outputPos++
+
+			case '\u0154': // Ŕ [LATIN CAPITAL LETTER R WITH ACUTE]
+				fallthrough
+			case '\u0156': // Ŗ [LATIN CAPITAL LETTER R WITH CEDILLA]
+				fallthrough
+			case '\u0158': // Ř [LATIN CAPITAL LETTER R WITH CARON]
+				fallthrough
+			case '\u0210': // Ȓ [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE]
+				fallthrough
+			case '\u0212': // Ȓ [LATIN CAPITAL LETTER R WITH INVERTED BREVE]
+				fallthrough
+			case '\u024C': // Ɍ [LATIN CAPITAL LETTER R WITH STROKE]
+				fallthrough
+			case '\u0280': // ʀ [LATIN LETTER SMALL CAPITAL R]
+				fallthrough
+			case '\u0281': // ʁ [LATIN LETTER SMALL CAPITAL INVERTED R]
+				fallthrough
+			case '\u1D19': // ᴙ [LATIN LETTER SMALL CAPITAL REVERSED R]
+				fallthrough
+			case '\u1D1A': // ᴚ [LATIN LETTER SMALL CAPITAL TURNED R]
+				fallthrough
+			case '\u1E58': // Ṙ [LATIN CAPITAL LETTER R WITH DOT ABOVE]
+				fallthrough
+			case '\u1E5A': // Ṛ [LATIN CAPITAL LETTER R WITH DOT BELOW]
+				fallthrough
+			case '\u1E5C': // Ṝ [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON]
+				fallthrough
+			case '\u1E5E': // Ṟ [LATIN CAPITAL LETTER R WITH LINE BELOW]
+				fallthrough
+			case '\u24C7': // Ⓡ [CIRCLED LATIN CAPITAL LETTER R]
+				fallthrough
+			case '\u2C64': // Ɽ [LATIN CAPITAL LETTER R WITH TAIL]
+				fallthrough
+			case '\uA75A': // Ꝛ [LATIN CAPITAL LETTER R ROTUNDA]
+				fallthrough
+			case '\uA782': // Ꞃ [LATIN CAPITAL LETTER INSULAR R]
+				fallthrough
+			case '\uFF32': // Ｒ [FULLWIDTH LATIN CAPITAL LETTER R]
+				output[outputPos] = 'R'
+				outputPos++
+
+			case '\u0155': // ŕ [LATIN SMALL LETTER R WITH ACUTE]
+				fallthrough
+			case '\u0157': // ŗ [LATIN SMALL LETTER R WITH CEDILLA]
+				fallthrough
+			case '\u0159': // ř [LATIN SMALL LETTER R WITH CARON]
+				fallthrough
+			case '\u0211': // ȑ [LATIN SMALL LETTER R WITH DOUBLE GRAVE]
+				fallthrough
+			case '\u0213': // ȓ [LATIN SMALL LETTER R WITH INVERTED BREVE]
+				fallthrough
+			case '\u024D': // ɍ [LATIN SMALL LETTER R WITH STROKE]
+				fallthrough
+			case '\u027C': // ɼ [LATIN SMALL LETTER R WITH LONG LEG]
+				fallthrough
+			case '\u027D': // ɽ [LATIN SMALL LETTER R WITH TAIL]
+				fallthrough
+			case '\u027E': // ɾ [LATIN SMALL LETTER R WITH FISHHOOK]
+				fallthrough
+			case '\u027F': // ɿ [LATIN SMALL LETTER REVERSED R WITH FISHHOOK]
+				fallthrough
+			case '\u1D63': // ᵣ [LATIN SUBSCRIPT SMALL LETTER R]
+				fallthrough
+			case '\u1D72': // ᵲ [LATIN SMALL LETTER R WITH MIDDLE TILDE]
+				fallthrough
+			case '\u1D73': // ᵳ [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE]
+				fallthrough
+			case '\u1D89': // ᶉ [LATIN SMALL LETTER R WITH PALATAL HOOK]
+				fallthrough
+			case '\u1E59': // ṙ [LATIN SMALL LETTER R WITH DOT ABOVE]
+				fallthrough
+			case '\u1E5B': // ṛ [LATIN SMALL LETTER R WITH DOT BELOW]
+				fallthrough
+			case '\u1E5D': // ṝ [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON]
+				fallthrough
+			case '\u1E5F': // ṟ [LATIN SMALL LETTER R WITH LINE BELOW]
+				fallthrough
+			case '\u24E1': // ⓡ [CIRCLED LATIN SMALL LETTER R]
+				fallthrough
+			case '\uA75B': // ꝛ [LATIN SMALL LETTER R ROTUNDA]
+				fallthrough
+			case '\uA783': // ꞃ [LATIN SMALL LETTER INSULAR R]
+				fallthrough
+			case '\uFF52': // ｒ [FULLWIDTH LATIN SMALL LETTER R]
+				output[outputPos] = 'r'
+				outputPos++
+
+			case '\u24AD': // ⒭ [PARENTHESIZED LATIN SMALL LETTER R]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = 'r'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u015A': // Ś [LATIN CAPITAL LETTER S WITH ACUTE]
+				fallthrough
+			case '\u015C': // Ŝ [LATIN CAPITAL LETTER S WITH CIRCUMFLEX]
+				fallthrough
+			case '\u015E': // Ş [LATIN CAPITAL LETTER S WITH CEDILLA]
+				fallthrough
+			case '\u0160': // Š [LATIN CAPITAL LETTER S WITH CARON]
+				fallthrough
+			case '\u0218': // Ș [LATIN CAPITAL LETTER S WITH COMMA BELOW]
+				fallthrough
+			case '\u1E60': // Ṡ [LATIN CAPITAL LETTER S WITH DOT ABOVE]
+				fallthrough
+			case '\u1E62': // Ṣ [LATIN CAPITAL LETTER S WITH DOT BELOW]
+				fallthrough
+			case '\u1E64': // Ṥ [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE]
+				fallthrough
+			case '\u1E66': // Ṧ [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE]
+				fallthrough
+			case '\u1E68': // Ṩ [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE]
+				fallthrough
+			case '\u24C8': // Ⓢ [CIRCLED LATIN CAPITAL LETTER S]
+				fallthrough
+			case '\uA731': // ꜱ [LATIN LETTER SMALL CAPITAL S]
+				fallthrough
+			case '\uA785': // ꞅ [LATIN SMALL LETTER INSULAR S]
+				fallthrough
+			case '\uFF33': // Ｓ [FULLWIDTH LATIN CAPITAL LETTER S]
+				output[outputPos] = 'S'
+				outputPos++
+
+			case '\u015B': // ś [LATIN SMALL LETTER S WITH ACUTE]
+				fallthrough
+			case '\u015D': // ŝ [LATIN SMALL LETTER S WITH CIRCUMFLEX]
+				fallthrough
+			case '\u015F': // ş [LATIN SMALL LETTER S WITH CEDILLA]
+				fallthrough
+			case '\u0161': // š [LATIN SMALL LETTER S WITH CARON]
+				fallthrough
+			case '\u017F': // ſ http://en.wikipedia.org/wiki/Long_S [LATIN SMALL LETTER LONG S]
+				fallthrough
+			case '\u0219': // ș [LATIN SMALL LETTER S WITH COMMA BELOW]
+				fallthrough
+			case '\u023F': // ȿ [LATIN SMALL LETTER S WITH SWASH TAIL]
+				fallthrough
+			case '\u0282': // ʂ [LATIN SMALL LETTER S WITH HOOK]
+				fallthrough
+			case '\u1D74': // ᵴ [LATIN SMALL LETTER S WITH MIDDLE TILDE]
+				fallthrough
+			case '\u1D8A': // ᶊ [LATIN SMALL LETTER S WITH PALATAL HOOK]
+				fallthrough
+			case '\u1E61': // ṡ [LATIN SMALL LETTER S WITH DOT ABOVE]
+				fallthrough
+			case '\u1E63': // ṣ [LATIN SMALL LETTER S WITH DOT BELOW]
+				fallthrough
+			case '\u1E65': // ṥ [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE]
+				fallthrough
+			case '\u1E67': // ṧ [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE]
+				fallthrough
+			case '\u1E69': // ṩ [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE]
+				fallthrough
+			case '\u1E9C': // ẜ [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE]
+				fallthrough
+			case '\u1E9D': // ẝ [LATIN SMALL LETTER LONG S WITH HIGH STROKE]
+				fallthrough
+			case '\u24E2': // ⓢ [CIRCLED LATIN SMALL LETTER S]
+				fallthrough
+			case '\uA784': // Ꞅ [LATIN CAPITAL LETTER INSULAR S]
+				fallthrough
+			case '\uFF53': // ｓ [FULLWIDTH LATIN SMALL LETTER S]
+				output[outputPos] = 's'
+				outputPos++
+
+			case '\u1E9E': // ẞ [LATIN CAPITAL LETTER SHARP S]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'S'
+				outputPos++
+				output[outputPos] = 'S'
+				outputPos++
+
+			case '\u24AE': // ⒮ [PARENTHESIZED LATIN SMALL LETTER S]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = 's'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u00DF': // ß [LATIN SMALL LETTER SHARP S]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 's'
+				outputPos++
+				output[outputPos] = 's'
+				outputPos++
+
+			case '\uFB06': // ﬆ [LATIN SMALL LIGATURE ST]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 's'
+				outputPos++
+				output[outputPos] = 't'
+				outputPos++
+
+			case '\u0162': // Ţ [LATIN CAPITAL LETTER T WITH CEDILLA]
+				fallthrough
+			case '\u0164': // Ť [LATIN CAPITAL LETTER T WITH CARON]
+				fallthrough
+			case '\u0166': // Ŧ [LATIN CAPITAL LETTER T WITH STROKE]
+				fallthrough
+			case '\u01AC': // Ƭ [LATIN CAPITAL LETTER T WITH HOOK]
+				fallthrough
+			case '\u01AE': // Ʈ [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK]
+				fallthrough
+			case '\u021A': // Ț [LATIN CAPITAL LETTER T WITH COMMA BELOW]
+				fallthrough
+			case '\u023E': // Ⱦ [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE]
+				fallthrough
+			case '\u1D1B': // ᴛ [LATIN LETTER SMALL CAPITAL T]
+				fallthrough
+			case '\u1E6A': // Ṫ [LATIN CAPITAL LETTER T WITH DOT ABOVE]
+				fallthrough
+			case '\u1E6C': // Ṭ [LATIN CAPITAL LETTER T WITH DOT BELOW]
+				fallthrough
+			case '\u1E6E': // Ṯ [LATIN CAPITAL LETTER T WITH LINE BELOW]
+				fallthrough
+			case '\u1E70': // Ṱ [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW]
+				fallthrough
+			case '\u24C9': // Ⓣ [CIRCLED LATIN CAPITAL LETTER T]
+				fallthrough
+			case '\uA786': // Ꞇ [LATIN CAPITAL LETTER INSULAR T]
+				fallthrough
+			case '\uFF34': // Ｔ [FULLWIDTH LATIN CAPITAL LETTER T]
+				output[outputPos] = 'T'
+				outputPos++
+
+			case '\u0163': // ţ [LATIN SMALL LETTER T WITH CEDILLA]
+				fallthrough
+			case '\u0165': // ť [LATIN SMALL LETTER T WITH CARON]
+				fallthrough
+			case '\u0167': // ŧ [LATIN SMALL LETTER T WITH STROKE]
+				fallthrough
+			case '\u01AB': // ƫ [LATIN SMALL LETTER T WITH PALATAL HOOK]
+				fallthrough
+			case '\u01AD': // ƭ [LATIN SMALL LETTER T WITH HOOK]
+				fallthrough
+			case '\u021B': // ț [LATIN SMALL LETTER T WITH COMMA BELOW]
+				fallthrough
+			case '\u0236': // ȶ [LATIN SMALL LETTER T WITH CURL]
+				fallthrough
+			case '\u0287': // ʇ [LATIN SMALL LETTER TURNED T]
+				fallthrough
+			case '\u0288': // ʈ [LATIN SMALL LETTER T WITH RETROFLEX HOOK]
+				fallthrough
+			case '\u1D75': // ᵵ [LATIN SMALL LETTER T WITH MIDDLE TILDE]
+				fallthrough
+			case '\u1E6B': // ṫ [LATIN SMALL LETTER T WITH DOT ABOVE]
+				fallthrough
+			case '\u1E6D': // ṭ [LATIN SMALL LETTER T WITH DOT BELOW]
+				fallthrough
+			case '\u1E6F': // ṯ [LATIN SMALL LETTER T WITH LINE BELOW]
+				fallthrough
+			case '\u1E71': // ṱ [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW]
+				fallthrough
+			case '\u1E97': // ẗ [LATIN SMALL LETTER T WITH DIAERESIS]
+				fallthrough
+			case '\u24E3': // ⓣ [CIRCLED LATIN SMALL LETTER T]
+				fallthrough
+			case '\u2C66': // ⱦ [LATIN SMALL LETTER T WITH DIAGONAL STROKE]
+				fallthrough
+			case '\uFF54': // ｔ [FULLWIDTH LATIN SMALL LETTER T]
+				output[outputPos] = 't'
+				outputPos++
+
+			case '\u00DE': // Þ [LATIN CAPITAL LETTER THORN]
+				fallthrough
+			case '\uA766': // Ꝧ [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'T'
+				outputPos++
+				output[outputPos] = 'H'
+				outputPos++
+
+			case '\uA728': // Ꜩ [LATIN CAPITAL LETTER TZ]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'T'
+				outputPos++
+				output[outputPos] = 'Z'
+				outputPos++
+
+			case '\u24AF': // ⒯ [PARENTHESIZED LATIN SMALL LETTER T]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = 't'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u02A8': // ʨ [LATIN SMALL LETTER TC DIGRAPH WITH CURL]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 't'
+				outputPos++
+				output[outputPos] = 'c'
+				outputPos++
+
+			case '\u00FE': // þ [LATIN SMALL LETTER THORN]
+				fallthrough
+			case '\u1D7A': // ᵺ [LATIN SMALL LETTER TH WITH STRIKETHROUGH]
+				fallthrough
+			case '\uA767': // ꝧ [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 't'
+				outputPos++
+				output[outputPos] = 'h'
+				outputPos++
+
+			case '\u02A6': // ʦ [LATIN SMALL LETTER TS DIGRAPH]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 't'
+				outputPos++
+				output[outputPos] = 's'
+				outputPos++
+
+			case '\uA729': // ꜩ [LATIN SMALL LETTER TZ]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 't'
+				outputPos++
+				output[outputPos] = 'z'
+				outputPos++
+
+			case '\u00D9': // Ù [LATIN CAPITAL LETTER U WITH GRAVE]
+				fallthrough
+			case '\u00DA': // Ú [LATIN CAPITAL LETTER U WITH ACUTE]
+				fallthrough
+			case '\u00DB': // Û [LATIN CAPITAL LETTER U WITH CIRCUMFLEX]
+				fallthrough
+			case '\u00DC': // Ü [LATIN CAPITAL LETTER U WITH DIAERESIS]
+				fallthrough
+			case '\u0168': // Ũ [LATIN CAPITAL LETTER U WITH TILDE]
+				fallthrough
+			case '\u016A': // Ū [LATIN CAPITAL LETTER U WITH MACRON]
+				fallthrough
+			case '\u016C': // Ŭ [LATIN CAPITAL LETTER U WITH BREVE]
+				fallthrough
+			case '\u016E': // Ů [LATIN CAPITAL LETTER U WITH RING ABOVE]
+				fallthrough
+			case '\u0170': // Ű [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE]
+				fallthrough
+			case '\u0172': // Ų [LATIN CAPITAL LETTER U WITH OGONEK]
+				fallthrough
+			case '\u01AF': // Ư [LATIN CAPITAL LETTER U WITH HORN]
+				fallthrough
+			case '\u01D3': // Ǔ [LATIN CAPITAL LETTER U WITH CARON]
+				fallthrough
+			case '\u01D5': // Ǖ [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON]
+				fallthrough
+			case '\u01D7': // Ǘ [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE]
+				fallthrough
+			case '\u01D9': // Ǚ [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON]
+				fallthrough
+			case '\u01DB': // Ǜ [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE]
+				fallthrough
+			case '\u0214': // Ȕ [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE]
+				fallthrough
+			case '\u0216': // Ȗ [LATIN CAPITAL LETTER U WITH INVERTED BREVE]
+				fallthrough
+			case '\u0244': // Ʉ [LATIN CAPITAL LETTER U BAR]
+				fallthrough
+			case '\u1D1C': // ᴜ [LATIN LETTER SMALL CAPITAL U]
+				fallthrough
+			case '\u1D7E': // ᵾ [LATIN SMALL CAPITAL LETTER U WITH STROKE]
+				fallthrough
+			case '\u1E72': // Ṳ [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW]
+				fallthrough
+			case '\u1E74': // Ṵ [LATIN CAPITAL LETTER U WITH TILDE BELOW]
+				fallthrough
+			case '\u1E76': // Ṷ [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW]
+				fallthrough
+			case '\u1E78': // Ṹ [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE]
+				fallthrough
+			case '\u1E7A': // Ṻ [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS]
+				fallthrough
+			case '\u1EE4': // Ụ [LATIN CAPITAL LETTER U WITH DOT BELOW]
+				fallthrough
+			case '\u1EE6': // Ủ [LATIN CAPITAL LETTER U WITH HOOK ABOVE]
+				fallthrough
+			case '\u1EE8': // Ứ [LATIN CAPITAL LETTER U WITH HORN AND ACUTE]
+				fallthrough
+			case '\u1EEA': // Ừ [LATIN CAPITAL LETTER U WITH HORN AND GRAVE]
+				fallthrough
+			case '\u1EEC': // Ử [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE]
+				fallthrough
+			case '\u1EEE': // Ữ [LATIN CAPITAL LETTER U WITH HORN AND TILDE]
+				fallthrough
+			case '\u1EF0': // Ự [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW]
+				fallthrough
+			case '\u24CA': // Ⓤ [CIRCLED LATIN CAPITAL LETTER U]
+				fallthrough
+			case '\uFF35': // Ｕ [FULLWIDTH LATIN CAPITAL LETTER U]
+				output[outputPos] = 'U'
+				outputPos++
+
+			case '\u00F9': // ù [LATIN SMALL LETTER U WITH GRAVE]
+				fallthrough
+			case '\u00FA': // ú [LATIN SMALL LETTER U WITH ACUTE]
+				fallthrough
+			case '\u00FB': // û [LATIN SMALL LETTER U WITH CIRCUMFLEX]
+				fallthrough
+			case '\u00FC': // ü [LATIN SMALL LETTER U WITH DIAERESIS]
+				fallthrough
+			case '\u0169': // ũ [LATIN SMALL LETTER U WITH TILDE]
+				fallthrough
+			case '\u016B': // ū [LATIN SMALL LETTER U WITH MACRON]
+				fallthrough
+			case '\u016D': // ŭ [LATIN SMALL LETTER U WITH BREVE]
+				fallthrough
+			case '\u016F': // ů [LATIN SMALL LETTER U WITH RING ABOVE]
+				fallthrough
+			case '\u0171': // ű [LATIN SMALL LETTER U WITH DOUBLE ACUTE]
+				fallthrough
+			case '\u0173': // ų [LATIN SMALL LETTER U WITH OGONEK]
+				fallthrough
+			case '\u01B0': // ư [LATIN SMALL LETTER U WITH HORN]
+				fallthrough
+			case '\u01D4': // ǔ [LATIN SMALL LETTER U WITH CARON]
+				fallthrough
+			case '\u01D6': // ǖ [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON]
+				fallthrough
+			case '\u01D8': // ǘ [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE]
+				fallthrough
+			case '\u01DA': // ǚ [LATIN SMALL LETTER U WITH DIAERESIS AND CARON]
+				fallthrough
+			case '\u01DC': // ǜ [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE]
+				fallthrough
+			case '\u0215': // ȕ [LATIN SMALL LETTER U WITH DOUBLE GRAVE]
+				fallthrough
+			case '\u0217': // ȗ [LATIN SMALL LETTER U WITH INVERTED BREVE]
+				fallthrough
+			case '\u0289': // ʉ [LATIN SMALL LETTER U BAR]
+				fallthrough
+			case '\u1D64': // ᵤ [LATIN SUBSCRIPT SMALL LETTER U]
+				fallthrough
+			case '\u1D99': // ᶙ [LATIN SMALL LETTER U WITH RETROFLEX HOOK]
+				fallthrough
+			case '\u1E73': // ṳ [LATIN SMALL LETTER U WITH DIAERESIS BELOW]
+				fallthrough
+			case '\u1E75': // ṵ [LATIN SMALL LETTER U WITH TILDE BELOW]
+				fallthrough
+			case '\u1E77': // ṷ [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW]
+				fallthrough
+			case '\u1E79': // ṹ [LATIN SMALL LETTER U WITH TILDE AND ACUTE]
+				fallthrough
+			case '\u1E7B': // ṻ [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS]
+				fallthrough
+			case '\u1EE5': // ụ [LATIN SMALL LETTER U WITH DOT BELOW]
+				fallthrough
+			case '\u1EE7': // ủ [LATIN SMALL LETTER U WITH HOOK ABOVE]
+				fallthrough
+			case '\u1EE9': // ứ [LATIN SMALL LETTER U WITH HORN AND ACUTE]
+				fallthrough
+			case '\u1EEB': // ừ [LATIN SMALL LETTER U WITH HORN AND GRAVE]
+				fallthrough
+			case '\u1EED': // ử [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE]
+				fallthrough
+			case '\u1EEF': // ữ [LATIN SMALL LETTER U WITH HORN AND TILDE]
+				fallthrough
+			case '\u1EF1': // ự [LATIN SMALL LETTER U WITH HORN AND DOT BELOW]
+				fallthrough
+			case '\u24E4': // ⓤ [CIRCLED LATIN SMALL LETTER U]
+				fallthrough
+			case '\uFF55': // ｕ [FULLWIDTH LATIN SMALL LETTER U]
+				output[outputPos] = 'u'
+				outputPos++
+
+			case '\u24B0': // ⒰ [PARENTHESIZED LATIN SMALL LETTER U]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = 'u'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u1D6B': // ᵫ [LATIN SMALL LETTER UE]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'u'
+				outputPos++
+				output[outputPos] = 'e'
+				outputPos++
+
+			case '\u01B2': // Ʋ [LATIN CAPITAL LETTER V WITH HOOK]
+				fallthrough
+			case '\u0245': // Ʌ [LATIN CAPITAL LETTER TURNED V]
+				fallthrough
+			case '\u1D20': // ᴠ [LATIN LETTER SMALL CAPITAL V]
+				fallthrough
+			case '\u1E7C': // Ṽ [LATIN CAPITAL LETTER V WITH TILDE]
+				fallthrough
+			case '\u1E7E': // Ṿ [LATIN CAPITAL LETTER V WITH DOT BELOW]
+				fallthrough
+			case '\u1EFC': // Ỽ [LATIN CAPITAL LETTER MIDDLE-WELSH V]
+				fallthrough
+			case '\u24CB': // Ⓥ [CIRCLED LATIN CAPITAL LETTER V]
+				fallthrough
+			case '\uA75E': // Ꝟ [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE]
+				fallthrough
+			case '\uA768': // Ꝩ [LATIN CAPITAL LETTER VEND]
+				fallthrough
+			case '\uFF36': // Ｖ [FULLWIDTH LATIN CAPITAL LETTER V]
+				output[outputPos] = 'V'
+				outputPos++
+
+			case '\u028B': // ʋ [LATIN SMALL LETTER V WITH HOOK]
+				fallthrough
+			case '\u028C': // ʌ [LATIN SMALL LETTER TURNED V]
+				fallthrough
+			case '\u1D65': // ᵥ [LATIN SUBSCRIPT SMALL LETTER V]
+				fallthrough
+			case '\u1D8C': // ᶌ [LATIN SMALL LETTER V WITH PALATAL HOOK]
+				fallthrough
+			case '\u1E7D': // ṽ [LATIN SMALL LETTER V WITH TILDE]
+				fallthrough
+			case '\u1E7F': // ṿ [LATIN SMALL LETTER V WITH DOT BELOW]
+				fallthrough
+			case '\u24E5': // ⓥ [CIRCLED LATIN SMALL LETTER V]
+				fallthrough
+			case '\u2C71': // ⱱ [LATIN SMALL LETTER V WITH RIGHT HOOK]
+				fallthrough
+			case '\u2C74': // ⱴ [LATIN SMALL LETTER V WITH CURL]
+				fallthrough
+			case '\uA75F': // ꝟ [LATIN SMALL LETTER V WITH DIAGONAL STROKE]
+				fallthrough
+			case '\uFF56': // ｖ [FULLWIDTH LATIN SMALL LETTER V]
+				output[outputPos] = 'v'
+				outputPos++
+
+			case '\uA760': // Ꝡ [LATIN CAPITAL LETTER VY]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'V'
+				outputPos++
+				output[outputPos] = 'Y'
+				outputPos++
+
+			case '\u24B1': // ⒱ [PARENTHESIZED LATIN SMALL LETTER V]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = 'v'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\uA761': // ꝡ [LATIN SMALL LETTER VY]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = 'v'
+				outputPos++
+				output[outputPos] = 'y'
+				outputPos++
+
+			case '\u0174': // Ŵ [LATIN CAPITAL LETTER W WITH CIRCUMFLEX]
+				fallthrough
+			case '\u01F7': // Ƿ http://en.wikipedia.org/wiki/Wynn [LATIN CAPITAL LETTER WYNN]
+				fallthrough
+			case '\u1D21': // ᴡ [LATIN LETTER SMALL CAPITAL W]
+				fallthrough
+			case '\u1E80': // Ẁ [LATIN CAPITAL LETTER W WITH GRAVE]
+				fallthrough
+			case '\u1E82': // Ẃ [LATIN CAPITAL LETTER W WITH ACUTE]
+				fallthrough
+			case '\u1E84': // Ẅ [LATIN CAPITAL LETTER W WITH DIAERESIS]
+				fallthrough
+			case '\u1E86': // Ẇ [LATIN CAPITAL LETTER W WITH DOT ABOVE]
+				fallthrough
+			case '\u1E88': // Ẉ [LATIN CAPITAL LETTER W WITH DOT BELOW]
+				fallthrough
+			case '\u24CC': // Ⓦ [CIRCLED LATIN CAPITAL LETTER W]
+				fallthrough
+			case '\u2C72': // Ⱳ [LATIN CAPITAL LETTER W WITH HOOK]
+				fallthrough
+			case '\uFF37': // Ｗ [FULLWIDTH LATIN CAPITAL LETTER W]
+				output[outputPos] = 'W'
+				outputPos++
+
+			case '\u0175': // ŵ [LATIN SMALL LETTER W WITH CIRCUMFLEX]
+				fallthrough
+			case '\u01BF': // ƿ http://en.wikipedia.org/wiki/Wynn [LATIN LETTER WYNN]
+				fallthrough
+			case '\u028D': // ʍ [LATIN SMALL LETTER TURNED W]
+				fallthrough
+			case '\u1E81': // ẁ [LATIN SMALL LETTER W WITH GRAVE]
+				fallthrough
+			case '\u1E83': // ẃ [LATIN SMALL LETTER W WITH ACUTE]
+				fallthrough
+			case '\u1E85': // ẅ [LATIN SMALL LETTER W WITH DIAERESIS]
+				fallthrough
+			case '\u1E87': // ẇ [LATIN SMALL LETTER W WITH DOT ABOVE]
+				fallthrough
+			case '\u1E89': // ẉ [LATIN SMALL LETTER W WITH DOT BELOW]
+				fallthrough
+			case '\u1E98': // ẘ [LATIN SMALL LETTER W WITH RING ABOVE]
+				fallthrough
+			case '\u24E6': // ⓦ [CIRCLED LATIN SMALL LETTER W]
+				fallthrough
+			case '\u2C73': // ⱳ [LATIN SMALL LETTER W WITH HOOK]
+				fallthrough
+			case '\uFF57': // ｗ [FULLWIDTH LATIN SMALL LETTER W]
+				output[outputPos] = 'w'
+				outputPos++
+
+			case '\u24B2': // ⒲ [PARENTHESIZED LATIN SMALL LETTER W]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = 'w'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u1E8A': // Ẋ [LATIN CAPITAL LETTER X WITH DOT ABOVE]
+				fallthrough
+			case '\u1E8C': // Ẍ [LATIN CAPITAL LETTER X WITH DIAERESIS]
+				fallthrough
+			case '\u24CD': // Ⓧ [CIRCLED LATIN CAPITAL LETTER X]
+				fallthrough
+			case '\uFF38': // Ｘ [FULLWIDTH LATIN CAPITAL LETTER X]
+				output[outputPos] = 'X'
+				outputPos++
+
+			case '\u1D8D': // ᶍ [LATIN SMALL LETTER X WITH PALATAL HOOK]
+				fallthrough
+			case '\u1E8B': // ẋ [LATIN SMALL LETTER X WITH DOT ABOVE]
+				fallthrough
+			case '\u1E8D': // ẍ [LATIN SMALL LETTER X WITH DIAERESIS]
+				fallthrough
+			case '\u2093': // ₓ [LATIN SUBSCRIPT SMALL LETTER X]
+				fallthrough
+			case '\u24E7': // ⓧ [CIRCLED LATIN SMALL LETTER X]
+				fallthrough
+			case '\uFF58': // ｘ [FULLWIDTH LATIN SMALL LETTER X]
+				output[outputPos] = 'x'
+				outputPos++
+
+			case '\u24B3': // ⒳ [PARENTHESIZED LATIN SMALL LETTER X]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = 'x'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u00DD': // Ý [LATIN CAPITAL LETTER Y WITH ACUTE]
+				fallthrough
+			case '\u0176': // Ŷ [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX]
+				fallthrough
+			case '\u0178': // Ÿ [LATIN CAPITAL LETTER Y WITH DIAERESIS]
+				fallthrough
+			case '\u01B3': // Ƴ [LATIN CAPITAL LETTER Y WITH HOOK]
+				fallthrough
+			case '\u0232': // Ȳ [LATIN CAPITAL LETTER Y WITH MACRON]
+				fallthrough
+			case '\u024E': // Ɏ [LATIN CAPITAL LETTER Y WITH STROKE]
+				fallthrough
+			case '\u028F': // ʏ [LATIN LETTER SMALL CAPITAL Y]
+				fallthrough
+			case '\u1E8E': // Ẏ [LATIN CAPITAL LETTER Y WITH DOT ABOVE]
+				fallthrough
+			case '\u1EF2': // Ỳ [LATIN CAPITAL LETTER Y WITH GRAVE]
+				fallthrough
+			case '\u1EF4': // Ỵ [LATIN CAPITAL LETTER Y WITH DOT BELOW]
+				fallthrough
+			case '\u1EF6': // Ỷ [LATIN CAPITAL LETTER Y WITH HOOK ABOVE]
+				fallthrough
+			case '\u1EF8': // Ỹ [LATIN CAPITAL LETTER Y WITH TILDE]
+				fallthrough
+			case '\u1EFE': // Ỿ [LATIN CAPITAL LETTER Y WITH LOOP]
+				fallthrough
+			case '\u24CE': // Ⓨ [CIRCLED LATIN CAPITAL LETTER Y]
+				fallthrough
+			case '\uFF39': // Ｙ [FULLWIDTH LATIN CAPITAL LETTER Y]
+				output[outputPos] = 'Y'
+				outputPos++
+
+			case '\u00FD': // ý [LATIN SMALL LETTER Y WITH ACUTE]
+				fallthrough
+			case '\u00FF': // ÿ [LATIN SMALL LETTER Y WITH DIAERESIS]
+				fallthrough
+			case '\u0177': // ŷ [LATIN SMALL LETTER Y WITH CIRCUMFLEX]
+				fallthrough
+			case '\u01B4': // ƴ [LATIN SMALL LETTER Y WITH HOOK]
+				fallthrough
+			case '\u0233': // ȳ [LATIN SMALL LETTER Y WITH MACRON]
+				fallthrough
+			case '\u024F': // ɏ [LATIN SMALL LETTER Y WITH STROKE]
+				fallthrough
+			case '\u028E': // ʎ [LATIN SMALL LETTER TURNED Y]
+				fallthrough
+			case '\u1E8F': // ẏ [LATIN SMALL LETTER Y WITH DOT ABOVE]
+				fallthrough
+			case '\u1E99': // ẙ [LATIN SMALL LETTER Y WITH RING ABOVE]
+				fallthrough
+			case '\u1EF3': // ỳ [LATIN SMALL LETTER Y WITH GRAVE]
+				fallthrough
+			case '\u1EF5': // ỵ [LATIN SMALL LETTER Y WITH DOT BELOW]
+				fallthrough
+			case '\u1EF7': // ỷ [LATIN SMALL LETTER Y WITH HOOK ABOVE]
+				fallthrough
+			case '\u1EF9': // ỹ [LATIN SMALL LETTER Y WITH TILDE]
+				fallthrough
+			case '\u1EFF': // ỿ [LATIN SMALL LETTER Y WITH LOOP]
+				fallthrough
+			case '\u24E8': // ⓨ [CIRCLED LATIN SMALL LETTER Y]
+				fallthrough
+			case '\uFF59': // ｙ [FULLWIDTH LATIN SMALL LETTER Y]
+				output[outputPos] = 'y'
+				outputPos++
+
+			case '\u24B4': // ⒴ [PARENTHESIZED LATIN SMALL LETTER Y]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = 'y'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u0179': // Ź [LATIN CAPITAL LETTER Z WITH ACUTE]
+				fallthrough
+			case '\u017B': // Ż [LATIN CAPITAL LETTER Z WITH DOT ABOVE]
+				fallthrough
+			case '\u017D': // Ž [LATIN CAPITAL LETTER Z WITH CARON]
+				fallthrough
+			case '\u01B5': // Ƶ [LATIN CAPITAL LETTER Z WITH STROKE]
+				fallthrough
+			case '\u021C': // Ȝ http://en.wikipedia.org/wiki/Yogh [LATIN CAPITAL LETTER YOGH]
+				fallthrough
+			case '\u0224': // Ȥ [LATIN CAPITAL LETTER Z WITH HOOK]
+				fallthrough
+			case '\u1D22': // ᴢ [LATIN LETTER SMALL CAPITAL Z]
+				fallthrough
+			case '\u1E90': // Ẑ [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX]
+				fallthrough
+			case '\u1E92': // Ẓ [LATIN CAPITAL LETTER Z WITH DOT BELOW]
+				fallthrough
+			case '\u1E94': // Ẕ [LATIN CAPITAL LETTER Z WITH LINE BELOW]
+				fallthrough
+			case '\u24CF': // Ⓩ [CIRCLED LATIN CAPITAL LETTER Z]
+				fallthrough
+			case '\u2C6B': // Ⱬ [LATIN CAPITAL LETTER Z WITH DESCENDER]
+				fallthrough
+			case '\uA762': // Ꝣ [LATIN CAPITAL LETTER VISIGOTHIC Z]
+				fallthrough
+			case '\uFF3A': // Ｚ [FULLWIDTH LATIN CAPITAL LETTER Z]
+				output[outputPos] = 'Z'
+				outputPos++
+
+			case '\u017A': // ź [LATIN SMALL LETTER Z WITH ACUTE]
+				fallthrough
+			case '\u017C': // ż [LATIN SMALL LETTER Z WITH DOT ABOVE]
+				fallthrough
+			case '\u017E': // ž [LATIN SMALL LETTER Z WITH CARON]
+				fallthrough
+			case '\u01B6': // ƶ [LATIN SMALL LETTER Z WITH STROKE]
+				fallthrough
+			case '\u021D': // ȝ http://en.wikipedia.org/wiki/Yogh [LATIN SMALL LETTER YOGH]
+				fallthrough
+			case '\u0225': // ȥ [LATIN SMALL LETTER Z WITH HOOK]
+				fallthrough
+			case '\u0240': // ɀ [LATIN SMALL LETTER Z WITH SWASH TAIL]
+				fallthrough
+			case '\u0290': // ʐ [LATIN SMALL LETTER Z WITH RETROFLEX HOOK]
+				fallthrough
+			case '\u0291': // ʑ [LATIN SMALL LETTER Z WITH CURL]
+				fallthrough
+			case '\u1D76': // ᵶ [LATIN SMALL LETTER Z WITH MIDDLE TILDE]
+				fallthrough
+			case '\u1D8E': // ᶎ [LATIN SMALL LETTER Z WITH PALATAL HOOK]
+				fallthrough
+			case '\u1E91': // ẑ [LATIN SMALL LETTER Z WITH CIRCUMFLEX]
+				fallthrough
+			case '\u1E93': // ẓ [LATIN SMALL LETTER Z WITH DOT BELOW]
+				fallthrough
+			case '\u1E95': // ẕ [LATIN SMALL LETTER Z WITH LINE BELOW]
+				fallthrough
+			case '\u24E9': // ⓩ [CIRCLED LATIN SMALL LETTER Z]
+				fallthrough
+			case '\u2C6C': // ⱬ [LATIN SMALL LETTER Z WITH DESCENDER]
+				fallthrough
+			case '\uA763': // ꝣ [LATIN SMALL LETTER VISIGOTHIC Z]
+				fallthrough
+			case '\uFF5A': // ｚ [FULLWIDTH LATIN SMALL LETTER Z]
+				output[outputPos] = 'z'
+				outputPos++
+
+			case '\u24B5': // ⒵ [PARENTHESIZED LATIN SMALL LETTER Z]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = 'z'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u2070': // ⁰ [SUPERSCRIPT ZERO]
+				fallthrough
+			case '\u2080': // ₀ [SUBSCRIPT ZERO]
+				fallthrough
+			case '\u24EA': // ⓪ [CIRCLED DIGIT ZERO]
+				fallthrough
+			case '\u24FF': // ⓿ [NEGATIVE CIRCLED DIGIT ZERO]
+				fallthrough
+			case '\uFF10': // ０ [FULLWIDTH DIGIT ZERO]
+				output[outputPos] = '0'
+				outputPos++
+
+			case '\u00B9': // ¹ [SUPERSCRIPT ONE]
+				fallthrough
+			case '\u2081': // ₁ [SUBSCRIPT ONE]
+				fallthrough
+			case '\u2460': // ① [CIRCLED DIGIT ONE]
+				fallthrough
+			case '\u24F5': // ⓵ [DOUBLE CIRCLED DIGIT ONE]
+				fallthrough
+			case '\u2776': // ❶ [DINGBAT NEGATIVE CIRCLED DIGIT ONE]
+				fallthrough
+			case '\u2780': // ➀ [DINGBAT CIRCLED SANS-SERIF DIGIT ONE]
+				fallthrough
+			case '\u278A': // ➊ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE]
+				fallthrough
+			case '\uFF11': // １ [FULLWIDTH DIGIT ONE]
+				output[outputPos] = '1'
+				outputPos++
+
+			case '\u2488': // ⒈ [DIGIT ONE FULL STOP]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '.'
+				outputPos++
+
+			case '\u2474': // ⑴ [PARENTHESIZED DIGIT ONE]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u00B2': // ² [SUPERSCRIPT TWO]
+				fallthrough
+			case '\u2082': // ₂ [SUBSCRIPT TWO]
+				fallthrough
+			case '\u2461': // ② [CIRCLED DIGIT TWO]
+				fallthrough
+			case '\u24F6': // ⓶ [DOUBLE CIRCLED DIGIT TWO]
+				fallthrough
+			case '\u2777': // ❷ [DINGBAT NEGATIVE CIRCLED DIGIT TWO]
+				fallthrough
+			case '\u2781': // ➁ [DINGBAT CIRCLED SANS-SERIF DIGIT TWO]
+				fallthrough
+			case '\u278B': // ➋ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO]
+				fallthrough
+			case '\uFF12': // ２ [FULLWIDTH DIGIT TWO]
+				output[outputPos] = '2'
+				outputPos++
+
+			case '\u2489': // ⒉ [DIGIT TWO FULL STOP]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = '2'
+				outputPos++
+				output[outputPos] = '.'
+				outputPos++
+
+			case '\u2475': // ⑵ [PARENTHESIZED DIGIT TWO]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = '2'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u00B3': // ³ [SUPERSCRIPT THREE]
+				fallthrough
+			case '\u2083': // ₃ [SUBSCRIPT THREE]
+				fallthrough
+			case '\u2462': // ③ [CIRCLED DIGIT THREE]
+				fallthrough
+			case '\u24F7': // ⓷ [DOUBLE CIRCLED DIGIT THREE]
+				fallthrough
+			case '\u2778': // ❸ [DINGBAT NEGATIVE CIRCLED DIGIT THREE]
+				fallthrough
+			case '\u2782': // ➂ [DINGBAT CIRCLED SANS-SERIF DIGIT THREE]
+				fallthrough
+			case '\u278C': // ➌ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE]
+				fallthrough
+			case '\uFF13': // ３ [FULLWIDTH DIGIT THREE]
+				output[outputPos] = '3'
+				outputPos++
+
+			case '\u248A': // ⒊ [DIGIT THREE FULL STOP]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = '3'
+				outputPos++
+				output[outputPos] = '.'
+				outputPos++
+
+			case '\u2476': // ⑶ [PARENTHESIZED DIGIT THREE]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = '3'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u2074': // ⁴ [SUPERSCRIPT FOUR]
+				fallthrough
+			case '\u2084': // ₄ [SUBSCRIPT FOUR]
+				fallthrough
+			case '\u2463': // ④ [CIRCLED DIGIT FOUR]
+				fallthrough
+			case '\u24F8': // ⓸ [DOUBLE CIRCLED DIGIT FOUR]
+				fallthrough
+			case '\u2779': // ❹ [DINGBAT NEGATIVE CIRCLED DIGIT FOUR]
+				fallthrough
+			case '\u2783': // ➃ [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR]
+				fallthrough
+			case '\u278D': // ➍ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR]
+				fallthrough
+			case '\uFF14': // ４ [FULLWIDTH DIGIT FOUR]
+				output[outputPos] = '4'
+				outputPos++
+
+			case '\u248B': // ⒋ [DIGIT FOUR FULL STOP]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = '4'
+				outputPos++
+				output[outputPos] = '.'
+				outputPos++
+
+			case '\u2477': // ⑷ [PARENTHESIZED DIGIT FOUR]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = '4'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u2075': // ⁵ [SUPERSCRIPT FIVE]
+				fallthrough
+			case '\u2085': // ₅ [SUBSCRIPT FIVE]
+				fallthrough
+			case '\u2464': // ⑤ [CIRCLED DIGIT FIVE]
+				fallthrough
+			case '\u24F9': // ⓹ [DOUBLE CIRCLED DIGIT FIVE]
+				fallthrough
+			case '\u277A': // ❺ [DINGBAT NEGATIVE CIRCLED DIGIT FIVE]
+				fallthrough
+			case '\u2784': // ➄ [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE]
+				fallthrough
+			case '\u278E': // ➎ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE]
+				fallthrough
+			case '\uFF15': // ５ [FULLWIDTH DIGIT FIVE]
+				output[outputPos] = '5'
+				outputPos++
+
+			case '\u248C': // ⒌ [DIGIT FIVE FULL STOP]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = '5'
+				outputPos++
+				output[outputPos] = '.'
+				outputPos++
+
+			case '\u2478': // ⑸ [PARENTHESIZED DIGIT FIVE]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = '5'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u2076': // ⁶ [SUPERSCRIPT SIX]
+				fallthrough
+			case '\u2086': // ₆ [SUBSCRIPT SIX]
+				fallthrough
+			case '\u2465': // ⑥ [CIRCLED DIGIT SIX]
+				fallthrough
+			case '\u24FA': // ⓺ [DOUBLE CIRCLED DIGIT SIX]
+				fallthrough
+			case '\u277B': // ❻ [DINGBAT NEGATIVE CIRCLED DIGIT SIX]
+				fallthrough
+			case '\u2785': // ➅ [DINGBAT CIRCLED SANS-SERIF DIGIT SIX]
+				fallthrough
+			case '\u278F': // ➏ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX]
+				fallthrough
+			case '\uFF16': // ６ [FULLWIDTH DIGIT SIX]
+				output[outputPos] = '6'
+				outputPos++
+
+			case '\u248D': // ⒍ [DIGIT SIX FULL STOP]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = '6'
+				outputPos++
+				output[outputPos] = '.'
+				outputPos++
+
+			case '\u2479': // ⑹ [PARENTHESIZED DIGIT SIX]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = '6'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u2077': // ⁷ [SUPERSCRIPT SEVEN]
+				fallthrough
+			case '\u2087': // ₇ [SUBSCRIPT SEVEN]
+				fallthrough
+			case '\u2466': // ⑦ [CIRCLED DIGIT SEVEN]
+				fallthrough
+			case '\u24FB': // ⓻ [DOUBLE CIRCLED DIGIT SEVEN]
+				fallthrough
+			case '\u277C': // ❼ [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN]
+				fallthrough
+			case '\u2786': // ➆ [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN]
+				fallthrough
+			case '\u2790': // ➐ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN]
+				fallthrough
+			case '\uFF17': // ７ [FULLWIDTH DIGIT SEVEN]
+				output[outputPos] = '7'
+				outputPos++
+
+			case '\u248E': // ⒎ [DIGIT SEVEN FULL STOP]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = '7'
+				outputPos++
+				output[outputPos] = '.'
+				outputPos++
+
+			case '\u247A': // ⑺ [PARENTHESIZED DIGIT SEVEN]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = '7'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u2078': // ⁸ [SUPERSCRIPT EIGHT]
+				fallthrough
+			case '\u2088': // ₈ [SUBSCRIPT EIGHT]
+				fallthrough
+			case '\u2467': // ⑧ [CIRCLED DIGIT EIGHT]
+				fallthrough
+			case '\u24FC': // ⓼ [DOUBLE CIRCLED DIGIT EIGHT]
+				fallthrough
+			case '\u277D': // ❽ [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT]
+				fallthrough
+			case '\u2787': // ➇ [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT]
+				fallthrough
+			case '\u2791': // ➑ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT]
+				fallthrough
+			case '\uFF18': // ８ [FULLWIDTH DIGIT EIGHT]
+				output[outputPos] = '8'
+				outputPos++
+
+			case '\u248F': // ⒏ [DIGIT EIGHT FULL STOP]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = '8'
+				outputPos++
+				output[outputPos] = '.'
+				outputPos++
+
+			case '\u247B': // ⑻ [PARENTHESIZED DIGIT EIGHT]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = '8'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u2079': // ⁹ [SUPERSCRIPT NINE]
+				fallthrough
+			case '\u2089': // ₉ [SUBSCRIPT NINE]
+				fallthrough
+			case '\u2468': // ⑨ [CIRCLED DIGIT NINE]
+				fallthrough
+			case '\u24FD': // ⓽ [DOUBLE CIRCLED DIGIT NINE]
+				fallthrough
+			case '\u277E': // ❾ [DINGBAT NEGATIVE CIRCLED DIGIT NINE]
+				fallthrough
+			case '\u2788': // ➈ [DINGBAT CIRCLED SANS-SERIF DIGIT NINE]
+				fallthrough
+			case '\u2792': // ➒ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE]
+				fallthrough
+			case '\uFF19': // ９ [FULLWIDTH DIGIT NINE]
+				output[outputPos] = '9'
+				outputPos++
+
+			case '\u2490': // ⒐ [DIGIT NINE FULL STOP]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = '9'
+				outputPos++
+				output[outputPos] = '.'
+				outputPos++
+
+			case '\u247C': // ⑼ [PARENTHESIZED DIGIT NINE]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = '9'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u2469': // ⑩ [CIRCLED NUMBER TEN]
+				fallthrough
+			case '\u24FE': // ⓾ [DOUBLE CIRCLED NUMBER TEN]
+				fallthrough
+			case '\u277F': // ❿ [DINGBAT NEGATIVE CIRCLED NUMBER TEN]
+				fallthrough
+			case '\u2789': // ➉ [DINGBAT CIRCLED SANS-SERIF NUMBER TEN]
+				fallthrough
+			case '\u2793': // ➓ [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '0'
+				outputPos++
+
+			case '\u2491': // ⒑ [NUMBER TEN FULL STOP]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '0'
+				outputPos++
+				output[outputPos] = '.'
+				outputPos++
+
+			case '\u247D': // ⑽ [PARENTHESIZED NUMBER TEN]
+				output = output[:(len(output) + 3)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '0'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u246A': // ⑪ [CIRCLED NUMBER ELEVEN]
+				fallthrough
+			case '\u24EB': // ⓫ [NEGATIVE CIRCLED NUMBER ELEVEN]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '1'
+				outputPos++
+
+			case '\u2492': // ⒒ [NUMBER ELEVEN FULL STOP]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '.'
+				outputPos++
+
+			case '\u247E': // ⑾ [PARENTHESIZED NUMBER ELEVEN]
+				output = output[:(len(output) + 3)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u246B': // ⑫ [CIRCLED NUMBER TWELVE]
+				fallthrough
+			case '\u24EC': // ⓬ [NEGATIVE CIRCLED NUMBER TWELVE]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '2'
+				outputPos++
+
+			case '\u2493': // ⒓ [NUMBER TWELVE FULL STOP]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '2'
+				outputPos++
+				output[outputPos] = '.'
+				outputPos++
+
+			case '\u247F': // ⑿ [PARENTHESIZED NUMBER TWELVE]
+				output = output[:(len(output) + 3)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '2'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u246C': // ⑬ [CIRCLED NUMBER THIRTEEN]
+				fallthrough
+			case '\u24ED': // ⓭ [NEGATIVE CIRCLED NUMBER THIRTEEN]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '3'
+				outputPos++
+
+			case '\u2494': // ⒔ [NUMBER THIRTEEN FULL STOP]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '3'
+				outputPos++
+				output[outputPos] = '.'
+				outputPos++
+
+			case '\u2480': // ⒀ [PARENTHESIZED NUMBER THIRTEEN]
+				output = output[:(len(output) + 3)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '3'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u246D': // ⑭ [CIRCLED NUMBER FOURTEEN]
+				fallthrough
+			case '\u24EE': // ⓮ [NEGATIVE CIRCLED NUMBER FOURTEEN]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '4'
+				outputPos++
+
+			case '\u2495': // ⒕ [NUMBER FOURTEEN FULL STOP]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '4'
+				outputPos++
+				output[outputPos] = '.'
+				outputPos++
+
+			case '\u2481': // ⒁ [PARENTHESIZED NUMBER FOURTEEN]
+				output = output[:(len(output) + 3)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '4'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u246E': // ⑮ [CIRCLED NUMBER FIFTEEN]
+				fallthrough
+			case '\u24EF': // ⓯ [NEGATIVE CIRCLED NUMBER FIFTEEN]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '5'
+				outputPos++
+
+			case '\u2496': // ⒖ [NUMBER FIFTEEN FULL STOP]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '5'
+				outputPos++
+				output[outputPos] = '.'
+				outputPos++
+
+			case '\u2482': // ⒂ [PARENTHESIZED NUMBER FIFTEEN]
+				output = output[:(len(output) + 3)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '5'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u246F': // ⑯ [CIRCLED NUMBER SIXTEEN]
+				fallthrough
+			case '\u24F0': // ⓰ [NEGATIVE CIRCLED NUMBER SIXTEEN]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '6'
+				outputPos++
+
+			case '\u2497': // ⒗ [NUMBER SIXTEEN FULL STOP]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '6'
+				outputPos++
+				output[outputPos] = '.'
+				outputPos++
+
+			case '\u2483': // ⒃ [PARENTHESIZED NUMBER SIXTEEN]
+				output = output[:(len(output) + 3)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '6'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u2470': // ⑰ [CIRCLED NUMBER SEVENTEEN]
+				fallthrough
+			case '\u24F1': // ⓱ [NEGATIVE CIRCLED NUMBER SEVENTEEN]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '7'
+				outputPos++
+
+			case '\u2498': // ⒘ [NUMBER SEVENTEEN FULL STOP]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '7'
+				outputPos++
+				output[outputPos] = '.'
+				outputPos++
+
+			case '\u2484': // ⒄ [PARENTHESIZED NUMBER SEVENTEEN]
+				output = output[:(len(output) + 3)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '7'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u2471': // ⑱ [CIRCLED NUMBER EIGHTEEN]
+				fallthrough
+			case '\u24F2': // ⓲ [NEGATIVE CIRCLED NUMBER EIGHTEEN]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '8'
+				outputPos++
+
+			case '\u2499': // ⒙ [NUMBER EIGHTEEN FULL STOP]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '8'
+				outputPos++
+				output[outputPos] = '.'
+				outputPos++
+
+			case '\u2485': // ⒅ [PARENTHESIZED NUMBER EIGHTEEN]
+				output = output[:(len(output) + 3)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '8'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u2472': // ⑲ [CIRCLED NUMBER NINETEEN]
+				fallthrough
+			case '\u24F3': // ⓳ [NEGATIVE CIRCLED NUMBER NINETEEN]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '9'
+				outputPos++
+
+			case '\u249A': // ⒚ [NUMBER NINETEEN FULL STOP]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '9'
+				outputPos++
+				output[outputPos] = '.'
+				outputPos++
+
+			case '\u2486': // ⒆ [PARENTHESIZED NUMBER NINETEEN]
+				output = output[:(len(output) + 3)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = '1'
+				outputPos++
+				output[outputPos] = '9'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u2473': // ⑳ [CIRCLED NUMBER TWENTY]
+				fallthrough
+			case '\u24F4': // ⓴ [NEGATIVE CIRCLED NUMBER TWENTY]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = '2'
+				outputPos++
+				output[outputPos] = '0'
+				outputPos++
+
+			case '\u249B': // ⒛ [NUMBER TWENTY FULL STOP]
+				output = output[:(len(output) + 2)]
+				output[outputPos] = '2'
+				outputPos++
+				output[outputPos] = '0'
+				outputPos++
+				output[outputPos] = '.'
+				outputPos++
+
+			case '\u2487': // ⒇ [PARENTHESIZED NUMBER TWENTY]
+				output = output[:(len(output) + 3)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = '2'
+				outputPos++
+				output[outputPos] = '0'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u00AB': // « [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK]
+				fallthrough
+			case '\u00BB': // » [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK]
+				fallthrough
+			case '\u201C': // “ [LEFT DOUBLE QUOTATION MARK]
+				fallthrough
+			case '\u201D': // ” [RIGHT DOUBLE QUOTATION MARK]
+				fallthrough
+			case '\u201E': // „ [DOUBLE LOW-9 QUOTATION MARK]
+				fallthrough
+			case '\u2033': // ″ [DOUBLE PRIME]
+				fallthrough
+			case '\u2036': // ‶ [REVERSED DOUBLE PRIME]
+				fallthrough
+			case '\u275D': // ❝ [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT]
+				fallthrough
+			case '\u275E': // ❞ [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT]
+				fallthrough
+			case '\u276E': // ❮ [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT]
+				fallthrough
+			case '\u276F': // ❯ [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT]
+				fallthrough
+			case '\uFF02': // ＂ [FULLWIDTH QUOTATION MARK]
+				output[outputPos] = '"'
+				outputPos++
+
+			case '\u2018': // ‘ [LEFT SINGLE QUOTATION MARK]
+				fallthrough
+			case '\u2019': // ’ [RIGHT SINGLE QUOTATION MARK]
+				fallthrough
+			case '\u201A': // ‚ [SINGLE LOW-9 QUOTATION MARK]
+				fallthrough
+			case '\u201B': // ‛ [SINGLE HIGH-REVERSED-9 QUOTATION MARK]
+				fallthrough
+			case '\u2032': // ′ [PRIME]
+				fallthrough
+			case '\u2035': // ‵ [REVERSED PRIME]
+				fallthrough
+			case '\u2039': // ‹ [SINGLE LEFT-POINTING ANGLE QUOTATION MARK]
+				fallthrough
+			case '\u203A': // › [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK]
+				fallthrough
+			case '\u275B': // ❛ [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT]
+				fallthrough
+			case '\u275C': // ❜ [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT]
+				fallthrough
+			case '\uFF07': // ＇ [FULLWIDTH APOSTROPHE]
+				output[outputPos] = '\''
+				outputPos++
+
+			case '\u2010': // ‐ [HYPHEN]
+				fallthrough
+			case '\u2011': // ‑ [NON-BREAKING HYPHEN]
+				fallthrough
+			case '\u2012': // ‒ [FIGURE DASH]
+				fallthrough
+			case '\u2013': // – [EN DASH]
+				fallthrough
+			case '\u2014': // — [EM DASH]
+				fallthrough
+			case '\u207B': // ⁻ [SUPERSCRIPT MINUS]
+				fallthrough
+			case '\u208B': // ₋ [SUBSCRIPT MINUS]
+				fallthrough
+			case '\uFF0D': // － [FULLWIDTH HYPHEN-MINUS]
+				output[outputPos] = '-'
+				outputPos++
+
+			case '\u2045': // ⁅ [LEFT SQUARE BRACKET WITH QUILL]
+				fallthrough
+			case '\u2772': // ❲ [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT]
+				fallthrough
+			case '\uFF3B': // ［ [FULLWIDTH LEFT SQUARE BRACKET]
+				output[outputPos] = '['
+				outputPos++
+
+			case '\u2046': // ⁆ [RIGHT SQUARE BRACKET WITH QUILL]
+				fallthrough
+			case '\u2773': // ❳ [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT]
+				fallthrough
+			case '\uFF3D': // ］ [FULLWIDTH RIGHT SQUARE BRACKET]
+				output[outputPos] = ']'
+				outputPos++
+
+			case '\u207D': // ⁽ [SUPERSCRIPT LEFT PARENTHESIS]
+				fallthrough
+			case '\u208D': // ₍ [SUBSCRIPT LEFT PARENTHESIS]
+				fallthrough
+			case '\u2768': // ❨ [MEDIUM LEFT PARENTHESIS ORNAMENT]
+				fallthrough
+			case '\u276A': // ❪ [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT]
+				fallthrough
+			case '\uFF08': // （ [FULLWIDTH LEFT PARENTHESIS]
+				output[outputPos] = '('
+				outputPos++
+
+			case '\u2E28': // ⸨ [LEFT DOUBLE PARENTHESIS]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = '('
+				outputPos++
+				output[outputPos] = '('
+				outputPos++
+
+			case '\u207E': // ⁾ [SUPERSCRIPT RIGHT PARENTHESIS]
+				fallthrough
+			case '\u208E': // ₎ [SUBSCRIPT RIGHT PARENTHESIS]
+				fallthrough
+			case '\u2769': // ❩ [MEDIUM RIGHT PARENTHESIS ORNAMENT]
+				fallthrough
+			case '\u276B': // ❫ [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT]
+				fallthrough
+			case '\uFF09': // ） [FULLWIDTH RIGHT PARENTHESIS]
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u2E29': // ⸩ [RIGHT DOUBLE PARENTHESIS]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = ')'
+				outputPos++
+				output[outputPos] = ')'
+				outputPos++
+
+			case '\u276C': // ❬ [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT]
+				fallthrough
+			case '\u2770': // ❰ [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT]
+				fallthrough
+			case '\uFF1C': // ＜ [FULLWIDTH LESS-THAN SIGN]
+				output[outputPos] = '<'
+				outputPos++
+
+			case '\u276D': // ❭ [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT]
+				fallthrough
+			case '\u2771': // ❱ [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT]
+				fallthrough
+			case '\uFF1E': // ＞ [FULLWIDTH GREATER-THAN SIGN]
+				output[outputPos] = '>'
+				outputPos++
+
+			case '\u2774': // ❴ [MEDIUM LEFT CURLY BRACKET ORNAMENT]
+				fallthrough
+			case '\uFF5B': // ｛ [FULLWIDTH LEFT CURLY BRACKET]
+				output[outputPos] = '{'
+				outputPos++
+
+			case '\u2775': // ❵ [MEDIUM RIGHT CURLY BRACKET ORNAMENT]
+				fallthrough
+			case '\uFF5D': // ｝ [FULLWIDTH RIGHT CURLY BRACKET]
+				output[outputPos] = '}'
+				outputPos++
+
+			case '\u207A': // ⁺ [SUPERSCRIPT PLUS SIGN]
+				fallthrough
+			case '\u208A': // ₊ [SUBSCRIPT PLUS SIGN]
+				fallthrough
+			case '\uFF0B': // ＋ [FULLWIDTH PLUS SIGN]
+				output[outputPos] = '+'
+				outputPos++
+
+			case '\u207C': // ⁼ [SUPERSCRIPT EQUALS SIGN]
+				fallthrough
+			case '\u208C': // ₌ [SUBSCRIPT EQUALS SIGN]
+				fallthrough
+			case '\uFF1D': // ＝ [FULLWIDTH EQUALS SIGN]
+				output[outputPos] = '='
+				outputPos++
+
+			case '\uFF01': // ！ [FULLWIDTH EXCLAMATION MARK]
+				output[outputPos] = '!'
+				outputPos++
+
+			case '\u203C': // ‼ [DOUBLE EXCLAMATION MARK]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = '!'
+				outputPos++
+				output[outputPos] = '!'
+				outputPos++
+
+			case '\u2049': // ⁉ [EXCLAMATION QUESTION MARK]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = '!'
+				outputPos++
+				output[outputPos] = '?'
+				outputPos++
+
+			case '\uFF03': // ＃ [FULLWIDTH NUMBER SIGN]
+				output[outputPos] = '#'
+				outputPos++
+
+			case '\uFF04': // ＄ [FULLWIDTH DOLLAR SIGN]
+				output[outputPos] = '$'
+				outputPos++
+
+			case '\u2052': // ⁒ [COMMERCIAL MINUS SIGN]
+				fallthrough
+			case '\uFF05': // ％ [FULLWIDTH PERCENT SIGN]
+				output[outputPos] = '%'
+				outputPos++
+
+			case '\uFF06': // ＆ [FULLWIDTH AMPERSAND]
+				output[outputPos] = '&'
+				outputPos++
+
+			case '\u204E': // ⁎ [LOW ASTERISK]
+				fallthrough
+			case '\uFF0A': // ＊ [FULLWIDTH ASTERISK]
+				output[outputPos] = '*'
+				outputPos++
+
+			case '\uFF0C': // ， [FULLWIDTH COMMA]
+				output[outputPos] = ','
+				outputPos++
+
+			case '\uFF0E': // ． [FULLWIDTH FULL STOP]
+				output[outputPos] = '.'
+				outputPos++
+
+			case '\u2044': // ⁄ [FRACTION SLASH]
+				fallthrough
+			case '\uFF0F': // ／ [FULLWIDTH SOLIDUS]
+				output[outputPos] = '/'
+				outputPos++
+
+			case '\uFF1A': // ： [FULLWIDTH COLON]
+				output[outputPos] = ':'
+				outputPos++
+
+			case '\u204F': // ⁏ [REVERSED SEMICOLON]
+				fallthrough
+			case '\uFF1B': // ； [FULLWIDTH SEMICOLON]
+				output[outputPos] = ';'
+				outputPos++
+
+			case '\uFF1F': // ？ [FULLWIDTH QUESTION MARK]
+				output[outputPos] = '?'
+				outputPos++
+
+			case '\u2047': // ⁇ [DOUBLE QUESTION MARK]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = '?'
+				outputPos++
+				output[outputPos] = '?'
+				outputPos++
+
+			case '\u2048': // ⁈ [QUESTION EXCLAMATION MARK]
+				output = output[:(len(output) + 1)]
+				output[outputPos] = '?'
+				outputPos++
+				output[outputPos] = '!'
+				outputPos++
+
+			case '\uFF20': // ＠ [FULLWIDTH COMMERCIAL AT]
+				output[outputPos] = '@'
+				outputPos++
+
+			case '\uFF3C': // ＼ [FULLWIDTH REVERSE SOLIDUS]
+				output[outputPos] = '\\'
+				outputPos++
+
+			case '\u2038': // ‸ [CARET]
+				fallthrough
+			case '\uFF3E': // ＾ [FULLWIDTH CIRCUMFLEX ACCENT]
+				output[outputPos] = '^'
+				outputPos++
+
+			case '\uFF3F': // ＿ [FULLWIDTH LOW LINE]
+				output[outputPos] = '_'
+				outputPos++
+
+			case '\u2053': // ⁓ [SWUNG DASH]
+				fallthrough
+			case '\uFF5E': // ～ [FULLWIDTH TILDE]
+				output[outputPos] = '~'
+				outputPos++
+				break
+
+			default:
+				output[outputPos] = c
+				outputPos++
+			}
+		}
+	}
+	return output
+}
diff --git a/analysis/char/asciifolding/asciifolding_test.go b/analysis/char/asciifolding/asciifolding_test.go
new file mode 100644
index 000000000..82dc67868
--- /dev/null
+++ b/analysis/char/asciifolding/asciifolding_test.go
@@ -0,0 +1,56 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package asciifolding
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestAsciiFoldingFilter(t *testing.T) {
+
+	tests := []struct {
+		input  []byte
+		output []byte
+	}{
+		{
+			// empty input passes
+			input:  []byte(``),
+			output: []byte(``),
+		},
+		{
+			// no modification for plain ASCII
+			input:  []byte(`The quick brown fox jumps over the lazy dog`),
+			output: []byte(`The quick brown fox jumps over the lazy dog`),
+		},
+		{
+			// Umlauts are folded to plain ASCII
+			input:  []byte(`The quick bröwn fox jümps over the läzy dog`),
+			output: []byte(`The quick brown fox jumps over the lazy dog`),
+		}, {
+			// composite unicode runes are folded to more than one ASCII rune
+			input:  []byte(`ÆꜴ`),
+			output: []byte(`AEAO`),
+		},
+	}
+
+	for _, test := range tests {
+		filter := New()
+		output := filter.Filter(test.input)
+		if !reflect.DeepEqual(output, test.output) {
+			t.Errorf("Expected:\n`%s`\ngot:\n`%s`\nfor:\n`%s`\n", string(test.output), string(output), string(test.input))
+		}
+	}
+}

From 748984d4d21ea9edbfa89b75262e0857f5c03507 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Wed, 5 Dec 2018 11:48:31 -0800
Subject: [PATCH 506/728] Fix Boolean Searcher's MustNotSearcher from advancing
 incorrectly

+ Noted incorrect advancing of the boolean's currMustNot position:
    [0xc4200b82c0] BOOL Advance to ID: 13965, currID: 13943, currMust: 13943, currMustNot: 13966
    [0xc4200b82c0] BOOL Advanced, now currID: 13965, currMust: 13965, currMustNot: 13967
+ Fix the incorrect comparison for Boolean searcher's MustNot cached entry
    - If the current position is already where it should be do not advance
    - If the position is only less than (and not just inequal) to the requested ID, advance
+ Unit tests (thanks @mschoch)
+ Addresses https://github.com/blevesearch/bleve/issues/1059
---
 search/searcher/search_boolean.go |   8 +--
 search_test.go                    | 104 ++++++++++++++++++++++++++++++
 2 files changed, 108 insertions(+), 4 deletions(-)

diff --git a/search/searcher/search_boolean.go b/search/searcher/search_boolean.go
index 0a223bb0b..c8509ef09 100644
--- a/search/searcher/search_boolean.go
+++ b/search/searcher/search_boolean.go
@@ -356,10 +356,10 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
 		}
 
 		if s.mustNotSearcher != nil {
-			// Additional check for mustNotSearcher whose cursor isn't tracked by
-			// currentID to prevent it from moving when the searcher's already
-			// where it should be.
-			if s.currMustNot == nil || !s.currMustNot.IndexInternalID.Equals(ID) {
+			// Additional check for mustNotSearcher, whose cursor isn't tracked by
+			// currentID to prevent it from moving when the searcher's tracked
+			// position is already ahead of or at the requested ID.
+			if s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0 {
 				if s.currMustNot != nil {
 					ctx.DocumentMatchPool.Put(s.currMustNot)
 				}
diff --git a/search_test.go b/search_test.go
index 59ac13402..113b9dfc1 100644
--- a/search_test.go
+++ b/search_test.go
@@ -33,6 +33,7 @@ import (
 	"github.com/blevesearch/bleve/analysis/tokenizer/whitespace"
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index/scorch"
+	"github.com/blevesearch/bleve/index/upsidedown"
 	"github.com/blevesearch/bleve/mapping"
 	"github.com/blevesearch/bleve/search"
 	"github.com/blevesearch/bleve/search/query"
@@ -888,3 +889,106 @@ func TestMultipleNestedBooleanMustNotSearchersOnScorch(t *testing.T) {
 		t.Fatalf("Unexpected result, %v != 1", res.Total)
 	}
 }
+
+func testBooleanMustNotSearcher(t *testing.T, indexName string) {
+	defer func() {
+		err := os.RemoveAll("testidx")
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	im := NewIndexMapping()
+	idx, err := NewUsing("testidx", im, indexName, Config.DefaultKVStore, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	docs := []struct {
+		Name    string
+		HasRole bool
+	}{
+		{
+			Name: "13900",
+		},
+		{
+			Name: "13901",
+		},
+		{
+			Name: "13965",
+		},
+		{
+			Name:    "13966",
+			HasRole: true,
+		},
+		{
+			Name:    "13967",
+			HasRole: true,
+		},
+	}
+
+	for _, doc := range docs {
+		err := idx.Index(doc.Name, doc)
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	lhs := NewDocIDQuery([]string{"13965", "13966", "13967"})
+	hasRole := NewBoolFieldQuery(true)
+	hasRole.SetField("HasRole")
+	rhs := NewBooleanQuery()
+	rhs.AddMustNot(hasRole)
+
+	var compareLeftRightAndConjunction = func(idx Index, left, right query.Query) error {
+		// left
+		lr := NewSearchRequestOptions(left, 100, 0, false)
+		lres, err := idx.Search(lr)
+		if err != nil {
+			return fmt.Errorf("error left: %v", err)
+		}
+		lresIds := map[string]struct{}{}
+		for i := range lres.Hits {
+			lresIds[lres.Hits[i].ID] = struct{}{}
+		}
+		// right
+		rr := NewSearchRequestOptions(right, 100, 0, false)
+		rres, err := idx.Search(rr)
+		if err != nil {
+			return fmt.Errorf("error right: %v", err)
+		}
+		rresIds := map[string]struct{}{}
+		for i := range rres.Hits {
+			rresIds[rres.Hits[i].ID] = struct{}{}
+		}
+		// conjunction
+		cr := NewSearchRequestOptions(NewConjunctionQuery(left, right), 100, 0, false)
+		cres, err := idx.Search(cr)
+		if err != nil {
+			return fmt.Errorf("error conjunction: %v", err)
+		}
+		for i := range cres.Hits {
+			if _, ok := lresIds[cres.Hits[i].ID]; ok {
+				if _, ok := rresIds[cres.Hits[i].ID]; !ok {
+					return fmt.Errorf("error id %s missing from right", cres.Hits[i].ID)
+				}
+			} else {
+				return fmt.Errorf("error id %s missing from left", cres.Hits[i].ID)
+			}
+		}
+		return nil
+	}
+
+	err = compareLeftRightAndConjunction(idx, lhs, rhs)
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestBooleanMustNotSearcherUpsidedown(t *testing.T) {
+	testBooleanMustNotSearcher(t, upsidedown.Name)
+}
+
+func TestBooleanMustNotSearcherScorch(t *testing.T) {
+	testBooleanMustNotSearcher(t, scorch.Name)
+}

From dd49ef0aa0a006a46e90b20dcba578265c043c07 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Wed, 5 Dec 2018 17:13:59 -0800
Subject: [PATCH 507/728] Advancing boolean searcher's currentID w.r.t
 currMustNot

+ Advance the boolean searcher's nested searchers only if the
  cursor is trailing the lookup ID, additionally if the
  mustNotSearcher has been initialized, ensure that the cursor
  used to track the mustNotSearcher (which isn't tracked by
  the other cursor) is trailing the lookup ID as well, to avoid
  any searcher from advancing incorrectly.
+ Unit test that catches the cornercase:
    TestNestedBooleanMustNotSearcherUpsidedown
---
 search/searcher/search_boolean.go | 27 ++++++++++++++-------------
 search_test.go                    | 19 ++++++++++++-------
 2 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/search/searcher/search_boolean.go b/search/searcher/search_boolean.go
index c8509ef09..bbbced479 100644
--- a/search/searcher/search_boolean.go
+++ b/search/searcher/search_boolean.go
@@ -332,8 +332,14 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
 		}
 	}
 
-	// Advance the searcher only if the cursor is trailing the lookup ID
-	if s.currentID == nil || s.currentID.Compare(ID) < 0 {
+	// Advance the searchers only if the currentID cursor is trailing the lookup ID,
+	// additionally if the mustNotSearcher has been initialized, ensure that the
+	// cursor used to track the mustNotSearcher (currMustNot, which isn't tracked by
+	// currentID) is trailing the lookup ID as well - for in the case where currentID
+	// is nil and currMustNot is already at or ahead of the lookup ID, we MUST NOT
+	// advance the currentID or the currMustNot cursors.
+	if (s.currentID == nil || s.currentID.Compare(ID) < 0) &&
+		(s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0) {
 		var err error
 		if s.mustSearcher != nil {
 			if s.currMust != nil {
@@ -356,17 +362,12 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
 		}
 
 		if s.mustNotSearcher != nil {
-			// Additional check for mustNotSearcher, whose cursor isn't tracked by
-			// currentID to prevent it from moving when the searcher's tracked
-			// position is already ahead of or at the requested ID.
-			if s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0 {
-				if s.currMustNot != nil {
-					ctx.DocumentMatchPool.Put(s.currMustNot)
-				}
-				s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID)
-				if err != nil {
-					return nil, err
-				}
+			if s.currMustNot != nil {
+				ctx.DocumentMatchPool.Put(s.currMustNot)
+			}
+			s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID)
+			if err != nil {
+				return nil, err
 			}
 		}
 
diff --git a/search_test.go b/search_test.go
index 113b9dfc1..d078ecfdc 100644
--- a/search_test.go
+++ b/search_test.go
@@ -553,7 +553,7 @@ func TestNestedBooleanSearchers(t *testing.T) {
 	}
 }
 
-func TestNestedBooleanMustNotSearcher(t *testing.T) {
+func TestNestedBooleanMustNotSearcherUpsidedown(t *testing.T) {
 	// create an index with default settings
 	idxMapping := NewIndexMapping()
 	idx, err := New("testidx", idxMapping)
@@ -891,19 +891,24 @@ func TestMultipleNestedBooleanMustNotSearchersOnScorch(t *testing.T) {
 }
 
 func testBooleanMustNotSearcher(t *testing.T, indexName string) {
+	im := NewIndexMapping()
+	idx, err := NewUsing("testidx", im, indexName, Config.DefaultKVStore, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
 	defer func() {
+		err = idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+
 		err := os.RemoveAll("testidx")
 		if err != nil {
 			t.Fatal(err)
 		}
 	}()
 
-	im := NewIndexMapping()
-	idx, err := NewUsing("testidx", im, indexName, Config.DefaultKVStore, nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-
 	docs := []struct {
 		Name    string
 		HasRole bool

From be3692849832dd9d505186d5b01ddad203439065 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 10 Dec 2018 12:00:13 +0530
Subject: [PATCH 508/728] MB-32287 - zap docvalue cmd is broken

Fixing the docvalue cmd to adopt the newer disk format
And minor improvements
---
 cmd/bleve/cmd/zap/docvalue.go | 207 +++++++++++++++++-----------------
 1 file changed, 106 insertions(+), 101 deletions(-)

diff --git a/cmd/bleve/cmd/zap/docvalue.go b/cmd/bleve/cmd/zap/docvalue.go
index 065f078e4..b8563be93 100644
--- a/cmd/bleve/cmd/zap/docvalue.go
+++ b/cmd/bleve/cmd/zap/docvalue.go
@@ -48,10 +48,10 @@ var docvalueCmd = &cobra.Command{
 
 		// iterate through fields index
 		var fieldInv []string
-		var id, read, fieldLoc uint64
-		var nread int
+		var id uint64
 		for fieldsIndexOffset+(8*id) < fieldsIndexEnd {
-			addr := binary.BigEndian.Uint64(data[fieldsIndexOffset+(8*id) : fieldsIndexOffset+(8*id)+8])
+			addr := binary.BigEndian.Uint64(
+				data[fieldsIndexOffset+(8*id) : fieldsIndexOffset+(8*id)+8])
 			var n uint64
 			_, read := binary.Uvarint(data[addr+n : fieldsIndexEnd])
 			n += uint64(read)
@@ -67,164 +67,160 @@ var docvalueCmd = &cobra.Command{
 		}
 
 		dvLoc := segment.DocValueOffset()
-		fieldDvLoc, total, fdvread := uint64(0), uint64(0), int(0)
-
+		var n int
 		var fieldName string
 		var fieldID uint16
+		var fieldDvSize float64
+		var read, fieldStartLoc, fieldEndLoc uint64
+		var fieldChunkCount, fieldDvStart, fieldDvEnd, totalDvSize uint64
+		var fieldChunkLens []uint64
 
 		// if no fields are specified then print the docValue offsets for all fields set
 		for id, field := range fieldInv {
-			fieldLoc, fdvread = binary.Uvarint(data[dvLoc+read : dvLoc+read+binary.MaxVarintLen64])
-			if fdvread <= 0 {
-				return fmt.Errorf("loadDvIterators: failed to read the docvalue offsets for field %d", fieldID)
+			fieldStartLoc, n = binary.Uvarint(
+				data[dvLoc+read : dvLoc+read+binary.MaxVarintLen64])
+			if n <= 0 {
+				return fmt.Errorf("loadDvIterators: failed to read the "+
+					" docvalue offsets for field %d", fieldID)
+			}
+
+			read += uint64(n)
+			fieldEndLoc, n = binary.Uvarint(
+				data[dvLoc+read : dvLoc+read+binary.MaxVarintLen64])
+			if n <= 0 {
+				return fmt.Errorf("Failed to read the docvalue offset "+
+					"end for field %d", fieldID)
 			}
-			read += uint64(fdvread)
-			if fieldLoc == math.MaxUint64 {
-				fmt.Printf("fieldID: %d '%s' docvalue at %d (%x) not persisted \n", id, field, fieldLoc, fieldLoc)
+
+			read += uint64(n)
+			if fieldStartLoc == math.MaxUint64 && len(args) == 1 {
+				fmt.Printf("FieldID: %d '%s' docvalue at %d (%x) not "+
+					" persisted \n", id, field, fieldStartLoc, fieldStartLoc)
 				continue
 			}
 
-			var offset, clen, numChunks uint64
-			numChunks, nread = binary.Uvarint(data[fieldLoc : fieldLoc+binary.MaxVarintLen64])
-			if nread <= 0 {
-				return fmt.Errorf("failed to read the field "+
-					"doc values for field %s", fieldName)
+			var chunkOffsetsPosition, offset, numChunks uint64
+			if fieldEndLoc-fieldStartLoc > 16 {
+				numChunks = binary.BigEndian.Uint64(data[fieldEndLoc-8 : fieldEndLoc])
+				// read the length of chunk offsets
+				chunkOffsetsLen := binary.BigEndian.Uint64(data[fieldEndLoc-16 : fieldEndLoc-8])
+				// acquire position of chunk offsets
+				chunkOffsetsPosition = (fieldEndLoc - 16) - chunkOffsetsLen
 			}
-			offset += uint64(nread)
 
-			// read the length of chunks
-			totalSize := uint64(0)
+			// read the chunk offsets
 			chunkLens := make([]uint64, numChunks)
+			dvSize := uint64(0)
 			for i := 0; i < int(numChunks); i++ {
-				clen, nread = binary.Uvarint(data[fieldLoc+offset : fieldLoc+offset+binary.MaxVarintLen64])
-				if nread <= 0 {
-					return fmt.Errorf("corrupted chunk length for chunk number: %d", i)
+				length, read := binary.Uvarint(
+					data[chunkOffsetsPosition+offset : chunkOffsetsPosition+offset+
+						binary.MaxVarintLen64])
+				if read <= 0 {
+					return fmt.Errorf("Corrupted chunk offset during segment load")
 				}
 
-				chunkLens[i] = clen
-				totalSize += clen
-				offset += uint64(nread)
+				offset += uint64(read)
+				chunkLens[i] = length
+				dvSize += length
 			}
 
-			total += totalSize
+			totalDvSize += dvSize
+			// if no field args are given, then print out the dv locations for all fields
 			if len(args) == 1 {
-				// if no field args are given, then print out the dv locations for all fields
-				mbsize := float64(totalSize) / (1024 * 1024)
-				fmt.Printf("fieldID: %d '%s' docvalue at %d (%x) numChunks %d  diskSize %.3f MB\n", id, field, fieldLoc, fieldLoc, numChunks, mbsize)
+				mbsize := float64(dvSize) / (1024 * 1024)
+				fmt.Printf("FieldID: %d '%s' docvalue at %d (%x) numChunks "+
+					"%d  diskSize %.6f MB\n", id, field, fieldStartLoc,
+					fieldStartLoc, numChunks, mbsize)
 				continue
 			}
 
-			if field != args[1] {
-				continue
-			} else {
-				fieldDvLoc = fieldLoc
+			// if the field is the requested one for more details,
+			// then remember the details
+			if field == args[1] {
+				fieldDvStart = fieldStartLoc
+				fieldDvEnd = fieldEndLoc
 				fieldName = field
 				fieldID = uint16(id)
+				fieldDvSize = float64(dvSize) / (1024 * 1024)
+				fieldChunkLens = append(fieldChunkLens, chunkLens...)
+				fieldChunkCount = numChunks
 			}
-
 		}
 
-		mbsize := float64(total) / (1024 * 1024)
-		fmt.Printf("Total Doc Values Size on Disk: %.3f MB\n", mbsize)
+		mbsize := float64(totalDvSize) / (1024 * 1024)
+		fmt.Printf("Total Doc Values Size on Disk: %.6f MB\n", mbsize)
 
 		// done with the fields dv locs printing for the given zap file
 		if len(args) == 1 {
 			return nil
 		}
 
-		if fieldName == "" || fieldDvLoc == 0 {
-			return fmt.Errorf("no field found for given field arg: %s", args[1])
-		}
-
-		// read the number of chunks
-		var offset, clen, numChunks uint64
-		numChunks, nread = binary.Uvarint(data[fieldDvLoc : fieldDvLoc+binary.MaxVarintLen64])
-		if nread <= 0 {
-			return fmt.Errorf("failed to read the field "+
-				"doc values for field %s", fieldName)
-		}
-		offset += uint64(nread)
-
-		if len(args) == 2 {
-			fmt.Printf("number of chunks: %d\n", numChunks)
-		}
-
-		// read the length of chunks
-		chunkLens := make([]uint64, numChunks)
-		for i := 0; i < int(numChunks); i++ {
-			clen, nread = binary.Uvarint(data[fieldDvLoc+offset : fieldDvLoc+offset+binary.MaxVarintLen64])
-			if nread <= 0 {
-				return fmt.Errorf("corrupted chunk length for chunk number: %d", i)
-			}
-
-			chunkLens[i] = clen
-			offset += uint64(nread)
-			if len(args) == 2 {
-				fmt.Printf("chunk: %d size: %d \n", i, clen)
-			}
-			/*
-				TODO => dump all chunk headers??
-				if len(args) == 3 && args[2] == ">" {
-					dumpChunkDocNums(data, )
-
-				}*/
+		if fieldName == "" || fieldDvEnd == 0 {
+			return fmt.Errorf("No docvalue persisted for given field arg: %s",
+				args[1])
 		}
 
 		if len(args) == 2 {
+			fmt.Printf("FieldID: %d '%s' docvalue at %d (%x) numChunks "+
+				"%d  diskSize %.6f MB\n", fieldID, fieldName, fieldDvStart,
+				fieldDvStart, fieldChunkCount, fieldDvSize)
+			fmt.Printf("Number of docvalue chunks: %d\n", fieldChunkCount)
 			return nil
 		}
 
 		localDocNum, err := strconv.Atoi(args[2])
 		if err != nil {
-			return fmt.Errorf("unable to parse doc number: %v", err)
+			return fmt.Errorf("Unable to parse doc number: %v", err)
 		}
 
 		if localDocNum >= int(segment.NumDocs()) {
-			return fmt.Errorf("invalid doc number %d (valid 0 - %d)", localDocNum, segment.NumDocs()-1)
+			return fmt.Errorf("Invalid doc number %d (valid 0 - %d)",
+				localDocNum, segment.NumDocs()-1)
 		}
 
 		// find the chunkNumber where the docValues are stored
 		docInChunk := uint64(localDocNum) / uint64(segment.ChunkFactor())
 
-		if numChunks < docInChunk {
-			return fmt.Errorf("no chunk exists for chunk number: %d for localDocNum: %d", docInChunk, localDocNum)
-		}
-
-		destChunkDataLoc := fieldDvLoc + offset
-		for i := 0; i < int(docInChunk); i++ {
-			destChunkDataLoc += chunkLens[i]
+		if fieldChunkCount < docInChunk {
+			return fmt.Errorf("No chunk exists for chunk number: %d for "+
+				"localDocNum: %d", docInChunk, localDocNum)
 		}
-		curChunkSize := chunkLens[docInChunk]
 
-		if curChunkSize == 0 {
-			return nil
-		}
+		start, end := readChunkBoundary(int(docInChunk), fieldChunkLens)
+		destChunkDataLoc := fieldDvStart + start
+		curChunkEnd := fieldDvStart + end
 
 		// read the number of docs reside in the chunk
-		numDocs := uint64(0)
-		numDocs, nread = binary.Uvarint(data[destChunkDataLoc : destChunkDataLoc+binary.MaxVarintLen64])
-		if nread <= 0 {
-			return fmt.Errorf("failed to read the target chunk: %d", docInChunk)
+		var numDocs uint64
+		var nr int
+		numDocs, nr = binary.Uvarint(
+			data[destChunkDataLoc : destChunkDataLoc+binary.MaxVarintLen64])
+		if nr <= 0 {
+			return fmt.Errorf("Failed to read the chunk")
 		}
-		chunkMetaLoc := destChunkDataLoc + uint64(nread)
 
-		offset = uint64(0)
+		chunkMetaLoc := destChunkDataLoc + uint64(nr)
 		curChunkHeader := make([]zap.MetaData, int(numDocs))
+		offset := uint64(0)
 		for i := 0; i < int(numDocs); i++ {
-			curChunkHeader[i].DocNum, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
-			offset += uint64(nread)
-			curChunkHeader[i].DocDvOffset, nread = binary.Uvarint(data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
-			offset += uint64(nread)
+			curChunkHeader[i].DocNum, nr = binary.Uvarint(
+				data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
+			offset += uint64(nr)
+			curChunkHeader[i].DocDvOffset, nr = binary.Uvarint(
+				data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
+			offset += uint64(nr)
 		}
 
 		compressedDataLoc := chunkMetaLoc + offset
-		dataLength := destChunkDataLoc + curChunkSize - compressedDataLoc
+		dataLength := curChunkEnd - compressedDataLoc
 		curChunkData := data[compressedDataLoc : compressedDataLoc+dataLength]
 
-		start, length := getDocValueLocs(uint64(localDocNum), curChunkHeader)
-		if start == math.MaxUint64 || length == math.MaxUint64 {
-			fmt.Printf("no field values found for localDocNum: %d\n", localDocNum)
-			fmt.Printf("Try docNums present in chunk: %s\n", metaDataDocNums(curChunkHeader))
+		start, end = getDocValueLocs(uint64(localDocNum), curChunkHeader)
+		if start == math.MaxUint64 || end == math.MaxUint64 {
+			fmt.Printf("No field values found for localDocNum: %d\n",
+				localDocNum)
+			fmt.Printf("Try docNums present in chunk: %s\n",
+				metaDataDocNums(curChunkHeader))
 			return nil
 		}
 		// uncompress the already loaded data
@@ -236,8 +232,9 @@ var docvalueCmd = &cobra.Command{
 
 		var termSeparator byte = 0xff
 		var termSeparatorSplitSlice = []byte{termSeparator}
+
 		// pick the terms for the given docNum
-		uncompressed = uncompressed[start : start+length]
+		uncompressed = uncompressed[start:end]
 		for {
 			i := bytes.Index(uncompressed, termSeparatorSplitSlice)
 			if i < 0 {
@@ -270,6 +267,14 @@ func metaDataDocNums(metaHeader []zap.MetaData) string {
 	return docNums
 }
 
+func readChunkBoundary(chunk int, offsets []uint64) (uint64, uint64) {
+	var start uint64
+	if chunk > 0 {
+		start = offsets[chunk-1]
+	}
+	return start, offsets[chunk]
+}
+
 func init() {
 	RootCmd.AddCommand(docvalueCmd)
 }

From 685ce244479fa0933d68c1f8ccb7a465898ec4d1 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 10 Dec 2018 16:39:37 -0800
Subject: [PATCH 509/728] Protect conjunction searcher with no searchers

Fixes https://github.com/blevesearch/bleve/issues/1074
---
 search/searcher/search_conjunction.go |  2 +-
 search_test.go                        | 17 +++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/search/searcher/search_conjunction.go b/search/searcher/search_conjunction.go
index a48052679..62966c13f 100644
--- a/search/searcher/search_conjunction.go
+++ b/search/searcher/search_conjunction.go
@@ -158,7 +158,7 @@ func (s *ConjunctionSearcher) Next(ctx *search.SearchContext) (*search.DocumentM
 	var rv *search.DocumentMatch
 	var err error
 OUTER:
-	for s.currs[s.maxIDIdx] != nil {
+	for s.maxIDIdx < len(s.currs) && s.currs[s.maxIDIdx] != nil {
 		maxID := s.currs[s.maxIDIdx].IndexInternalID
 
 		i := 0
diff --git a/search_test.go b/search_test.go
index d078ecfdc..52f406d62 100644
--- a/search_test.go
+++ b/search_test.go
@@ -997,3 +997,20 @@ func TestBooleanMustNotSearcherUpsidedown(t *testing.T) {
 func TestBooleanMustNotSearcherScorch(t *testing.T) {
 	testBooleanMustNotSearcher(t, scorch.Name)
 }
+
+func TestQueryStringEmptyConjunctionSearcher(t *testing.T) {
+	mapping := NewIndexMapping()
+	mapping.DefaultAnalyzer = keyword.Name
+	index, err := NewMemOnly(mapping)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer func() {
+		_ = index.Close()
+	}()
+
+	query := NewQueryStringQuery("foo:bar +baz:\"\"")
+	searchReq := NewSearchRequest(query)
+
+    _, _ = index.Search(searchReq)
+}

From 3120a9e67c6a39458dead3f7084fe4481ec1727c Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 11 Dec 2018 17:06:35 -0500
Subject: [PATCH 510/728] renamed Callbacks to PersistedCallback changed to
 single callback per-batch removed callbacks from snapshotReversion (unused)
 removed unnecessary comment from upside impl

---
 index.go                       |  8 ++++----
 index/index.go                 | 17 ++++++++---------
 index/scorch/introducer.go     | 11 ++++++-----
 index/scorch/persister.go      | 10 +++++-----
 index/scorch/scorch.go         | 20 ++++++++++----------
 index/scorch/scorch_test.go    |  8 ++++----
 index/upsidedown/upsidedown.go |  6 +++---
 7 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/index.go b/index.go
index 0747dd65f..99357eee0 100644
--- a/index.go
+++ b/index.go
@@ -129,12 +129,12 @@ func (b *Batch) Merge(o *Batch) {
 	}
 }
 
-func (b *Batch) AddCallback(f index.BatchCallback) {
-	b.internal.AddCallback(f)
+func (b *Batch) SetPersistedCallback(f index.BatchCallback) {
+	b.internal.SetPersistedCallback(f)
 }
 
-func (b *Batch) Callbacks() []index.BatchCallback {
-	return b.internal.Callbacks()
+func (b *Batch) PersistedCallback() index.BatchCallback {
+	return b.internal.PersistedCallback()
 }
 
 // An Index implements all the indexing and searching
diff --git a/index/index.go b/index/index.go
index 398c9fc87..4ae79056e 100644
--- a/index/index.go
+++ b/index/index.go
@@ -251,16 +251,15 @@ type DocIDReader interface {
 type BatchCallback func(error)
 
 type Batch struct {
-	IndexOps    map[string]*document.Document
-	InternalOps map[string][]byte
-	callbacks   []BatchCallback
+	IndexOps          map[string]*document.Document
+	InternalOps       map[string][]byte
+	persistedCallback BatchCallback
 }
 
 func NewBatch() *Batch {
 	return &Batch{
 		IndexOps:    make(map[string]*document.Document),
 		InternalOps: make(map[string][]byte),
-		callbacks:   nil,
 	}
 }
 
@@ -280,12 +279,12 @@ func (b *Batch) DeleteInternal(key []byte) {
 	b.InternalOps[string(key)] = nil
 }
 
-func (b *Batch) AddCallback(f BatchCallback) {
-	b.callbacks = append(b.callbacks, f)
+func (b *Batch) SetPersistedCallback(f BatchCallback) {
+	b.persistedCallback = f
 }
 
-func (b *Batch) Callbacks() []BatchCallback {
-	return b.callbacks
+func (b *Batch) PersistedCallback() BatchCallback {
+	return b.persistedCallback
 }
 
 func (b *Batch) String() string {
@@ -310,7 +309,7 @@ func (b *Batch) String() string {
 func (b *Batch) Reset() {
 	b.IndexOps = make(map[string]*document.Document)
 	b.InternalOps = make(map[string][]byte)
-	b.callbacks = nil
+	b.persistedCallback = nil
 }
 
 func (b *Batch) Merge(o *Batch) {
diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 836e716c4..ac627796f 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -31,9 +31,9 @@ type segmentIntroduction struct {
 	ids       []string
 	internal  map[string][]byte
 
-	applied   chan error
-	persisted chan error
-	callbacks []index.BatchCallback
+	applied           chan error
+	persisted         chan error
+	persistedCallback index.BatchCallback
 }
 
 type persistIntroduction struct {
@@ -50,7 +50,6 @@ type snapshotReversion struct {
 	snapshot  *IndexSnapshot
 	applied   chan error
 	persisted chan error
-	callbacks []index.BatchCallback
 }
 
 func (s *Scorch) mainLoop() {
@@ -216,7 +215,9 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
 	if next.persisted != nil {
 		s.rootPersisted = append(s.rootPersisted, next.persisted)
 	}
-	s.callbacks = append(s.callbacks, next.callbacks...)
+	if next.persistedCallback != nil {
+		s.persistedCallbacks = append(s.persistedCallbacks, next.persistedCallback)
+	}
 	// swap in new index snapshot
 	newSnapshot.epoch = s.nextSnapshotEpoch
 	s.nextSnapshotEpoch++
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index ebbc240e7..2ba50867d 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -116,7 +116,7 @@ OUTER:
 
 		var ourSnapshot *IndexSnapshot
 		var ourPersisted []chan error
-		var ourCallbacks []index.BatchCallback
+		var ourPersistedCallbacks []index.BatchCallback
 
 		// check to see if there is a new snapshot to persist
 		s.rootLock.Lock()
@@ -125,8 +125,8 @@ OUTER:
 			ourSnapshot.AddRef()
 			ourPersisted = s.rootPersisted
 			s.rootPersisted = nil
-			ourCallbacks = s.callbacks
-			s.callbacks = nil
+			ourPersistedCallbacks = s.persistedCallbacks
+			s.persistedCallbacks = nil
 			atomic.StoreUint64(&s.iStats.persistSnapshotSize, uint64(ourSnapshot.Size()))
 			atomic.StoreUint64(&s.iStats.persistEpoch, ourSnapshot.epoch)
 		}
@@ -154,8 +154,8 @@ OUTER:
 				atomic.AddUint64(&s.stats.TotPersistLoopErr, 1)
 				continue OUTER
 			}
-			for i := range ourCallbacks {
-				ourCallbacks[i](err)
+			for i := range ourPersistedCallbacks {
+				ourPersistedCallbacks[i](err)
 			}
 
 			atomic.StoreUint64(&s.stats.LastPersistedEpoch, ourSnapshot.epoch)
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 89b3ba408..3430bd368 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -54,7 +54,7 @@ type Scorch struct {
 	rootLock             sync.RWMutex
 	root                 *IndexSnapshot // holds 1 ref-count on the root
 	rootPersisted        []chan error   // closed when root is persisted
-	callbacks            []index.BatchCallback
+	persistedCallbacks   []index.BatchCallback
 	nextSnapshotEpoch    uint64
 	eligibleForRemoval   []uint64        // Index snapshot epochs that are safe to GC.
 	ineligibleForRemoval map[string]bool // Filenames that should not be GC'ed yet.
@@ -356,7 +356,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
 		atomic.AddUint64(&s.stats.TotBatchesEmpty, 1)
 	}
 
-	err = s.prepareSegment(newSegment, ids, batch.InternalOps, batch.Callbacks())
+	err = s.prepareSegment(newSegment, ids, batch.InternalOps, batch.PersistedCallback())
 	if err != nil {
 		if newSegment != nil {
 			_ = newSegment.Close()
@@ -376,17 +376,17 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
 }
 
 func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
-	internalOps map[string][]byte, callbacks []index.BatchCallback) error {
+	internalOps map[string][]byte, persistedCallback index.BatchCallback) error {
 
 	// new introduction
 	introduction := &segmentIntroduction{
-		id:        atomic.AddUint64(&s.nextSegmentID, 1),
-		data:      newSegment,
-		ids:       ids,
-		obsoletes: make(map[uint64]*roaring.Bitmap),
-		internal:  internalOps,
-		applied:   make(chan error),
-		callbacks: callbacks,
+		id:                atomic.AddUint64(&s.nextSegmentID, 1),
+		data:              newSegment,
+		ids:               ids,
+		obsoletes:         make(map[uint64]*roaring.Bitmap),
+		internal:          internalOps,
+		applied:           make(chan error),
+		persistedCallback: persistedCallback,
 	}
 
 	if !s.unsafeBatch {
diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index 0e78a45d4..a1691fd09 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -932,16 +932,16 @@ func TestIndexBatch(t *testing.T) {
 }
 
 func TestIndexBatchWithCallbacks(t *testing.T) {
-
+	cfg := CreateConfig("TestIndexBatchWithCallbacks")
 	defer func() {
-		err := DestroyTest()
+		err := DestroyTest(cfg)
 		if err != nil {
 			t.Fatal(err)
 		}
 	}()
 
 	analysisQueue := index.NewAnalysisQueue(1)
-	idx, err := NewScorch(Name, testConfig, analysisQueue)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -964,7 +964,7 @@ func TestIndexBatchWithCallbacks(t *testing.T) {
 	doc := document.NewDocument("3")
 	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test3")))
 	batch.Update(doc)
-	batch.AddCallback(func(e error) {
+	batch.SetPersistedCallback(func(e error) {
 		updated = true
 		cbErr = e
 
diff --git a/index/upsidedown/upsidedown.go b/index/upsidedown/upsidedown.go
index d0a27c000..8edbb5b3d 100644
--- a/index/upsidedown/upsidedown.go
+++ b/index/upsidedown/upsidedown.go
@@ -959,9 +959,9 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
 		atomic.AddUint64(&udc.stats.errors, 1)
 	}
 
-	// For sake of completeness
-	for i := range batch.Callbacks() {
-		batch.Callbacks()[i](err)
+	persistedCallback := batch.PersistedCallback()
+	if persistedCallback != nil {
+		persistedCallback(err)
 	}
 	return
 }

From 9ec8f83514c46f72b2fc97e2dc7a5eabef435615 Mon Sep 17 00:00:00 2001
From: Kevin Gillieron <kevin.gillieron@e-xpertsolutions.com>
Date: Wed, 12 Dec 2018 16:51:42 +0100
Subject: [PATCH 511/728] add test to reproduce bug

---
 search/query/query_string_parser_test.go | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/search/query/query_string_parser_test.go b/search/query/query_string_parser_test.go
index 356127fdc..f10170edd 100644
--- a/search/query/query_string_parser_test.go
+++ b/search/query/query_string_parser_test.go
@@ -49,6 +49,16 @@ func TestQuerySyntaxParserValid(t *testing.T) {
 				},
 				nil),
 		},
+		{
+			input:   "127.0.0.1",
+			mapping: mapping.NewIndexMapping(),
+			result: NewBooleanQueryForQueryString(
+				nil,
+				[]Query{
+					NewMatchQuery("127.0.0.1"),
+				},
+				nil),
+		},
 		{
 			input:   `"test phrase 1"`,
 			mapping: mapping.NewIndexMapping(),

From 85453e97b2a394cc7d13e800e48c7c778946574d Mon Sep 17 00:00:00 2001
From: Kevin Gillieron <kevin.gillieron@e-xpertsolutions.com>
Date: Wed, 12 Dec 2018 16:52:29 +0100
Subject: [PATCH 512/728] fix number parsing issue

Numbers separated by a dot, like IP addresses, are parsed as floating
point number instead of strings.
---
 search/query/query_string_lex.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/search/query/query_string_lex.go b/search/query/query_string_lex.go
index 9c59cedde..3a9cf2398 100644
--- a/search/query/query_string_lex.go
+++ b/search/query/query_string_lex.go
@@ -273,6 +273,7 @@ func inNumOrStrState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
 	// see where to go
 	if !l.seenDot && next == '.' {
 		// stay in this state
+		l.seenDot = true
 		l.buf += string(next)
 		return inNumOrStrState, true
 	} else if unicode.IsDigit(next) {

From 1d14bcc36f1b4b9d42d14ade4866b7c9a20e6786 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 17 Dec 2018 15:42:09 +0530
Subject: [PATCH 513/728] 1079 - Incorrect scorch score

The composite field norm/scoring is broken with
scorch, as it incorrectly considers the "_id"
field during the composite field analysis.
---
 index/scorch/scorch.go    |   2 +-
 test/versus_score_test.go | 135 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 136 insertions(+), 1 deletion(-)
 create mode 100644 test/versus_score_test.go

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 3430bd368..07568e6e5 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -526,7 +526,7 @@ func (s *Scorch) Analyze(d *document.Document) *index.AnalysisResult {
 			rv.Analyzed[i] = tokenFreqs
 			rv.Length[i] = fieldLength
 
-			if len(d.CompositeFields) > 0 {
+			if len(d.CompositeFields) > 0 && field.Name() != "_id" {
 				// see if any of the composite fields need this
 				for _, compositeField := range d.CompositeFields {
 					compositeField.Compose(field.Name(), fieldLength, tokenFreqs)
diff --git a/test/versus_score_test.go b/test/versus_score_test.go
new file mode 100644
index 000000000..dcaf8d650
--- /dev/null
+++ b/test/versus_score_test.go
@@ -0,0 +1,135 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package test
+
+import (
+	"fmt"
+	"os"
+	"strconv"
+	"testing"
+
+	"github.com/blevesearch/bleve"
+	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/index/scorch"
+	"github.com/blevesearch/bleve/index/upsidedown"
+	"github.com/blevesearch/bleve/mapping"
+	"github.com/blevesearch/bleve/search"
+)
+
+func TestDisjunctionSearchScoreIndexWithCompositeFields(t *testing.T) {
+	upHits := disjunctionQueryiOnIndexWithCompositeFields(upsidedown.Name, t)
+	scHits := disjunctionQueryiOnIndexWithCompositeFields(scorch.Name, t)
+
+	if upHits[0].ID != scHits[0].ID || upHits[1].ID != scHits[1].ID {
+		t.Errorf("upsidedown, scorch returned different docs;\n"+
+			"upsidedown: (%s, %s), scorch: (%s, %s)\n",
+			upHits[0].ID, upHits[1].ID, scHits[0].ID, scHits[1].ID)
+	}
+
+	if scHits[0].Score != upHits[0].Score || scHits[1].Score != upHits[1].Score {
+		t.Errorf("upsidedown, scorch showing different scores;\n"+
+			"upsidedown: (%+v, %+v), scorch: (%+v, %+v)\n",
+			*upHits[0].Expl, *upHits[1].Expl, *scHits[0].Expl, *scHits[1].Expl)
+	}
+
+}
+
+func disjunctionQueryiOnIndexWithCompositeFields(indexName string,
+	t *testing.T) []*search.DocumentMatch {
+	// create an index
+	idxMapping := mapping.NewIndexMapping()
+	idx, err := bleve.NewUsing("testidx", idxMapping, indexName,
+		bleve.Config.DefaultKVStore, nil)
+	if err != nil {
+		t.Error(err)
+	}
+
+	defer func() {
+		err = idx.Close()
+		if err != nil {
+			t.Error(err)
+		}
+		err = os.RemoveAll("testidx")
+		if err != nil {
+			t.Error(err)
+		}
+	}()
+
+	// create and insert documents as a batch
+	batch := idx.NewBatch()
+	docs := []struct {
+		field1 string
+		field2 int
+	}{
+		{
+			field1: "one",
+			field2: 1,
+		},
+		{
+			field1: "two",
+			field2: 2,
+		},
+	}
+
+	for i := 0; i < len(docs); i++ {
+		doc := document.NewDocument(strconv.Itoa(docs[i].field2))
+		doc.Fields = []document.Field{
+			document.NewTextField("field1", []uint64{}, []byte(docs[i].field1)),
+			document.NewNumericField("field2", []uint64{}, float64(docs[i].field2)),
+		}
+		doc.CompositeFields = []*document.CompositeField{
+			document.NewCompositeFieldWithIndexingOptions(
+				"_all", true, []string{"field1"}, []string{},
+				document.IndexField|document.IncludeTermVectors),
+		}
+		if err = batch.IndexAdvanced(doc); err != nil {
+			t.Error(err)
+		}
+	}
+	if err = idx.Batch(batch); err != nil {
+		t.Error(err)
+	}
+
+	/*
+		Query:
+				 DISJ
+			        /    \
+			     CONJ    TERM(two)
+			     /
+		          TERM(one)
+	*/
+
+	tq1 := bleve.NewTermQuery("one")
+	tq1.SetBoost(2)
+	tq2 := bleve.NewTermQuery("two")
+	tq2.SetBoost(3)
+
+	cq := bleve.NewConjunctionQuery(tq1)
+	cq.SetBoost(4)
+
+	q := bleve.NewDisjunctionQuery(tq1, tq2)
+	sr := bleve.NewSearchRequestOptions(q, 2, 0, true)
+	res, err := idx.Search(sr)
+	if err != nil {
+		t.Error(err)
+	}
+
+	if len(res.Hits) != 2 {
+		t.Errorf(fmt.Sprintf("indexType: %s Expected 2 hits, "+
+			"but got: %v", indexName, len(res.Hits)))
+	}
+
+	return res.Hits
+}

From a67bab66242517e6802a57d8f7ceaeac0797f188 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 10 Dec 2018 14:19:32 -0800
Subject: [PATCH 514/728] MB-32295: Minimal optimization for
 conjunction/disjunction searchers

If the number of nested searchers within a conjunction/disjunction
searcher is one, just return the nested searcher unwrapped as is, to
avoid an unnecessary level of indirection.
---
 search/query/conjunction.go | 5 +++++
 search/query/disjunction.go | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/search/query/conjunction.go b/search/query/conjunction.go
index 39cc312de..8e2d056ed 100644
--- a/search/query/conjunction.go
+++ b/search/query/conjunction.go
@@ -70,9 +70,14 @@ func (q *ConjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
 		}
 		ss = append(ss, sr)
 	}
+
 	if len(ss) < 1 {
 		return searcher.NewMatchNoneSearcher(i)
+	} else if len(ss) == 1 {
+        // return single nested searcher as is
+		return ss[0], nil
 	}
+
 	return searcher.NewConjunctionSearcher(i, ss, options)
 }
 
diff --git a/search/query/disjunction.go b/search/query/disjunction.go
index dacc3a75b..6b0e383bc 100644
--- a/search/query/disjunction.go
+++ b/search/query/disjunction.go
@@ -76,9 +76,14 @@ func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
 		}
 		ss = append(ss, sr)
 	}
+
 	if len(ss) < 1 {
 		return searcher.NewMatchNoneSearcher(i)
+	} else if len(ss) == 1 {
+		// return the single nested searcher as is
+		return ss[0], nil
 	}
+
 	return searcher.NewDisjunctionSearcher(i, ss, q.Min, options)
 }
 

From d1d06dcc9bfa63cc396c2fcf69e6eaf3c22422f0 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 17 Dec 2018 10:36:27 -0800
Subject: [PATCH 515/728] MB-32295: "min" propagation for disjunction queries
 with single clause

+ Optimization of returning the nested searcher within a disjunction
  searcher should be done only when min <= 1.
---
 search/query/conjunction.go |  2 +-
 search/query/disjunction.go |  4 +--
 search_test.go              | 71 ++++++++++++++++++++++++++++++++++++-
 3 files changed, 73 insertions(+), 4 deletions(-)

diff --git a/search/query/conjunction.go b/search/query/conjunction.go
index 8e2d056ed..f14cbdd88 100644
--- a/search/query/conjunction.go
+++ b/search/query/conjunction.go
@@ -74,7 +74,7 @@ func (q *ConjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
 	if len(ss) < 1 {
 		return searcher.NewMatchNoneSearcher(i)
 	} else if len(ss) == 1 {
-        // return single nested searcher as is
+		// return single nested searcher as is
 		return ss[0], nil
 	}
 
diff --git a/search/query/disjunction.go b/search/query/disjunction.go
index 6b0e383bc..b884cb659 100644
--- a/search/query/disjunction.go
+++ b/search/query/disjunction.go
@@ -79,8 +79,8 @@ func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
 
 	if len(ss) < 1 {
 		return searcher.NewMatchNoneSearcher(i)
-	} else if len(ss) == 1 {
-		// return the single nested searcher as is
+	} else if len(ss) == 1 && q.Min <= 1 {
+		// return the single nested searcher as is; only if min clauses is not greater than 1;
 		return ss[0], nil
 	}
 
diff --git a/search_test.go b/search_test.go
index 52f406d62..71fae4236 100644
--- a/search_test.go
+++ b/search_test.go
@@ -1012,5 +1012,74 @@ func TestQueryStringEmptyConjunctionSearcher(t *testing.T) {
 	query := NewQueryStringQuery("foo:bar +baz:\"\"")
 	searchReq := NewSearchRequest(query)
 
-    _, _ = index.Search(searchReq)
+	_, _ = index.Search(searchReq)
+}
+
+func TestDisjunctionQueryIncorrectMin(t *testing.T) {
+	// create an index with default settings
+	idxMapping := NewIndexMapping()
+	idx, err := New("testidx", idxMapping)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer func() {
+		err = idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+		err = os.RemoveAll("testidx")
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	// create and insert documents as a batch
+	batch := idx.NewBatch()
+	docs := []struct {
+		field1 string
+		field2 int
+	}{
+		{
+			field1: "one",
+			field2: 1,
+		},
+		{
+			field1: "two",
+			field2: 2,
+		},
+	}
+
+	for i := 0; i < len(docs); i++ {
+		doc := document.NewDocument(strconv.Itoa(docs[i].field2))
+		doc.Fields = []document.Field{
+			document.NewTextField("field1", []uint64{}, []byte(docs[i].field1)),
+			document.NewNumericField("field2", []uint64{}, float64(docs[i].field2)),
+		}
+		doc.CompositeFields = []*document.CompositeField{
+			document.NewCompositeFieldWithIndexingOptions(
+				"_all", true, []string{"text"}, []string{},
+				document.IndexField|document.IncludeTermVectors),
+		}
+		if err = batch.IndexAdvanced(doc); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	if err = idx.Batch(batch); err != nil {
+		t.Fatal(err)
+	}
+
+	tq := NewTermQuery("one")
+	dq := NewDisjunctionQuery(tq)
+	dq.SetMin(2)
+	sr := NewSearchRequestOptions(dq, 1, 0, false)
+	res, err := idx.Search(sr)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if res.Total > 0 {
+		t.Fatalf("Expected 0 matches as disjunction query contains a single clause"+
+			" but got: %v", res.Total)
+	}
 }

From cc0219a5f4ebff25fdd709cf5ed14536b0a0d08d Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Fri, 4 Jan 2019 14:05:07 -0800
Subject: [PATCH 516/728] Revert "Refactoring."

This reverts commit 934a36ea597bf6c20b8b05bda553bbf3f1d2d073.
---
 index/scorch/segment/zap/segment.go | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 4ee6c290b..606ea0cfa 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -418,18 +418,19 @@ func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) {
 		}
 
 		postingsList := emptyPostingsList
-		filteredIds := make([]string, 0, len(ids))
+		filteredIds := ids[:0]
 		sMax := ""
 
 		sMaxB, err := idDict.fst.GetMaxKey()
-		if err != nil {
-			return nil, err
-		}
-		sMax = string(sMaxB)
-		for _, id := range ids {
-			if id <= sMax {
-				filteredIds = append(filteredIds, id)
+		if err == nil {
+			sMax = string(sMaxB)
+			for _, id := range ids {
+				if id <= sMax {
+					filteredIds = append(filteredIds, id)
+				}
 			}
+		} else {
+			filteredIds = ids
 		}
 
 		for _, id := range filteredIds {

From 843bbf265ed77cc3b122da4e856eaf865fe3f012 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Fri, 4 Jan 2019 14:05:20 -0800
Subject: [PATCH 517/728] Revert "Simplification - Using filteredIds instead of
 ids."

This reverts commit 43e8e27f8e2cf5d076973dbad26f436aaa8a12c6.
---
 index/scorch/segment/zap/segment.go | 39 ++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 606ea0cfa..9342dd273 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -418,29 +418,38 @@ func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) {
 		}
 
 		postingsList := emptyPostingsList
-		filteredIds := ids[:0]
+
+		skipCheck := false
 		sMax := ""
+		iMin := ""
 
-		sMaxB, err := idDict.fst.GetMaxKey()
-		if err == nil {
-			sMax = string(sMaxB)
-			for _, id := range ids {
-				if id <= sMax {
-					filteredIds = append(filteredIds, id)
+		if len(ids) > 0 {
+			sMaxB, err := idDict.fst.GetMaxKey()
+			if err != nil {
+				skipCheck = true
+			} else {
+				sMax = string(sMaxB)
+				iMin = ids[0]
+				for i := 1; i < len(ids); i++ {
+					if ids[i] < iMin {
+						iMin = ids[i]
+					}
 				}
 			}
 		} else {
-			filteredIds = ids
+			skipCheck = true
 		}
-
-		for _, id := range filteredIds {
-			postingsList, err = idDict.postingsList([]byte(id), nil, postingsList)
-			if err != nil {
-				return nil, err
+		if skipCheck || (iMin <= sMax) {
+			for _, id := range ids {
+				if skipCheck || (id <= sMax) {
+					postingsList, err = idDict.postingsList([]byte(id), nil, postingsList)
+					if err != nil {
+						return nil, err
+					}
+					postingsList.OrInto(rv)
+				}
 			}
-			postingsList.OrInto(rv)
 		}
-
 	}
 
 	return rv, nil

From ecc0efcc4ffa22707a1d9bee9cfcab1ebd270783 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Fri, 4 Jan 2019 14:05:36 -0800
Subject: [PATCH 518/728] Revert "Fix: DocNumbers must work with empty ids
 array."

This reverts commit 8395e90b544ddfa13cbe9b112b330a8093bdec2c.
---
 index/scorch/segment/zap/segment.go | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 9342dd273..1bab522e2 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -423,21 +423,17 @@ func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) {
 		sMax := ""
 		iMin := ""
 
-		if len(ids) > 0 {
-			sMaxB, err := idDict.fst.GetMaxKey()
-			if err != nil {
-				skipCheck = true
-			} else {
-				sMax = string(sMaxB)
-				iMin = ids[0]
-				for i := 1; i < len(ids); i++ {
-					if ids[i] < iMin {
-						iMin = ids[i]
-					}
+		sMaxB, err := idDict.fst.GetMaxKey()
+		if err != nil {
+			skipCheck = true
+		} else {
+			sMax = string(sMaxB)
+			iMin = ids[0]
+			for i := 1; i < len(ids); i++ {
+				if ids[i] < iMin {
+					iMin = ids[i]
 				}
 			}
-		} else {
-			skipCheck = true
 		}
 		if skipCheck || (iMin <= sMax) {
 			for _, id := range ids {

From 0d76d2adf9a8e9f5804b62cd4ca83c86144464eb Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Fri, 4 Jan 2019 14:06:59 -0800
Subject: [PATCH 519/728] Revert "DocNumbers - skip segments, that don't
 include requested ids."

This reverts commit 9bb1687420e429f94ffcd4a159ad1c7181772452.
---
 index/scorch/segment/zap/segment.go | 31 +++++------------------------
 1 file changed, 5 insertions(+), 26 deletions(-)

diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 1bab522e2..8c6de211a 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -418,33 +418,12 @@ func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) {
 		}
 
 		postingsList := emptyPostingsList
-
-		skipCheck := false
-		sMax := ""
-		iMin := ""
-
-		sMaxB, err := idDict.fst.GetMaxKey()
-		if err != nil {
-			skipCheck = true
-		} else {
-			sMax = string(sMaxB)
-			iMin = ids[0]
-			for i := 1; i < len(ids); i++ {
-				if ids[i] < iMin {
-					iMin = ids[i]
-				}
-			}
-		}
-		if skipCheck || (iMin <= sMax) {
-			for _, id := range ids {
-				if skipCheck || (id <= sMax) {
-					postingsList, err = idDict.postingsList([]byte(id), nil, postingsList)
-					if err != nil {
-						return nil, err
-					}
-					postingsList.OrInto(rv)
-				}
+		for _, id := range ids {
+			postingsList, err = idDict.postingsList([]byte(id), nil, postingsList)
+			if err != nil {
+				return nil, err
 			}
+			postingsList.OrInto(rv)
 		}
 	}
 

From 5784878872b9fc934e1720e6e8f5a28ba3195f4f Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Sat, 5 Jan 2019 18:37:52 -0500
Subject: [PATCH 520/728] add unit test for #1096

---
 index_test.go | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)

diff --git a/index_test.go b/index_test.go
index 0d81eba40..55b38909d 100644
--- a/index_test.go
+++ b/index_test.go
@@ -2073,3 +2073,79 @@ func TestBatchMerge(t *testing.T) {
 	}
 
 }
+
+func TestBug1096(t *testing.T) {
+	defer func() {
+		err := os.RemoveAll("testidx")
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	// use default mapping
+	mapping := NewIndexMapping()
+
+	// create a scorch index with default SAFE batches
+	var idx Index
+	idx, err = NewUsing("testidx", mapping, "scorch", "scorch", nil)
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	// create a single batch instance that we will reuse
+	// this should be safe because we have single goroutine
+	// and we always wait for batch execution to finish
+	batch := idx.NewBatch()
+
+	// number of batches to execute
+	for i := 0; i < 10; i++ {
+
+		// number of documents to put into the batch
+		for j := 0; j < 91; j++ {
+
+			// create a doc id 0-90 (important so that we get id's 9 and 90)
+			// this could duplicate something already in the index
+			//   this too should be OK and update the item in the index
+			id := fmt.Sprintf("%d", j)
+
+			err = batch.Index(id, map[string]interface{}{
+				"name":  id,
+				"batch": fmt.Sprintf("%d", i),
+			})
+			if err != nil {
+				log.Fatal(err)
+			}
+		}
+
+		// execute the batch
+		err = idx.Batch(batch)
+		if err != nil {
+			log.Fatal(err)
+		}
+
+		// reset the batch before reusing it
+		batch.Reset()
+	}
+
+	// search for docs having name starting with the number 9
+	q := NewWildcardQuery("9*")
+	q.SetField("name")
+	req := NewSearchRequestOptions(q, 1000, 0, false)
+	req.Fields = []string{"*"}
+	var res *SearchResult
+	res, err = idx.Search(req)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	// we expect only 2 hits, for docs 9 and 90
+	if res.Total > 2 {
+		t.Fatalf("expected only 2 hits '9' and '90', got %v", res)
+	}
+}

From af8b2ab37f5b30915e2cdfdc4e2b8b4c6d2b13b1 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 7 Jan 2019 16:23:42 -0800
Subject: [PATCH 521/728] Update SHA of vellum in vendor/manifest

---
 vendor/manifest | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vendor/manifest b/vendor/manifest
index 9471f14e2..5bdb6a076 100644
--- a/vendor/manifest
+++ b/vendor/manifest
@@ -137,7 +137,7 @@
 			"importpath": "github.com/couchbase/vellum",
 			"repository": "https://github.com/couchbase/vellum",
 			"vcs": "git",
-			"revision": "f377ee3282b954c46915d89482bf93288ee7dd12",
+			"revision": "28880ab96d9361ab5a74f0e12000f8fe0cd20712",
 			"branch": "master",
 			"notests": true
 		}

From fa29ecd3f1875b37f0fd25de9cdd2f7fb2eeaadd Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Tue, 8 Jan 2019 16:21:14 -0800
Subject: [PATCH 522/728] MB-31406: Accounting for FileMergeWrittenBytes
 incrementally

---
 index/scorch/merge.go                    | 11 +++++++----
 index/scorch/segment/segment.go          |  4 ++++
 index/scorch/segment/zap/count.go        | 10 ++++++++++
 index/scorch/segment/zap/merge.go        | 10 ++++++----
 index/scorch/segment/zap/merge_test.go   | 10 +++++-----
 index/scorch/segment/zap/segment_test.go |  2 +-
 6 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 61abe6951..bcbf5b710 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -196,10 +196,9 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 			fileMergeZapStartTime := time.Now()
 
 			atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1)
-			newDocNums, nBytes, err := zap.Merge(segmentsToMerge, docsToDrop, path,
-				DefaultChunkFactor, s.closeCh)
+			newDocNums, _, err := zap.Merge(segmentsToMerge, docsToDrop, path,
+				DefaultChunkFactor, s.closeCh, s)
 			atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
-			atomic.AddUint64(&s.stats.TotFileMergeWrittenBytes, nBytes)
 
 			fileMergeZapTime := uint64(time.Since(fileMergeZapStartTime))
 			atomic.AddUint64(&s.stats.TotFileMergeZapTime, fileMergeZapTime)
@@ -292,7 +291,7 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 	path := s.path + string(os.PathSeparator) + filename
 
 	newDocNums, _, err :=
-		zap.MergeSegmentBases(sbs, sbsDrops, path, chunkFactor, s.closeCh)
+		zap.MergeSegmentBases(sbs, sbsDrops, path, chunkFactor, s.closeCh, s)
 
 	atomic.AddUint64(&s.stats.TotMemMergeZapEnd, 1)
 
@@ -347,3 +346,7 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 		return newSnapshot, newSegmentID, nil
 	}
 }
+
+func (s *Scorch) ReportBytesWritten(bytesWritten uint64) {
+	atomic.AddUint64(&s.stats.TotFileMergeWrittenBytes, bytesWritten)
+}
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index be9142c40..b94d6f979 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -129,3 +129,7 @@ type DocumentFieldTermVisitable interface {
 
 type DocVisitState interface {
 }
+
+type StatsReporter interface {
+	ReportBytesWritten(bytesWritten uint64)
+}
diff --git a/index/scorch/segment/zap/count.go b/index/scorch/segment/zap/count.go
index d75e83c03..50290f888 100644
--- a/index/scorch/segment/zap/count.go
+++ b/index/scorch/segment/zap/count.go
@@ -17,6 +17,8 @@ package zap
 import (
 	"hash/crc32"
 	"io"
+
+	"github.com/blevesearch/bleve/index/scorch/segment"
 )
 
 // CountHashWriter is a wrapper around a Writer which counts the number of
@@ -25,6 +27,7 @@ type CountHashWriter struct {
 	w   io.Writer
 	crc uint32
 	n   int
+	s   segment.StatsReporter
 }
 
 // NewCountHashWriter returns a CountHashWriter which wraps the provided Writer
@@ -32,11 +35,18 @@ func NewCountHashWriter(w io.Writer) *CountHashWriter {
 	return &CountHashWriter{w: w}
 }
 
+func NewCountHashWriterWithStatsReporter(w io.Writer, s segment.StatsReporter) *CountHashWriter {
+	return &CountHashWriter{w: w, s: s}
+}
+
 // Write writes the provided bytes to the wrapped writer and counts the bytes
 func (c *CountHashWriter) Write(b []byte) (int, error) {
 	n, err := c.w.Write(b)
 	c.crc = crc32.Update(c.crc, crc32.IEEETable, b[:n])
 	c.n += n
+	if c.s != nil {
+		c.s.ReportBytesWritten(uint64(n))
+	}
 	return n, err
 }
 
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 13e9bf97c..4ef222c1a 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -38,17 +38,19 @@ const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc
 // remaining data.  This new segment is built at the specified path,
 // with the provided chunkFactor.
 func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
-	chunkFactor uint32, closeCh chan struct{}) ([][]uint64, uint64, error) {
+	chunkFactor uint32, closeCh chan struct{}, s seg.StatsReporter) (
+	[][]uint64, uint64, error) {
 	segmentBases := make([]*SegmentBase, len(segments))
 	for segmenti, segment := range segments {
 		segmentBases[segmenti] = &segment.SegmentBase
 	}
 
-	return MergeSegmentBases(segmentBases, drops, path, chunkFactor, closeCh)
+	return MergeSegmentBases(segmentBases, drops, path, chunkFactor, closeCh, s)
 }
 
 func MergeSegmentBases(segmentBases []*SegmentBase, drops []*roaring.Bitmap, path string,
-	chunkFactor uint32, closeCh chan struct{}) ([][]uint64, uint64, error) {
+	chunkFactor uint32, closeCh chan struct{}, s seg.StatsReporter) (
+	[][]uint64, uint64, error) {
 	flag := os.O_RDWR | os.O_CREATE
 
 	f, err := os.OpenFile(path, flag, 0600)
@@ -65,7 +67,7 @@ func MergeSegmentBases(segmentBases []*SegmentBase, drops []*roaring.Bitmap, pat
 	br := bufio.NewWriterSize(f, DefaultFileMergerBufferSize)
 
 	// wrap it for counting (tracking offsets)
-	cr := NewCountHashWriter(br)
+	cr := NewCountHashWriterWithStatsReporter(br, s)
 
 	newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, _, _, _, err :=
 		MergeToWriter(segmentBases, drops, chunkFactor, cr, closeCh)
diff --git a/index/scorch/segment/zap/merge_test.go b/index/scorch/segment/zap/merge_test.go
index db1cfff15..450ecba91 100644
--- a/index/scorch/segment/zap/merge_test.go
+++ b/index/scorch/segment/zap/merge_test.go
@@ -71,7 +71,7 @@ func TestMerge(t *testing.T) {
 	segsToMerge[0] = segment.(*Segment)
 	segsToMerge[1] = segment2.(*Segment)
 
-	_, _, err = Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024, nil)
+	_, _, err = Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024, nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -175,7 +175,7 @@ func testMergeWithEmptySegments(t *testing.T, before bool, numEmptySegments int)
 
 	drops := make([]*roaring.Bitmap, len(segsToMerge))
 
-	_, _, err = Merge(segsToMerge, drops, "/tmp/scorch3.zap", 1024, nil)
+	_, _, err = Merge(segsToMerge, drops, "/tmp/scorch3.zap", 1024, nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -217,7 +217,7 @@ func testMergeWithSelf(t *testing.T, segCur *Segment, expectedCount uint64) {
 		segsToMerge := make([]*Segment, 1)
 		segsToMerge[0] = segCur
 
-		_, _, err := Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/"+fname, 1024, nil)
+		_, _, err := Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/"+fname, 1024, nil, nil)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -589,7 +589,7 @@ func testMergeWithUpdates(t *testing.T, segmentDocIds [][]string, docsToDrop []*
 func testMergeAndDropSegments(t *testing.T, segsToMerge []*Segment, docsToDrop []*roaring.Bitmap, expectedNumDocs uint64) {
 	_ = os.RemoveAll("/tmp/scorch-merged.zap")
 
-	_, _, err := Merge(segsToMerge, docsToDrop, "/tmp/scorch-merged.zap", 1024, nil)
+	_, _, err := Merge(segsToMerge, docsToDrop, "/tmp/scorch-merged.zap", 1024, nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -823,7 +823,7 @@ func TestMergeBytesWritten(t *testing.T) {
 	segsToMerge[0] = segment.(*Segment)
 	segsToMerge[1] = segment2.(*Segment)
 
-	_, nBytes, err := Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024, nil)
+	_, nBytes, err := Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024, nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
diff --git a/index/scorch/segment/zap/segment_test.go b/index/scorch/segment/zap/segment_test.go
index 623198c63..ffe4c7c80 100644
--- a/index/scorch/segment/zap/segment_test.go
+++ b/index/scorch/segment/zap/segment_test.go
@@ -691,7 +691,7 @@ func TestMergedSegmentDocsWithNonOverlappingFields(t *testing.T) {
 	segsToMerge[0] = segment1.(*Segment)
 	segsToMerge[1] = segment2.(*Segment)
 
-	_, nBytes, err := Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024, nil)
+	_, nBytes, err := Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024, nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}

From ec80de1199ba6815873df0a05fe0d2e0136f2941 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 11 Jan 2019 21:51:00 -0800
Subject: [PATCH 523/728] optimize scorch postings list that has no obsoleted
 docs

A postings list that has no obsoleted docs will have a postings
iterator with an actual roaring-bitmap that's logically the same as
the all roaring-bitmap (e.g., actual = all - obsoletions).

This optimization handles this edge case by using the same exact
roaring-bitmap iterator for both the actual and all needs.  This means
that Next() calls will be only needed on a single roaring iterator
rather than the general case of Next()'ing through two separate
iterators.

A helper method, PostingsIterator.currChunkNext(), is refactored out
as part of this change so that maintaining the freq/norm/location
readers correctly is more straightforward.

On a scorch index of 200,000 en-wiki docs, high-freq term searches
went from ~278 q/s before this change to ~307 q/s after.  Low-freq
term searches went from ~84K q/s before to ~92K q/s after.
---
 index/scorch/segment/zap/posting.go | 106 ++++++++++++++++++++++------
 1 file changed, 83 insertions(+), 23 deletions(-)

diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 0ac7938e1..1bada4983 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -254,7 +254,7 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
 		rv.Actual = rv.ActualBM.Iterator()
 	} else {
 		rv.ActualBM = p.postings
-		rv.Actual = p.postings.Iterator()
+		rv.Actual = rv.all // Optimize to use same iterator for all & Actual.
 	}
 
 	return rv
@@ -651,6 +651,10 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool,
 		return 0, false, nil
 	}
 
+	if i.postings.postings == i.ActualBM {
+		return i.nextDocNumAtOrAfterClean(atOrAfter)
+	}
+
 	n := i.Actual.Next()
 	for uint64(n) < atOrAfter && i.Actual.HasNext() {
 		n = i.Actual.Next()
@@ -670,31 +674,10 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool,
 	for allN != n {
 		// in the same chunk, so move the freq/norm/loc decoders forward
 		if i.includeFreqNorm && allNChunk == nChunk {
-			if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
-				err := i.loadChunk(int(nChunk))
-				if err != nil {
-					return 0, false, fmt.Errorf("error loading chunk: %v", err)
-				}
-			}
-
-			// read off freq/offsets even though we don't care about them
-			_, _, hasLocs, err := i.readFreqNormHasLocs()
+			err := i.currChunkNext(nChunk)
 			if err != nil {
 				return 0, false, err
 			}
-
-			if i.includeLocs && hasLocs {
-				numLocsBytes, err := binary.ReadUvarint(i.locReader)
-				if err != nil {
-					return 0, false, fmt.Errorf("error reading location numLocsBytes: %v", err)
-				}
-
-				// skip over all the location bytes
-				_, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent)
-				if err != nil {
-					return 0, false, err
-				}
-			}
 		}
 
 		allN = i.all.Next()
@@ -711,6 +694,83 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool,
 	return uint64(n), true, nil
 }
 
+// optimization when the postings list is "clean" (e.g., no updates &
+// no deletions) where the all bitmap is the same as the actual bitmap
+func (i *PostingsIterator) nextDocNumAtOrAfterClean(
+	atOrAfter uint64) (uint64, bool, error) {
+	sameChunkNexts := 0 // # of times we called Next() in the same chunk
+
+	n := i.Actual.Next()
+
+	nChunk := n / i.postings.sb.chunkFactor
+
+	for uint64(n) < atOrAfter && i.Actual.HasNext() {
+		n = i.Actual.Next()
+
+		nChunkPrev := nChunk
+		nChunk = n / i.postings.sb.chunkFactor
+
+		if nChunk != nChunkPrev {
+			sameChunkNexts = 0
+		} else {
+			sameChunkNexts += 1
+		}
+	}
+
+	if uint64(n) < atOrAfter {
+		// couldn't find anything
+		return 0, false, nil
+	}
+
+	if i.includeFreqNorm {
+		for j := 0; j < sameChunkNexts; j++ {
+			err := i.currChunkNext(nChunk)
+			if err != nil {
+				return 0, false, fmt.Errorf("error optimized currChunkNext: %v", err)
+			}
+		}
+
+		if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
+			err := i.loadChunk(int(nChunk))
+			if err != nil {
+				return 0, false, fmt.Errorf("error loading chunk: %v", err)
+			}
+		}
+	}
+
+	return uint64(n), true, nil
+}
+
+func (i *PostingsIterator) currChunkNext(nChunk uint32) error {
+	if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
+		err := i.loadChunk(int(nChunk))
+		if err != nil {
+			return fmt.Errorf("error loading chunk: %v", err)
+		}
+	}
+
+	// read off freq/offsets even though we don't care about them
+	_, _, hasLocs, err := i.readFreqNormHasLocs()
+	if err != nil {
+		return err
+	}
+
+	if i.includeLocs && hasLocs {
+		numLocsBytes, err := binary.ReadUvarint(i.locReader)
+		if err != nil {
+			return fmt.Errorf("error reading location numLocsBytes: %v", err)
+		}
+
+		// skip over all the location bytes
+		_, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent)
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
 // Posting is a single entry in a postings list
 type Posting struct {
 	docNum uint64

From b57a98d6dff94226dd200ec6477fe2e98b2eb457 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 15 Jan 2019 16:17:59 -0800
Subject: [PATCH 524/728] optimize scorch postingsIter chunk maintenance on
 skip ahead

Consider a scenario of a conjunction search with a high-frequency term
search and a low-frequency term search.  The scorch PostingsIterator
for the high-frequency term will have its
nextDocNumAtOrAfter(atOrAfter) method invoked with a non-zero
atOrAfter parameter -- driven by docNum's from the low frequency term
search.

In this case, before this change, the inner loop that moves the
high-frequency "all iterator" forwards performs a division by
chunkFactor on every single "all" docNum, in order to compute the all
iterator's current chunk number, so that it can be checked against the
"acutal iterator"'s current chunk number (in order to maintain the
PostingIterator's freqNorm & loc reader positions).

This integer division was appearing in pprof, so this change is to
replace the division op with a faster integer >= comparison.

On a 200K en-wiki docs perf test, a query-string search for "+http
+schoch" went from ~7650 q/sec throughput before the change to ~8000
q/sec afterwards.
---
 index/scorch/segment/zap/posting.go | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 1bada4983..d0bdd6852 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -666,14 +666,16 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool,
 	allN := i.all.Next()
 
 	nChunk := n / i.postings.sb.chunkFactor
-	allNChunk := allN / i.postings.sb.chunkFactor
+
+	// when allN becomes >= to here, then allN is in the same chunk as nChunk.
+	allNReachesNChunk := nChunk * i.postings.sb.chunkFactor
 
 	// n is the next actual hit (excluding some postings), and
 	// allN is the next hit in the full postings, and
 	// if they don't match, move 'all' forwards until they do
 	for allN != n {
-		// in the same chunk, so move the freq/norm/loc decoders forward
-		if i.includeFreqNorm && allNChunk == nChunk {
+		// we've reached same chunk, so move the freq/norm/loc decoders forward
+		if i.includeFreqNorm && allN >= allNReachesNChunk {
 			err := i.currChunkNext(nChunk)
 			if err != nil {
 				return 0, false, err
@@ -681,7 +683,6 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool,
 		}
 
 		allN = i.all.Next()
-		allNChunk = allN / i.postings.sb.chunkFactor
 	}
 
 	if i.includeFreqNorm && (i.currChunk != nChunk || i.currChunkFreqNorm == nil) {

From 5554236b024202656ea23aa49a81261b10cef7d6 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Wed, 16 Jan 2019 09:17:12 -0800
Subject: [PATCH 525/728] [Scorch] Skipping segments that don't include
 requested ids

Skip the segment if the current segment's maximum docId is
smaller than the smallest requested id, while fetching
the DocNumbers.
---
 index/scorch/segment/zap/segment.go | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 8c6de211a..7ba28c236 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -418,7 +418,20 @@ func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) {
 		}
 
 		postingsList := emptyPostingsList
+
+		sMax, err := idDict.fst.GetMaxKey()
+		if err != nil {
+			return nil, err
+		}
+		sMaxStr := string(sMax)
+		filteredIds := make([]string, 0, len(ids))
 		for _, id := range ids {
+			if id <= sMaxStr {
+				filteredIds = append(filteredIds, id)
+			}
+		}
+
+		for _, id := range filteredIds {
 			postingsList, err = idDict.postingsList([]byte(id), nil, postingsList)
 			if err != nil {
 				return nil, err

From c15bb65ae6df459768345ee9a68f3122ef3d4fbd Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 19 Jan 2019 13:52:43 -0800
Subject: [PATCH 526/728] optimization Finish() may return optimized resources

This is a plumbing level API change, where an optimization is now
allowed to return alternative resources in its Finish()
implementation.

For example, this might be useful in future commits, where scorch
codepaths might return an alternate index.TermFieldReader instance for
optimizing non-scoring disjunctions.
---
 index/index.go                        | 10 +++++++++-
 index/scorch/optimize.go              |  6 +++---
 search/searcher/search_conjunction.go |  2 +-
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/index/index.go b/index/index.go
index 4ae79056e..6aa444cfd 100644
--- a/index/index.go
+++ b/index/index.go
@@ -341,11 +341,19 @@ type Optimizable interface {
 	Optimize(kind string, octx OptimizableContext) (OptimizableContext, error)
 }
 
+// Represents a result of optimization -- see the Finish() method.
+type Optimized interface{}
+
 type OptimizableContext interface {
 	// Once all the optimzable resources have been provided the same
 	// OptimizableContext instance, the optimization preparations are
 	// finished or completed via the Finish() method.
-	Finish() error
+	//
+	// Depending on the optimization being performed, the Finish()
+	// method might return a non-nil Optimized instance.  For example,
+	// the Optimized instance might represent an optimized
+	// TermFieldReader instance.
+	Finish() (Optimized, error)
 }
 
 type DocValueReader interface {
diff --git a/index/scorch/optimize.go b/index/scorch/optimize.go
index b45fc8b0d..9ba7247c9 100644
--- a/index/scorch/optimize.go
+++ b/index/scorch/optimize.go
@@ -53,9 +53,9 @@ type OptimizeTFRConjunction struct {
 	tfrs []*IndexSnapshotTermFieldReader
 }
 
-func (o *OptimizeTFRConjunction) Finish() error {
+func (o *OptimizeTFRConjunction) Finish() (index.Optimized, error) {
 	if len(o.tfrs) <= 1 {
-		return nil
+		return nil, nil
 	}
 
 	for i := range o.snapshot.segment {
@@ -89,5 +89,5 @@ func (o *OptimizeTFRConjunction) Finish() error {
 		}
 	}
 
-	return nil
+	return nil, nil
 }
diff --git a/search/searcher/search_conjunction.go b/search/searcher/search_conjunction.go
index 62966c13f..e89848b79 100644
--- a/search/searcher/search_conjunction.go
+++ b/search/searcher/search_conjunction.go
@@ -77,7 +77,7 @@ func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.S
 		}
 
 		if octx != nil {
-			err := octx.Finish()
+			_, err := octx.Finish()
 			if err != nil {
 				return nil, err
 			}

From eef432c1349f0df8859e8f28acb63bc90345e7c1 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sat, 19 Jan 2019 15:12:59 -0800
Subject: [PATCH 527/728] API: added SearchRequest.NoScore flag

This is a backwards compatible API addition.
---
 index_impl.go                  | 1 +
 search.go                      | 1 +
 search/search.go               | 1 +
 search/searcher/search_term.go | 6 ++++--
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/index_impl.go b/index_impl.go
index c969f3758..ad9b3505e 100644
--- a/index_impl.go
+++ b/index_impl.go
@@ -458,6 +458,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
 	searcher, err := req.Query.Searcher(indexReader, i.m, search.SearcherOptions{
 		Explain:            req.Explain,
 		IncludeTermVectors: req.IncludeLocations || req.Highlight != nil,
+		NoScore:            req.NoScore,
 	})
 	if err != nil {
 		return nil, err
diff --git a/search.go b/search.go
index 86ea4193a..019d6fd04 100644
--- a/search.go
+++ b/search.go
@@ -273,6 +273,7 @@ type SearchRequest struct {
 	Explain          bool              `json:"explain"`
 	Sort             search.SortOrder  `json:"sort"`
 	IncludeLocations bool              `json:"includeLocations"`
+	NoScore          bool              `json:"noScore"`
 }
 
 func (r *SearchRequest) Validate() error {
diff --git a/search/search.go b/search/search.go
index 440c09571..b241de668 100644
--- a/search/search.go
+++ b/search/search.go
@@ -280,6 +280,7 @@ type Searcher interface {
 type SearcherOptions struct {
 	Explain            bool
 	IncludeTermVectors bool
+	NoScore            bool
 }
 
 // SearchContext represents the context around a single search
diff --git a/search/searcher/search_term.go b/search/searcher/search_term.go
index 97b7dbb90..e805bf121 100644
--- a/search/searcher/search_term.go
+++ b/search/searcher/search_term.go
@@ -39,7 +39,8 @@ type TermSearcher struct {
 
 func NewTermSearcher(indexReader index.IndexReader, term string, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
 	termBytes := []byte(term)
-	reader, err := indexReader.TermFieldReader(termBytes, field, true, true, options.IncludeTermVectors)
+	needFreqNorm := !options.NoScore
+	reader, err := indexReader.TermFieldReader(termBytes, field, needFreqNorm, needFreqNorm, options.IncludeTermVectors)
 	if err != nil {
 		return nil, err
 	}
@@ -57,7 +58,8 @@ func NewTermSearcher(indexReader index.IndexReader, term string, field string, b
 }
 
 func NewTermSearcherBytes(indexReader index.IndexReader, term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
-	reader, err := indexReader.TermFieldReader(term, field, true, true, options.IncludeTermVectors)
+	needFreqNorm := !options.NoScore
+	reader, err := indexReader.TermFieldReader(term, field, needFreqNorm, needFreqNorm, options.IncludeTermVectors)
 	if err != nil {
 		return nil, err
 	}

From 58e6641d13844e212687bd35823914c5e26b04dd Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sun, 20 Jan 2019 12:05:56 -0800
Subject: [PATCH 528/728] optimize scorch 'unadorned' disjunctions via roaring
 OR's

Normally, the procesing of disjunctions would need postings iterators
that provide the matching internal-id's along with associated
freq-norm and term-vector information.

A so-called "unadorned" disjunction is a more restricted edge case,
where the application does not need the extra freq-norm information (a
non-scoring disjunction) and does not need term-vector information.

The scorch indexer can optimize these unadorned disjunctions by OR'ing
together the constituent, actual bitmaps, since we do not need to
maintain readers for the freq-norm and term-vector data.  The
resulting OR'ed bitmap is then wrapped by a single term-field-reader
as an alternative to the normal disjunction searcher machinery.

Regarding perf microbenchmarks (bleve-query) on a 200K en-wiki docs
scorch index...

for a high number of high-frequency terms...

- wildcard search on "th*" (~31K hits)...
    before the change, with normal scoring         -  ~4.7 q/sec
    w/ NoScore:true                                -  ~5.1 q/sec
    w/ NoScore:true & unadorned disj. optimization - ~11.6 q/sec

for a low number of high-frequency terms...

- query-string search on "http www com" (~25K hits)...
    before the change, with normal scoring         - ~190 q/sec
    w/ NoScore:true                                - ~260 q/sec
    w/ NoScore:true & unadorned disj. optimization - ~415 q/sec

for a low number of low-frequency terms...

- query-string search on "marty shoch" (207 hits)...
    before the change, with normal scoring         - ~15.0K q/sec
    w/ NoScore:true                                - ~21.3K q/sec
    w/ NoScore:true & unadorned disj. optimization - ~16.1K q/sec

Searching with NoScore:true is faster than normal scoring in all
cases.  However, for low numbers of low-frequency terms, the unadorned
disjunction optimization is slower (~16.1K q/sec) than the classic
disjunction searcher implementation (~21.3K q/sec).  This is likely
due to more memory/garbage creation for the computed OR'ed bitmaps.
This might be addressed in future commits by using heuristics.
---
 index/scorch/optimize.go              | 129 +++++++++++++++++++++++++-
 index/scorch/segment/zap/posting.go   |  60 +++++++++---
 index/scorch/snapshot_index.go        |   2 +-
 index/scorch/snapshot_index_tfr.go    |   2 +-
 search/searcher/search_disjunction.go |  51 ++++++++++
 search/searcher/search_term.go        |  23 ++---
 6 files changed, 229 insertions(+), 38 deletions(-)

diff --git a/index/scorch/optimize.go b/index/scorch/optimize.go
index 9ba7247c9..a5846121a 100644
--- a/index/scorch/optimize.go
+++ b/index/scorch/optimize.go
@@ -20,15 +20,27 @@ import (
 	"github.com/RoaringBitmap/roaring"
 
 	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/scorch/segment"
 	"github.com/blevesearch/bleve/index/scorch/segment/zap"
 )
 
-func (s *IndexSnapshotTermFieldReader) Optimize(kind string, octx index.OptimizableContext) (
-	index.OptimizableContext, error) {
-	if kind != "conjunction" {
-		return octx, nil
+func (s *IndexSnapshotTermFieldReader) Optimize(kind string,
+	octx index.OptimizableContext) (index.OptimizableContext, error) {
+	if kind == "conjunction" {
+		return s.optimizeConjunction(octx)
+	}
+
+	if kind == "disjunction:unadorned" {
+		return s.optimizeDisjunctionUnadorned(octx)
 	}
 
+	return octx, nil
+}
+
+// ----------------------------------------------------------------
+
+func (s *IndexSnapshotTermFieldReader) optimizeConjunction(
+	octx index.OptimizableContext) (index.OptimizableContext, error) {
 	if octx == nil {
 		octx = &OptimizeTFRConjunction{snapshot: s.snapshot}
 	}
@@ -39,7 +51,7 @@ func (s *IndexSnapshotTermFieldReader) Optimize(kind string, octx index.Optimiza
 	}
 
 	if o.snapshot != s.snapshot {
-		return nil, fmt.Errorf("tried to optimize across different snapshots")
+		return nil, fmt.Errorf("tried to optimize conjunction across different snapshots")
 	}
 
 	o.tfrs = append(o.tfrs, s)
@@ -80,6 +92,8 @@ func (o *OptimizeTFRConjunction) Finish() (index.Optimized, error) {
 			bm.And(itr.ActualBM)
 		}
 
+		// in this conjunction optimization, the postings iterators
+		// will all share the same AND'ed together actual bitmap
 		for _, tfr := range o.tfrs {
 			itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
 			if ok && itr.ActualBM != nil {
@@ -91,3 +105,108 @@ func (o *OptimizeTFRConjunction) Finish() (index.Optimized, error) {
 
 	return nil, nil
 }
+
+// ----------------------------------------------------------------
+
+// An "unadorned" disjunction optimization is appropriate when
+// additional or subsidiary information like freq-norm's and
+// term-vectors are not required, and instead only the internal-id's
+// are needed.
+func (s *IndexSnapshotTermFieldReader) optimizeDisjunctionUnadorned(
+	octx index.OptimizableContext) (index.OptimizableContext, error) {
+	if octx == nil {
+		octx = &OptimizeTFRDisjunctionUnadorned{snapshot: s.snapshot}
+	}
+
+	o, ok := octx.(*OptimizeTFRDisjunctionUnadorned)
+	if !ok {
+		return nil, nil
+	}
+
+	if o.snapshot != s.snapshot {
+		return nil, fmt.Errorf("tried to optimize unadorned disjunction across different snapshots")
+	}
+
+	o.tfrs = append(o.tfrs, s)
+
+	return o, nil
+}
+
+type OptimizeTFRDisjunctionUnadorned struct {
+	snapshot *IndexSnapshot
+
+	tfrs []*IndexSnapshotTermFieldReader
+}
+
+var OptimizeTFRDisjunctionUnadornedTerm = []byte("<disjunction:unadorned>")
+var OptimizeTFRDisjunctionUnadornedField = "*"
+
+// Finish of an unadorned disjunction optimization will compute a
+// termFieldReader with an "actual" bitmap that represents the
+// constituent bitmaps OR'ed together.  This termFieldReader cannot
+// provide any freq-norm or termVector associated information.
+func (o *OptimizeTFRDisjunctionUnadorned) Finish() (rv index.Optimized, err error) {
+	if len(o.tfrs) <= 1 {
+		return nil, nil
+	}
+
+	// We use an artificial term and field because the optimized
+	// termFieldReader can represent multiple terms and fields.
+	oTFR := &IndexSnapshotTermFieldReader{
+		term:               OptimizeTFRDisjunctionUnadornedTerm,
+		field:              OptimizeTFRDisjunctionUnadornedField,
+		snapshot:           o.snapshot,
+		iterators:          make([]segment.PostingsIterator, len(o.snapshot.segment)),
+		segmentOffset:      0,
+		includeFreq:        false,
+		includeNorm:        false,
+		includeTermVectors: false,
+	}
+
+	var docNums []uint32            // Collected docNum's from 1-hit posting lists.
+	var actualBMs []*roaring.Bitmap // Collected from regular posting lists.
+
+	for i := range o.snapshot.segment {
+		docNums = docNums[:0]
+		actualBMs = actualBMs[:0]
+
+		for _, tfr := range o.tfrs {
+			itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
+			if !ok {
+				return nil, nil
+			}
+
+			docNum, ok := itr.DocNum1Hit()
+			if ok {
+				docNums = append(docNums, uint32(docNum))
+				continue
+			}
+
+			if itr.ActualBM != nil {
+				actualBMs = append(actualBMs, itr.ActualBM)
+			}
+		}
+
+		var bm *roaring.Bitmap
+		if len(actualBMs) > 2 {
+			bm = roaring.HeapOr(actualBMs...)
+		} else if len(actualBMs) == 2 {
+			bm = roaring.Or(actualBMs[0], actualBMs[1])
+		} else if len(actualBMs) == 1 {
+			bm = actualBMs[0].Clone()
+		}
+
+		if bm == nil {
+			bm = roaring.New()
+		}
+
+		bm.AddMany(docNums)
+
+		oTFR.iterators[i], err = zap.PostingsIteratorFromBitmap(bm, false, false)
+		if err != nil {
+			return nil, nil
+		}
+	}
+
+	return oTFR, nil
+}
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index d0bdd6852..0155e8222 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -651,7 +651,7 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool,
 		return 0, false, nil
 	}
 
-	if i.postings.postings == i.ActualBM {
+	if i.postings == nil || i.postings.postings == i.ActualBM {
 		return i.nextDocNumAtOrAfterClean(atOrAfter)
 	}
 
@@ -699,10 +699,23 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool,
 // no deletions) where the all bitmap is the same as the actual bitmap
 func (i *PostingsIterator) nextDocNumAtOrAfterClean(
 	atOrAfter uint64) (uint64, bool, error) {
-	sameChunkNexts := 0 // # of times we called Next() in the same chunk
-
 	n := i.Actual.Next()
 
+	if !i.includeFreqNorm {
+		for uint64(n) < atOrAfter && i.Actual.HasNext() {
+			n = i.Actual.Next()
+		}
+
+		if uint64(n) < atOrAfter {
+			return 0, false, nil // couldn't find anything
+		}
+
+		return uint64(n), true, nil
+	}
+
+	// freq-norm's needed, so maintain freq-norm chunk reader
+	sameChunkNexts := 0 // # of times we called Next() in the same chunk
+
 	nChunk := n / i.postings.sb.chunkFactor
 
 	for uint64(n) < atOrAfter && i.Actual.HasNext() {
@@ -723,19 +736,17 @@ func (i *PostingsIterator) nextDocNumAtOrAfterClean(
 		return 0, false, nil
 	}
 
-	if i.includeFreqNorm {
-		for j := 0; j < sameChunkNexts; j++ {
-			err := i.currChunkNext(nChunk)
-			if err != nil {
-				return 0, false, fmt.Errorf("error optimized currChunkNext: %v", err)
-			}
+	for j := 0; j < sameChunkNexts; j++ {
+		err := i.currChunkNext(nChunk)
+		if err != nil {
+			return 0, false, fmt.Errorf("error optimized currChunkNext: %v", err)
 		}
+	}
 
-		if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
-			err := i.loadChunk(int(nChunk))
-			if err != nil {
-				return 0, false, fmt.Errorf("error loading chunk: %v", err)
-			}
+	if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
+		err := i.loadChunk(int(nChunk))
+		if err != nil {
+			return 0, false, fmt.Errorf("error loading chunk: %v", err)
 		}
 	}
 
@@ -772,6 +783,27 @@ func (i *PostingsIterator) currChunkNext(nChunk uint32) error {
 	return nil
 }
 
+// DocNum1Hit returns the docNum and true if this is "1-hit" optimized
+// and the docNum is available.
+func (p *PostingsIterator) DocNum1Hit() (uint64, bool) {
+	if p.normBits1Hit != 0 && p.docNum1Hit != docNum1HitFinished {
+		return p.docNum1Hit, true
+	}
+	return 0, false
+}
+
+// PostingsIteratorFromBitmap constructs a PostingsIterator given an
+// "actual" bitmap.
+func PostingsIteratorFromBitmap(bm *roaring.Bitmap,
+	includeFreqNorm, includeLocs bool) (*PostingsIterator, error) {
+	return &PostingsIterator{
+		ActualBM:        bm,
+		Actual:          bm.Iterator(),
+		includeFreqNorm: includeFreqNorm,
+		includeLocs:     includeLocs,
+	}, nil
+}
+
 // Posting is a single entry in a postings list
 type Posting struct {
 	docNum uint64
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 4c4d92144..8babb31fa 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -435,7 +435,7 @@ func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err err
 }
 
 func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
-	includeNorm, includeTermVectors bool) (tfr index.TermFieldReader, err error) {
+	includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
 	rv := i.allocTermFieldReaderDicts(field)
 
 	rv.term = term
diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go
index 89af3be4c..5d56f1944 100644
--- a/index/scorch/snapshot_index_tfr.go
+++ b/index/scorch/snapshot_index_tfr.go
@@ -74,7 +74,7 @@ func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*in
 		rv = &index.TermFieldDoc{}
 	}
 	// find the next hit
-	for i.segmentOffset < len(i.postings) {
+	for i.segmentOffset < len(i.iterators) {
 		next, err := i.iterators[i.segmentOffset].Next()
 		if err != nil {
 			return nil, err
diff --git a/search/searcher/search_disjunction.go b/search/searcher/search_disjunction.go
index 882b02ccb..d4db7d9bf 100644
--- a/search/searcher/search_disjunction.go
+++ b/search/searcher/search_disjunction.go
@@ -40,6 +40,11 @@ func NewDisjunctionSearcher(indexReader index.IndexReader,
 func newDisjunctionSearcher(indexReader index.IndexReader,
 	qsearchers []search.Searcher, min float64, options search.SearcherOptions,
 	limit bool) (search.Searcher, error) {
+	rv, err := optimizeDisjunctionSearcher(indexReader, qsearchers, min, options)
+	if err != nil || rv != nil {
+		return rv, err
+	}
+
 	if len(qsearchers) > DisjunctionHeapTakeover {
 		return newDisjunctionHeapSearcher(indexReader, qsearchers, min, options,
 			limit)
@@ -48,6 +53,52 @@ func newDisjunctionSearcher(indexReader index.IndexReader,
 		limit)
 }
 
+// optimizeDisjunctionSearcher might return an optimized searcher that
+// represents the disjunction, especially in the edge case of a
+// non-scoring, no-term-vectors disjunction
+func optimizeDisjunctionSearcher(indexReader index.IndexReader,
+	qsearchers []search.Searcher, min float64, options search.SearcherOptions) (
+	search.Searcher, error) {
+	// we cannot use the "unadorned" disjunction optimization if the
+	// caller wants extra information like freq-norm's for scoring or
+	// term vectors
+	if len(qsearchers) <= 1 || !options.NoScore || options.IncludeTermVectors {
+		return nil, nil
+	}
+
+	var octx index.OptimizableContext
+
+	for _, searcher := range qsearchers {
+		o, ok := searcher.(index.Optimizable)
+		if !ok {
+			return nil, nil
+		}
+
+		var err error
+		octx, err = o.Optimize("disjunction:unadorned", octx)
+		if err != nil {
+			return nil, err
+		}
+
+		if octx == nil {
+			return nil, nil
+		}
+	}
+
+	optimized, err := octx.Finish()
+	if err != nil || optimized == nil {
+		return nil, err
+	}
+
+	tfr, ok := optimized.(index.TermFieldReader)
+	if !ok {
+		return nil, nil
+	}
+
+	return newTermSearcherFromReader(indexReader, tfr,
+		[]byte("<disjunction:unadorned>"), "*", 1.0, options)
+}
+
 func tooManyClauses(count int) bool {
 	if DisjunctionMaxClauseCount != 0 && count > DisjunctionMaxClauseCount {
 		return true
diff --git a/search/searcher/search_term.go b/search/searcher/search_term.go
index e805bf121..c90008eef 100644
--- a/search/searcher/search_term.go
+++ b/search/searcher/search_term.go
@@ -38,23 +38,7 @@ type TermSearcher struct {
 }
 
 func NewTermSearcher(indexReader index.IndexReader, term string, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
-	termBytes := []byte(term)
-	needFreqNorm := !options.NoScore
-	reader, err := indexReader.TermFieldReader(termBytes, field, needFreqNorm, needFreqNorm, options.IncludeTermVectors)
-	if err != nil {
-		return nil, err
-	}
-	count, err := indexReader.DocCount()
-	if err != nil {
-		_ = reader.Close()
-		return nil, err
-	}
-	scorer := scorer.NewTermQueryScorer(termBytes, field, boost, count, reader.Count(), options)
-	return &TermSearcher{
-		indexReader: indexReader,
-		reader:      reader,
-		scorer:      scorer,
-	}, nil
+	return NewTermSearcherBytes(indexReader, []byte(term), field, boost, options)
 }
 
 func NewTermSearcherBytes(indexReader index.IndexReader, term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
@@ -63,6 +47,11 @@ func NewTermSearcherBytes(indexReader index.IndexReader, term []byte, field stri
 	if err != nil {
 		return nil, err
 	}
+	return newTermSearcherFromReader(indexReader, reader, term, field, boost, options)
+}
+
+func newTermSearcherFromReader(indexReader index.IndexReader, reader index.TermFieldReader,
+	term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
 	count, err := indexReader.DocCount()
 	if err != nil {
 		_ = reader.Close()

From 68561b5c8e9888983989afb66711f0fc96ac5e48 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Sun, 20 Jan 2019 21:58:44 -0800
Subject: [PATCH 529/728] low-frequency terms heuristic for unadorned disj.
 optimization

The heuristic check in this commit skips the unadorned disjunction
optimization in the case of a disjunction of low-frequency terms.
---
 index/scorch/optimize.go | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/index/scorch/optimize.go b/index/scorch/optimize.go
index a5846121a..37b61af20 100644
--- a/index/scorch/optimize.go
+++ b/index/scorch/optimize.go
@@ -37,6 +37,8 @@ func (s *IndexSnapshotTermFieldReader) Optimize(kind string,
 	return octx, nil
 }
 
+var OptimizeDisjunctionUnadornedMinChildCardinality = uint64(256)
+
 // ----------------------------------------------------------------
 
 func (s *IndexSnapshotTermFieldReader) optimizeConjunction(
@@ -150,6 +152,31 @@ func (o *OptimizeTFRDisjunctionUnadorned) Finish() (rv index.Optimized, err erro
 		return nil, nil
 	}
 
+	for i := range o.snapshot.segment {
+		var cMax uint64
+
+		for _, tfr := range o.tfrs {
+			itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
+			if !ok {
+				return nil, nil
+			}
+
+			if itr.ActualBM != nil {
+				c := itr.ActualBM.GetCardinality()
+				if cMax < c {
+					cMax = c
+				}
+			}
+		}
+
+		// Heuristic to skip the optimization if all the constituent
+		// bitmaps are too small, where the processing & resource
+		// overhead to create the OR'ed bitmap outweighs the benefit.
+		if cMax < OptimizeDisjunctionUnadornedMinChildCardinality {
+			return nil, nil
+		}
+	}
+
 	// We use an artificial term and field because the optimized
 	// termFieldReader can represent multiple terms and fields.
 	oTFR := &IndexSnapshotTermFieldReader{

From b1b8cecef9045b23f21d02025fecc78b347ece1d Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 21 Jan 2019 07:56:54 -0800
Subject: [PATCH 530/728] unadorned disjunction optimization checks min param

---
 search/searcher/search_disjunction.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/search/searcher/search_disjunction.go b/search/searcher/search_disjunction.go
index d4db7d9bf..520be0fb1 100644
--- a/search/searcher/search_disjunction.go
+++ b/search/searcher/search_disjunction.go
@@ -61,8 +61,8 @@ func optimizeDisjunctionSearcher(indexReader index.IndexReader,
 	search.Searcher, error) {
 	// we cannot use the "unadorned" disjunction optimization if the
 	// caller wants extra information like freq-norm's for scoring or
-	// term vectors
-	if len(qsearchers) <= 1 || !options.NoScore || options.IncludeTermVectors {
+	// term vectors, or leverages the min feature
+	if len(qsearchers) <= 1 || min > 1 || !options.NoScore || options.IncludeTermVectors {
 		return nil, nil
 	}
 

From 4ac43411b730fe85a9ad2533e73627871766188f Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 21 Jan 2019 08:06:42 -0800
Subject: [PATCH 531/728] optimize unadorned conjunctions (API change)

NOTE: API change -- NewConjunctionSearcher() now returns a
search.Searcher instead of the previous *ConjunctionSearcher

This commit provides an optimization for "unadorned" conjunctions
similar to the previous optimization for unadorned disjunctions.

As part of this, the optimizeCompositeSearcher() func was
renamed/refactored for reuse for both disjunctions and conjunctions.

Regarding perf microbenchmarks (bleve-query) on a 200K document
en-wiki scorch index...

On a query-string search for "+www +com +http" (231 hits)...

  before the change, with normal scoring         -   ~689 q/sec
  w/ NoScore:true                                -  ~2184 q/sec
  w/ NoScore:true & unadorned conj. optimization - ~10148 q/sec
---
 index/scorch/optimize.go              | 179 +++++++++++++++++++++++++-
 index/scorch/segment/empty.go         |   2 +
 index/scorch/segment/zap/posting.go   |  26 +++-
 search/searcher/search_conjunction.go |  40 +++---
 search/searcher/search_disjunction.go |  33 +++--
 search/searcher/search_phrase.go      |   2 +-
 6 files changed, 235 insertions(+), 47 deletions(-)

diff --git a/index/scorch/optimize.go b/index/scorch/optimize.go
index 37b61af20..3e71e3696 100644
--- a/index/scorch/optimize.go
+++ b/index/scorch/optimize.go
@@ -30,6 +30,10 @@ func (s *IndexSnapshotTermFieldReader) Optimize(kind string,
 		return s.optimizeConjunction(octx)
 	}
 
+	if kind == "conjunction:unadorned" {
+		return s.optimizeConjunctionUnadorned(octx)
+	}
+
 	if kind == "disjunction:unadorned" {
 		return s.optimizeDisjunctionUnadorned(octx)
 	}
@@ -95,7 +99,9 @@ func (o *OptimizeTFRConjunction) Finish() (index.Optimized, error) {
 		}
 
 		// in this conjunction optimization, the postings iterators
-		// will all share the same AND'ed together actual bitmap
+		// will all share the same AND'ed together actual bitmap.  The
+		// regular conjunction searcher machinery will still be used,
+		// but the underlying bitmap will be smaller.
 		for _, tfr := range o.tfrs {
 			itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
 			if ok && itr.ActualBM != nil {
@@ -110,6 +116,177 @@ func (o *OptimizeTFRConjunction) Finish() (index.Optimized, error) {
 
 // ----------------------------------------------------------------
 
+// An "unadorned" conjunction optimization is appropriate when
+// additional or subsidiary information like freq-norm's and
+// term-vectors are not required, and instead only the internal-id's
+// are needed.
+func (s *IndexSnapshotTermFieldReader) optimizeConjunctionUnadorned(
+	octx index.OptimizableContext) (index.OptimizableContext, error) {
+	if octx == nil {
+		octx = &OptimizeTFRConjunctionUnadorned{snapshot: s.snapshot}
+	}
+
+	o, ok := octx.(*OptimizeTFRConjunctionUnadorned)
+	if !ok {
+		return nil, nil
+	}
+
+	if o.snapshot != s.snapshot {
+		return nil, fmt.Errorf("tried to optimize unadorned conjunction across different snapshots")
+	}
+
+	o.tfrs = append(o.tfrs, s)
+
+	return o, nil
+}
+
+type OptimizeTFRConjunctionUnadorned struct {
+	snapshot *IndexSnapshot
+
+	tfrs []*IndexSnapshotTermFieldReader
+}
+
+var OptimizeTFRConjunctionUnadornedTerm = []byte("<conjunction:unadorned>")
+var OptimizeTFRConjunctionUnadornedField = "*"
+
+// Finish of an unadorned conjunction optimization will compute a
+// termFieldReader with an "actual" bitmap that represents the
+// constituent bitmaps AND'ed together.  This termFieldReader cannot
+// provide any freq-norm or termVector associated information.
+func (o *OptimizeTFRConjunctionUnadorned) Finish() (rv index.Optimized, err error) {
+	if len(o.tfrs) <= 1 {
+		return nil, nil
+	}
+
+	// We use an artificial term and field because the optimized
+	// termFieldReader can represent multiple terms and fields.
+	oTFR := &IndexSnapshotTermFieldReader{
+		term:               OptimizeTFRConjunctionUnadornedTerm,
+		field:              OptimizeTFRConjunctionUnadornedField,
+		snapshot:           o.snapshot,
+		iterators:          make([]segment.PostingsIterator, len(o.snapshot.segment)),
+		segmentOffset:      0,
+		includeFreq:        false,
+		includeNorm:        false,
+		includeTermVectors: false,
+	}
+
+	var actualBMs []*roaring.Bitmap // Collected from regular posting lists.
+
+OUTER:
+	for i := range o.snapshot.segment {
+		actualBMs = actualBMs[:0]
+
+		var docNum1HitLast uint64
+		var docNum1HitLastOk bool
+
+		for _, tfr := range o.tfrs {
+			if _, ok := tfr.iterators[i].(*segment.EmptyPostingsIterator); ok {
+				// An empty postings iterator means the entire AND is empty.
+				oTFR.iterators[i] = segment.AnEmptyPostingsIterator
+				continue OUTER
+			}
+
+			itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
+			if !ok {
+				// We optimize zap postings iterators only.
+				return nil, nil
+			}
+
+			// If the postings iterator is "1-hit" optimized, then we
+			// can perform several optimizations up-front here.
+			docNum1Hit, ok := itr.DocNum1Hit()
+			if ok {
+				if docNum1Hit == zap.DocNum1HitFinished {
+					// An empty docNum here means the entire AND is empty.
+					oTFR.iterators[i] = segment.AnEmptyPostingsIterator
+					continue OUTER
+				}
+
+				if docNum1HitLastOk && docNum1HitLast != docNum1Hit {
+					// The docNum1Hit doesn't match the previous
+					// docNum1HitLast, so the entire AND is empty.
+					oTFR.iterators[i] = segment.AnEmptyPostingsIterator
+					continue OUTER
+				}
+
+				docNum1HitLast = docNum1Hit
+				docNum1HitLastOk = true
+
+				continue
+			}
+
+			if itr.ActualBM == nil {
+				// An empty actual bitmap means the entire AND is empty.
+				oTFR.iterators[i] = segment.AnEmptyPostingsIterator
+				continue OUTER
+			}
+
+			// Collect the actual bitmap for more processing later.
+			actualBMs = append(actualBMs, itr.ActualBM)
+		}
+
+		if docNum1HitLastOk {
+			// We reach here if all the 1-hit optimized posting
+			// iterators had the same 1-hit docNum, so we can check if
+			// our collected actual bitmaps also have that docNum.
+			for _, bm := range actualBMs {
+				if !bm.Contains(uint32(docNum1HitLast)) {
+					// The docNum1Hit isn't in one of our actual
+					// bitmaps, so the entire AND is empty.
+					oTFR.iterators[i] = segment.AnEmptyPostingsIterator
+					continue OUTER
+				}
+			}
+
+			// The actual bitmaps and docNum1Hits all contain or have
+			// the same 1-hit docNum, so that's our AND'ed result.
+			oTFR.iterators[i], err = zap.PostingsIteratorFrom1Hit(
+				docNum1HitLast, zap.NormBits1Hit, false, false)
+			if err != nil {
+				return nil, nil
+			}
+
+			continue OUTER
+		}
+
+		if len(actualBMs) == 0 {
+			// If we've collected no actual bitmaps at this point,
+			// then the entire AND is empty.
+			oTFR.iterators[i] = segment.AnEmptyPostingsIterator
+			continue OUTER
+		}
+
+		if len(actualBMs) == 1 {
+			// If we've only 1 actual bitmap, then that's our result.
+			oTFR.iterators[i], err = zap.PostingsIteratorFromBitmap(
+				actualBMs[0], false, false)
+			if err != nil {
+				return nil, nil
+			}
+
+			continue OUTER
+		}
+
+		// Else, AND together our collected bitmaps as our result.
+		bm := roaring.And(actualBMs[0], actualBMs[1])
+
+		for _, actualBM := range actualBMs[2:] {
+			bm.And(actualBM)
+		}
+
+		oTFR.iterators[i], err = zap.PostingsIteratorFromBitmap(
+			bm, false, false)
+		if err != nil {
+			return nil, nil
+		}
+	}
+
+	return oTFR, nil
+}
+
+// ----------------------------------------------------------------
+
 // An "unadorned" disjunction optimization is appropriate when
 // additional or subsidiary information like freq-norm's and
 // term-vectors are not required, and instead only the internal-id's
diff --git a/index/scorch/segment/empty.go b/index/scorch/segment/empty.go
index af50d0aaf..165a01bc1 100644
--- a/index/scorch/segment/empty.go
+++ b/index/scorch/segment/empty.go
@@ -125,3 +125,5 @@ func (e *EmptyPostingsIterator) Next() (Posting, error) {
 func (e *EmptyPostingsIterator) Size() int {
 	return 0
 }
+
+var AnEmptyPostingsIterator = &EmptyPostingsIterator{}
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 0155e8222..26378c27e 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -92,7 +92,9 @@ func under32Bits(x uint64) bool {
 	return x <= mask31Bits
 }
 
-const docNum1HitFinished = math.MaxUint64
+const DocNum1HitFinished = math.MaxUint64
+
+var NormBits1Hit = uint64(math.Float32bits(float32(1)))
 
 // PostingsList is an in-memory representation of a postings list
 type PostingsList struct {
@@ -199,7 +201,7 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
 		rv.normBits1Hit = p.normBits1Hit
 
 		if p.except != nil && p.except.Contains(uint32(rv.docNum1Hit)) {
-			rv.docNum1Hit = docNum1HitFinished
+			rv.docNum1Hit = DocNum1HitFinished
 		}
 
 		return rv
@@ -634,16 +636,16 @@ func (i *PostingsIterator) nextBytes() (
 // sets up the currChunk / loc related fields of the iterator.
 func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool, error) {
 	if i.normBits1Hit != 0 {
-		if i.docNum1Hit == docNum1HitFinished {
+		if i.docNum1Hit == DocNum1HitFinished {
 			return 0, false, nil
 		}
 		if i.docNum1Hit < atOrAfter {
 			// advanced past our 1-hit
-			i.docNum1Hit = docNum1HitFinished // consume our 1-hit docNum
+			i.docNum1Hit = DocNum1HitFinished // consume our 1-hit docNum
 			return 0, false, nil
 		}
 		docNum := i.docNum1Hit
-		i.docNum1Hit = docNum1HitFinished // consume our 1-hit docNum
+		i.docNum1Hit = DocNum1HitFinished // consume our 1-hit docNum
 		return docNum, true, nil
 	}
 
@@ -786,7 +788,7 @@ func (i *PostingsIterator) currChunkNext(nChunk uint32) error {
 // DocNum1Hit returns the docNum and true if this is "1-hit" optimized
 // and the docNum is available.
 func (p *PostingsIterator) DocNum1Hit() (uint64, bool) {
-	if p.normBits1Hit != 0 && p.docNum1Hit != docNum1HitFinished {
+	if p.normBits1Hit != 0 && p.docNum1Hit != DocNum1HitFinished {
 		return p.docNum1Hit, true
 	}
 	return 0, false
@@ -804,6 +806,18 @@ func PostingsIteratorFromBitmap(bm *roaring.Bitmap,
 	}, nil
 }
 
+// PostingsIteratorFrom1Hit constructs a PostingsIterator given a
+// 1-hit docNum.
+func PostingsIteratorFrom1Hit(docNum1Hit, normBits1Hit uint64,
+	includeFreqNorm, includeLocs bool) (*PostingsIterator, error) {
+	return &PostingsIterator{
+		docNum1Hit:      docNum1Hit,
+		normBits1Hit:    normBits1Hit,
+		includeFreqNorm: includeFreqNorm,
+		includeLocs:     includeLocs,
+	}, nil
+}
+
 // Posting is a single entry in a postings list
 type Posting struct {
 	docNum uint64
diff --git a/search/searcher/search_conjunction.go b/search/searcher/search_conjunction.go
index e89848b79..0b21ec53a 100644
--- a/search/searcher/search_conjunction.go
+++ b/search/searcher/search_conjunction.go
@@ -43,14 +43,26 @@ type ConjunctionSearcher struct {
 	options     search.SearcherOptions
 }
 
-func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.Searcher, options search.SearcherOptions) (*ConjunctionSearcher, error) {
-	// build the downstream searchers
+func NewConjunctionSearcher(indexReader index.IndexReader,
+	qsearchers []search.Searcher, options search.SearcherOptions) (
+	search.Searcher, error) {
+	// build the sorted downstream searchers
 	searchers := make(OrderedSearcherList, len(qsearchers))
 	for i, searcher := range qsearchers {
 		searchers[i] = searcher
 	}
-	// sort the searchers
 	sort.Sort(searchers)
+
+	// attempt the "unadorned" conjunction optimization only when we
+	// do not need extra information like freq-norm's or term vectors
+	if len(searchers) > 1 && options.NoScore && !options.IncludeTermVectors {
+		rv, err := optimizeCompositeSearcher("conjunction:unadorned",
+			indexReader, searchers, options)
+		if err != nil || rv != nil {
+			return rv, err
+		}
+	}
+
 	// build our searcher
 	rv := ConjunctionSearcher{
 		indexReader: indexReader,
@@ -63,24 +75,10 @@ func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.S
 
 	// attempt push-down conjunction optimization when there's >1 searchers
 	if len(searchers) > 1 {
-		var octx index.OptimizableContext
-
-		for _, searcher := range searchers {
-			o, ok := searcher.(index.Optimizable)
-			if ok {
-				var err error
-				octx, err = o.Optimize("conjunction", octx)
-				if err != nil {
-					return nil, err
-				}
-			}
-		}
-
-		if octx != nil {
-			_, err := octx.Finish()
-			if err != nil {
-				return nil, err
-			}
+		rv, err := optimizeCompositeSearcher("conjunction",
+			indexReader, searchers, options)
+		if err != nil || rv != nil {
+			return rv, err
 		}
 	}
 
diff --git a/search/searcher/search_disjunction.go b/search/searcher/search_disjunction.go
index 520be0fb1..8b9a0dade 100644
--- a/search/searcher/search_disjunction.go
+++ b/search/searcher/search_disjunction.go
@@ -40,9 +40,16 @@ func NewDisjunctionSearcher(indexReader index.IndexReader,
 func newDisjunctionSearcher(indexReader index.IndexReader,
 	qsearchers []search.Searcher, min float64, options search.SearcherOptions,
 	limit bool) (search.Searcher, error) {
-	rv, err := optimizeDisjunctionSearcher(indexReader, qsearchers, min, options)
-	if err != nil || rv != nil {
-		return rv, err
+	// attempt the "unadorned" disjunction optimization only when we
+	// do not need extra information like freq-norm's or term vectors
+	// and the requested min is simple
+	if len(qsearchers) > 1 && min <= 1 &&
+		options.NoScore && !options.IncludeTermVectors {
+		rv, err := optimizeCompositeSearcher("disjunction:unadorned",
+			indexReader, qsearchers, options)
+		if err != nil || rv != nil {
+			return rv, err
+		}
 	}
 
 	if len(qsearchers) > DisjunctionHeapTakeover {
@@ -53,19 +60,9 @@ func newDisjunctionSearcher(indexReader index.IndexReader,
 		limit)
 }
 
-// optimizeDisjunctionSearcher might return an optimized searcher that
-// represents the disjunction, especially in the edge case of a
-// non-scoring, no-term-vectors disjunction
-func optimizeDisjunctionSearcher(indexReader index.IndexReader,
-	qsearchers []search.Searcher, min float64, options search.SearcherOptions) (
-	search.Searcher, error) {
-	// we cannot use the "unadorned" disjunction optimization if the
-	// caller wants extra information like freq-norm's for scoring or
-	// term vectors, or leverages the min feature
-	if len(qsearchers) <= 1 || min > 1 || !options.NoScore || options.IncludeTermVectors {
-		return nil, nil
-	}
-
+func optimizeCompositeSearcher(optimizationKind string,
+	indexReader index.IndexReader, qsearchers []search.Searcher,
+	options search.SearcherOptions) (search.Searcher, error) {
 	var octx index.OptimizableContext
 
 	for _, searcher := range qsearchers {
@@ -75,7 +72,7 @@ func optimizeDisjunctionSearcher(indexReader index.IndexReader,
 		}
 
 		var err error
-		octx, err = o.Optimize("disjunction:unadorned", octx)
+		octx, err = o.Optimize(optimizationKind, octx)
 		if err != nil {
 			return nil, err
 		}
@@ -96,7 +93,7 @@ func optimizeDisjunctionSearcher(indexReader index.IndexReader,
 	}
 
 	return newTermSearcherFromReader(indexReader, tfr,
-		[]byte("<disjunction:unadorned>"), "*", 1.0, options)
+		[]byte(optimizationKind), "*", 1.0, options)
 }
 
 func tooManyClauses(count int) bool {
diff --git a/search/searcher/search_phrase.go b/search/searcher/search_phrase.go
index 08eb13338..51b7e5bd8 100644
--- a/search/searcher/search_phrase.go
+++ b/search/searcher/search_phrase.go
@@ -32,7 +32,7 @@ func init() {
 }
 
 type PhraseSearcher struct {
-	mustSearcher *ConjunctionSearcher
+	mustSearcher search.Searcher
 	queryNorm    float64
 	currMust     *search.DocumentMatch
 	terms        [][]string

From a1c886716cd508e12dce4d7b2dc1fcf7f170e888 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 21 Jan 2019 20:00:23 -0800
Subject: [PATCH 532/728] API: renamed SearchRequest.NoScore (bool) to Score
 (string)

Values for SearchRequest.Score include...

  ""     - basic TF/IDF scoring, and the default behavior.
  "none" - non-scoring search request.
---
 index_impl.go                         | 2 +-
 search.go                             | 5 ++++-
 search/search.go                      | 2 +-
 search/searcher/search_conjunction.go | 3 ++-
 search/searcher/search_disjunction.go | 2 +-
 search/searcher/search_term.go        | 2 +-
 6 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/index_impl.go b/index_impl.go
index ad9b3505e..fe61b8064 100644
--- a/index_impl.go
+++ b/index_impl.go
@@ -458,7 +458,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
 	searcher, err := req.Query.Searcher(indexReader, i.m, search.SearcherOptions{
 		Explain:            req.Explain,
 		IncludeTermVectors: req.IncludeLocations || req.Highlight != nil,
-		NoScore:            req.NoScore,
+		Score:              req.Score,
 	})
 	if err != nil {
 		return nil, err
diff --git a/search.go b/search.go
index 019d6fd04..ebd69971e 100644
--- a/search.go
+++ b/search.go
@@ -261,6 +261,7 @@ func (h *HighlightRequest) AddField(field string) {
 // Explain triggers inclusion of additional search
 // result score explanations.
 // Sort describes the desired order for the results to be returned.
+// Score controls the kind of scoring performed
 //
 // A special field named "*" can be used to return all fields.
 type SearchRequest struct {
@@ -273,7 +274,7 @@ type SearchRequest struct {
 	Explain          bool              `json:"explain"`
 	Sort             search.SortOrder  `json:"sort"`
 	IncludeLocations bool              `json:"includeLocations"`
-	NoScore          bool              `json:"noScore"`
+	Score            string            `json:"score,omitempty"`
 }
 
 func (r *SearchRequest) Validate() error {
@@ -323,6 +324,7 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
 		Explain          bool              `json:"explain"`
 		Sort             []json.RawMessage `json:"sort"`
 		IncludeLocations bool              `json:"includeLocations"`
+		Score            string            `json:"score"`
 	}
 
 	err := json.Unmarshal(input, &temp)
@@ -349,6 +351,7 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
 	r.Fields = temp.Fields
 	r.Facets = temp.Facets
 	r.IncludeLocations = temp.IncludeLocations
+	r.Score = temp.Score
 	r.Query, err = query.ParseQuery(temp.Q)
 	if err != nil {
 		return err
diff --git a/search/search.go b/search/search.go
index b241de668..7f6777df6 100644
--- a/search/search.go
+++ b/search/search.go
@@ -280,7 +280,7 @@ type Searcher interface {
 type SearcherOptions struct {
 	Explain            bool
 	IncludeTermVectors bool
-	NoScore            bool
+	Score              string
 }
 
 // SearchContext represents the context around a single search
diff --git a/search/searcher/search_conjunction.go b/search/searcher/search_conjunction.go
index 0b21ec53a..ac737bccd 100644
--- a/search/searcher/search_conjunction.go
+++ b/search/searcher/search_conjunction.go
@@ -55,7 +55,8 @@ func NewConjunctionSearcher(indexReader index.IndexReader,
 
 	// attempt the "unadorned" conjunction optimization only when we
 	// do not need extra information like freq-norm's or term vectors
-	if len(searchers) > 1 && options.NoScore && !options.IncludeTermVectors {
+	if len(searchers) > 1 &&
+		options.Score == "none" && !options.IncludeTermVectors {
 		rv, err := optimizeCompositeSearcher("conjunction:unadorned",
 			indexReader, searchers, options)
 		if err != nil || rv != nil {
diff --git a/search/searcher/search_disjunction.go b/search/searcher/search_disjunction.go
index 8b9a0dade..6a296b68f 100644
--- a/search/searcher/search_disjunction.go
+++ b/search/searcher/search_disjunction.go
@@ -44,7 +44,7 @@ func newDisjunctionSearcher(indexReader index.IndexReader,
 	// do not need extra information like freq-norm's or term vectors
 	// and the requested min is simple
 	if len(qsearchers) > 1 && min <= 1 &&
-		options.NoScore && !options.IncludeTermVectors {
+		options.Score == "none" && !options.IncludeTermVectors {
 		rv, err := optimizeCompositeSearcher("disjunction:unadorned",
 			indexReader, qsearchers, options)
 		if err != nil || rv != nil {
diff --git a/search/searcher/search_term.go b/search/searcher/search_term.go
index c90008eef..c1af74c76 100644
--- a/search/searcher/search_term.go
+++ b/search/searcher/search_term.go
@@ -42,7 +42,7 @@ func NewTermSearcher(indexReader index.IndexReader, term string, field string, b
 }
 
 func NewTermSearcherBytes(indexReader index.IndexReader, term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
-	needFreqNorm := !options.NoScore
+	needFreqNorm := options.Score != "none"
 	reader, err := indexReader.TermFieldReader(term, field, needFreqNorm, needFreqNorm, options.IncludeTermVectors)
 	if err != nil {
 		return nil, err

From e9779534840ac72a78b383fd904edbabe95f712a Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 22 Jan 2019 21:26:44 -0800
Subject: [PATCH 533/728] TestScorchCompositeSearchOptimizations

---
 index/scorch/optimize.go                   |  10 +-
 search/searcher/base_test.go               |   2 +-
 search/searcher/search_conjunction_test.go | 191 ++++++++++++++++++++-
 3 files changed, 198 insertions(+), 5 deletions(-)

diff --git a/index/scorch/optimize.go b/index/scorch/optimize.go
index 3e71e3696..b33e3be3d 100644
--- a/index/scorch/optimize.go
+++ b/index/scorch/optimize.go
@@ -24,17 +24,21 @@ import (
 	"github.com/blevesearch/bleve/index/scorch/segment/zap"
 )
 
+var OptimizeConjunction = true
+var OptimizeConjunctionUnadorned = true
+var OptimizeDisjunctionUnadorned = true
+
 func (s *IndexSnapshotTermFieldReader) Optimize(kind string,
 	octx index.OptimizableContext) (index.OptimizableContext, error) {
-	if kind == "conjunction" {
+	if OptimizeConjunction && kind == "conjunction" {
 		return s.optimizeConjunction(octx)
 	}
 
-	if kind == "conjunction:unadorned" {
+	if OptimizeConjunctionUnadorned && kind == "conjunction:unadorned" {
 		return s.optimizeConjunctionUnadorned(octx)
 	}
 
-	if kind == "disjunction:unadorned" {
+	if OptimizeDisjunctionUnadorned && kind == "disjunction:unadorned" {
 		return s.optimizeDisjunctionUnadorned(octx)
 	}
 
diff --git a/search/searcher/base_test.go b/search/searcher/base_test.go
index feb00a6aa..a4644b6b3 100644
--- a/search/searcher/base_test.go
+++ b/search/searcher/base_test.go
@@ -27,7 +27,7 @@ import (
 	"github.com/blevesearch/bleve/index/upsidedown"
 )
 
-var twoDocIndex index.Index //= upside_down.NewUpsideDownCouch(inmem.MustOpen())
+var twoDocIndex index.Index
 
 func init() {
 	twoDocIndex = initTwoDocUpsideDown()
diff --git a/search/searcher/search_conjunction_test.go b/search/searcher/search_conjunction_test.go
index 2107743bd..4a71381be 100644
--- a/search/searcher/search_conjunction_test.go
+++ b/search/searcher/search_conjunction_test.go
@@ -15,14 +15,17 @@
 package searcher
 
 import (
+	"io/ioutil"
+	"os"
+	"strings"
 	"testing"
 
 	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/scorch"
 	"github.com/blevesearch/bleve/search"
 )
 
 func TestConjunctionSearch(t *testing.T) {
-
 	twoDocIndexReader, err := twoDocIndex.Reader()
 	if err != nil {
 		t.Error(err)
@@ -221,3 +224,189 @@ func TestConjunctionSearch(t *testing.T) {
 		}
 	}
 }
+
+type compositeSearchOptimizationTest struct {
+	fieldTerms  []string
+	expectEmpty string
+}
+
+func TestScorchCompositeSearchOptimizations(t *testing.T) {
+	dir, _ := ioutil.TempDir("", "scorchTwoDoc")
+	defer func() {
+		_ = os.RemoveAll(dir)
+	}()
+
+	twoDocIndex := initTwoDocScorch(dir)
+
+	twoDocIndexReader, err := twoDocIndex.Reader()
+	if err != nil {
+		t.Error(err)
+	}
+	defer func() {
+		err := twoDocIndexReader.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	tests := []compositeSearchOptimizationTest{
+		{fieldTerms: []string{},
+			expectEmpty: "conjunction,disjunction"},
+		{fieldTerms: []string{"name:marty"},
+			expectEmpty: ""},
+		{fieldTerms: []string{"name:marty", "desc:beer"},
+			expectEmpty: ""},
+		{fieldTerms: []string{"name:marty", "name:marty"},
+			expectEmpty: ""},
+		{fieldTerms: []string{"name:marty", "desc:beer", "title:mister", "street:couchbase"},
+			expectEmpty: "conjunction"},
+		{fieldTerms: []string{"name:steve", "desc:beer", "title:mister", "street:couchbase"},
+			expectEmpty: ""},
+
+		{fieldTerms: []string{"name:NotARealName"},
+			expectEmpty: "conjunction,disjunction"},
+		{fieldTerms: []string{"name:NotARealName", "name:marty"},
+			expectEmpty: "conjunction"},
+		{fieldTerms: []string{"name:NotARealName", "name:marty", "desc:beer"},
+			expectEmpty: "conjunction"},
+		{fieldTerms: []string{"name:NotARealName", "name:marty", "name:marty"},
+			expectEmpty: "conjunction"},
+		{fieldTerms: []string{"name:NotARealName", "name:marty", "desc:beer", "title:mister", "street:couchbase"},
+			expectEmpty: "conjunction"},
+	}
+
+	// The theme of this unit test is that given one of the above
+	// search test cases -- no matter what searcher options we
+	// provide, across either conjunctions or disjunctions, whether we
+	// have optimizations that are enabled or disabled, the set of doc
+	// ID's from the search results from any of those combinations
+	// should be the same.
+	searcherOptionsToCompare := []search.SearcherOptions{
+		search.SearcherOptions{},
+		search.SearcherOptions{Explain: true},
+		search.SearcherOptions{IncludeTermVectors: true},
+		search.SearcherOptions{IncludeTermVectors: true, Explain: true},
+		search.SearcherOptions{Score: "none"},
+		search.SearcherOptions{Score: "none", IncludeTermVectors: true},
+		search.SearcherOptions{Score: "none", IncludeTermVectors: true, Explain: true},
+		search.SearcherOptions{Score: "none", Explain: true},
+	}
+
+	testScorchCompositeSearchOptimizations(t, twoDocIndexReader, tests,
+		searcherOptionsToCompare, "conjunction")
+
+	testScorchCompositeSearchOptimizations(t, twoDocIndexReader, tests,
+		searcherOptionsToCompare, "disjunction")
+}
+
+func testScorchCompositeSearchOptimizations(t *testing.T, indexReader index.IndexReader,
+	tests []compositeSearchOptimizationTest,
+	searcherOptionsToCompare []search.SearcherOptions,
+	compositeKind string) {
+	for testi := range tests {
+		resultsToCompare := map[string]bool{}
+
+		testScorchCompositeSearchOptimizationsHelper(t, indexReader, tests, testi,
+			searcherOptionsToCompare, compositeKind, false, resultsToCompare)
+
+		testScorchCompositeSearchOptimizationsHelper(t, indexReader, tests, testi,
+			searcherOptionsToCompare, compositeKind, true, resultsToCompare)
+	}
+}
+
+func testScorchCompositeSearchOptimizationsHelper(
+	t *testing.T, indexReader index.IndexReader,
+	tests []compositeSearchOptimizationTest, testi int,
+	searcherOptionsToCompare []search.SearcherOptions,
+	compositeKind string, allowOptimizations bool, resultsToCompare map[string]bool) {
+	// Save the global allowed optimization settings to restore later.
+	optimizeConjunction := scorch.OptimizeConjunction
+	optimizeConjunctionUnadorned := scorch.OptimizeConjunctionUnadorned
+	optimizeDisjunctionUnadorned := scorch.OptimizeDisjunctionUnadorned
+	optimizeDisjunctionUnadornedMinChildCardinality :=
+		scorch.OptimizeDisjunctionUnadornedMinChildCardinality
+
+	scorch.OptimizeConjunction = allowOptimizations
+	scorch.OptimizeConjunctionUnadorned = allowOptimizations
+	scorch.OptimizeDisjunctionUnadorned = allowOptimizations
+
+	if allowOptimizations {
+		scorch.OptimizeDisjunctionUnadornedMinChildCardinality = uint64(0)
+	}
+
+	defer func() {
+		scorch.OptimizeConjunction = optimizeConjunction
+		scorch.OptimizeConjunctionUnadorned = optimizeConjunctionUnadorned
+		scorch.OptimizeDisjunctionUnadorned = optimizeDisjunctionUnadorned
+		scorch.OptimizeDisjunctionUnadornedMinChildCardinality =
+			optimizeDisjunctionUnadornedMinChildCardinality
+	}()
+
+	test := tests[testi]
+
+	for searcherOptionsI, searcherOptions := range searcherOptionsToCompare {
+		// Construct the leaf term searchers.
+		var searchers []search.Searcher
+
+		for _, fieldTerm := range test.fieldTerms {
+			ft := strings.Split(fieldTerm, ":")
+			field := ft[0]
+			term := ft[1]
+
+			searcher, err := NewTermSearcher(indexReader, term, field, 1.0, searcherOptions)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			searchers = append(searchers, searcher)
+		}
+
+		// Construct the composite searcher.
+		var cs search.Searcher
+		var err error
+		if compositeKind == "conjunction" {
+			cs, err = NewConjunctionSearcher(indexReader, searchers, searcherOptions)
+		} else {
+			cs, err = NewDisjunctionSearcher(indexReader, searchers, 0, searcherOptions)
+		}
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		ctx := &search.SearchContext{
+			DocumentMatchPool: search.NewDocumentMatchPool(10, 0),
+		}
+
+		next, err := cs.Next(ctx)
+		i := 0
+		for err == nil && next != nil {
+			docID, err := indexReader.ExternalID(next.IndexInternalID)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			if searcherOptionsI == 0 && allowOptimizations == false {
+				resultsToCompare[string(docID)] = true
+			} else {
+				if !resultsToCompare[string(docID)] {
+					t.Errorf("missing %s", string(docID))
+				}
+			}
+
+			next, err = cs.Next(ctx)
+			i++
+		}
+
+		if i != len(resultsToCompare) {
+			t.Errorf("mismatched count, %d vs %d", i, len(resultsToCompare))
+		}
+
+		if i == 0 && !strings.Contains(test.expectEmpty, compositeKind) {
+			t.Errorf("testi: %d, compositeKind: %s, allowOptimizations: %t,"+
+				" searcherOptionsI: %d, searcherOptions: %#v,"+
+				" expected some results but got no results on test: %#v",
+				testi, compositeKind, allowOptimizations,
+				searcherOptionsI, searcherOptions, test)
+		}
+	}
+}

From c8e737a945472f5f266c0799005216080443fbb8 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 29 Jan 2019 09:36:53 -0800
Subject: [PATCH 534/728] MB-32846 - more aggressively removeOldData() in
 scorch persister

The scorch persister loop has an optimization to not block/wait if it
sees that the latest root epoch has changed during the current
persistence round, so that the persister can continue immediately to
the top of the persister loop.  But, this "continue OUTER"
optimization would skip a removeOldData() invocation.

This meant that removeOldData() wouldn't be invoked for potentially a
long time in this kind of indexing-heavy scenario.

While indexing 200K wiki docs (indexing only using bleve-blast),
before this change, # of files could grow up to >100 zap segment
files.  After this change, the # of files stayed nearer the
neighborhood of 10~20 files.

See also: https://issues.couchbase.com/browse/MB-32846

Also, cleaned up some whitespace lines seen while trying to diagnose
this issue.
---
 index/scorch/introducer.go | 4 +++-
 index/scorch/persister.go  | 5 ++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index ac627796f..2d04bd38e 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -376,7 +376,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 				fileSegments++
 			}
 		}
-
 	}
 
 	// before the newMerge introduction, need to clean the newly
@@ -393,6 +392,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 			}
 		}
 	}
+
 	// In case where all the docs in the newly merged segment getting
 	// deleted by the time we reach here, can skip the introduction.
 	if nextMerge.new != nil &&
@@ -424,6 +424,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 	newSnapshot.AddRef() // 1 ref for the nextMerge.notify response
 
 	newSnapshot.updateSize()
+
 	s.rootLock.Lock()
 	// swap in new index snapshot
 	newSnapshot.epoch = s.nextSnapshotEpoch
@@ -501,6 +502,7 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
 	}
 
 	newSnapshot.updateSize()
+
 	// swap in new snapshot
 	rootPrev := s.root
 	s.root = newSnapshot
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 2ba50867d..f75945d4d 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -111,6 +111,7 @@ OUTER:
 		if ew != nil && ew.epoch > lastMergedEpoch {
 			lastMergedEpoch = ew.epoch
 		}
+
 		lastMergedEpoch, persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch,
 			lastMergedEpoch, persistWatchers, po)
 
@@ -178,6 +179,7 @@ OUTER:
 			s.fireEvent(EventKindPersisterProgress, time.Since(startTime))
 
 			if changed {
+				s.removeOldData()
 				atomic.AddUint64(&s.stats.TotPersistLoopProgress, 1)
 				continue OUTER
 			}
@@ -659,13 +661,13 @@ func (s *Scorch) LoadSnapshot(epoch uint64) (rv *IndexSnapshot, err error) {
 }
 
 func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
-
 	rv := &IndexSnapshot{
 		parent:   s,
 		internal: make(map[string][]byte),
 		refs:     1,
 		creator:  "loadSnapshot",
 	}
+
 	var running uint64
 	c := snapshot.Cursor()
 	for k, _ := c.First(); k != nil; k, _ = c.Next() {
@@ -701,6 +703,7 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
 			running += segmentSnapshot.segment.Count()
 		}
 	}
+
 	return rv, nil
 }
 

From 0e572047cb5f7839f2d903e71378c7e8d094f551 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Wed, 9 Jan 2019 14:39:42 +0530
Subject: [PATCH 535/728] MB-31974 - Streaming search results

Adding optional builder/maker callbacks for the
document match handlers from applications.
These builder method gives back a threadsafe
document match handler callback for bleve which
it will invoke for every document hit during
a search.
The application should make its own copy of
the hit/documentMatch in the callback if it
wish to hold on to data.
---
 search/collector.go      | 20 ++++++++
 search/collector/topn.go | 98 +++++++++++++++++++++++++++++-----------
 search/search.go         |  1 +
 3 files changed, 92 insertions(+), 27 deletions(-)

diff --git a/search/collector.go b/search/collector.go
index 0d163a9d9..f7a490d65 100644
--- a/search/collector.go
+++ b/search/collector.go
@@ -30,3 +30,23 @@ type Collector interface {
 	SetFacetsBuilder(facetsBuilder *FacetsBuilder)
 	FacetResults() FacetResults
 }
+
+// DocumentMatchHandler is the type of document match callback
+// bleve will invoke during the search.
+// Eventually, bleve will indicate the completion of an ongoing search,
+// by passing a nil value for the document match callback.
+// The application should take a copy of the hit/documentMatch
+// if it wish to own it or need prolonged access to it.
+type DocumentMatchHandler func(hit *DocumentMatch) error
+
+type MakeDocumentMatchHandlerKeyType string
+
+var MakeDocumentMatchHandlerKey = MakeDocumentMatchHandlerKeyType(
+	"MakeDocumentMatchHandlerKey")
+
+// MakeDocumentMatchHandler is an optional DocumentMatchHandler
+// builder function which the applications can pass to bleve.
+// These builder methods gives a DocumentMatchHandler function
+// to bleve, which it will invoke on every document matches.
+type MakeDocumentMatchHandler  func(ctx *SearchContext) (
+	callback DocumentMatchHandler, loadID bool, err error)
diff --git a/search/collector/topn.go b/search/collector/topn.go
index 4b2682da0..3682952fd 100644
--- a/search/collector/topn.go
+++ b/search/collector/topn.go
@@ -140,6 +140,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
 	}
 	searchContext := &search.SearchContext{
 		DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)),
+		Collector: hc,
 	}
 
 	hc.dvReader, err = reader.DocValueReader(hc.neededFields)
@@ -154,6 +155,19 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
 		hc.sort.UpdateVisitor(field, term)
 	}
 
+	dmHandlerMaker := MakeTopNDocumentMatchHandler
+	if cv := ctx.Value(search.MakeDocumentMatchHandlerKey); cv != nil {
+		dmHandlerMaker = cv.(search.MakeDocumentMatchHandler)
+	}
+	// use the application given builder for making the custom document match
+	// handler and perform callbacks/invocations on the newly made handler.
+	dmHandler, loadID, err := dmHandlerMaker(searchContext)
+	if err != nil {
+		return err
+	}
+
+ 	hc.needDocIds = hc.needDocIds||loadID
+
 	select {
 	case <-ctx.Done():
 		return ctx.Err()
@@ -169,13 +183,26 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
 			}
 		}
 
-		err = hc.collectSingle(searchContext, reader, next)
+		err = hc.prepareDocumentMatch(searchContext, reader, next)
+		if err != nil {
+			break
+		}
+
+ 		err = dmHandler(next)
 		if err != nil {
 			break
 		}
 
 		next, err = searcher.Next(searchContext)
 	}
+
+ 	// help finalize/flush the results in case
+	// of custom document match handlers.
+	err = dmHandler(nil)
+	if err != nil {
+		return err
+	}
+
 	// compute search duration
 	hc.took = time.Since(startTime)
 	if err != nil {
@@ -191,8 +218,8 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
 
 var sortByScoreOpt = []string{"_score"}
 
-func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.IndexReader, d *search.DocumentMatch) error {
-	var err error
+func (hc *TopNCollector) prepareDocumentMatch(ctx *search.SearchContext,
+	reader index.IndexReader, d *search.DocumentMatch ) (err error) {
 
 	// visit field terms for features that require it (sort, facets)
 	if len(hc.neededFields) > 0 {
@@ -226,35 +253,52 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I
 		hc.sort.Value(d)
 	}
 
-	// optimization, we track lowest sorting hit already removed from heap
-	// with this one comparison, we can avoid all heap operations if
-	// this hit would have been added and then immediately removed
-	if hc.lowestMatchOutsideResults != nil {
-		cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, hc.lowestMatchOutsideResults)
-		if cmp >= 0 {
-			// this hit can't possibly be in the result set, so avoid heap ops
-			ctx.DocumentMatchPool.Put(d)
-			return nil
-		}
-	}
+	return nil
+}
 
-	removed := hc.store.AddNotExceedingSize(d, hc.size+hc.skip)
-	if removed != nil {
-		if hc.lowestMatchOutsideResults == nil {
-			hc.lowestMatchOutsideResults = removed
-		} else {
-			cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, removed, hc.lowestMatchOutsideResults)
-			if cmp < 0 {
-				tmp := hc.lowestMatchOutsideResults
-				hc.lowestMatchOutsideResults = removed
-				ctx.DocumentMatchPool.Put(tmp)
+func MakeTopNDocumentMatchHandler(
+	ctx *search.SearchContext) (search.DocumentMatchHandler, bool, error) {
+	var hc *TopNCollector
+	var ok bool
+	if hc, ok = ctx.Collector.(*TopNCollector); ok {
+		return func(d *search.DocumentMatch) error {
+			if d == nil {
+				return nil
+			}
+			// optimization, we track lowest sorting hit already removed from heap
+			// with this one comparison, we can avoid all heap operations if
+			// this hit would have been added and then immediately removed
+			if hc.lowestMatchOutsideResults != nil {
+				cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d,
+					hc.lowestMatchOutsideResults)
+				if cmp >= 0 {
+					// this hit can't possibly be in the result set, so avoid heap ops
+					ctx.DocumentMatchPool.Put(d)
+					return nil
+				}
 			}
-		}
-	}
 
-	return nil
+			removed := hc.store.AddNotExceedingSize(d, hc.size+hc.skip)
+			if removed != nil {
+				if hc.lowestMatchOutsideResults == nil {
+					hc.lowestMatchOutsideResults = removed
+				} else {
+					cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc,
+						removed, hc.lowestMatchOutsideResults)
+					if cmp < 0 {
+						tmp := hc.lowestMatchOutsideResults
+						hc.lowestMatchOutsideResults = removed
+						ctx.DocumentMatchPool.Put(tmp)
+					}
+				}
+			}
+			return nil
+		}, false, nil
+	}
+	return nil, false, nil
 }
 
+
 // visitFieldTerms is responsible for visiting the field terms of the
 // search hit, and passing visited terms to the sort and facet builder
 func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.DocumentMatch) error {
diff --git a/search/search.go b/search/search.go
index 440c09571..1d26d0de0 100644
--- a/search/search.go
+++ b/search/search.go
@@ -285,6 +285,7 @@ type SearcherOptions struct {
 // SearchContext represents the context around a single search
 type SearchContext struct {
 	DocumentMatchPool *DocumentMatchPool
+	Collector Collector
 }
 
 func (sc *SearchContext) Size() int {

From f2126efa73aa93e6907aa1b8629c1d85bb8c1566 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Wed, 30 Jan 2019 13:23:44 -0800
Subject: [PATCH 536/728] Formatting: go fmt ./..

---
 index/store/moss/lower_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/index/store/moss/lower_test.go b/index/store/moss/lower_test.go
index 72b8a7194..afc0a0959 100644
--- a/index/store/moss/lower_test.go
+++ b/index/store/moss/lower_test.go
@@ -27,7 +27,7 @@ func openWithLower(t *testing.T, mo store.MergeOperator) (string, store.KVStore)
 	tmpDir, _ := ioutil.TempDir("", "mossStore")
 
 	config := map[string]interface{}{
-		"path": tmpDir,
+		"path":                    tmpDir,
 		"mossLowerLevelStoreName": "mossStore",
 	}
 

From 8b976597d8346285afeba177a968750696aebe9f Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Wed, 30 Jan 2019 13:19:24 -0800
Subject: [PATCH 537/728] MB-32855: Propagate min setting when optimizing a
 1-claused disjunction

---
 search/query/disjunction.go                 |  8 ++-
 search/search.go                            |  5 ++
 search/searcher/search_disjunction_heap.go  |  4 ++
 search/searcher/search_disjunction_slice.go |  4 ++
 search/searcher/search_filter.go            |  6 ++
 search_test.go                              | 63 +++++++++++++++++++++
 6 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/search/query/disjunction.go b/search/query/disjunction.go
index b884cb659..1effc29ac 100644
--- a/search/query/disjunction.go
+++ b/search/query/disjunction.go
@@ -58,7 +58,8 @@ func (q *DisjunctionQuery) SetMin(m float64) {
 	q.Min = m
 }
 
-func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
+func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
+	options search.SearcherOptions) (search.Searcher, error) {
 	ss := make([]search.Searcher, 0, len(q.Disjuncts))
 	for _, disjunct := range q.Disjuncts {
 		sr, err := disjunct.Searcher(i, m, options)
@@ -80,6 +81,11 @@ func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
 	if len(ss) < 1 {
 		return searcher.NewMatchNoneSearcher(i)
 	} else if len(ss) == 1 && q.Min <= 1 {
+		// update min setting of child searcher if supported
+		if searcher, ok := ss[0].(search.MinApplicableSearcher); ok {
+			searcher.SetMin(int(q.Min))
+		}
+
 		// return the single nested searcher as is; only if min clauses is not greater than 1;
 		return ss[0], nil
 	}
diff --git a/search/search.go b/search/search.go
index 440c09571..ef7683332 100644
--- a/search/search.go
+++ b/search/search.go
@@ -277,6 +277,11 @@ type Searcher interface {
 	DocumentMatchPoolSize() int
 }
 
+type MinApplicableSearcher interface {
+	Searcher
+	SetMin(to int)
+}
+
 type SearcherOptions struct {
 	Explain            bool
 	IncludeTermVectors bool
diff --git a/search/searcher/search_disjunction_heap.go b/search/searcher/search_disjunction_heap.go
index ec133f1f8..92b28dcc2 100644
--- a/search/searcher/search_disjunction_heap.go
+++ b/search/searcher/search_disjunction_heap.go
@@ -290,6 +290,10 @@ func (s *DisjunctionHeapSearcher) Min() int {
 	return s.min
 }
 
+func (s *DisjunctionHeapSearcher) SetMin(to int) {
+	s.min = to
+}
+
 func (s *DisjunctionHeapSearcher) DocumentMatchPoolSize() int {
 	rv := len(s.searchers)
 	for _, s := range s.searchers {
diff --git a/search/searcher/search_disjunction_slice.go b/search/searcher/search_disjunction_slice.go
index e47f39ad0..00a631355 100644
--- a/search/searcher/search_disjunction_slice.go
+++ b/search/searcher/search_disjunction_slice.go
@@ -274,6 +274,10 @@ func (s *DisjunctionSliceSearcher) Min() int {
 	return s.min
 }
 
+func (s *DisjunctionSliceSearcher) SetMin(to int) {
+	s.min = to
+}
+
 func (s *DisjunctionSliceSearcher) DocumentMatchPoolSize() int {
 	rv := len(s.currs)
 	for _, s := range s.searchers {
diff --git a/search/searcher/search_filter.go b/search/searcher/search_filter.go
index 7c95fb41c..ff777623a 100644
--- a/search/searcher/search_filter.go
+++ b/search/searcher/search_filter.go
@@ -98,6 +98,12 @@ func (f *FilteringSearcher) Min() int {
 	return f.child.Min()
 }
 
+func (f *FilteringSearcher) SetMin(to int) {
+	if searcher, ok := f.child.(search.MinApplicableSearcher); ok {
+		searcher.SetMin(to)
+	}
+}
+
 func (f *FilteringSearcher) DocumentMatchPoolSize() int {
 	return f.child.DocumentMatchPoolSize()
 }
diff --git a/search_test.go b/search_test.go
index 71fae4236..ef1cf1814 100644
--- a/search_test.go
+++ b/search_test.go
@@ -1083,3 +1083,66 @@ func TestDisjunctionQueryIncorrectMin(t *testing.T) {
 			" but got: %v", res.Total)
 	}
 }
+
+func TestBooleanShouldMinPropagation(t *testing.T) {
+	idx, err := New("testidx", NewIndexMapping())
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	defer func() {
+		err = idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err := os.RemoveAll("testidx")
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	doc1 := map[string]interface{}{
+		"dept":  "finance",
+		"name":  "alvita",
+		"email": "alvita@domain.com",
+	}
+
+	doc2 := map[string]interface{}{
+		"dept":  "dev-ops",
+		"name":  "keelia",
+		"email": "keelia@domain.com",
+	}
+
+	batch := idx.NewBatch()
+
+	if err = batch.Index("doc1", doc1); err != nil {
+		t.Fatal(err)
+	}
+
+	if err = batch.Index("doc2", doc2); err != nil {
+		t.Fatal(err)
+	}
+
+	if err = idx.Batch(batch); err != nil {
+		t.Fatal(err)
+	}
+
+	mq1 := NewMatchQuery("dev-ops")
+	mq1.SetField("dept")
+	mq2 := NewMatchQuery("keelia@domain.com")
+	mq2.SetField("email")
+	bq := NewBooleanQuery()
+	bq.AddShould(mq1)
+	bq.AddMust(mq2)
+	sr := NewSearchRequest(bq)
+
+	res, err := idx.Search(sr)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if res.Total != 2 {
+		t.Errorf("Expected 2 results, but got: %v", res.Total)
+	}
+}

From 82b1018c17577775a0c2cdb30b5b93bbf1fc67f7 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Wed, 30 Jan 2019 15:39:55 -0800
Subject: [PATCH 538/728] Update example to match multiple terms

---
 search/query/disjunction.go |  4 ++--
 search_test.go              | 27 +++++++++++++++++----------
 2 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/search/query/disjunction.go b/search/query/disjunction.go
index 1effc29ac..87ae9db7a 100644
--- a/search/query/disjunction.go
+++ b/search/query/disjunction.go
@@ -81,12 +81,12 @@ func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
 	if len(ss) < 1 {
 		return searcher.NewMatchNoneSearcher(i)
 	} else if len(ss) == 1 && q.Min <= 1 {
-		// update min setting of child searcher if supported
+		// update min setting of child searcher if supported; and return the
+		// single child searcher as is when min is not greater than 1.
 		if searcher, ok := ss[0].(search.MinApplicableSearcher); ok {
 			searcher.SetMin(int(q.Min))
 		}
 
-		// return the single nested searcher as is; only if min clauses is not greater than 1;
 		return ss[0], nil
 	}
 
diff --git a/search_test.go b/search_test.go
index ef1cf1814..f2f9fdc56 100644
--- a/search_test.go
+++ b/search_test.go
@@ -1103,15 +1103,13 @@ func TestBooleanShouldMinPropagation(t *testing.T) {
 	}()
 
 	doc1 := map[string]interface{}{
-		"dept":  "finance",
-		"name":  "alvita",
-		"email": "alvita@domain.com",
+		"dept": "queen",
+		"name": "cersei lannister",
 	}
 
 	doc2 := map[string]interface{}{
-		"dept":  "dev-ops",
-		"name":  "keelia",
-		"email": "keelia@domain.com",
+		"dept": "kings guard",
+		"name": "jaime lannister",
 	}
 
 	batch := idx.NewBatch()
@@ -1128,15 +1126,24 @@ func TestBooleanShouldMinPropagation(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	mq1 := NewMatchQuery("dev-ops")
+	// term dictionaries in the index for field..
+	//  dept: queen kings guard
+	//  name: cersei jaime lannister
+
+	// the following match query would match doc2
+	mq1 := NewMatchQuery("kings guard")
 	mq1.SetField("dept")
-	mq2 := NewMatchQuery("keelia@domain.com")
-	mq2.SetField("email")
+
+	// the following match query would match both doc1 and doc2,
+	// as both docs share common lastname
+	mq2 := NewMatchQuery("jaime lannister")
+	mq2.SetField("name")
+
 	bq := NewBooleanQuery()
 	bq.AddShould(mq1)
 	bq.AddMust(mq2)
-	sr := NewSearchRequest(bq)
 
+	sr := NewSearchRequest(bq)
 	res, err := idx.Search(sr)
 	if err != nil {
 		t.Fatal(err)

From 20e27ce55e32cfc79b6cc776bc40648b24361291 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Fri, 1 Feb 2019 13:50:14 +0530
Subject: [PATCH 539/728] 1118 - Fix for cachedFieldDocs concurrent access

---
 index/scorch/scorch_test.go      | 85 ++++++++++++++++++++++++++++++++
 index/scorch/snapshot_segment.go | 21 ++++++--
 2 files changed, 102 insertions(+), 4 deletions(-)

diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index a1691fd09..d8a7c3c54 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -24,6 +24,7 @@ import (
 	"sync"
 	"testing"
 	"time"
+	"math/rand"
 
 	"github.com/blevesearch/bleve/analysis"
 	"github.com/blevesearch/bleve/analysis/analyzer/keyword"
@@ -1509,6 +1510,90 @@ func TestIndexDocumentVisitFieldTerms(t *testing.T) {
 	}
 }
 
+func TestFieldTermsConcurrent(t *testing.T) {
+	cfg := CreateConfig("TestFieldTermsConcurrent")
+
+	// setting path to empty string disables persistence/merging
+	// which ensures we have in-memory segments
+	// which is important for this test, to trigger the right code
+	// path, where fields exist, but have NOT been uninverted by
+	// the Segment impl (in memory segments are still SegmentBase)
+	cfg["path"] = ""
+
+	defer func() {
+		err := DestroyTest(cfg)
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	mp := mapping.NewIndexMapping()
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Fatalf("error opening index: %v", err)
+	}
+	defer func() {
+		cerr := idx.Close()
+		if cerr != nil {
+			t.Fatal(cerr)
+		}
+	}()
+
+	// create a single bath (leading to 1 in-memory segment)
+	// have one field named "name" and 100 others named f0-f99
+	batch := index.NewBatch()
+	for i := 0; i < 1000; i++ {
+		data := map[string]string{
+			"name": fmt.Sprintf("doc-%d", i),
+		}
+		for j := 0; j < 100; j++ {
+			data[fmt.Sprintf("f%d", j)] = fmt.Sprintf("v%d", i)
+		}
+		doc := document.NewDocument(fmt.Sprintf("%d", i))
+		err = mp.MapDocument(doc, data)
+		if err != nil {
+			t.Errorf("error mapping doc: %v", err)
+		}
+		batch.Update(doc)
+	}
+
+	err = idx.Batch(batch)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// now have 10 goroutines try to visit field values for doc 1
+	// in a random field
+	var wg sync.WaitGroup
+	for j := 0; j < 10; j++ {
+		wg.Add(1)
+		go func() {
+			r, err := idx.Reader()
+			if err != nil {
+				t.Fatal(err)
+			}
+			docNumber, err := r.InternalID("1")
+			if err != nil {
+				t.Fatal(err)
+			}
+			err = r.DocumentVisitFieldTerms(docNumber,
+				[]string{fmt.Sprintf("f%d", rand.Intn(100))},
+				func(field string, term []byte) {})
+			if err != nil {
+				t.Fatal(err)
+			}
+			wg.Done()
+		}()
+	}
+
+	wg.Wait()
+}
+
 func TestConcurrentUpdate(t *testing.T) {
 	cfg := CreateConfig("TestConcurrentUpdate")
 	err := InitTest(cfg)
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index 0e0c59e9f..7d3de0811 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -113,14 +113,29 @@ func (s *SegmentSnapshot) Size() (rv int) {
 }
 
 type cachedFieldDocs struct {
+	m sync.Mutex
 	readyCh chan struct{}     // closed when the cachedFieldDocs.docs is ready to be used.
 	err     error             // Non-nil if there was an error when preparing this cachedFieldDocs.
 	docs    map[uint64][]byte // Keyed by localDocNum, value is a list of terms delimited by 0xFF.
 	size    uint64
 }
 
+func (cfd *cachedFieldDocs) Size() int {
+	var rv int
+	cfd.m.Lock()
+	for _, entry := range cfd.docs {
+		rv += 8 /* size of uint64 */ + len(entry)
+	}
+	cfd.m.Unlock()
+	return rv
+}
+
 func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) {
-	defer close(cfd.readyCh)
+	cfd.m.Lock()
+	defer func() {
+		close(cfd.readyCh)
+		cfd.m.Unlock()
+	}()
 
 	cfd.size += uint64(size.SizeOfUint64) /* size field */
 	dict, err := ss.segment.Dictionary(field)
@@ -231,9 +246,7 @@ func (c *cachedDocs) updateSizeLOCKED() {
 	for k, v := range c.cache { // cachedFieldDocs
 		sizeInBytes += len(k)
 		if v != nil {
-			for _, entry := range v.docs { // docs
-				sizeInBytes += 8 /* size of uint64 */ + len(entry)
-			}
+			sizeInBytes += v.Size()
 		}
 	}
 	atomic.StoreUint64(&c.size, uint64(sizeInBytes))

From be1fb367f2e36d4b8bf0bc26cee6e3c82525e2c0 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 1 Feb 2019 07:56:06 -0500
Subject: [PATCH 540/728] fix race in test for scorch batch callbacks

previous version access/modifies values from
multiple goroutines.
this version uses a waitgroup to know that the
callback fired, if the callback does not fire
the test will timeout.
---
 index/scorch/scorch_test.go | 30 ++++++++----------------------
 1 file changed, 8 insertions(+), 22 deletions(-)

diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index a1691fd09..2c4dc602a 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -15,7 +15,6 @@
 package scorch
 
 import (
-	"fmt"
 	"log"
 	"os"
 	"reflect"
@@ -950,24 +949,22 @@ func TestIndexBatchWithCallbacks(t *testing.T) {
 		t.Fatalf("error opening index: %v", err)
 	}
 	defer func() {
-		err := idx.Close()
-		if err != nil {
-			t.Fatal(err)
+		cerr := idx.Close()
+		if cerr != nil {
+			t.Fatal(cerr)
 		}
 	}()
 
 	// Check that callback function works
-	updated := false
-	cbErr := fmt.Errorf("")
+	var wg sync.WaitGroup
+	wg.Add(1)
 
 	batch := index.NewBatch()
 	doc := document.NewDocument("3")
 	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test3")))
 	batch.Update(doc)
 	batch.SetPersistedCallback(func(e error) {
-		updated = true
-		cbErr = e
-
+		wg.Done()
 	})
 
 	err = idx.Batch(batch)
@@ -975,19 +972,8 @@ func TestIndexBatchWithCallbacks(t *testing.T) {
 		t.Error(err)
 	}
 
-	for i := 0; i < 30; i++ {
-		if updated {
-			break
-		}
-		time.Sleep(500 * time.Millisecond)
-	}
-	if !updated {
-		t.Fatal("Callback function wasn't called")
-	}
-	if cbErr != nil {
-		t.Fatal("Error wasn't updated properly on callback function")
-	}
-
+	wg.Wait()
+	// test has no assertion but will timeout if callback doesn't fire
 }
 
 func TestIndexInsertUpdateDeleteWithMultipleTypesStored(t *testing.T) {

From 3c0b0ae477f8d880fc1ebde4680bda19495de9cb Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 1 Feb 2019 10:25:16 -0500
Subject: [PATCH 541/728] fix data race when indexing empty batches and using
 reset

When the batches passed to bleve are empty, this was a special
case that inadvertently avoided proper synchronization, and
led to the race detector to correctly point out that resetting
the batch was unsafe.  By simply avoiding an unnecessary step
when the batch is empty, we can avoid the problem.
---
 index/scorch/scorch.go         | 18 +++++++++--------
 index/upsidedown/upsidedown.go | 18 +++++++++--------
 index_test.go                  | 35 ++++++++++++++++++++++++++++++++++
 3 files changed, 55 insertions(+), 16 deletions(-)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 07568e6e5..23608fc3b 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -312,15 +312,17 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
 
 	// FIXME could sort ids list concurrent with analysis?
 
-	go func() {
-		for _, doc := range batch.IndexOps {
-			if doc != nil {
-				aw := index.NewAnalysisWork(s, doc, resultChan)
-				// put the work on the queue
-				s.analysisQueue.Queue(aw)
+	if len(batch.IndexOps) > 0 {
+		go func() {
+			for _, doc := range batch.IndexOps {
+				if doc != nil {
+					aw := index.NewAnalysisWork(s, doc, resultChan)
+					// put the work on the queue
+					s.analysisQueue.Queue(aw)
+				}
 			}
-		}
-	}()
+		}()
+	}
 
 	// wait for analysis result
 	analysisResults := make([]*index.AnalysisResult, int(numUpdates))
diff --git a/index/upsidedown/upsidedown.go b/index/upsidedown/upsidedown.go
index 8edbb5b3d..e4bc3d8f0 100644
--- a/index/upsidedown/upsidedown.go
+++ b/index/upsidedown/upsidedown.go
@@ -810,15 +810,17 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
 		}
 	}
 
-	go func() {
-		for _, doc := range batch.IndexOps {
-			if doc != nil {
-				aw := index.NewAnalysisWork(udc, doc, resultChan)
-				// put the work on the queue
-				udc.analysisQueue.Queue(aw)
+	if len(batch.IndexOps) > 0 {
+		go func() {
+			for _, doc := range batch.IndexOps {
+				if doc != nil {
+					aw := index.NewAnalysisWork(udc, doc, resultChan)
+					// put the work on the queue
+					udc.analysisQueue.Queue(aw)
+				}
 			}
-		}
-	}()
+		}()
+	}
 
 	// retrieve back index rows concurrent with analysis
 	docBackIndexRowErr := error(nil)
diff --git a/index_test.go b/index_test.go
index 55b38909d..d1351dc29 100644
--- a/index_test.go
+++ b/index_test.go
@@ -32,6 +32,7 @@ import (
 	"github.com/blevesearch/bleve/analysis/analyzer/keyword"
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/store/boltdb"
 	"github.com/blevesearch/bleve/index/store/null"
 	"github.com/blevesearch/bleve/mapping"
 	"github.com/blevesearch/bleve/search"
@@ -2149,3 +2150,37 @@ func TestBug1096(t *testing.T) {
 		t.Fatalf("expected only 2 hits '9' and '90', got %v", res)
 	}
 }
+
+func TestDataRaceBug1092(t *testing.T) {
+	defer func() {
+		rerr := os.RemoveAll("testidx")
+		if rerr != nil {
+			t.Fatal(rerr)
+		}
+	}()
+
+	// use default mapping
+	mapping := NewIndexMapping()
+
+	var idx Index
+	idx, err = NewUsing("testidx", mapping, upsidedown.Name, boltdb.Name, nil)
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer func() {
+		cerr := idx.Close()
+		if cerr != nil {
+			t.Fatal(cerr)
+		}
+	}()
+
+	batch := idx.NewBatch()
+	for i := 0; i < 10; i++ {
+		err = idx.Batch(batch)
+		if err != nil {
+			t.Error(err)
+		}
+
+		batch.Reset()
+	}
+}

From 2aa43c5988384f2e8ebfaf44d595d04e94f7ff8e Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Fri, 1 Feb 2019 10:16:10 -0800
Subject: [PATCH 542/728] Un-export MinApplicableSearcher -->
 minApplicableSearcher

Also, since the FilteringSearcher encapsulates a child Searcher,
a "min" update need not be propagated for it.
---
 search/query/disjunction.go      | 2 +-
 search/query/query.go            | 7 +++++++
 search/search.go                 | 5 -----
 search/searcher/search_filter.go | 6 ------
 4 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/search/query/disjunction.go b/search/query/disjunction.go
index 87ae9db7a..792259320 100644
--- a/search/query/disjunction.go
+++ b/search/query/disjunction.go
@@ -83,7 +83,7 @@ func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
 	} else if len(ss) == 1 && q.Min <= 1 {
 		// update min setting of child searcher if supported; and return the
 		// single child searcher as is when min is not greater than 1.
-		if searcher, ok := ss[0].(search.MinApplicableSearcher); ok {
+		if searcher, ok := ss[0].(minApplicableSearcher); ok {
 			searcher.SetMin(int(q.Min))
 		}
 
diff --git a/search/query/query.go b/search/query/query.go
index c7c1eefb8..71b515bf4 100644
--- a/search/query/query.go
+++ b/search/query/query.go
@@ -63,6 +63,13 @@ type ValidatableQuery interface {
 	Validate() error
 }
 
+// A Searcher to which the "min" setting is applicable, and supports
+// updates to the setting.
+type minApplicableSearcher interface {
+	search.Searcher
+	SetMin(int)
+}
+
 // ParseQuery deserializes a JSON representation of
 // a Query object.
 func ParseQuery(input []byte) (Query, error) {
diff --git a/search/search.go b/search/search.go
index ef7683332..440c09571 100644
--- a/search/search.go
+++ b/search/search.go
@@ -277,11 +277,6 @@ type Searcher interface {
 	DocumentMatchPoolSize() int
 }
 
-type MinApplicableSearcher interface {
-	Searcher
-	SetMin(to int)
-}
-
 type SearcherOptions struct {
 	Explain            bool
 	IncludeTermVectors bool
diff --git a/search/searcher/search_filter.go b/search/searcher/search_filter.go
index ff777623a..7c95fb41c 100644
--- a/search/searcher/search_filter.go
+++ b/search/searcher/search_filter.go
@@ -98,12 +98,6 @@ func (f *FilteringSearcher) Min() int {
 	return f.child.Min()
 }
 
-func (f *FilteringSearcher) SetMin(to int) {
-	if searcher, ok := f.child.(search.MinApplicableSearcher); ok {
-		searcher.SetMin(to)
-	}
-}
-
 func (f *FilteringSearcher) DocumentMatchPoolSize() int {
 	return f.child.DocumentMatchPoolSize()
 }

From 84070ba7a39fe96b5ca0216a5e9b89b2e329bd17 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 1 Feb 2019 15:22:07 -0500
Subject: [PATCH 543/728] fix build

somehow latest merges broke
---
 index/scorch/scorch_test.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index 9ac65183b..550a584ae 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -15,7 +15,9 @@
 package scorch
 
 import (
+	"fmt"
 	"log"
+	"math/rand"
 	"os"
 	"reflect"
 	"regexp"
@@ -23,7 +25,6 @@ import (
 	"sync"
 	"testing"
 	"time"
-	"math/rand"
 
 	"github.com/blevesearch/bleve/analysis"
 	"github.com/blevesearch/bleve/analysis/analyzer/keyword"

From d59ba7f56723df4f1f9ddf8b4180e9bbb317c078 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 1 Feb 2019 15:47:19 -0500
Subject: [PATCH 544/728] enable race detector on travis

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index fd6e1ba38..35f7b60f2 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -13,7 +13,7 @@ script:
   - go get github.com/kisielk/errcheck
   - go get -u github.com/FiloSottile/gvt
   - gvt restore
-  - go test -v $(go list ./... | grep -v vendor/)
+  - go test -race -v $(go list ./... | grep -v vendor/)
   - go vet $(go list ./... | grep -v vendor/)
   - errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/)
   - docs/project-code-coverage.sh

From 8e1c5b2bf4d3ea439fdefd0b0ba6e2d0f851ff03 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 4 Feb 2019 15:35:04 +0530
Subject: [PATCH 545/728] Unit tests for -Collector Chaining
 -DocumentMatchHandler -MakeDocumentMatchHandler

---
 search/collector/topn_test.go | 257 ++++++++++++++++++++++++++++++++++
 1 file changed, 257 insertions(+)

diff --git a/search/collector/topn_test.go b/search/collector/topn_test.go
index d50e38a0c..2e668bd9e 100644
--- a/search/collector/topn_test.go
+++ b/search/collector/topn_test.go
@@ -15,6 +15,7 @@
 package collector
 
 import (
+	"bytes"
 	"context"
 	"testing"
 
@@ -467,6 +468,262 @@ func TestPaginationSameScores(t *testing.T) {
 	}
 }
 
+// TestStreamResults verifies the search.DocumentMatchHandler
+func TestStreamResults(t *testing.T) {
+	matches := []*search.DocumentMatch{
+		{
+			IndexInternalID: index.IndexInternalID("a"),
+			Score:           11,
+		},
+		{
+			IndexInternalID: index.IndexInternalID("b"),
+			Score:           1,
+		},
+		{
+			IndexInternalID: index.IndexInternalID("c"),
+			Score:           11,
+		},
+		{
+			IndexInternalID: index.IndexInternalID("d"),
+			Score:           999,
+		},
+		{
+			IndexInternalID: index.IndexInternalID("e"),
+			Score:           11,
+		},
+		{
+			IndexInternalID: index.IndexInternalID("f"),
+			Score:           9,
+		},
+		{
+			IndexInternalID: index.IndexInternalID("g"),
+			Score:           11,
+		},
+		{
+			IndexInternalID: index.IndexInternalID("h"),
+			Score:           89,
+		},
+		{
+			IndexInternalID: index.IndexInternalID("i"),
+			Score:           101,
+		},
+		{
+			IndexInternalID: index.IndexInternalID("j"),
+			Score:           112,
+		},
+		{
+			IndexInternalID: index.IndexInternalID("k"),
+			Score:           10,
+		},
+		{
+			IndexInternalID: index.IndexInternalID("l"),
+			Score:           99,
+		},
+		{
+			IndexInternalID: index.IndexInternalID("m"),
+			Score:           11,
+		},
+		{
+			IndexInternalID: index.IndexInternalID("n"),
+			Score:           111,
+		},
+	}
+
+	searcher := &stubSearcher{
+		matches: matches,
+	}
+	ind := 0
+	docMatchHandler := func(hit *search.DocumentMatch) error {
+		if hit == nil {
+			return nil // search completed
+		}
+		if !bytes.Equal(hit.IndexInternalID, matches[ind].IndexInternalID) {
+			t.Errorf("%d hit IndexInternalID actual: %s, expected: %s",
+				ind, hit.IndexInternalID, matches[ind].IndexInternalID)
+		}
+		if hit.Score != matches[ind].Score {
+			t.Errorf("%d hit Score actual: %s, expected: %s",
+				ind, hit.IndexInternalID, matches[ind].IndexInternalID)
+		}
+		ind++
+		return nil
+	}
+
+	var handlerMaker search.MakeDocumentMatchHandler
+	handlerMaker = func(ctx *search.SearchContext) (search.DocumentMatchHandler,
+		bool, error) {
+		return docMatchHandler, false, nil
+	}
+
+	ctx := context.WithValue(context.Background(), search.MakeDocumentMatchHandlerKey,
+		handlerMaker)
+
+	collector := NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
+	err := collector.Collect(ctx, searcher, &stubReader{})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	maxScore := collector.MaxScore()
+	if maxScore != 999.0 {
+		t.Errorf("expected max score 99.0, got %f", maxScore)
+	}
+
+	total := collector.Total()
+	if int(total) != ind {
+		t.Errorf("expected 14 total results, got %d", total)
+	}
+
+	results := collector.Results()
+
+	if len(results) != 0 {
+		t.Fatalf("expected 0 results, got %d", len(results))
+	}
+}
+
+// TestCollectorChaining verifies the chaining of collectors.
+// The custom DocumentMatchHandler can process every hit for
+// the search query and then pass the hit to the topn collector
+// to eventually have the sorted top `N` results.
+func TestCollectorChaining(t *testing.T) {
+	matches := []*search.DocumentMatch{
+		{
+			IndexInternalID: index.IndexInternalID("a"),
+			Score:           11,
+		},
+		{
+			IndexInternalID: index.IndexInternalID("b"),
+			Score:           1,
+		},
+		{
+			IndexInternalID: index.IndexInternalID("c"),
+			Score:           11,
+		},
+		{
+			IndexInternalID: index.IndexInternalID("d"),
+			Score:           999,
+		},
+		{
+			IndexInternalID: index.IndexInternalID("e"),
+			Score:           11,
+		},
+		{
+			IndexInternalID: index.IndexInternalID("f"),
+			Score:           9,
+		},
+		{
+			IndexInternalID: index.IndexInternalID("g"),
+			Score:           11,
+		},
+		{
+			IndexInternalID: index.IndexInternalID("h"),
+			Score:           89,
+		},
+		{
+			IndexInternalID: index.IndexInternalID("i"),
+			Score:           101,
+		},
+		{
+			IndexInternalID: index.IndexInternalID("j"),
+			Score:           112,
+		},
+		{
+			IndexInternalID: index.IndexInternalID("k"),
+			Score:           10,
+		},
+		{
+			IndexInternalID: index.IndexInternalID("l"),
+			Score:           99,
+		},
+		{
+			IndexInternalID: index.IndexInternalID("m"),
+			Score:           11,
+		},
+		{
+			IndexInternalID: index.IndexInternalID("n"),
+			Score:           111,
+		},
+	}
+
+	searcher := &stubSearcher{
+		matches: matches,
+	}
+
+	var topNHandler search.DocumentMatchHandler
+	ind := 0
+	docMatchHandler := func(hit *search.DocumentMatch) error {
+		if hit == nil {
+			return nil // search completed
+		}
+		if !bytes.Equal(hit.IndexInternalID, matches[ind].IndexInternalID) {
+			t.Errorf("%d hit IndexInternalID actual: %s, expected: %s",
+				ind, hit.IndexInternalID, matches[ind].IndexInternalID)
+		}
+		if hit.Score != matches[ind].Score {
+			t.Errorf("%d hit Score actual: %s, expected: %s",
+				ind, hit.IndexInternalID, matches[ind].IndexInternalID)
+		}
+		ind++
+		// give the hit back to the topN collector
+		err := topNHandler(hit)
+		if err != nil {
+			t.Errorf("unexpected err: %v", err)
+		}
+		return nil
+	}
+
+	var handlerMaker search.MakeDocumentMatchHandler
+	handlerMaker = func(ctx *search.SearchContext) (search.DocumentMatchHandler,
+		bool, error) {
+		topNHandler, _, _ = MakeTopNDocumentMatchHandler(ctx)
+		return docMatchHandler, false, nil
+	}
+
+	ctx := context.WithValue(context.Background(), search.MakeDocumentMatchHandlerKey,
+		handlerMaker)
+
+	collector := NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
+	err := collector.Collect(ctx, searcher, &stubReader{})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	maxScore := collector.MaxScore()
+	if maxScore != 999.0 {
+		t.Errorf("expected max score 99.0, got %f", maxScore)
+	}
+
+	total := collector.Total()
+	if int(total) != ind {
+		t.Errorf("expected 14 total results, got %d", total)
+	}
+
+	results := collector.Results()
+
+	if len(results) != 10 { // as it is paged
+		t.Fatalf("expected 0 results, got %d", len(results))
+	}
+
+	if results[0].ID != "d" {
+		t.Errorf("expected first result to have ID 'l', got %s", results[0].ID)
+	}
+
+	if results[0].Score != 999.0 {
+		t.Errorf("expected highest score to be 999.0, got %f", results[0].Score)
+	}
+
+	minScore := 1000.0
+	for _, result := range results {
+		if result.Score < minScore {
+			minScore = result.Score
+		}
+	}
+
+	if minScore < 10 {
+		t.Errorf("expected minimum score to be higher than 10, got %f", minScore)
+	}
+}
+
 func BenchmarkTop10of0Scores(b *testing.B) {
 	benchHelper(0, func() search.Collector {
 		return NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})

From c73ceb8bd4355f02aa46e1d9102727dbd9f2fbfe Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 4 Feb 2019 15:45:37 -0800
Subject: [PATCH 546/728] allocate walkContext only when docMapping is enabled

---
 mapping/index.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mapping/index.go b/mapping/index.go
index fc5d12a73..602764cbb 100644
--- a/mapping/index.go
+++ b/mapping/index.go
@@ -320,8 +320,8 @@ func (im *IndexMappingImpl) determineType(data interface{}) string {
 func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{}) error {
 	docType := im.determineType(data)
 	docMapping := im.mappingForType(docType)
-	walkContext := im.newWalkContext(doc, docMapping)
 	if docMapping.Enabled {
+		walkContext := im.newWalkContext(doc, docMapping)
 		docMapping.walkDocument(data, []string{}, []uint64{}, walkContext)
 
 		// see if the _all field was disabled

From 46f577ca226f9b392b07110c84d9613e38074474 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Tue, 5 Feb 2019 17:35:34 -0800
Subject: [PATCH 547/728] MB-32855: Applying the disjunction 1-clause
 optimization

+ This optimization can be applied only when ..
    - The parent disjunction searcher has only 1 child searcher.
    - The 1 child searcher's min is the same as the parent's min.
+ This means that the "min" setting doesn't need to be propagated.
---
 search/query/disjunction.go                 | 10 ++-
 search/query/query.go                       |  7 ---
 search/searcher/search_disjunction_heap.go  |  4 --
 search/searcher/search_disjunction_slice.go |  4 --
 search_test.go                              | 70 +++++++++++++++++++++
 5 files changed, 74 insertions(+), 21 deletions(-)

diff --git a/search/query/disjunction.go b/search/query/disjunction.go
index 792259320..2bc1d7044 100644
--- a/search/query/disjunction.go
+++ b/search/query/disjunction.go
@@ -80,12 +80,10 @@ func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
 
 	if len(ss) < 1 {
 		return searcher.NewMatchNoneSearcher(i)
-	} else if len(ss) == 1 && q.Min <= 1 {
-		// update min setting of child searcher if supported; and return the
-		// single child searcher as is when min is not greater than 1.
-		if searcher, ok := ss[0].(minApplicableSearcher); ok {
-			searcher.SetMin(int(q.Min))
-		}
+	} else if len(ss) == 1 && int(q.Min) == ss[0].Min() {
+		// apply optimization only if both conditions below are satisfied:
+		// - disjunction searcher has only 1 child searcher
+		// - parent searcher's min setting is equal to child searcher's min
 
 		return ss[0], nil
 	}
diff --git a/search/query/query.go b/search/query/query.go
index 71b515bf4..c7c1eefb8 100644
--- a/search/query/query.go
+++ b/search/query/query.go
@@ -63,13 +63,6 @@ type ValidatableQuery interface {
 	Validate() error
 }
 
-// A Searcher to which the "min" setting is applicable, and supports
-// updates to the setting.
-type minApplicableSearcher interface {
-	search.Searcher
-	SetMin(int)
-}
-
 // ParseQuery deserializes a JSON representation of
 // a Query object.
 func ParseQuery(input []byte) (Query, error) {
diff --git a/search/searcher/search_disjunction_heap.go b/search/searcher/search_disjunction_heap.go
index 92b28dcc2..ec133f1f8 100644
--- a/search/searcher/search_disjunction_heap.go
+++ b/search/searcher/search_disjunction_heap.go
@@ -290,10 +290,6 @@ func (s *DisjunctionHeapSearcher) Min() int {
 	return s.min
 }
 
-func (s *DisjunctionHeapSearcher) SetMin(to int) {
-	s.min = to
-}
-
 func (s *DisjunctionHeapSearcher) DocumentMatchPoolSize() int {
 	rv := len(s.searchers)
 	for _, s := range s.searchers {
diff --git a/search/searcher/search_disjunction_slice.go b/search/searcher/search_disjunction_slice.go
index 00a631355..e47f39ad0 100644
--- a/search/searcher/search_disjunction_slice.go
+++ b/search/searcher/search_disjunction_slice.go
@@ -274,10 +274,6 @@ func (s *DisjunctionSliceSearcher) Min() int {
 	return s.min
 }
 
-func (s *DisjunctionSliceSearcher) SetMin(to int) {
-	s.min = to
-}
-
 func (s *DisjunctionSliceSearcher) DocumentMatchPoolSize() int {
 	rv := len(s.currs)
 	for _, s := range s.searchers {
diff --git a/search_test.go b/search_test.go
index f2f9fdc56..a37dea703 100644
--- a/search_test.go
+++ b/search_test.go
@@ -1153,3 +1153,73 @@ func TestBooleanShouldMinPropagation(t *testing.T) {
 		t.Errorf("Expected 2 results, but got: %v", res.Total)
 	}
 }
+
+func TestDisjunctionMinPropagation(t *testing.T) {
+	idx, err := New("testidx", NewIndexMapping())
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	defer func() {
+		err = idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err := os.RemoveAll("testidx")
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	doc1 := map[string]interface{}{
+		"dept": "finance",
+		"name": "xyz",
+	}
+
+	doc2 := map[string]interface{}{
+		"dept": "marketing",
+		"name": "xyz",
+	}
+
+	doc3 := map[string]interface{}{
+		"dept": "engineering",
+		"name": "abc",
+	}
+
+	batch := idx.NewBatch()
+
+	if err = batch.Index("doc1", doc1); err != nil {
+		t.Fatal(err)
+	}
+
+	if err = batch.Index("doc2", doc2); err != nil {
+		t.Fatal(err)
+	}
+
+	if err = batch.Index("doc3", doc3); err != nil {
+		t.Fatal(err)
+	}
+
+	if err = idx.Batch(batch); err != nil {
+		t.Fatal(err)
+	}
+
+	mq1 := NewMatchQuery("finance")
+	mq2 := NewMatchQuery("marketing")
+	dq := NewDisjunctionQuery(mq1, mq2)
+	dq.SetMin(3)
+
+	dq2 := NewDisjunctionQuery(dq)
+	dq2.SetMin(1)
+
+	sr := NewSearchRequest(dq2)
+	res, err := idx.Search(sr)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if res.Total != 0 {
+		t.Fatalf("Expect 0 results, but got: %v", res.Total)
+	}
+}

From 1415017996a1771ba6df5cf1dc7f805d2a86b348 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Tue, 5 Feb 2019 17:39:06 -0800
Subject: [PATCH 548/728] Fix formatting: go fmt ./...

---
 index/scorch/snapshot_segment.go |  2 +-
 search/collector.go              |  2 +-
 search/collector/topn.go         | 11 +++++------
 search/search.go                 |  2 +-
 4 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index 7d3de0811..f3a2c56a9 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -113,7 +113,7 @@ func (s *SegmentSnapshot) Size() (rv int) {
 }
 
 type cachedFieldDocs struct {
-	m sync.Mutex
+	m       sync.Mutex
 	readyCh chan struct{}     // closed when the cachedFieldDocs.docs is ready to be used.
 	err     error             // Non-nil if there was an error when preparing this cachedFieldDocs.
 	docs    map[uint64][]byte // Keyed by localDocNum, value is a list of terms delimited by 0xFF.
diff --git a/search/collector.go b/search/collector.go
index f7a490d65..df3ff9c5a 100644
--- a/search/collector.go
+++ b/search/collector.go
@@ -48,5 +48,5 @@ var MakeDocumentMatchHandlerKey = MakeDocumentMatchHandlerKeyType(
 // builder function which the applications can pass to bleve.
 // These builder methods gives a DocumentMatchHandler function
 // to bleve, which it will invoke on every document matches.
-type MakeDocumentMatchHandler  func(ctx *SearchContext) (
+type MakeDocumentMatchHandler func(ctx *SearchContext) (
 	callback DocumentMatchHandler, loadID bool, err error)
diff --git a/search/collector/topn.go b/search/collector/topn.go
index 3682952fd..378a7b114 100644
--- a/search/collector/topn.go
+++ b/search/collector/topn.go
@@ -140,7 +140,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
 	}
 	searchContext := &search.SearchContext{
 		DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)),
-		Collector: hc,
+		Collector:         hc,
 	}
 
 	hc.dvReader, err = reader.DocValueReader(hc.neededFields)
@@ -166,7 +166,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
 		return err
 	}
 
- 	hc.needDocIds = hc.needDocIds||loadID
+	hc.needDocIds = hc.needDocIds || loadID
 
 	select {
 	case <-ctx.Done():
@@ -188,7 +188,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
 			break
 		}
 
- 		err = dmHandler(next)
+		err = dmHandler(next)
 		if err != nil {
 			break
 		}
@@ -196,7 +196,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
 		next, err = searcher.Next(searchContext)
 	}
 
- 	// help finalize/flush the results in case
+	// help finalize/flush the results in case
 	// of custom document match handlers.
 	err = dmHandler(nil)
 	if err != nil {
@@ -219,7 +219,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
 var sortByScoreOpt = []string{"_score"}
 
 func (hc *TopNCollector) prepareDocumentMatch(ctx *search.SearchContext,
-	reader index.IndexReader, d *search.DocumentMatch ) (err error) {
+	reader index.IndexReader, d *search.DocumentMatch) (err error) {
 
 	// visit field terms for features that require it (sort, facets)
 	if len(hc.neededFields) > 0 {
@@ -298,7 +298,6 @@ func MakeTopNDocumentMatchHandler(
 	return nil, false, nil
 }
 
-
 // visitFieldTerms is responsible for visiting the field terms of the
 // search hit, and passing visited terms to the sort and facet builder
 func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.DocumentMatch) error {
diff --git a/search/search.go b/search/search.go
index 1d26d0de0..44451fa1e 100644
--- a/search/search.go
+++ b/search/search.go
@@ -285,7 +285,7 @@ type SearcherOptions struct {
 // SearchContext represents the context around a single search
 type SearchContext struct {
 	DocumentMatchPool *DocumentMatchPool
-	Collector Collector
+	Collector         Collector
 }
 
 func (sc *SearchContext) Size() int {

From 098b7f138b0fef882f978cdebc4236dcb316272e Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 11 Feb 2019 14:01:34 +0530
Subject: [PATCH 549/728] cleaning up DocumentMatch

- removing the un-used Document field
---
 search/search.go | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/search/search.go b/search/search.go
index 440c09571..f0e24e8a2 100644
--- a/search/search.go
+++ b/search/search.go
@@ -18,7 +18,6 @@ import (
 	"fmt"
 	"reflect"
 
-	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/size"
 )
@@ -100,9 +99,6 @@ type DocumentMatch struct {
 	// fields as float64s and date fields as time.RFC3339 formatted strings.
 	Fields map[string]interface{} `json:"fields,omitempty"`
 
-	// if we load the document for this hit, remember it so we dont load again
-	Document *document.Document `json:"-"`
-
 	// used to maintain natural index order
 	HitNumber uint64 `json:"-"`
 
@@ -195,10 +191,6 @@ func (dm *DocumentMatch) Size() int {
 			size.SizeOfPtr
 	}
 
-	if dm.Document != nil {
-		sizeInBytes += dm.Document.Size()
-	}
-
 	return sizeInBytes
 }
 

From 16c228ad9b0a41adf04cc2da39e56366efa34dc5 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 11 Feb 2019 17:12:34 -0800
Subject: [PATCH 550/728] Propagate Score setting to child search request
 during MultiSearch(..)

---
 index_alias_impl.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/index_alias_impl.go b/index_alias_impl.go
index f678a059b..335fcade2 100644
--- a/index_alias_impl.go
+++ b/index_alias_impl.go
@@ -433,6 +433,7 @@ func createChildSearchRequest(req *SearchRequest) *SearchRequest {
 		Explain:          req.Explain,
 		Sort:             req.Sort.Copy(),
 		IncludeLocations: req.IncludeLocations,
+		Score:            req.Score,
 	}
 	return &rv
 }

From da6534acf84faf5eca44a20e015e745f9f6c45b4 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Wed, 13 Feb 2019 17:07:56 -0800
Subject: [PATCH 551/728] MB-32855: Revert conjunction-single clause
 optimization

Reverting the single-clause conjunction optimization for now,
as we're noticing unexpected results while executing a boolean
query in certain scenarios.
---
 search/query/conjunction.go | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/search/query/conjunction.go b/search/query/conjunction.go
index f14cbdd88..1a7ed1bc0 100644
--- a/search/query/conjunction.go
+++ b/search/query/conjunction.go
@@ -73,9 +73,6 @@ func (q *ConjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
 
 	if len(ss) < 1 {
 		return searcher.NewMatchNoneSearcher(i)
-	} else if len(ss) == 1 {
-		// return single nested searcher as is
-		return ss[0], nil
 	}
 
 	return searcher.NewConjunctionSearcher(i, ss, options)

From 326f90d860033a97e50da3919dc38fc3da610d6b Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Thu, 14 Feb 2019 17:04:49 -0500
Subject: [PATCH 552/728] switch to bbolt (#1134)

switch to use github.com/etcd-io/bbolt
in upsidedown test, close reader before closing index
---
 index/scorch/persister.go           | 2 +-
 index/scorch/scorch.go              | 2 +-
 index/scorch/snapshot_rollback.go   | 2 +-
 index/store/boltdb/iterator.go      | 2 +-
 index/store/boltdb/reader.go        | 2 +-
 index/store/boltdb/store.go         | 2 +-
 index/store/boltdb/store_test.go    | 2 +-
 index/upsidedown/upsidedown_test.go | 5 +++++
 vendor/manifest                     | 6 +++---
 9 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index f75945d4d..349ccdc0e 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -33,7 +33,7 @@ import (
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
 	"github.com/blevesearch/bleve/index/scorch/segment/zap"
-	"github.com/boltdb/bolt"
+	bolt "github.com/etcd-io/bbolt"
 )
 
 var DefaultChunkFactor uint32 = 1024
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 23608fc3b..3f3d8bffc 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -31,7 +31,7 @@ import (
 	"github.com/blevesearch/bleve/index/scorch/segment/zap"
 	"github.com/blevesearch/bleve/index/store"
 	"github.com/blevesearch/bleve/registry"
-	"github.com/boltdb/bolt"
+	bolt "github.com/etcd-io/bbolt"
 )
 
 const Name = "scorch"
diff --git a/index/scorch/snapshot_rollback.go b/index/scorch/snapshot_rollback.go
index 247003311..470868d0e 100644
--- a/index/scorch/snapshot_rollback.go
+++ b/index/scorch/snapshot_rollback.go
@@ -19,7 +19,7 @@ import (
 	"log"
 
 	"github.com/blevesearch/bleve/index/scorch/segment"
-	"github.com/boltdb/bolt"
+	bolt "github.com/etcd-io/bbolt"
 )
 
 type RollbackPoint struct {
diff --git a/index/store/boltdb/iterator.go b/index/store/boltdb/iterator.go
index 82ab946fd..4b5019f1f 100644
--- a/index/store/boltdb/iterator.go
+++ b/index/store/boltdb/iterator.go
@@ -17,7 +17,7 @@ package boltdb
 import (
 	"bytes"
 
-	"github.com/boltdb/bolt"
+	bolt "github.com/etcd-io/bbolt"
 )
 
 type Iterator struct {
diff --git a/index/store/boltdb/reader.go b/index/store/boltdb/reader.go
index 1d701c982..4cd94183c 100644
--- a/index/store/boltdb/reader.go
+++ b/index/store/boltdb/reader.go
@@ -16,7 +16,7 @@ package boltdb
 
 import (
 	"github.com/blevesearch/bleve/index/store"
-	"github.com/boltdb/bolt"
+	bolt "github.com/etcd-io/bbolt"
 )
 
 type Reader struct {
diff --git a/index/store/boltdb/store.go b/index/store/boltdb/store.go
index d8de0768f..7021cd91c 100644
--- a/index/store/boltdb/store.go
+++ b/index/store/boltdb/store.go
@@ -30,7 +30,7 @@ import (
 
 	"github.com/blevesearch/bleve/index/store"
 	"github.com/blevesearch/bleve/registry"
-	"github.com/boltdb/bolt"
+	bolt "github.com/etcd-io/bbolt"
 )
 
 const (
diff --git a/index/store/boltdb/store_test.go b/index/store/boltdb/store_test.go
index 693c7b9f3..6411c239c 100644
--- a/index/store/boltdb/store_test.go
+++ b/index/store/boltdb/store_test.go
@@ -20,7 +20,7 @@ import (
 
 	"github.com/blevesearch/bleve/index/store"
 	"github.com/blevesearch/bleve/index/store/test"
-	"github.com/boltdb/bolt"
+	bolt "github.com/etcd-io/bbolt"
 )
 
 func open(t *testing.T, mo store.MergeOperator) store.KVStore {
diff --git a/index/upsidedown/upsidedown_test.go b/index/upsidedown/upsidedown_test.go
index 288f49003..2135a22c9 100644
--- a/index/upsidedown/upsidedown_test.go
+++ b/index/upsidedown/upsidedown_test.go
@@ -1401,6 +1401,11 @@ func TestConcurrentUpdate(t *testing.T) {
 	if len(doc.Fields) > 1 {
 		t.Errorf("expected single field, found %d", len(doc.Fields))
 	}
+
+	err = r.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
 }
 
 func TestLargeField(t *testing.T) {
diff --git a/vendor/manifest b/vendor/manifest
index 5bdb6a076..729d8a64f 100644
--- a/vendor/manifest
+++ b/vendor/manifest
@@ -26,10 +26,10 @@
 			"notests": true
 		},
 		{
-			"importpath": "github.com/boltdb/bolt",
-			"repository": "https://github.com/boltdb/bolt",
+			"importpath": "github.com/etcd-io/bbolt",
+			"repository": "https://github.com/etcd-io/bbolt",
 			"vcs": "",
-			"revision": "9da31745363232bc1e27dbab3569e77383a51585",
+			"revision": "7ee3ded59d4835e10f3e7d0f7603c42aa5e83820",
 			"branch": "master",
 			"notests": true
 		},

From 05d86ea8f6e30456949f612cf68cf4a27ce8c9c5 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Thu, 14 Feb 2019 17:05:07 -0500
Subject: [PATCH 553/728] add support for configuring the boltdb
 initialMmapSize option (#1135)

Without this option, sometimes committing a write Tx will block
until all ongoing read Txs finish.  If your application supports
running long searches, this may result in unacceptable delays
to indexing the data.

By setting this value to a very large value (say 1TB), you can
ensure that bolt will never need to remap to complete the
write Tx.  On 64-bit systems this may be an acceptable trade-off.

See also:
https://github.com/boltdb/bolt/issues/240
https://github.com/boltdb/bolt/issues/489
---
 index/store/boltdb/store.go | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/index/store/boltdb/store.go b/index/store/boltdb/store.go
index 7021cd91c..56613d531 100644
--- a/index/store/boltdb/store.go
+++ b/index/store/boltdb/store.go
@@ -74,6 +74,12 @@ func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore,
 		bo.ReadOnly = ro
 	}
 
+	if initialMmapSize, ok := config["initialMmapSize"].(int); ok {
+		bo.InitialMmapSize = initialMmapSize
+	} else if initialMmapSize, ok := config["initialMmapSize"].(float64); ok {
+		bo.InitialMmapSize = int(initialMmapSize)
+	}
+
 	db, err := bolt.Open(path, 0600, bo)
 	if err != nil {
 		return nil, err

From 8bbe51a161c462ca9baa952cb5bf27604016eddd Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Tue, 19 Feb 2019 17:12:08 +0530
Subject: [PATCH 554/728] dict cmd additions

-exposing 1hit count and total term info
-exposing postings size and cardinality
---
 cmd/bleve/cmd/zap/dict.go | 41 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 38 insertions(+), 3 deletions(-)

diff --git a/cmd/bleve/cmd/zap/dict.go b/cmd/bleve/cmd/zap/dict.go
index 2c60d31da..3cd256fa5 100644
--- a/cmd/bleve/cmd/zap/dict.go
+++ b/cmd/bleve/cmd/zap/dict.go
@@ -19,6 +19,7 @@ import (
 	"fmt"
 	"math"
 
+	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index/scorch/segment/zap"
 	"github.com/couchbase/vellum"
 	"github.com/spf13/cobra"
@@ -47,6 +48,7 @@ var dictCmd = &cobra.Command{
 		fstBytes := data[addr+uint64(read) : addr+uint64(read)+vellumLen]
 		fmt.Printf("raw vellum data:\n % x\n", fstBytes)
 		fmt.Printf("dictionary:\n")
+		var termsCount, hit1Count int64
 		if fstBytes != nil {
 			fst, err := vellum.Load(fstBytes)
 			if err != nil {
@@ -61,15 +63,21 @@ var dictCmd = &cobra.Command{
 					docNum, normBits := zap.FSTValDecode1Hit(currVal)
 					norm := math.Float32frombits(uint32(normBits))
 					extra = fmt.Sprintf("-- docNum: %d, norm: %f", docNum, norm)
+					fmt.Printf(" %s - %d (%x) %s\n", currTerm, currVal, currVal, extra)
+					hit1Count++
+				} else {
+					// fetch the postings size, cardinality in case of non 1 hits
+					l, c := readPostingCardinality(currVal, data)
+					fmt.Printf(" %s - %d (%x) posting byteSize: %d cardinality: %d\n",
+						currTerm, currVal, currVal, l, c)
 				}
-
-				fmt.Printf(" %s - %d (%x) %s\n", currTerm, currVal, currVal, extra)
+				termsCount++
 				err = itr.Next()
 			}
 			if err != nil && err != vellum.ErrIteratorDone {
 				return fmt.Errorf("error iterating dictionary: %v", err)
 			}
-
+			fmt.Printf("Total terms in dictionary : %d 1hit count: %d\n", termsCount, hit1Count)
 		}
 
 		return nil
@@ -79,3 +87,30 @@ var dictCmd = &cobra.Command{
 func init() {
 	RootCmd.AddCommand(dictCmd)
 }
+
+func readPostingCardinality(postingsOffset uint64, data []byte) (int, uint64) {
+	// read the location of the freq/norm details
+	var n uint64
+	var read int
+
+	_, read = binary.Uvarint(data[postingsOffset+n : postingsOffset+binary.MaxVarintLen64])
+	n += uint64(read)
+
+	_, read = binary.Uvarint(data[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
+	n += uint64(read)
+
+	var postingsLen uint64
+	postingsLen, read = binary.Uvarint(data[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
+	n += uint64(read)
+
+	roaringBytes := data[postingsOffset+n : postingsOffset+n+postingsLen]
+
+	r := roaring.NewBitmap()
+
+	_, err := r.FromBuffer(roaringBytes)
+	if err != nil {
+		fmt.Printf("error loading roaring bitmap: %v", err)
+	}
+
+	return len(roaringBytes), r.GetCardinality()
+}

From eb62de6a81c63347e4c2a2312e145ee518991fc3 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Fri, 22 Feb 2019 09:27:26 +0530
Subject: [PATCH 555/728] LoadAndHighLightFields API

Refactoring the code a bit to let
applications to perform field loading
and any highlighting. Mainly helpful
with the streamed result hits.
---
 index_impl.go    | 135 +++++++++++++++++++++++++----------------------
 search/search.go |   1 +
 2 files changed, 74 insertions(+), 62 deletions(-)

diff --git a/index_impl.go b/index_impl.go
index fe61b8064..63fe39ccb 100644
--- a/index_impl.go
+++ b/index_impl.go
@@ -542,71 +542,13 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
 	}
 
 	for _, hit := range hits {
-		if len(req.Fields) > 0 || highlighter != nil {
-			doc, err := indexReader.Document(hit.ID)
-			if err == nil && doc != nil {
-				if len(req.Fields) > 0 {
-					fieldsToLoad := deDuplicate(req.Fields)
-					for _, f := range fieldsToLoad {
-						for _, docF := range doc.Fields {
-							if f == "*" || docF.Name() == f {
-								var value interface{}
-								switch docF := docF.(type) {
-								case *document.TextField:
-									value = string(docF.Value())
-								case *document.NumericField:
-									num, err := docF.Number()
-									if err == nil {
-										value = num
-									}
-								case *document.DateTimeField:
-									datetime, err := docF.DateTime()
-									if err == nil {
-										value = datetime.Format(time.RFC3339)
-									}
-								case *document.BooleanField:
-									boolean, err := docF.Boolean()
-									if err == nil {
-										value = boolean
-									}
-								case *document.GeoPointField:
-									lon, err := docF.Lon()
-									if err == nil {
-										lat, err := docF.Lat()
-										if err == nil {
-											value = []float64{lon, lat}
-										}
-									}
-								}
-								if value != nil {
-									hit.AddFieldValue(docF.Name(), value)
-								}
-							}
-						}
-					}
-				}
-				if highlighter != nil {
-					highlightFields := req.Highlight.Fields
-					if highlightFields == nil {
-						// add all fields with matches
-						highlightFields = make([]string, 0, len(hit.Locations))
-						for k := range hit.Locations {
-							highlightFields = append(highlightFields, k)
-						}
-					}
-					for _, hf := range highlightFields {
-						highlighter.BestFragmentsInField(hit, doc, hf, 1)
-					}
-				}
-			} else if doc == nil {
-				// unexpected case, a doc ID that was found as a search hit
-				// was unable to be found during document lookup
-				return nil, ErrorIndexReadInconsistency
-			}
-		}
 		if i.name != "" {
 			hit.Index = i.name
 		}
+		err = LoadAndHighlightFields(hit, req, i.name, indexReader, highlighter)
+		if err != nil {
+			return nil, err
+		}
 	}
 
 	atomic.AddUint64(&i.stats.searches, 1)
@@ -632,6 +574,75 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
 	}, nil
 }
 
+func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest,
+	indexName string, r index.IndexReader,
+	highlighter highlight.Highlighter) error {
+	if len(req.Fields) > 0 || highlighter != nil {
+		doc, err := r.Document(hit.ID)
+		if err == nil && doc != nil {
+			if len(req.Fields) > 0 {
+				fieldsToLoad := deDuplicate(req.Fields)
+				for _, f := range fieldsToLoad {
+					for _, docF := range doc.Fields {
+						if f == "*" || docF.Name() == f {
+							var value interface{}
+							switch docF := docF.(type) {
+							case *document.TextField:
+								value = string(docF.Value())
+							case *document.NumericField:
+								num, err := docF.Number()
+								if err == nil {
+									value = num
+								}
+							case *document.DateTimeField:
+								datetime, err := docF.DateTime()
+								if err == nil {
+									value = datetime.Format(time.RFC3339)
+								}
+							case *document.BooleanField:
+								boolean, err := docF.Boolean()
+								if err == nil {
+									value = boolean
+								}
+							case *document.GeoPointField:
+								lon, err := docF.Lon()
+								if err == nil {
+									lat, err := docF.Lat()
+									if err == nil {
+										value = []float64{lon, lat}
+									}
+								}
+							}
+							if value != nil {
+								hit.AddFieldValue(docF.Name(), value)
+							}
+						}
+					}
+				}
+			}
+			if highlighter != nil {
+				highlightFields := req.Highlight.Fields
+				if highlightFields == nil {
+					// add all fields with matches
+					highlightFields = make([]string, 0, len(hit.Locations))
+					for k := range hit.Locations {
+						highlightFields = append(highlightFields, k)
+					}
+				}
+				for _, hf := range highlightFields {
+					highlighter.BestFragmentsInField(hit, doc, hf, 1)
+				}
+			}
+		} else if doc == nil {
+			// unexpected case, a doc ID that was found as a search hit
+			// was unable to be found during document lookup
+			return ErrorIndexReadInconsistency
+		}
+	}
+
+	return nil
+}
+
 // Fields returns the name of all the fields this
 // Index has operated on.
 func (i *indexImpl) Fields() (fields []string, err error) {
diff --git a/search/search.go b/search/search.go
index f8a282d16..72bb0ea29 100644
--- a/search/search.go
+++ b/search/search.go
@@ -279,6 +279,7 @@ type SearcherOptions struct {
 type SearchContext struct {
 	DocumentMatchPool *DocumentMatchPool
 	Collector         Collector
+	IndexReader       index.IndexReader
 }
 
 func (sc *SearchContext) Size() int {

From 4c8905c96c2995005ac87120ecb4a35d6aeb3923 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Tue, 26 Feb 2019 10:43:21 -0800
Subject: [PATCH 556/728] Add API for index/store/moss to retrieve associated
 collection

---
 index/store/moss/store.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/index/store/moss/store.go b/index/store/moss/store.go
index 89ea553cf..47aab076f 100644
--- a/index/store/moss/store.go
+++ b/index/store/moss/store.go
@@ -218,6 +218,10 @@ func (s *Store) LowerLevelStore() store.KVStore {
 	return s.llstore
 }
 
+func (s *Store) Collection() moss.Collection {
+	return s.ms
+}
+
 func init() {
 	registry.RegisterKVStore(Name, New)
 }

From a91b427b59b893f112021841ba7370d285f8426f Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Sun, 3 Mar 2019 10:18:44 -0500
Subject: [PATCH 557/728] fix data race with batch reset (#1150)

this is another variation of the race found/fixed in #1092
in that case the batch was empty, which meant we would skip
the code that properly synchronized access.  our fix only
handled this exact case (no data operations), however
there is another variation, if the batch contains only deletes
(which are data ops) we still spawned the goroutine, although
since there were no real updates, again the synchronization
code would be skipped, and thus the data race could happen.

the fix is to check the number of updates (computed earlier on
the caller's goroutine, so it's safe) instead of the length
of the IndexOps (which includes updates and deletes)

the key is that we should only spawn the goroutine that will
range over the batch, in cases where we will synchronize on
waiting for the analysis to complete (at least one update).

fixes #1149
---
 index/scorch/scorch.go         |  2 +-
 index/upsidedown/upsidedown.go |  2 +-
 index_test.go                  | 32 ++++++++++++++++++++++++++++++++
 3 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 3f3d8bffc..2641b3b19 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -312,7 +312,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
 
 	// FIXME could sort ids list concurrent with analysis?
 
-	if len(batch.IndexOps) > 0 {
+	if numUpdates > 0 {
 		go func() {
 			for _, doc := range batch.IndexOps {
 				if doc != nil {
diff --git a/index/upsidedown/upsidedown.go b/index/upsidedown/upsidedown.go
index e4bc3d8f0..7992ac1d9 100644
--- a/index/upsidedown/upsidedown.go
+++ b/index/upsidedown/upsidedown.go
@@ -810,7 +810,7 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
 		}
 	}
 
-	if len(batch.IndexOps) > 0 {
+	if numUpdates > 0 {
 		go func() {
 			for _, doc := range batch.IndexOps {
 				if doc != nil {
diff --git a/index_test.go b/index_test.go
index d1351dc29..65ddedaa8 100644
--- a/index_test.go
+++ b/index_test.go
@@ -2184,3 +2184,35 @@ func TestDataRaceBug1092(t *testing.T) {
 		batch.Reset()
 	}
 }
+
+func TestBatchRaceBug1149(t *testing.T) {
+	defer func() {
+		err := os.RemoveAll("testidx")
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+	i, err := New("testidx", NewIndexMapping())
+	//i, err := NewUsing("testidx", NewIndexMapping(), "scorch", "scorch", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer func() {
+		err := i.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+	b := i.NewBatch()
+	b.Delete("1")
+	err = i.Batch(b)
+	if err != nil {
+		t.Fatal(err)
+	}
+	b.Reset()
+	err = i.Batch(b)
+	if err != nil {
+		t.Fatal(err)
+	}
+	b.Reset()
+}

From 119f9f47953812f1569f6b630188428346332dc0 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Sun, 3 Mar 2019 19:36:36 -0500
Subject: [PATCH 558/728] fix postings list count implementation (#1143)

previously the calculation subtracted the total number of
deleted documents, without regard for how the deleted documents
set might overlap with the postings list, this was incorrect.

fixes #1140
---
 index/scorch/segment/zap/posting.go | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 26378c27e..93b51ae73 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -264,18 +264,17 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
 
 // Count returns the number of items on this postings list
 func (p *PostingsList) Count() uint64 {
-	var n uint64
+	var n, e uint64
 	if p.normBits1Hit != 0 {
 		n = 1
+		if p.except != nil && p.except.Contains(uint32(p.docNum1Hit)) {
+			e = 1
+		}
 	} else if p.postings != nil {
 		n = p.postings.GetCardinality()
-	}
-	var e uint64
-	if p.except != nil {
-		e = p.except.GetCardinality()
-	}
-	if n <= e {
-		return 0
+		if p.except != nil {
+			e = p.postings.AndCardinality(p.except)
+		}
 	}
 	return n - e
 }

From 4b5cfbc8d5656cf05bf74d1ae270d2ca505560fc Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Thu, 7 Mar 2019 08:51:01 -0500
Subject: [PATCH 559/728] fix issue mapping documents containing *time.Time
 (#1153)

* fix issue mapping documents containing *time.Time

If documents contained a field of type *time.Time AND they also
had an explicit mapping for the field of a type OTHER THAN text,
the field would not be mapped.

The issue was introduced when we added special support for
handling encoding.TextMarshaler because *time.Time satisfies
this interface.

This change fixes this issue, but there are some more complicated
cases exposed by this, specifically related to mapping the same
field multiple times, and handling cases where you try to mix
multiple types.

Instead, our solution is to further limit the way we support
using ecoding.TextMarshaler.  The new limitation is that we ONLY
use the output of MarshalText() if you have an explicit mapping
AND ALL the index fields are of type text.  All other cases
will ignore TextMarshaler and follow regular indexing rules.

* address code review comments
---
 mapping/document.go     | 26 +++++++++++++++++---------
 mapping/mapping_test.go | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 9 deletions(-)

diff --git a/mapping/document.go b/mapping/document.go
index f950b59be..15cb6b5fa 100644
--- a/mapping/document.go
+++ b/mapping/document.go
@@ -525,19 +525,27 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
 		if !propertyValue.IsNil() {
 			switch property := property.(type) {
 			case encoding.TextMarshaler:
-
-				txt, err := property.MarshalText()
-				if err == nil && subDocMapping != nil {
-					// index by explicit mapping
+				// ONLY process TextMarshaler if there is an explicit mapping
+				// AND all of the fiels are of type text
+				// OTHERWISE process field without TextMarshaler
+				if subDocMapping != nil {
+					allFieldsText := true
 					for _, fieldMapping := range subDocMapping.Fields {
-						if fieldMapping.Type == "text" {
-							fieldMapping.processString(string(txt), pathString, path, indexes, context)
+						if fieldMapping.Type != "text" {
+							allFieldsText = false
+							break
 						}
 					}
-				} else {
-					dm.walkDocument(property, path, indexes, context)
+					txt, err := property.MarshalText()
+					if err == nil && allFieldsText {
+						txtStr := string(txt)
+						for _, fieldMapping := range subDocMapping.Fields {
+							fieldMapping.processString(txtStr, pathString, path, indexes, context)
+						}
+						return
+					}
 				}
-
+				dm.walkDocument(property, path, indexes, context)
 			default:
 				dm.walkDocument(property, path, indexes, context)
 			}
diff --git a/mapping/mapping_test.go b/mapping/mapping_test.go
index 498fdb4ec..2349da707 100644
--- a/mapping/mapping_test.go
+++ b/mapping/mapping_test.go
@@ -1109,3 +1109,35 @@ func TestClosestDocDynamicMapping(t *testing.T) {
 		t.Fatalf("expected 1 field, got: %d", len(doc.Fields))
 	}
 }
+
+func TestMappingPointerToTimeBug1152(t *testing.T) {
+
+	when, err := time.Parse(time.RFC3339, "2019-03-06T15:04:05Z")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	thing := struct {
+		When *time.Time
+	}{
+		When: &when,
+	}
+
+	// this case tests when there WAS an explicit mapping, but it was NOT type text
+	// as this was the specific case that was problematic
+	m := NewIndexMapping()
+	dtf := NewDateTimeFieldMapping()
+	m.DefaultMapping.AddFieldMappingsAt("When", dtf)
+	doc := document.NewDocument("x")
+	err = m.MapDocument(doc, thing)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(doc.Fields) != 1 {
+		t.Fatalf("expected 1 field, got: %d", len(doc.Fields))
+	}
+	if _, ok := doc.Fields[0].(*document.DateTimeField); !ok {
+		t.Fatalf("expected field to be type *document.DateTimeField, got %T", doc.Fields[0])
+	}
+}
\ No newline at end of file

From baab665d1b509f1247dd97c2e9eaa59d2a8ac6e3 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Tue, 12 Mar 2019 16:01:16 +0530
Subject: [PATCH 560/728] IndexReader initialisation in SearchContext

Initialising the IndexReader field of the
SearchContext so that the DocumentMatchHandler
implementation will have enough details for
invoking the LoadAndHighlightFields API.
---
 search/collector/topn.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/search/collector/topn.go b/search/collector/topn.go
index 378a7b114..ff08d0a9c 100644
--- a/search/collector/topn.go
+++ b/search/collector/topn.go
@@ -141,6 +141,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
 	searchContext := &search.SearchContext{
 		DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)),
 		Collector:         hc,
+		IndexReader:	   reader,
 	}
 
 	hc.dvReader, err = reader.DocValueReader(hc.neededFields)

From d12e143a661a8d1b2f067b72ea5e8f4620f62093 Mon Sep 17 00:00:00 2001
From: Mervin <mofei2816@gmail.com>
Date: Thu, 14 Mar 2019 07:31:44 +0800
Subject: [PATCH 561/728] Fix panic on range query with unlimited upper in
 scorch (issue#1156). (#1157)

* fix panic on range query with unlimited upper in scorch.

* add unit test for the bug

also remove unnecessary `= nil` since that is the zero-value
---
 index/scorch/segment/zap/dict.go      | 13 +++++---
 index/scorch/segment/zap/dict_test.go | 44 +++++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 2c0e1bf2a..190265d6e 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -143,11 +143,14 @@ func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator
 	}
 
 	// need to increment the end position to be inclusive
-	endBytes := []byte(end)
-	if endBytes[len(endBytes)-1] < 0xff {
-		endBytes[len(endBytes)-1]++
-	} else {
-		endBytes = append(endBytes, 0xff)
+	var endBytes []byte
+	if len(end) > 0 {
+		endBytes = []byte(end)
+		if endBytes[len(endBytes)-1] < 0xff {
+			endBytes[len(endBytes)-1]++
+		} else {
+			endBytes = append(endBytes, 0xff)
+		}
 	}
 
 	if d.fst != nil {
diff --git a/index/scorch/segment/zap/dict_test.go b/index/scorch/segment/zap/dict_test.go
index b654bf45f..378abd854 100644
--- a/index/scorch/segment/zap/dict_test.go
+++ b/index/scorch/segment/zap/dict_test.go
@@ -279,3 +279,47 @@ func TestDictionaryError(t *testing.T) {
 		t.Fatalf("expected nil next and nil err, got: %v, %v", nxt, err)
 	}
 }
+
+func TestDictionaryBug1156(t *testing.T) {
+
+	_ = os.RemoveAll("/tmp/scorch.zap")
+
+	testSeg, _, _ := buildTestSegmentForDict()
+	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
+	if err != nil {
+		t.Fatalf("error persisting segment: %v", err)
+	}
+
+	segment, err := Open("/tmp/scorch.zap")
+	if err != nil {
+		t.Fatalf("error opening segment: %v", err)
+	}
+	defer func() {
+		cerr := segment.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", err)
+		}
+	}()
+
+	dict, err := segment.Dictionary("desc")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// test range iterator
+	expected := []string{"cat", "dog", "egg", "fish"}
+	var got []string
+	itr := dict.RangeIterator("cat", "")
+	next, err := itr.Next()
+	for next != nil && err == nil {
+		got = append(got, next.Term)
+		next, err = itr.Next()
+	}
+	if err != nil {
+		t.Fatalf("dict itr error: %v", err)
+	}
+
+	if !reflect.DeepEqual(expected, got) {
+		t.Errorf("expected: %v, got: %v", expected, got)
+	}
+}

From 57a60f1bbc4ee907082cd6eb3e365795156b6862 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Wed, 13 Mar 2019 19:38:17 -0400
Subject: [PATCH 562/728] run integration tests with scorch (#1159)

some minor changes have been made to a few of the queries to
ensure that the search results have a deterministic order

one test was removed as it was specifically checking the order
when no sort was specified, which is defined in a way that
does not lend itself to easy testing

we did not change all tests without sort order, as many of them
are correctly ordered by score already
---
 .travis.yml                     |  1 +
 test/tests/basic/searches.json  |  5 +++++
 test/tests/fosdem/searches.json |  1 +
 test/tests/sort/searches.json   | 33 ---------------------------------
 4 files changed, 7 insertions(+), 33 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 35f7b60f2..fcc516db5 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -15,6 +15,7 @@ script:
   - gvt restore
   - go test -race -v $(go list ./... | grep -v vendor/)
   - go vet $(go list ./... | grep -v vendor/)
+  - go test ./test -v -indexType scorch
   - errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/)
   - docs/project-code-coverage.sh
   - docs/build_children.sh
diff --git a/test/tests/basic/searches.json b/test/tests/basic/searches.json
index 42bcee299..0f9606ede 100644
--- a/test/tests/basic/searches.json
+++ b/test/tests/basic/searches.json
@@ -189,6 +189,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["_id"],
 			"query": {
 				"field": "birthday",
 				"start": "2010-01-01"
@@ -408,6 +409,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["_id"],
 			"query": {
 				"query": "-title:mista"
 			}
@@ -516,6 +518,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["_id"],
 			"query": {
 				"match_all": {}
 			}
@@ -543,6 +546,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["_id"],
 			"query": {
 				"ids": ["b", "c"]
 			}
@@ -564,6 +568,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["_id"],
 			"query": {
 				"query": "+age:>20 missess"
 			}
diff --git a/test/tests/fosdem/searches.json b/test/tests/fosdem/searches.json
index 99f95aaad..444f4dc30 100644
--- a/test/tests/fosdem/searches.json
+++ b/test/tests/fosdem/searches.json
@@ -3,6 +3,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["_id"],
 			"query": {
 				"field": "category",
 				"match_phrase": "Perl"
diff --git a/test/tests/sort/searches.json b/test/tests/sort/searches.json
index a679f0b8d..bb51720a8 100644
--- a/test/tests/sort/searches.json
+++ b/test/tests/sort/searches.json
@@ -1,37 +1,4 @@
 [
-	{
-    "comment": "default order, all have same score, then by natural index order",
-		"search": {
-			"from": 0,
-			"size": 10,
-			"query": {
-				"match_all":{}
-			}
-		},
-		"result": {
-			"total_hits": 6,
-			"hits": [
-				{
-					"id": "a"
-				},
-        {
-          "id": "b"
-        },
-        {
-          "id": "c"
-        },
-        {
-          "id": "d"
-        },
-        {
-          "id": "e"
-        },
-        {
-          "id": "f"
-        }
-			]
-		}
-	},
   {
     "comment": "sort by name, ascending",
 		"search": {

From 308ce95fe6f6e2c976462c5db0743e9a010b3d19 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 15 Mar 2019 09:22:15 -0400
Subject: [PATCH 563/728] change sort strategy for integration tests (#1161)

any test which is not explicitly setting a sort order
should instead specify:

"sort": ["-_score", "_id"]

this starts with the natural order of score descending,
but also imposes an additional tiebreaking rule to sort
by the documents identifier.  this comes with some
additional cost, but makes results more stable for
testing purposes.
---
 test/tests/basic/searches.json    | 60 ++++++++++++++++++++++++++++---
 test/tests/employee/searches.json |  2 ++
 test/tests/fosdem/searches.json   |  6 +++-
 test/tests/phrase/searches.json   | 22 ++++++++++++
 4 files changed, 84 insertions(+), 6 deletions(-)

diff --git a/test/tests/basic/searches.json b/test/tests/basic/searches.json
index 0f9606ede..8b6206c32 100644
--- a/test/tests/basic/searches.json
+++ b/test/tests/basic/searches.json
@@ -1,8 +1,10 @@
 [
 	{
+		"comment": "test term search, exact match",
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "name",
 				"term": "marti"
@@ -18,9 +20,11 @@
 		}
 	},
 	{
+		"comment": "test term search, no match",
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "name",
 				"term": "noone"
@@ -32,9 +36,11 @@
 		}
 	},
 	{
+		"comment": "test match phrase search",
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"match_phrase": "long name"
 			}
@@ -49,9 +55,11 @@
 		}
 	},
 	{
+		"comment": "test term search, no match",
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "name",
 				"term": "walking"
@@ -63,9 +71,11 @@
 		}
 	},
 	{
+		"comment": "test match search, matching due to analysis",
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"fuzziness": 0,
 				"prefix_length": 0,
@@ -83,9 +93,11 @@
 		}
 	},
 	{
+		"comment": "test term prefix search",
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "name",
 				"prefix": "bobble"
@@ -101,9 +113,11 @@
 		}
 	},
 	{
+		"comment": "test simple query string",
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"query": "+name:phone"
 			}
@@ -118,9 +132,11 @@
 		}
 	},
 	{
+		"comment": "test numeric range, no lower bound",
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "age",
 				"max": 30
@@ -139,9 +155,11 @@
 		}
 	},
 	{
+		"comment": "test numeric range, upper and lower bounds",
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "age",
 				"max": 30,
@@ -158,9 +176,11 @@
 		}
 	},
 	{
+		"comment": "test conjunction of numeric range, upper and lower bounds",
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"conjuncts": [
 					{
@@ -186,10 +206,11 @@
 		}
 	},
 	{
+		"comment": "test date range, no upper bound",
 		"search": {
 			"from": 0,
 			"size": 10,
-			"sort": ["_id"],
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "birthday",
 				"start": "2010-01-01"
@@ -208,9 +229,11 @@
 		}
 	},
 	{
+		"comment": "test numeric range, no lower bound",
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "birthday",
 				"end": "2010-01-01"
@@ -226,9 +249,11 @@
 		}
 	},
 	{
+		"comment": "test term search, matching inside an array",
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "tags",
 				"term": "gopher"
@@ -244,9 +269,11 @@
 		}
 	},
 	{
+		"comment": "test term search, matching another element inside array",
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "tags",
 				"term": "belieber"
@@ -262,9 +289,11 @@
 		}
 	},
 	{
+		"comment": "test term search, not present in array",
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "tags",
 				"term": "notintagsarray"
@@ -280,6 +309,7 @@
 		"search": {
 			"from": 0,
 			"size": 0,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "name",
 				"term": "marti"
@@ -295,6 +325,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"fields": ["tags"],
 			"query": {
 				"field": "name",
@@ -314,9 +345,11 @@
 		}
 	},
 	{
+		"comment": "test fuzzy search, fuzziness 1 with match",
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "name",
 				"term": "msrti",
@@ -337,6 +370,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "name",
 				"match": "long"
@@ -362,6 +396,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "name",
 				"match": "long"
@@ -385,6 +420,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"fields": ["age","birthday"],
 			"query": {
 				"field": "name",
@@ -409,7 +445,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
-			"sort": ["_id"],
+			"sort": ["-_score", "_id"],
 			"query": {
 				"query": "-title:mista"
 			}
@@ -434,6 +470,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "name",
 				"match": "long"
@@ -460,6 +497,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "tags",
 				"match": "gopher"
@@ -485,6 +523,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "title",
 				"prefix": "miss"
@@ -504,6 +543,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"match_none": {}
 			}
@@ -518,7 +558,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
-			"sort": ["_id"],
+			"sort": ["-_score", "_id"],
 			"query": {
 				"match_all": {}
 			}
@@ -546,7 +586,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
-			"sort": ["_id"],
+			"sort": ["-_score", "_id"],
 			"query": {
 				"ids": ["b", "c"]
 			}
@@ -568,7 +608,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
-			"sort": ["_id"],
+			"sort": ["-_score", "_id"],
 			"query": {
 				"query": "+age:>20 missess"
 			}
@@ -593,6 +633,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "name",
 				"regexp": "mar.*"
@@ -612,6 +653,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "name",
 				"regexp": "mar."
@@ -627,6 +669,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "name",
 				"wildcard": "mar*"
@@ -646,6 +689,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"disjuncts": [
 					{
@@ -678,6 +722,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"disjuncts": [
 					{
@@ -711,6 +756,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"disjuncts": [
 					{
@@ -744,6 +790,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"disjuncts": [
 					{
@@ -776,6 +823,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"query": "name:mar*"
 			}
@@ -794,6 +842,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"query": "name:/mar.*/"
 			}
@@ -812,6 +861,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "title",
 				"max": "miz",
diff --git a/test/tests/employee/searches.json b/test/tests/employee/searches.json
index abba2e9fa..d4db280b5 100644
--- a/test/tests/employee/searches.json
+++ b/test/tests/employee/searches.json
@@ -1,8 +1,10 @@
 [
 	{
+		"comment": "test array position output",
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "manages.reports",
 				"term": "julián"
diff --git a/test/tests/fosdem/searches.json b/test/tests/fosdem/searches.json
index 444f4dc30..4909fea27 100644
--- a/test/tests/fosdem/searches.json
+++ b/test/tests/fosdem/searches.json
@@ -3,7 +3,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
-			"sort": ["_id"],
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "category",
 				"match_phrase": "Perl"
@@ -31,6 +31,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"match": "lisp"
 			}
@@ -51,6 +52,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {"boost":1,"query":"+lisp +category:Perl"}
 		},
 		"result": {
@@ -66,6 +68,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {"boost":1,"query":"+lisp +category:\"Perl\""}
 		},
 		"result": {
@@ -81,6 +84,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
                 "must": {
                     "conjuncts":[
diff --git a/test/tests/phrase/searches.json b/test/tests/phrase/searches.json
index 02d23a244..70f9c8346 100644
--- a/test/tests/phrase/searches.json
+++ b/test/tests/phrase/searches.json
@@ -3,6 +3,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "body",
 				"match_phrase": "Twenty"
@@ -21,6 +22,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "body",
 				"match_phrase": "Twenty Thousand"
@@ -39,6 +41,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "body",
 				"match_phrase": "Twenty Thousand Leagues"
@@ -57,6 +60,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "body",
 				"match_phrase": "Twenty Thousand Leagues Under"
@@ -75,6 +79,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "body",
 				"match_phrase": "Twenty Thousand Leagues Under the"
@@ -93,6 +98,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "body",
 				"match_phrase": "Twenty Thousand Leagues Under the Sea"
@@ -111,6 +117,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "body",
 				"match_phrase": "Thousand"
@@ -129,6 +136,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "body",
 				"match_phrase": "Thousand Leagues"
@@ -147,6 +155,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "body",
 				"match_phrase": "Thousand Leagues Under"
@@ -165,6 +174,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "body",
 				"match_phrase": "Thousand Leagues Under the"
@@ -183,6 +193,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "body",
 				"match_phrase": "Thousand Leagues Under the Sea"
@@ -201,6 +212,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "body",
 				"match_phrase": "Leagues"
@@ -219,6 +231,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "body",
 				"match_phrase": "Leagues Under"
@@ -237,6 +250,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "body",
 				"match_phrase": "Leagues Under the"
@@ -255,6 +269,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "body",
 				"match_phrase": "Leagues Under the Sea"
@@ -273,6 +288,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "body",
 				"match_phrase": "Under the Sea"
@@ -291,6 +307,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "body",
 				"match_phrase": "the Sea"
@@ -309,6 +326,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "body",
 				"match_phrase": "Sea"
@@ -327,6 +345,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "body",
 				"match_phrase": "bad call"
@@ -345,6 +364,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "body",
 				"match_phrase": "defenseless receiver"
@@ -363,6 +383,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "body",
 				"match_phrase": "bad receiver"
@@ -378,6 +399,7 @@
 		"search": {
 			"from": 0,
 			"size": 10,
+			"sort": ["-_score", "_id"],
 			"query": {
 				"field": "body",
 				"terms": [["twenti","thirti"],["thousand"]]

From c6b5eca7b2acff74224a35a03822a806af47b277 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 18 Mar 2019 18:44:44 -0700
Subject: [PATCH 564/728] Import analysis/char/asciifolding in config to
 register the filter

---
 config/config.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/config/config.go b/config/config.go
index ad0bdcb9a..cf2827a5c 100644
--- a/config/config.go
+++ b/config/config.go
@@ -31,6 +31,7 @@ import (
 	_ "github.com/blevesearch/bleve/search/highlight/highlighter/simple"
 
 	// char filters
+	_ "github.com/blevesearch/bleve/analysis/char/asciifolding"
 	_ "github.com/blevesearch/bleve/analysis/char/html"
 	_ "github.com/blevesearch/bleve/analysis/char/regexp"
 	_ "github.com/blevesearch/bleve/analysis/char/zerowidthnonjoiner"

From 950ca8b43520816995ed92c9d95a86dedd0c4964 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 18 Mar 2019 17:35:17 -0700
Subject: [PATCH 565/728] go fmt

---
 index/store/moss/lower_test.go | 2 +-
 mapping/mapping_test.go        | 2 +-
 search/collector/topn.go       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/index/store/moss/lower_test.go b/index/store/moss/lower_test.go
index afc0a0959..72b8a7194 100644
--- a/index/store/moss/lower_test.go
+++ b/index/store/moss/lower_test.go
@@ -27,7 +27,7 @@ func openWithLower(t *testing.T, mo store.MergeOperator) (string, store.KVStore)
 	tmpDir, _ := ioutil.TempDir("", "mossStore")
 
 	config := map[string]interface{}{
-		"path":                    tmpDir,
+		"path": tmpDir,
 		"mossLowerLevelStoreName": "mossStore",
 	}
 
diff --git a/mapping/mapping_test.go b/mapping/mapping_test.go
index 2349da707..b57283f3a 100644
--- a/mapping/mapping_test.go
+++ b/mapping/mapping_test.go
@@ -1140,4 +1140,4 @@ func TestMappingPointerToTimeBug1152(t *testing.T) {
 	if _, ok := doc.Fields[0].(*document.DateTimeField); !ok {
 		t.Fatalf("expected field to be type *document.DateTimeField, got %T", doc.Fields[0])
 	}
-}
\ No newline at end of file
+}
diff --git a/search/collector/topn.go b/search/collector/topn.go
index ff08d0a9c..b30bd6ecf 100644
--- a/search/collector/topn.go
+++ b/search/collector/topn.go
@@ -141,7 +141,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
 	searchContext := &search.SearchContext{
 		DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)),
 		Collector:         hc,
-		IndexReader:	   reader,
+		IndexReader:       reader,
 	}
 
 	hc.dvReader, err = reader.DocValueReader(hc.neededFields)

From af45e364cfbda2039fc5047fc1b392e6abe478e2 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 18 Mar 2019 14:41:50 -0700
Subject: [PATCH 566/728] upsidedown.UpdateWithAnalysis() advanced helper
 method

This commit refactors the UpsideDownCouch.Update(doc) method with a
helper method, UpdateWithAnalysis(), in order to allow advanced apps
to skip the analysis queue and back-index lookup steps of upsidedown's
single-doc Update().

The use-case motivation details...

couchbase server is integrating the N1QL query engine with its
bleve-powered full-text search engine, and may need to check that a
document matches a search during N1QL's predicate filtering phase.
For example...

  SELECT * FROM `beer-sample` as b
  WHERE SEARCH(b, "desc:good");

  SELECT * FROM [ {"desc": "the beer is good"} ] AS b
  WHERE SEARCH(b, "desc:good");

The above SEARCH()'es default to using a default, dynamic mapping.  If
an appropriate FTS index exists, it can be used to speed up the query.

In the case of a non-existent or non-covering FTS index, there might
be many JSON docs that need to be predicate-filtered by the N1QL
engine, where predicate-filtering is processed 1 doc at a time.

This is supported by creating a reused, upsidedown index using moss
configured in memory-only mode.  Each doc to be examined is a) indexed
(so the index's doc-count will only ever reach a max of 1) and b) the
upsidedown index is searched to see if the doc is a hit.

In upsidedown's Update(doc), the usual handoff to the analysis
goroutines was dominating the pprof graphs.  So, this commit enables a
performance optimization where couchbase can now avoid goroutine
switching via UpdateWithAnalysis().

Some microbenchmark results from github.com/couchbase/n1fty...

  BenchmarkMossWithoutOptimizations-8       20000 61246 ns/op 26021 B/op 386 allocs/op
  BenchmarkMossWithOptimizeReset-8          30000 55692 ns/op 20863 B/op 323 allocs/op
  BenchmarkMossWithOptimizeResetAndUpdate-8 50000 32241 ns/op 16924 B/op 230 allocs/op

The "Reset" is another, recent moss-specific optimization (ea630c),
for a fuller before-vs-after picture.
---
 index/upsidedown/upsidedown.go | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/index/upsidedown/upsidedown.go b/index/upsidedown/upsidedown.go
index 7992ac1d9..0699dbf97 100644
--- a/index/upsidedown/upsidedown.go
+++ b/index/upsidedown/upsidedown.go
@@ -415,7 +415,6 @@ func (udc *UpsideDownCouch) Close() error {
 func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
 	// do analysis before acquiring write lock
 	analysisStart := time.Now()
-	numPlainTextBytes := doc.NumPlainTextBytes()
 	resultChan := make(chan *index.AnalysisResult)
 	aw := index.NewAnalysisWork(udc, doc, resultChan)
 
@@ -452,6 +451,11 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
 		return
 	}
 
+	return udc.UpdateWithAnalysis(doc, result, backIndexRow)
+}
+
+func (udc *UpsideDownCouch) UpdateWithAnalysis(doc *document.Document,
+	result *index.AnalysisResult, backIndexRow *BackIndexRow) (err error) {
 	// start a writer for this update
 	indexStart := time.Now()
 	var kvwriter store.KVWriter
@@ -490,7 +494,7 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
 	atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart)))
 	if err == nil {
 		atomic.AddUint64(&udc.stats.updates, 1)
-		atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes)
+		atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, doc.NumPlainTextBytes())
 	} else {
 		atomic.AddUint64(&udc.stats.errors, 1)
 	}

From 59adf0b315be3cb05b3ae26e916a7bf44d6ca2f2 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 22 Mar 2019 12:15:59 -0700
Subject: [PATCH 567/728] fixes #1168 - filter duplicate locations in search
 results

This fix filters out duplicate locations using a destructive in-place
sort on the locations slice, and then a pass to remove duplicates.

This hopefully does not have undue impact on normal case performance,
as the deduplication effort happens only in the "last step" of
finalizeResults() and its DocumentMatch.Complete() invocations.

e.g., the deduplication will be only invoked on the final result hits
(e.g., "size": 10) instead of on all the (potentially many) candidate
document-matches which were examined beforehand.

Also, we only perform the deduplication effort only if we detect that
the locations that are being examined look like they're not in
increasing order.
---
 search/search.go         | 82 +++++++++++++++++++++++++++++++++++++++-
 search/search_test.go    | 76 +++++++++++++++++++++++++++++++++++++
 search_test.go           | 41 ++++++++++++++++++++
 test/integration_test.go |  2 +-
 4 files changed, 199 insertions(+), 2 deletions(-)
 create mode 100644 search/search_test.go

diff --git a/search/search.go b/search/search.go
index 72bb0ea29..8ed23de45 100644
--- a/search/search.go
+++ b/search/search.go
@@ -17,6 +17,7 @@ package search
 import (
 	"fmt"
 	"reflect"
+	"sort"
 
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/size"
@@ -49,6 +50,24 @@ func (ap ArrayPositions) Equals(other ArrayPositions) bool {
 	return true
 }
 
+func (ap ArrayPositions) Compare(other ArrayPositions) int {
+	for i, p := range ap {
+		if i >= len(other) {
+			return 1
+		}
+		if p < other[i] {
+			return -1
+		}
+		if p > other[i] {
+			return 1
+		}
+	}
+	if len(ap) < len(other) {
+		return -1
+	}
+	return 0
+}
+
 type Location struct {
 	// Pos is the position of the term within the field, starting at 1
 	Pos uint64 `json:"pos"`
@@ -68,6 +87,46 @@ func (l *Location) Size() int {
 
 type Locations []*Location
 
+func (p Locations) Len() int      { return len(p) }
+func (p Locations) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
+
+func (p Locations) Less(i, j int) bool {
+	c := p[i].ArrayPositions.Compare(p[j].ArrayPositions)
+	if c < 0 {
+		return true
+	}
+	if c > 0 {
+		return false
+	}
+	return p[i].Pos < p[j].Pos
+}
+
+func (p Locations) Dedupe() Locations { // destructive!
+	if len(p) <= 1 {
+		return p
+	}
+
+	sort.Sort(p)
+
+	slow := 0
+
+	for _, pfast := range p {
+		pslow := p[slow]
+		if pslow.Pos == pfast.Pos &&
+			pslow.Start == pfast.Start &&
+			pslow.End == pfast.End &&
+			pslow.ArrayPositions.Equals(pfast.ArrayPositions) {
+			continue // duplicate, so only move fast ahead
+		}
+
+		slow++
+
+		p[slow] = pfast
+	}
+
+	return p[:slow+1]
+}
+
 type TermLocationMap map[string]Locations
 
 func (t TermLocationMap) AddLocation(term string, location *Location) {
@@ -208,6 +267,7 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
 
 		var lastField string
 		var tlm TermLocationMap
+		var needsDedupe bool
 
 		for i, ftl := range dm.FieldTermLocations {
 			if lastField != ftl.Field {
@@ -231,7 +291,19 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
 				loc.ArrayPositions = append(ArrayPositions(nil), loc.ArrayPositions...)
 			}
 
-			tlm[ftl.Term] = append(tlm[ftl.Term], loc)
+			locs := tlm[ftl.Term]
+
+			// if the loc is before or at the last location, then there
+			// might be duplicates that need to be deduplicated
+			if !needsDedupe && len(locs) > 0 {
+				last := locs[len(locs)-1]
+				cmp := loc.ArrayPositions.Compare(last.ArrayPositions)
+				if cmp < 0 || (cmp == 0 && loc.Pos <= last.Pos) {
+					needsDedupe = true
+				}
+			}
+
+			tlm[ftl.Term] = append(locs, loc)
 
 			dm.FieldTermLocations[i] = FieldTermLocation{ // recycle
 				Location: Location{
@@ -239,6 +311,14 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
 				},
 			}
 		}
+
+		if needsDedupe {
+			for _, tlm := range dm.Locations {
+				for term, locs := range tlm {
+					tlm[term] = locs.Dedupe()
+				}
+			}
+		}
 	}
 
 	dm.FieldTermLocations = dm.FieldTermLocations[:0] // recycle
diff --git a/search/search_test.go b/search/search_test.go
new file mode 100644
index 000000000..f15dc061c
--- /dev/null
+++ b/search/search_test.go
@@ -0,0 +1,76 @@
+//  Copyright (c) 2019 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package search
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestArrayPositionsCompare(t *testing.T) {
+	tests := []struct {
+		a      []uint64
+		b      []uint64
+		expect int
+	}{
+		{nil, nil, 0},
+		{[]uint64{}, []uint64{}, 0},
+		{[]uint64{1}, []uint64{}, 1},
+		{[]uint64{1}, []uint64{1}, 0},
+		{[]uint64{}, []uint64{1}, -1},
+		{[]uint64{0}, []uint64{1}, -1},
+		{[]uint64{1}, []uint64{0}, 1},
+		{[]uint64{1}, []uint64{1, 2}, -1},
+		{[]uint64{1, 2}, []uint64{1}, 1},
+		{[]uint64{1, 2}, []uint64{1, 2}, 0},
+		{[]uint64{1, 2}, []uint64{1, 200}, -1},
+		{[]uint64{1, 2}, []uint64{100, 2}, -1},
+		{[]uint64{1, 2}, []uint64{1, 2, 3}, -1},
+	}
+
+	for _, test := range tests {
+		res := ArrayPositions(test.a).Compare(test.b)
+		if res != test.expect {
+			t.Errorf("test: %+v, res: %v", test, res)
+		}
+	}
+}
+
+func TestLocationsDedupe(t *testing.T) {
+	a := &Location{}
+	b := &Location{Pos: 1}
+	c := &Location{Pos: 2}
+
+	tests := []struct {
+		input  Locations
+		expect Locations
+	}{
+		{Locations{}, Locations{}},
+		{Locations{a}, Locations{a}},
+		{Locations{a, b, c}, Locations{a, b, c}},
+		{Locations{a, a}, Locations{a}},
+		{Locations{a, a, a}, Locations{a}},
+		{Locations{a, b}, Locations{a, b}},
+		{Locations{b, a}, Locations{a, b}},
+		{Locations{c, b, a, c, b, a, c, b, a}, Locations{a, b, c}},
+	}
+
+	for testi, test := range tests {
+		res := test.input.Dedupe()
+		if !reflect.DeepEqual(res, test.expect) {
+			t.Errorf("testi: %d, test: %+v, res: %+v", testi, test, res)
+		}
+	}
+}
diff --git a/search_test.go b/search_test.go
index a37dea703..917b24ad2 100644
--- a/search_test.go
+++ b/search_test.go
@@ -36,6 +36,7 @@ import (
 	"github.com/blevesearch/bleve/index/upsidedown"
 	"github.com/blevesearch/bleve/mapping"
 	"github.com/blevesearch/bleve/search"
+	"github.com/blevesearch/bleve/search/highlight/highlighter/html"
 	"github.com/blevesearch/bleve/search/query"
 )
 
@@ -1223,3 +1224,43 @@ func TestDisjunctionMinPropagation(t *testing.T) {
 		t.Fatalf("Expect 0 results, but got: %v", res.Total)
 	}
 }
+
+func TestDuplicateLocationsIssue1168(t *testing.T) {
+	fm1 := NewTextFieldMapping()
+	fm1.Analyzer = keyword.Name
+	fm1.Name = "name1"
+
+	dm := NewDocumentStaticMapping()
+	dm.AddFieldMappingsAt("name", fm1)
+
+	m := NewIndexMapping()
+	m.DefaultMapping = dm
+
+	idx, err := NewMemOnly(m)
+	if err != nil {
+		t.Fatalf("bleve new err: %v", err)
+	}
+
+	err = idx.Index("x", map[string]interface{}{
+		"name": "marty",
+	})
+	if err != nil {
+		t.Fatalf("bleve index err: %v", err)
+	}
+
+	q1 := NewTermQuery("marty")
+	q2 := NewTermQuery("marty")
+	dq := NewDisjunctionQuery(q1, q2)
+
+	sreq := NewSearchRequest(dq)
+	sreq.Fields = []string{"*"}
+	sreq.Highlight = NewHighlightWithStyle(html.Name)
+
+	sres, err := idx.Search(sreq)
+	if err != nil {
+		t.Fatalf("bleve search err: %v", err)
+	}
+	if len(sres.Hits[0].Locations["name1"]["marty"]) != 1 {
+		t.Fatalf("duplicate marty")
+	}
+}
diff --git a/test/integration_test.go b/test/integration_test.go
index 627969708..8cbac6f30 100644
--- a/test/integration_test.go
+++ b/test/integration_test.go
@@ -187,7 +187,7 @@ func runTestDir(t *testing.T, dir, datasetName string) {
 				if hit.Locations != nil {
 					if !reflect.DeepEqual(hit.Locations, res.Hits[hi].Locations) {
 						t.Errorf("test error - %s", search.Comment)
-						t.Errorf("test %d - expected hit %d to have locations %v got %v", testNum, hi, hit.Locations, res.Hits[hi].Locations)
+						t.Errorf("test %d - expected hit %d to have locations %#v got %#v", testNum, hi, hit.Locations, res.Hits[hi].Locations)
 					}
 				}
 				// assert that none of the scores were NaN,+Inf,-Inf

From 6ba04b0f813c73cae8a4a3d5f527a3291ee05b88 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 22 Mar 2019 12:46:48 -0700
Subject: [PATCH 568/728] asciifolding test apples from MB-33486

---
 analysis/char/asciifolding/asciifolding_test.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/analysis/char/asciifolding/asciifolding_test.go b/analysis/char/asciifolding/asciifolding_test.go
index 82dc67868..216583d1e 100644
--- a/analysis/char/asciifolding/asciifolding_test.go
+++ b/analysis/char/asciifolding/asciifolding_test.go
@@ -43,6 +43,10 @@ func TestAsciiFoldingFilter(t *testing.T) {
 			// composite unicode runes are folded to more than one ASCII rune
 			input:  []byte(`ÆꜴ`),
 			output: []byte(`AEAO`),
+		}, {
+			// apples from https://issues.couchbase.com/browse/MB-33486
+			input:  []byte(`Ápple Àpple Äpple Âpple Ãpple Åpple`),
+			output: []byte(`Apple Apple Apple Apple Apple Apple`),
 		},
 	}
 

From 40c19afe2872462a5102972bf8ad13297a16a2a6 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Fri, 22 Mar 2019 13:53:11 -0700
Subject: [PATCH 569/728] MB-33462: Validating the range for a date range query

+ All datetimes after 2262-04-11T11:59:59Z will cause UnixNano()
  which generates an Int64 to overflow.
+ Error out early for these values rather than letting the query
  fail quietly.
---
 search/query/date_range.go      | 14 +++++++++--
 search/query/date_range_test.go | 41 +++++++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/search/query/date_range.go b/search/query/date_range.go
index ff67a7bb7..ae26f8dec 100644
--- a/search/query/date_range.go
+++ b/search/query/date_range.go
@@ -143,10 +143,20 @@ func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) {
 	min := math.Inf(-1)
 	max := math.Inf(1)
 	if !q.Start.IsZero() {
-		min = numeric.Int64ToFloat64(q.Start.UnixNano())
+		startInt64 := q.Start.UnixNano()
+		if startInt64 < 0 {
+			// overflow
+			return nil, nil, fmt.Errorf("invalid/unsupported date range, start: %v", q.Start)
+		}
+		min = numeric.Int64ToFloat64(startInt64)
 	}
 	if !q.End.IsZero() {
-		max = numeric.Int64ToFloat64(q.End.UnixNano())
+		endInt64 := q.End.UnixNano()
+		if endInt64 < 0 {
+			// overflow
+			return nil, nil, fmt.Errorf("invalid/unsupported date range, end: %v", q.End)
+		}
+		max = numeric.Int64ToFloat64(endInt64)
 	}
 
 	return &min, &max, nil
diff --git a/search/query/date_range_test.go b/search/query/date_range_test.go
index 9dd691319..dd14d3238 100644
--- a/search/query/date_range_test.go
+++ b/search/query/date_range_test.go
@@ -49,3 +49,44 @@ func TestBleveQueryTime(t *testing.T) {
 		}
 	}
 }
+
+func TestValidateDatetimeRanges(t *testing.T) {
+	tests := []struct {
+		start  string
+		end    string
+		expect bool
+	}{
+		{
+			start:  "2019-03-22T13:25:00Z",
+			end:    "2019-03-22T18:25:00Z",
+			expect: true,
+		},
+		{
+			start:  "2019-03-22T13:25:00Z",
+			end:    "9999-03-22T13:25:00Z",
+			expect: false,
+		},
+		{
+			start:  "2019-03-22T13:25:00Z",
+			end:    "2262-04-11T11:59:59Z",
+			expect: true,
+		},
+		{
+			start:  "2019-03-22T13:25:00Z",
+			end:    "2262-04-12T00:00:00Z",
+			expect: false,
+		},
+	}
+
+	for _, test := range tests {
+		startTime, _ := time.Parse(time.RFC3339, test.start)
+		endTime, _ := time.Parse(time.RFC3339, test.end)
+
+		dateRangeQuery := NewDateRangeQuery(startTime, endTime)
+		if (dateRangeQuery.Validate() == nil) != test.expect {
+			t.Errorf("unexpected results while validating date range query with"+
+				" {start: %v, end: %v}, expected: %v",
+				test.start, test.end, test.expect)
+		}
+	}
+}

From 010c117a6c9b985178789987f470e4579b530bba Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 25 Mar 2019 15:33:56 +0530
Subject: [PATCH 570/728] Revert "MB-32846 - more aggressively removeOldData()
 in scorch persister"

This reverts commit c8e737a945472f5f266c0799005216080443fbb8.

This recent introduction of removeOldData in
the persister main loop has slowed the persister
work flow to result in higher number of memory
based segments. This high number of in-memory
segments had resulted in perf regression across a few
query types like fuzzy, wildcard etc.
---
 index/scorch/introducer.go | 4 +---
 index/scorch/persister.go  | 5 +----
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 2d04bd38e..ac627796f 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -376,6 +376,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 				fileSegments++
 			}
 		}
+
 	}
 
 	// before the newMerge introduction, need to clean the newly
@@ -392,7 +393,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 			}
 		}
 	}
-
 	// In case where all the docs in the newly merged segment getting
 	// deleted by the time we reach here, can skip the introduction.
 	if nextMerge.new != nil &&
@@ -424,7 +424,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 	newSnapshot.AddRef() // 1 ref for the nextMerge.notify response
 
 	newSnapshot.updateSize()
-
 	s.rootLock.Lock()
 	// swap in new index snapshot
 	newSnapshot.epoch = s.nextSnapshotEpoch
@@ -502,7 +501,6 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
 	}
 
 	newSnapshot.updateSize()
-
 	// swap in new snapshot
 	rootPrev := s.root
 	s.root = newSnapshot
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 349ccdc0e..e15fa2ab6 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -111,7 +111,6 @@ OUTER:
 		if ew != nil && ew.epoch > lastMergedEpoch {
 			lastMergedEpoch = ew.epoch
 		}
-
 		lastMergedEpoch, persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch,
 			lastMergedEpoch, persistWatchers, po)
 
@@ -179,7 +178,6 @@ OUTER:
 			s.fireEvent(EventKindPersisterProgress, time.Since(startTime))
 
 			if changed {
-				s.removeOldData()
 				atomic.AddUint64(&s.stats.TotPersistLoopProgress, 1)
 				continue OUTER
 			}
@@ -661,13 +659,13 @@ func (s *Scorch) LoadSnapshot(epoch uint64) (rv *IndexSnapshot, err error) {
 }
 
 func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
+
 	rv := &IndexSnapshot{
 		parent:   s,
 		internal: make(map[string][]byte),
 		refs:     1,
 		creator:  "loadSnapshot",
 	}
-
 	var running uint64
 	c := snapshot.Cursor()
 	for k, _ := c.First(); k != nil; k, _ = c.Next() {
@@ -703,7 +701,6 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
 			running += segmentSnapshot.segment.Count()
 		}
 	}
-
 	return rv, nil
 }
 

From 7740f389de43cc26265b6175d63f9882b0cd54d4 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Fri, 22 Mar 2019 21:38:55 -0700
Subject: [PATCH 571/728] MB-33455: improve ComputeGeoRange() performance

This change improves ComputeGeoRange() performance by avoiding trips
to the allocator by using pre-allocated []byte slices.  It also avoids
creation of interim [][]byte slices by append()'ing directly to
returned output slices.

Parts of this change...

- before this change, ComputeGeoRange() would invoke relateAndRecurse(),
  which would recursively invoke ComputeGeoRange().  Five of the
  recursive call parameters... sminLon, sminLat, smaxLon, smaxLat,
  and checkBoundaries... would be passed down on each invocation.

  - in this change, ComputeGeoRange() was refactored to have two, internal
    helper closures: relateAndRecurse() and computeGeoRange(), so that
    those five parameters would be part of the closure and would no
    longer need to be passed on each invocation.

- NewPrefixCodedInt64Prealloc() API was added to the numeric package
  to allow the caller to optionally provide a pre-allocated []byte slice.

  - relatedAndRecurse() now uses this NewPrefixCodedInt64Prealloc() API,
    via an internal helper closure function, makePrefixCoded().

  - makePrefixCoded() manages a preallocated []byte slice, where it
    allocates another []byte slice, as needed, which will be 2x the
    size of its last preallocated slice.

- the previous relateAndRecurse() would return [][]byte slices
  of the terms that were on-the-boundary and not-on-the-boundary.
  The caller, ComputeGeoRange(), would then append() those slices to
  its own on-boundary/not-on-boundary slices, and then return those
  onwards to its caller.  All these interim slices, then, became garbage.

  - In this commit, since relateAndRecurse() and computeGeoRange() are
    now closures, they append() directly onto the final output
    slices of the top-level ComputeGeoRange(), reducing garbage
    and trips to the allocator.

Before the change...

  BenchmarkComputeGeoRangePt01-8 100000     18516 ns/op      2005 B/op     63 allocs/op
  BenchmarkComputeGeoRangePt1-8   10000    108882 ns/op     96267 B/op    736 allocs/op
  BenchmarkComputeGeoRange10-8       50  35883331 ns/op  35812513 B/op 184543 allocs/op
  BenchmarkComputeGeoRange100-8      10 192568538 ns/op 187524856 B/op 926510 allocs/op

After the change...

  BenchmarkComputeGeoRangePt01-8 100000     12447 ns/op       280 B/op      7 allocs/op
  BenchmarkComputeGeoRangePt1-8   30000     47053 ns/op     16416 B/op     28 allocs/op
  BenchmarkComputeGeoRange10-8      100  11836988 ns/op   8503406 B/op     76 allocs/op
  BenchmarkComputeGeoRange100-8      20  72555603 ns/op  42778777 B/op     89 allocs/op

See also: https://issues.couchbase.com/browse/MB-33455
---
 numeric/prefix_coded.go                       |  21 +++-
 search/searcher/search_geoboundingbox.go      | 114 ++++++++++--------
 search/searcher/search_geoboundingbox_test.go |  43 +++++++
 3 files changed, 126 insertions(+), 52 deletions(-)

diff --git a/numeric/prefix_coded.go b/numeric/prefix_coded.go
index 76ea001ba..29bd0fc5c 100644
--- a/numeric/prefix_coded.go
+++ b/numeric/prefix_coded.go
@@ -23,12 +23,26 @@ const ShiftStartInt64 byte = 0x20
 type PrefixCoded []byte
 
 func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) {
+	rv, _, err := NewPrefixCodedInt64Prealloc(in, shift, nil)
+	return rv, err
+}
+
+func NewPrefixCodedInt64Prealloc(in int64, shift uint, prealloc []byte) (
+	rv PrefixCoded, preallocRest []byte, err error) {
 	if shift > 63 {
-		return nil, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift)
+		return nil, prealloc, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift)
 	}
 
 	nChars := ((63 - shift) / 7) + 1
-	rv := make(PrefixCoded, nChars+1)
+
+	size := int(nChars + 1)
+	if len(prealloc) >= size {
+		rv = PrefixCoded(prealloc[0:size])
+		preallocRest = prealloc[size:]
+	} else {
+		rv = make(PrefixCoded, size)
+	}
+
 	rv[0] = ShiftStartInt64 + byte(shift)
 
 	sortableBits := int64(uint64(in) ^ 0x8000000000000000)
@@ -40,7 +54,8 @@ func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) {
 		nChars--
 		sortableBits = int64(uint64(sortableBits) >> 7)
 	}
-	return rv, nil
+
+	return rv, preallocRest, nil
 }
 
 func MustNewPrefixCodedInt64(in int64, shift uint) PrefixCoded {
diff --git a/search/searcher/search_geoboundingbox.go b/search/searcher/search_geoboundingbox.go
index 289e41678..29809397c 100644
--- a/search/searcher/search_geoboundingbox.go
+++ b/search/searcher/search_geoboundingbox.go
@@ -22,6 +22,9 @@ import (
 	"github.com/blevesearch/bleve/search"
 )
 
+var GeoBitsShift1 = (geo.GeoBits << 1)
+var GeoBitsShift1Minus1 = GeoBitsShift1 - 1
+
 func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
 	maxLon, maxLat float64, field string, boost float64,
 	options search.SearcherOptions, checkBoundaries bool) (
@@ -36,7 +39,7 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
 	}
 
 	// do math to produce list of terms needed for this search
-	onBoundaryTerms, notOnBoundaryTerms := ComputeGeoRange(0, (geo.GeoBits<<1)-1,
+	onBoundaryTerms, notOnBoundaryTerms := ComputeGeoRange(0, GeoBitsShift1Minus1,
 		minLon, minLat, maxLon, maxLat, checkBoundaries)
 
 	var onBoundarySearcher search.Searcher
@@ -94,59 +97,72 @@ var geoMaxShift = document.GeoPrecisionStep * 4
 var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2
 
 func ComputeGeoRange(term uint64, shift uint,
-	sminLon, sminLat, smaxLon, smaxLat float64,
-	checkBoundaries bool) (
+	sminLon, sminLat, smaxLon, smaxLat float64, checkBoundaries bool) (
 	onBoundary [][]byte, notOnBoundary [][]byte) {
-	split := term | uint64(0x1)<<shift
-	var upperMax uint64
-	if shift < 63 {
-		upperMax = term | ((uint64(1) << (shift + 1)) - 1)
-	} else {
-		upperMax = 0xffffffffffffffff
-	}
-	lowerMax := split - 1
-	onBoundary, notOnBoundary = relateAndRecurse(term, lowerMax, shift,
-		sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
-	plusOnBoundary, plusNotOnBoundary := relateAndRecurse(split, upperMax, shift,
-		sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
-	onBoundary = append(onBoundary, plusOnBoundary...)
-	notOnBoundary = append(notOnBoundary, plusNotOnBoundary...)
-	return
-}
+	preallocBytesLen := 32
+	preallocBytes := make([]byte, preallocBytesLen)
 
-func relateAndRecurse(start, end uint64, res uint,
-	sminLon, sminLat, smaxLon, smaxLat float64,
-	checkBoundaries bool) (
-	onBoundary [][]byte, notOnBoundary [][]byte) {
-	minLon := geo.MortonUnhashLon(start)
-	minLat := geo.MortonUnhashLat(start)
-	maxLon := geo.MortonUnhashLon(end)
-	maxLat := geo.MortonUnhashLat(end)
-
-	level := ((geo.GeoBits << 1) - res) >> 1
-
-	within := res%document.GeoPrecisionStep == 0 &&
-		geo.RectWithin(minLon, minLat, maxLon, maxLat,
-			sminLon, sminLat, smaxLon, smaxLat)
-	if within || (level == geoDetailLevel &&
-		geo.RectIntersects(minLon, minLat, maxLon, maxLat,
-			sminLon, sminLat, smaxLon, smaxLat)) {
-		if !within && checkBoundaries {
-			return [][]byte{
-				numeric.MustNewPrefixCodedInt64(int64(start), res),
-			}, nil
+	makePrefixCoded := func(in int64, shift uint) (rv numeric.PrefixCoded) {
+		if len(preallocBytes) <= 0 {
+			preallocBytesLen = preallocBytesLen * 2
+			preallocBytes = make([]byte, preallocBytesLen)
+		}
+
+		var err error
+		rv, preallocBytes, err =
+			numeric.NewPrefixCodedInt64Prealloc(in, shift, preallocBytes)
+		if err != nil {
+			panic(err)
 		}
-		return nil,
-			[][]byte{
-				numeric.MustNewPrefixCodedInt64(int64(start), res),
+		return rv
+	}
+
+	var computeGeoRange func(term uint64, shift uint) // declare for recursion
+
+	relateAndRecurse := func(start, end uint64, res, level uint) {
+		minLon := geo.MortonUnhashLon(start)
+		minLat := geo.MortonUnhashLat(start)
+		maxLon := geo.MortonUnhashLon(end)
+		maxLat := geo.MortonUnhashLat(end)
+
+		within := res%document.GeoPrecisionStep == 0 &&
+			geo.RectWithin(minLon, minLat, maxLon, maxLat,
+				sminLon, sminLat, smaxLon, smaxLat)
+		if within || (level == geoDetailLevel &&
+			geo.RectIntersects(minLon, minLat, maxLon, maxLat,
+				sminLon, sminLat, smaxLon, smaxLat)) {
+			if !within && checkBoundaries {
+				onBoundary = append(onBoundary, makePrefixCoded(int64(start), res))
+			} else {
+				notOnBoundary = append(notOnBoundary, makePrefixCoded(int64(start), res))
 			}
-	} else if level < geoDetailLevel &&
-		geo.RectIntersects(minLon, minLat, maxLon, maxLat,
-			sminLon, sminLat, smaxLon, smaxLat) {
-		return ComputeGeoRange(start, res-1, sminLon, sminLat, smaxLon, smaxLat,
-			checkBoundaries)
+		} else if level < geoDetailLevel &&
+			geo.RectIntersects(minLon, minLat, maxLon, maxLat,
+				sminLon, sminLat, smaxLon, smaxLat) {
+			computeGeoRange(start, res-1)
+		}
 	}
-	return nil, nil
+
+	computeGeoRange = func(term uint64, shift uint) {
+		split := term | uint64(0x1)<<shift
+		var upperMax uint64
+		if shift < 63 {
+			upperMax = term | ((uint64(1) << (shift + 1)) - 1)
+		} else {
+			upperMax = 0xffffffffffffffff
+		}
+
+		lowerMax := split - 1
+
+		level := (GeoBitsShift1 - shift) >> 1
+
+		relateAndRecurse(term, lowerMax, shift, level)
+		relateAndRecurse(split, upperMax, shift, level)
+	}
+
+	computeGeoRange(term, shift)
+
+	return onBoundary, notOnBoundary
 }
 
 func buildRectFilter(dvReader index.DocValueReader, field string,
diff --git a/search/searcher/search_geoboundingbox_test.go b/search/searcher/search_geoboundingbox_test.go
index 1de938db5..0113604fc 100644
--- a/search/searcher/search_geoboundingbox_test.go
+++ b/search/searcher/search_geoboundingbox_test.go
@@ -198,3 +198,46 @@ func setupGeo(t *testing.T) index.Index {
 
 	return i
 }
+
+// --------------------------------------------------------------------
+
+func BenchmarkComputeGeoRangePt01(b *testing.B) {
+	onBoundary := 4
+	offBoundary := 0
+	benchmarkComputeGeoRange(b, -0.01, -0.01, 0.01, 0.01, onBoundary, offBoundary)
+}
+
+func BenchmarkComputeGeoRangePt1(b *testing.B) {
+	onBoundary := 56
+	offBoundary := 144
+	benchmarkComputeGeoRange(b, -0.1, -0.1, 0.1, 0.1, onBoundary, offBoundary)
+}
+
+func BenchmarkComputeGeoRange10(b *testing.B) {
+	onBoundary := 5464
+	offBoundary := 53704
+	benchmarkComputeGeoRange(b, -10.0, -10.0, 10.0, 10.0, onBoundary, offBoundary)
+}
+
+func BenchmarkComputeGeoRange100(b *testing.B) {
+	onBoundary := 32768
+	offBoundary := 258560
+	benchmarkComputeGeoRange(b, -100.0, -100.0, 100.0, 100.0, onBoundary, offBoundary)
+}
+
+// --------------------------------------------------------------------
+
+func benchmarkComputeGeoRange(b *testing.B,
+	minLon, minLat, maxLon, maxLat float64, onBoundary, offBoundary int) {
+	checkBoundaries := true
+
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		onBoundaryRes, offBoundaryRes :=
+			ComputeGeoRange(0, GeoBitsShift1Minus1, minLon, minLat, maxLon, maxLat, checkBoundaries)
+		if len(onBoundaryRes) != onBoundary || len(offBoundaryRes) != offBoundary {
+			b.Fatalf("boundaries not matching")
+		}
+	}
+}

From 3d774b9eb3804d64436a5fdaf3fa47efcfdbfec6 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Mon, 25 Mar 2019 09:53:03 -0700
Subject: [PATCH 572/728] MB-33455 - ComputeGeoRange() checks
 DisjunctionMaxClauseCount

This optimization passes DisjunctionMaxClauseCount to
ComputeGeoRange() so that it can error early if it sees that it's over
the disjunction clause limit.
---
 search/searcher/search_geoboundingbox.go      | 33 ++++++++++++-----
 search/searcher/search_geoboundingbox_test.go | 36 +++++++++++++++++--
 2 files changed, 58 insertions(+), 11 deletions(-)

diff --git a/search/searcher/search_geoboundingbox.go b/search/searcher/search_geoboundingbox.go
index 29809397c..a80855ac5 100644
--- a/search/searcher/search_geoboundingbox.go
+++ b/search/searcher/search_geoboundingbox.go
@@ -39,8 +39,11 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
 	}
 
 	// do math to produce list of terms needed for this search
-	onBoundaryTerms, notOnBoundaryTerms := ComputeGeoRange(0, GeoBitsShift1Minus1,
-		minLon, minLat, maxLon, maxLat, checkBoundaries)
+	onBoundaryTerms, notOnBoundaryTerms, err := ComputeGeoRange(0, GeoBitsShift1Minus1,
+		minLon, minLat, maxLon, maxLat, checkBoundaries, DisjunctionMaxClauseCount)
+	if err != nil {
+		return nil, err
+	}
 
 	var onBoundarySearcher search.Searcher
 	dvReader, err := indexReader.DocValueReader([]string{field})
@@ -97,8 +100,8 @@ var geoMaxShift = document.GeoPrecisionStep * 4
 var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2
 
 func ComputeGeoRange(term uint64, shift uint,
-	sminLon, sminLat, smaxLon, smaxLat float64, checkBoundaries bool) (
-	onBoundary [][]byte, notOnBoundary [][]byte) {
+	sminLon, sminLat, smaxLon, smaxLat float64, checkBoundaries bool, maxTerms int) (
+	onBoundary [][]byte, notOnBoundary [][]byte, err error) {
 	preallocBytesLen := 32
 	preallocBytes := make([]byte, preallocBytesLen)
 
@@ -108,12 +111,9 @@ func ComputeGeoRange(term uint64, shift uint,
 			preallocBytes = make([]byte, preallocBytesLen)
 		}
 
-		var err error
 		rv, preallocBytes, err =
 			numeric.NewPrefixCodedInt64Prealloc(in, shift, preallocBytes)
-		if err != nil {
-			panic(err)
-		}
+
 		return rv
 	}
 
@@ -144,6 +144,17 @@ func ComputeGeoRange(term uint64, shift uint,
 	}
 
 	computeGeoRange = func(term uint64, shift uint) {
+		if maxTerms > 0 {
+			if len(onBoundary) > maxTerms {
+				err = tooManyClausesErr(len(onBoundary))
+			} else if len(notOnBoundary) > maxTerms {
+				err = tooManyClausesErr(len(notOnBoundary))
+			}
+		}
+		if err != nil {
+			return
+		}
+
 		split := term | uint64(0x1)<<shift
 		var upperMax uint64
 		if shift < 63 {
@@ -162,7 +173,11 @@ func ComputeGeoRange(term uint64, shift uint,
 
 	computeGeoRange(term, shift)
 
-	return onBoundary, notOnBoundary
+	if err != nil {
+		return nil, nil, err
+	}
+
+	return onBoundary, notOnBoundary, err
 }
 
 func buildRectFilter(dvReader index.DocValueReader, field string,
diff --git a/search/searcher/search_geoboundingbox_test.go b/search/searcher/search_geoboundingbox_test.go
index 0113604fc..be3d09168 100644
--- a/search/searcher/search_geoboundingbox_test.go
+++ b/search/searcher/search_geoboundingbox_test.go
@@ -199,6 +199,35 @@ func setupGeo(t *testing.T) index.Index {
 	return i
 }
 
+func TestComputeGeoRange(t *testing.T) {
+	tests := []struct {
+		degs        float64
+		maxTerms    int
+		onBoundary  int
+		offBoundary int
+		err         string
+	}{
+		{0.01, 0, 4, 0, ""},
+		{0.1, 0, 56, 144, ""},
+		{100.0, 0, 32768, 258560, ""},
+		{100.0, 1024, 0, 0, "geo range produces too many terms, so should have error"},
+	}
+
+	for _, test := range tests {
+		onBoundaryRes, offBoundaryRes, err := ComputeGeoRange(0, GeoBitsShift1Minus1,
+			-1.0*test.degs, -1.0*test.degs, test.degs, test.degs, true, test.maxTerms)
+		if (err != nil) != (test.err != "") {
+			t.Errorf("test: %+v, err: %v", test, err)
+		}
+		if len(onBoundaryRes) != test.onBoundary {
+			t.Errorf("test: %+v, onBoundaryRes: %v", test, len(onBoundaryRes))
+		}
+		if len(offBoundaryRes) != test.offBoundary {
+			t.Errorf("test: %+v, offBoundaryRes: %v", test, len(offBoundaryRes))
+		}
+	}
+}
+
 // --------------------------------------------------------------------
 
 func BenchmarkComputeGeoRangePt01(b *testing.B) {
@@ -234,8 +263,11 @@ func benchmarkComputeGeoRange(b *testing.B,
 	b.ResetTimer()
 
 	for i := 0; i < b.N; i++ {
-		onBoundaryRes, offBoundaryRes :=
-			ComputeGeoRange(0, GeoBitsShift1Minus1, minLon, minLat, maxLon, maxLat, checkBoundaries)
+		onBoundaryRes, offBoundaryRes, err :=
+			ComputeGeoRange(0, GeoBitsShift1Minus1, minLon, minLat, maxLon, maxLat, checkBoundaries, 0)
+		if err != nil {
+			b.Fatalf("expected no err")
+		}
 		if len(onBoundaryRes) != onBoundary || len(offBoundaryRes) != offBoundary {
 			b.Fatalf("boundaries not matching")
 		}

From dd2708b664a636aa1fe934efcfc7735f17f65dc5 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Tue, 26 Mar 2019 15:03:13 -0700
Subject: [PATCH 573/728] Deploy errcheck only for go1.11

---
 .travis.yml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index fcc516db5..c79b3fa3d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -16,7 +16,11 @@ script:
   - go test -race -v $(go list ./... | grep -v vendor/)
   - go vet $(go list ./... | grep -v vendor/)
   - go test ./test -v -indexType scorch
-  - errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/)
+  - if [[ ${TRAVIS_GO_VERSION} =~ ^1\.11 ]]; then
+        errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/);
+    else
+        echo "errcheck skipped for go version" $TRAVIS_GO_VERSION;
+    fi
   - docs/project-code-coverage.sh
   - docs/build_children.sh
 

From 79c5a6840900fc453605270f189fa721c8b19759 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 26 Mar 2019 14:20:59 -0700
Subject: [PATCH 574/728] fixes #1166 - perf optimizations for ReadUvarint()

This commit introduces a specialized MemUvarintReader which is focused
on the "hot inner loop" of scorch/zap's reading of uvarint's -- such
as the postingsList reading of freqNormHasLocs and location data.

The various changes include...

- the separate bytes.Reader vs binary.ReadUvarint() codepaths are
  unified, and ReadByte() is now inlined, along with more local vars.

- SkipUvarint() allows entries to be skipped more efficiently (by
  postingsList.currChunkNext()).

- prevRune maintenance is removed.

- postingsList.readLocation() is always provided a non-nil output
  location.

In microbenchmarks...

  $ go test -benchmem -bench=. ./index/scorch/segment/
  BenchmarkUvarint-8          100000000 16.4  ns/op 0 B/op 0 allocs/op
  BenchmarkMemUvarintReader-8 200000000  6.15 ns/op 0 B/op 0 allocs/op

In bleve-query microbenchmarks on a scorch index with 200K en-wiki
docs, high-frequency term ("http") searches that included locations
had throughput of ~21.5 q/sec before this change, and after this
change went to ~28.8 q/sec.
---
 index/scorch/segment/int.go         | 86 ++++++++++++++++++++++++++-
 index/scorch/segment/int_test.go    | 71 ++++++++++++++++++++++
 index/scorch/segment/zap/posting.go | 91 +++++++++++++++--------------
 3 files changed, 203 insertions(+), 45 deletions(-)

diff --git a/index/scorch/segment/int.go b/index/scorch/segment/int.go
index a4836ebf8..663418d5b 100644
--- a/index/scorch/segment/int.go
+++ b/index/scorch/segment/int.go
@@ -19,7 +19,10 @@
 
 package segment
 
-import "fmt"
+import (
+	"errors"
+	"fmt"
+)
 
 const (
 	MaxVarintSize = 9
@@ -92,3 +95,84 @@ func DecodeUvarintAscending(b []byte) ([]byte, uint64, error) {
 	}
 	return b[length:], v, nil
 }
+
+// ------------------------------------------------------------
+
+type MemUvarintReader struct {
+	C int64 // index of next byte to read from S
+	S []byte
+}
+
+func NewMemUvarintReader(s []byte) *MemUvarintReader {
+	return &MemUvarintReader{S: s}
+}
+
+// Len returns the number of unread bytes.
+func (r *MemUvarintReader) Len() int {
+	n := int(int64(len(r.S)) - r.C)
+	if n < 0 {
+		return 0
+	}
+	return n
+}
+
+var ErrMemUvarintReaderOverflow = errors.New("MemUvarintReader overflow")
+
+// ReadUvarint reads an encoded uint64.  The original code this was
+// based on is at encoding/binary/ReadUvarint().
+func (r *MemUvarintReader) ReadUvarint() (uint64, error) {
+	var x uint64
+	var s uint
+	var C = r.C
+	var S = r.S
+
+	for true {
+		b := S[C]
+		C++
+
+		if b < 0x80 {
+			r.C = C
+
+			// why 63?  The original code had an 'i += 1' loop var and
+			// checked for i > 9 || i == 9 ...; but, we no longer
+			// check for the i var, but instead check here for s,
+			// which is incremented by 7.  So, 7*9 == 63.
+			//
+			// why the "extra" >= check?  The normal case is that s <
+			// 63, so we check this single >= guard first so that we
+			// hit the normal, nil-error return pathway sooner.
+			if s >= 63 && (s > 63 || s == 63 && b > 1) {
+				return 0, ErrMemUvarintReaderOverflow
+			}
+
+			return x | uint64(b)<<s, nil
+		}
+
+		x |= uint64(b&0x7f) << s
+		s += 7
+	}
+
+	return 0, nil // never reached, but compiler wants this
+}
+
+// SkipUvarint skips ahead one encoded uint64.
+func (r *MemUvarintReader) SkipUvarint() {
+	for true {
+		b := r.S[r.C]
+		r.C++
+
+		if b < 0x80 {
+			return
+		}
+	}
+}
+
+// SkipBytes skips a count number of bytes.
+func (r *MemUvarintReader) SkipBytes(count int64) {
+	r.C = r.C + count
+}
+
+func (r *MemUvarintReader) Reset(s []byte) {
+	r.C = 0
+	r.S = s
+}
diff --git a/index/scorch/segment/int_test.go b/index/scorch/segment/int_test.go
index 3d2ab6fd7..449f4a07e 100644
--- a/index/scorch/segment/int_test.go
+++ b/index/scorch/segment/int_test.go
@@ -21,6 +21,7 @@ package segment
 
 import (
 	"bytes"
+	"encoding/binary"
 	"math"
 	"testing"
 )
@@ -94,3 +95,73 @@ func testCustomEncodeUint64(
 		}
 	}
 }
+
+func BenchmarkUvarint(b *testing.B) {
+	n, buf := generateCommonUvarints(64, 512)
+
+	reader := bytes.NewReader(buf)
+	seen := 0
+
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i = i + 1 {
+		if seen >= n {
+			reader.Reset(buf)
+			seen = 0
+		}
+
+		_, _ = binary.ReadUvarint(reader)
+		seen = seen + 1
+	}
+}
+
+func BenchmarkMemUvarintReader(b *testing.B) {
+	n, buf := generateCommonUvarints(64, 512)
+
+	reader := &MemUvarintReader{S: buf}
+	seen := 0
+
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i = i + 1 {
+		if seen >= n {
+			reader.Reset(buf)
+			seen = 0
+		}
+
+		_, _ = reader.ReadUvarint()
+		seen = seen + 1
+	}
+}
+
+// generate some common, encoded uvarint's that we might see as
+// freq-norm's or locations.
+func generateCommonUvarints(maxFreq, maxFieldLen int) (n int, rv []byte) {
+	buf := make([]byte, binary.MaxVarintLen64)
+
+	var out bytes.Buffer
+
+	encode := func(val uint64) {
+		bufLen := binary.PutUvarint(buf, val)
+		out.Write(buf[:bufLen])
+		n = n + 1
+	}
+
+	for i := 1; i < maxFreq; i = i * 2 { // Common freqHasLoc's.
+		freqHasLocs := uint64(i << 1)
+		encode(freqHasLocs)
+		encode(freqHasLocs | 0x01) // 0'th LSB encodes whether there are locations.
+	}
+
+	encodeNorm := func(fieldLen int) {
+		norm := float32(1.0 / math.Sqrt(float64(fieldLen)))
+		normUint64 := uint64(math.Float32bits(float32(norm)))
+		encode(normUint64)
+	}
+
+	for i := 1; i < maxFieldLen; i = i * 2 { // Common norm's.
+		encodeNorm(i)
+	}
+
+	return n, out.Bytes()
+}
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 93b51ae73..fe9f4d957 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -15,10 +15,8 @@
 package zap
 
 import (
-	"bytes"
 	"encoding/binary"
 	"fmt"
-	"io"
 	"math"
 	"reflect"
 
@@ -334,8 +332,8 @@ type PostingsIterator struct {
 	currChunkFreqNorm []byte
 	currChunkLoc      []byte
 
-	freqNormReader *bytes.Reader
-	locReader      *bytes.Reader
+	freqNormReader *segment.MemUvarintReader
+	locReader      *segment.MemUvarintReader
 
 	freqChunkOffsets []uint64
 	freqChunkStart   uint64
@@ -386,7 +384,7 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
 		end += e
 		i.currChunkFreqNorm = i.postings.sb.mem[start:end]
 		if i.freqNormReader == nil {
-			i.freqNormReader = bytes.NewReader(i.currChunkFreqNorm)
+			i.freqNormReader = segment.NewMemUvarintReader(i.currChunkFreqNorm)
 		} else {
 			i.freqNormReader.Reset(i.currChunkFreqNorm)
 		}
@@ -404,7 +402,7 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
 		end += e
 		i.currChunkLoc = i.postings.sb.mem[start:end]
 		if i.locReader == nil {
-			i.locReader = bytes.NewReader(i.currChunkLoc)
+			i.locReader = segment.NewMemUvarintReader(i.currChunkLoc)
 		} else {
 			i.locReader.Reset(i.currChunkLoc)
 		}
@@ -419,18 +417,34 @@ func (i *PostingsIterator) readFreqNormHasLocs() (uint64, uint64, bool, error) {
 		return 1, i.normBits1Hit, false, nil
 	}
 
-	freqHasLocs, err := binary.ReadUvarint(i.freqNormReader)
+	freqHasLocs, err := i.freqNormReader.ReadUvarint()
 	if err != nil {
 		return 0, 0, false, fmt.Errorf("error reading frequency: %v", err)
 	}
+
 	freq, hasLocs := decodeFreqHasLocs(freqHasLocs)
 
-	normBits, err := binary.ReadUvarint(i.freqNormReader)
+	normBits, err := i.freqNormReader.ReadUvarint()
 	if err != nil {
 		return 0, 0, false, fmt.Errorf("error reading norm: %v", err)
 	}
 
-	return freq, normBits, hasLocs, err
+	return freq, normBits, hasLocs, nil
+}
+
+func (i *PostingsIterator) skipFreqNormReadHasLocs() (bool, error) {
+	if i.normBits1Hit != 0 {
+		return false, nil
+	}
+
+	freqHasLocs, err := i.freqNormReader.ReadUvarint()
+	if err != nil {
+		return false, fmt.Errorf("error reading freqHasLocs: %v", err)
+	}
+
+	i.freqNormReader.SkipUvarint() // Skip normBits.
+
+	return freqHasLocs&0x01 != 0, nil // See decodeFreqHasLocs() / hasLocs.
 }
 
 func encodeFreqHasLocs(freq uint64, hasLocs bool) uint64 {
@@ -448,58 +462,53 @@ func decodeFreqHasLocs(freqHasLocs uint64) (uint64, bool) {
 }
 
 // readLocation processes all the integers on the stream representing a single
-// location.  if you care about it, pass in a non-nil location struct, and we
-// will fill it.  if you don't care about it, pass in nil and we safely consume
-// the contents.
+// location.
 func (i *PostingsIterator) readLocation(l *Location) error {
 	// read off field
-	fieldID, err := binary.ReadUvarint(i.locReader)
+	fieldID, err := i.locReader.ReadUvarint()
 	if err != nil {
 		return fmt.Errorf("error reading location field: %v", err)
 	}
 	// read off pos
-	pos, err := binary.ReadUvarint(i.locReader)
+	pos, err := i.locReader.ReadUvarint()
 	if err != nil {
 		return fmt.Errorf("error reading location pos: %v", err)
 	}
 	// read off start
-	start, err := binary.ReadUvarint(i.locReader)
+	start, err := i.locReader.ReadUvarint()
 	if err != nil {
 		return fmt.Errorf("error reading location start: %v", err)
 	}
 	// read off end
-	end, err := binary.ReadUvarint(i.locReader)
+	end, err := i.locReader.ReadUvarint()
 	if err != nil {
 		return fmt.Errorf("error reading location end: %v", err)
 	}
 	// read off num array pos
-	numArrayPos, err := binary.ReadUvarint(i.locReader)
+	numArrayPos, err := i.locReader.ReadUvarint()
 	if err != nil {
 		return fmt.Errorf("error reading location num array pos: %v", err)
 	}
 
-	// group these together for less branching
-	if l != nil {
-		l.field = i.postings.sb.fieldsInv[fieldID]
-		l.pos = pos
-		l.start = start
-		l.end = end
-		if cap(l.ap) < int(numArrayPos) {
-			l.ap = make([]uint64, int(numArrayPos))
-		} else {
-			l.ap = l.ap[:int(numArrayPos)]
-		}
+	l.field = i.postings.sb.fieldsInv[fieldID]
+	l.pos = pos
+	l.start = start
+	l.end = end
+
+	if cap(l.ap) < int(numArrayPos) {
+		l.ap = make([]uint64, int(numArrayPos))
+	} else {
+		l.ap = l.ap[:int(numArrayPos)]
 	}
 
 	// read off array positions
 	for k := 0; k < int(numArrayPos); k++ {
-		ap, err := binary.ReadUvarint(i.locReader)
+		ap, err := i.locReader.ReadUvarint()
 		if err != nil {
 			return fmt.Errorf("error reading array position: %v", err)
 		}
-		if l != nil {
-			l.ap[k] = ap
-		}
+
+		l.ap[k] = ap
 	}
 
 	return nil
@@ -556,7 +565,7 @@ func (i *PostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.Posting, err
 		}
 		rv.locs = i.nextSegmentLocs[:0]
 
-		numLocsBytes, err := binary.ReadUvarint(i.locReader)
+		numLocsBytes, err := i.locReader.ReadUvarint()
 		if err != nil {
 			return nil, fmt.Errorf("error reading location numLocsBytes: %v", err)
 		}
@@ -612,17 +621,14 @@ func (i *PostingsIterator) nextBytes() (
 	if hasLocs {
 		startLoc := len(i.currChunkLoc) - i.locReader.Len()
 
-		numLocsBytes, err := binary.ReadUvarint(i.locReader)
+		numLocsBytes, err := i.locReader.ReadUvarint()
 		if err != nil {
 			return 0, 0, 0, nil, nil,
 				fmt.Errorf("error reading location nextBytes numLocs: %v", err)
 		}
 
 		// skip over all the location bytes
-		_, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent)
-		if err != nil {
-			return 0, 0, 0, nil, nil, err
-		}
+		i.locReader.SkipBytes(int64(numLocsBytes))
 
 		endLoc := len(i.currChunkLoc) - i.locReader.Len()
 		bytesLoc = i.currChunkLoc[startLoc:endLoc]
@@ -763,22 +769,19 @@ func (i *PostingsIterator) currChunkNext(nChunk uint32) error {
 	}
 
 	// read off freq/offsets even though we don't care about them
-	_, _, hasLocs, err := i.readFreqNormHasLocs()
+	hasLocs, err := i.skipFreqNormReadHasLocs()
 	if err != nil {
 		return err
 	}
 
 	if i.includeLocs && hasLocs {
-		numLocsBytes, err := binary.ReadUvarint(i.locReader)
+		numLocsBytes, err := i.locReader.ReadUvarint()
 		if err != nil {
 			return fmt.Errorf("error reading location numLocsBytes: %v", err)
 		}
 
 		// skip over all the location bytes
-		_, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent)
-		if err != nil {
-			return err
-		}
+		i.locReader.SkipBytes(int64(numLocsBytes))
 	}
 
 	return nil

From fb220ce32cb28e4b864156e632f2d3d4fdad3967 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Tue, 26 Mar 2019 21:28:30 -0700
Subject: [PATCH 575/728] fixes #1166 - MemUvarintReader avoids extra int64
 casting

---
 index/scorch/segment/int.go         | 6 +++---
 index/scorch/segment/zap/posting.go | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/index/scorch/segment/int.go b/index/scorch/segment/int.go
index 663418d5b..e89d2c1ee 100644
--- a/index/scorch/segment/int.go
+++ b/index/scorch/segment/int.go
@@ -99,7 +99,7 @@ func DecodeUvarintAscending(b []byte) ([]byte, uint64, error) {
 // ------------------------------------------------------------
 
 type MemUvarintReader struct {
-	C int64 // index of next byte to read from S
+	C int // index of next byte to read from S
 	S []byte
 }
 
@@ -109,7 +109,7 @@ func NewMemUvarintReader(s []byte) *MemUvarintReader {
 
 // Len returns the number of unread bytes.
 func (r *MemUvarintReader) Len() int {
-	n := int(int64(len(r.S)) - r.C)
+	n := len(r.S) - r.C
 	if n < 0 {
 		return 0
 	}
@@ -168,7 +168,7 @@ func (r *MemUvarintReader) SkipUvarint() {
 }
 
 // SkipBytes skips a count number of bytes.
-func (r *MemUvarintReader) SkipBytes(count int64) {
+func (r *MemUvarintReader) SkipBytes(count int) {
 	r.C = r.C + count
 }
 
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index fe9f4d957..0656c8e99 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -628,7 +628,7 @@ func (i *PostingsIterator) nextBytes() (
 		}
 
 		// skip over all the location bytes
-		i.locReader.SkipBytes(int64(numLocsBytes))
+		i.locReader.SkipBytes(int(numLocsBytes))
 
 		endLoc := len(i.currChunkLoc) - i.locReader.Len()
 		bytesLoc = i.currChunkLoc[startLoc:endLoc]
@@ -781,7 +781,7 @@ func (i *PostingsIterator) currChunkNext(nChunk uint32) error {
 		}
 
 		// skip over all the location bytes
-		i.locReader.SkipBytes(int64(numLocsBytes))
+		i.locReader.SkipBytes(int(numLocsBytes))
 	}
 
 	return nil

From 4a220e62ffa757eff5a3b7b2e2dbe9985ebde6bd Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 27 Mar 2019 06:53:34 -0700
Subject: [PATCH 576/728] simplify for true loop

---
 index/scorch/segment/int.go | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/index/scorch/segment/int.go b/index/scorch/segment/int.go
index e89d2c1ee..55299d8f7 100644
--- a/index/scorch/segment/int.go
+++ b/index/scorch/segment/int.go
@@ -126,7 +126,7 @@ func (r *MemUvarintReader) ReadUvarint() (uint64, error) {
 	var C = r.C
 	var S = r.S
 
-	for true {
+	for {
 		b := S[C]
 		C++
 
@@ -151,13 +151,11 @@ func (r *MemUvarintReader) ReadUvarint() (uint64, error) {
 		x |= uint64(b&0x7f) << s
 		s += 7
 	}
-
-	return 0, nil // never reached, but compiler wants this
 }
 
 // SkipUvarint skips ahead one encoded uint64.
 func (r *MemUvarintReader) SkipUvarint() {
-	for true {
+	for {
 		b := r.S[r.C]
 		r.C++
 

From 60b3810c76416608327c20aae06d57832f13eaea Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Fri, 29 Mar 2019 15:13:48 -0700
Subject: [PATCH 577/728] MB-32855: Revert disjunction-single clause
 optimization

+ Reverting the single-clause disjunction optimization as well,
  as we're noticing unexpected results while executing a boolean
  query in certain scenarios.
+ Note that I've already reverted the single-clause conjunction
  optimization with: https://github.com/blevesearch/bleve/pull/1133
+ Included unit tests that capture failing test cases with the
  above mentioned optimizations.

+ With this change, I've completely reverted:
   - https://github.com/blevesearch/bleve/pull/1073
   - https://github.com/blevesearch/bleve/pull/1116
   - https://github.com/blevesearch/bleve/pull/1128
+ Apologies for the grand mess :)

Also, some related discussion on why these optimizations don't
work: https://github.com/blevesearch/bleve/pull/1136
---
 index/store/moss/lower_test.go |   2 +-
 search/query/disjunction.go    |   6 --
 search_test.go                 | 152 +++++++++++++++++++++++++++++++++
 3 files changed, 153 insertions(+), 7 deletions(-)

diff --git a/index/store/moss/lower_test.go b/index/store/moss/lower_test.go
index 72b8a7194..afc0a0959 100644
--- a/index/store/moss/lower_test.go
+++ b/index/store/moss/lower_test.go
@@ -27,7 +27,7 @@ func openWithLower(t *testing.T, mo store.MergeOperator) (string, store.KVStore)
 	tmpDir, _ := ioutil.TempDir("", "mossStore")
 
 	config := map[string]interface{}{
-		"path": tmpDir,
+		"path":                    tmpDir,
 		"mossLowerLevelStoreName": "mossStore",
 	}
 
diff --git a/search/query/disjunction.go b/search/query/disjunction.go
index 2bc1d7044..a1fc1439a 100644
--- a/search/query/disjunction.go
+++ b/search/query/disjunction.go
@@ -80,12 +80,6 @@ func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
 
 	if len(ss) < 1 {
 		return searcher.NewMatchNoneSearcher(i)
-	} else if len(ss) == 1 && int(q.Min) == ss[0].Min() {
-		// apply optimization only if both conditions below are satisfied:
-		// - disjunction searcher has only 1 child searcher
-		// - parent searcher's min setting is equal to child searcher's min
-
-		return ss[0], nil
 	}
 
 	return searcher.NewDisjunctionSearcher(i, ss, q.Min, options)
diff --git a/search_test.go b/search_test.go
index 917b24ad2..682e8ed0a 100644
--- a/search_test.go
+++ b/search_test.go
@@ -28,7 +28,9 @@ import (
 	"github.com/blevesearch/bleve/analysis/analyzer/custom"
 	"github.com/blevesearch/bleve/analysis/analyzer/keyword"
 	"github.com/blevesearch/bleve/analysis/analyzer/standard"
+	"github.com/blevesearch/bleve/analysis/token/length"
 	"github.com/blevesearch/bleve/analysis/token/lowercase"
+	"github.com/blevesearch/bleve/analysis/token/shingle"
 	"github.com/blevesearch/bleve/analysis/tokenizer/single"
 	"github.com/blevesearch/bleve/analysis/tokenizer/whitespace"
 	"github.com/blevesearch/bleve/document"
@@ -1264,3 +1266,153 @@ func TestDuplicateLocationsIssue1168(t *testing.T) {
 		t.Fatalf("duplicate marty")
 	}
 }
+
+func TestBooleanMustSingleMatchNone(t *testing.T) {
+	idxMapping := NewIndexMapping()
+	if err := idxMapping.AddCustomTokenFilter(length.Name, map[string]interface{}{
+		"min":  3.0,
+		"max":  5.0,
+		"type": length.Name,
+	}); err != nil {
+		t.Fatal(err)
+	}
+	if err := idxMapping.AddCustomAnalyzer("custom1", map[string]interface{}{
+		"type":          "custom",
+		"tokenizer":     "single",
+		"token_filters": []interface{}{length.Name},
+	}); err != nil {
+		t.Fatal(err)
+	}
+
+	idxMapping.DefaultAnalyzer = "custom1"
+	idx, err := New("testidx", idxMapping)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	defer func() {
+		err = idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = os.RemoveAll("testidx")
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	doc := map[string]interface{}{
+		"languages_known": "Dutch",
+		"dept":            "Sales",
+	}
+
+	batch := idx.NewBatch()
+	if err = batch.Index("doc", doc); err != nil {
+		t.Fatal(err)
+	}
+
+	if err = idx.Batch(batch); err != nil {
+		t.Fatal(err)
+	}
+
+	// this is a successful match
+	matchSales := NewMatchQuery("Sales")
+	matchSales.SetField("dept")
+
+	// this would spin off a MatchNoneSearcher as the
+	// token filter rules out the word "French"
+	matchFrench := NewMatchQuery("French")
+	matchFrench.SetField("languages_known")
+
+	bq := NewBooleanQuery()
+	bq.AddShould(matchSales)
+	bq.AddMust(matchFrench)
+
+	sr := NewSearchRequest(bq)
+	res, err := idx.Search(sr)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if res.Total != 0 {
+		t.Fatalf("Expected 0 results but got: %v", res.Total)
+	}
+}
+
+func TestBooleanMustNotSingleMatchNone(t *testing.T) {
+	idxMapping := NewIndexMapping()
+	if err := idxMapping.AddCustomTokenFilter(shingle.Name, map[string]interface{}{
+		"min":  3.0,
+		"max":  5.0,
+		"type": shingle.Name,
+	}); err != nil {
+		t.Fatal(err)
+	}
+	if err := idxMapping.AddCustomAnalyzer("custom1", map[string]interface{}{
+		"type":          "custom",
+		"tokenizer":     "unicode",
+		"token_filters": []interface{}{shingle.Name},
+	}); err != nil {
+		t.Fatal(err)
+	}
+
+	idxMapping.DefaultAnalyzer = "custom1"
+	idx, err := New("testidx", idxMapping)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	defer func() {
+		err = idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = os.RemoveAll("testidx")
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	doc := map[string]interface{}{
+		"languages_known": "Dutch",
+		"dept":            "Sales",
+	}
+
+	batch := idx.NewBatch()
+	if err = batch.Index("doc", doc); err != nil {
+		t.Fatal(err)
+	}
+
+	if err = idx.Batch(batch); err != nil {
+		t.Fatal(err)
+	}
+
+	// this is a successful match
+	matchSales := NewMatchQuery("Sales")
+	matchSales.SetField("dept")
+
+	// this would spin off a MatchNoneSearcher as the
+	// token filter rules out the word "Dutch"
+	matchDutch := NewMatchQuery("Dutch")
+	matchDutch.SetField("languages_known")
+
+	matchEngineering := NewMatchQuery("Engineering")
+	matchEngineering.SetField("dept")
+
+	bq := NewBooleanQuery()
+	bq.AddShould(matchSales)
+	bq.AddMustNot(matchDutch, matchEngineering)
+
+	sr := NewSearchRequest(bq)
+	res, err := idx.Search(sr)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if res.Total != 0 {
+		t.Fatalf("Expected 0 results but got: %v", res.Total)
+	}
+
+}

From 0dc98d8d768f86fa4ccf5ea9289de3d33de2058c Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 1 Apr 2019 13:45:42 -0700
Subject: [PATCH 578/728] Adding a boolean search testcase

Reference: https://github.com/blevesearch/bleve/issues/1185
---
 search_test.go | 132 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 132 insertions(+)

diff --git a/search_test.go b/search_test.go
index 682e8ed0a..c534dd320 100644
--- a/search_test.go
+++ b/search_test.go
@@ -1414,5 +1414,137 @@ func TestBooleanMustNotSingleMatchNone(t *testing.T) {
 	if res.Total != 0 {
 		t.Fatalf("Expected 0 results but got: %v", res.Total)
 	}
+}
+
+func TestBooleanSearchBug1185(t *testing.T) {
+	defer func() {
+		err := os.RemoveAll("testidx")
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	of := NewTextFieldMapping()
+	of.Analyzer = keyword.Name
+	of.Name = "owner"
+
+	dm := NewDocumentMapping()
+	dm.AddFieldMappingsAt("owner", of)
+
+	m := NewIndexMapping()
+	m.DefaultMapping = dm
+
+	idx, err := NewUsing("testidx", m, "scorch", "scorch", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	err = idx.Index("17112", map[string]interface{}{
+		"owner": "marty",
+		"type":  "A Demo Type",
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	err = idx.Index("17139", map[string]interface{}{
+		"type": "A Demo Type",
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	err = idx.Index("177777", map[string]interface{}{
+		"type": "x",
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Index("177778", map[string]interface{}{
+		"type": "A Demo Type",
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
 
+	err = idx.Index("17140", map[string]interface{}{
+		"type": "A Demo Type",
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	err = idx.Index("17000", map[string]interface{}{
+		"owner": "marty",
+		"type":  "x",
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	err = idx.Index("17141", map[string]interface{}{
+		"type": "A Demo Type",
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	err = idx.Index("17428", map[string]interface{}{
+		"owner": "marty",
+		"type":  "A Demo Type",
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	err = idx.Index("17113", map[string]interface{}{
+		"owner": "marty",
+		"type":  "x",
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	matchTypeQ := NewMatchPhraseQuery("A Demo Type")
+	matchTypeQ.SetField("type")
+
+	matchAnyOwnerRegQ := NewRegexpQuery(".+")
+	matchAnyOwnerRegQ.SetField("owner")
+
+	matchNoOwner := NewBooleanQuery()
+	matchNoOwner.AddMustNot(matchAnyOwnerRegQ)
+
+	notNoOwner := NewBooleanQuery()
+	notNoOwner.AddMustNot(matchNoOwner)
+
+	matchTypeAndNoOwner := NewConjunctionQuery()
+	matchTypeAndNoOwner.AddQuery(matchTypeQ)
+	matchTypeAndNoOwner.AddQuery(notNoOwner)
+
+	req := NewSearchRequest(matchTypeAndNoOwner)
+	res, err := idx.Search(req)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// query 2
+	matchTypeAndNoOwnerBoolean := NewBooleanQuery()
+	matchTypeAndNoOwnerBoolean.AddMust(matchTypeQ)
+	matchTypeAndNoOwnerBoolean.AddMustNot(matchNoOwner)
+
+	req2 := NewSearchRequest(matchTypeAndNoOwnerBoolean)
+	res2, err := idx.Search(req2)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(res.Hits) != len(res2.Hits) {
+		t.Fatalf("expected same number of hits, got: %d and %d", len(res.Hits), len(res2.Hits))
+	}
 }

From 70e74bf466d04282cfceba8a7c7864c9da4a2c08 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 2 Apr 2019 08:48:58 -0400
Subject: [PATCH 579/728] fix incorrect BooleanSearcher Advance behavior

* Revert "Advancing boolean searcher's currentID w.r.t currMustNot"

This reverts commit dd49ef0aa0a006a46e90b20dcba578265c043c07.

* track boolean searcher done status

when boolean searchers are nested, there can occur a situation
in which a boolean searcher is reset after reaching the end.

this leads to incorrect behavior, so instead we track reaching
the end, and prevent advance from resetting that behavior.

* add Unit test that catches a cornercase:
    TestNestedBooleanMustNotSearcherUpsidedown
---
 search/searcher/search_boolean.go | 40 ++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/search/searcher/search_boolean.go b/search/searcher/search_boolean.go
index bbbced479..7f0bfa424 100644
--- a/search/searcher/search_boolean.go
+++ b/search/searcher/search_boolean.go
@@ -45,6 +45,7 @@ type BooleanSearcher struct {
 	scorer          *scorer.ConjunctionQueryScorer
 	matches         []*search.DocumentMatch
 	initialized     bool
+	done            bool
 }
 
 func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, options search.SearcherOptions) (*BooleanSearcher, error) {
@@ -207,6 +208,10 @@ func (s *BooleanSearcher) SetQueryNorm(qnorm float64) {
 
 func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
 
+	if s.done {
+		return nil, nil
+	}
+
 	if !s.initialized {
 		err := s.initSearchers(ctx)
 		if err != nil {
@@ -320,11 +325,19 @@ func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch
 		}
 	}
 
+	if rv == nil {
+		s.done = true
+	}
+
 	return rv, nil
 }
 
 func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
 
+	if s.done {
+		return nil, nil
+	}
+
 	if !s.initialized {
 		err := s.initSearchers(ctx)
 		if err != nil {
@@ -332,14 +345,8 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
 		}
 	}
 
-	// Advance the searchers only if the currentID cursor is trailing the lookup ID,
-	// additionally if the mustNotSearcher has been initialized, ensure that the
-	// cursor used to track the mustNotSearcher (currMustNot, which isn't tracked by
-	// currentID) is trailing the lookup ID as well - for in the case where currentID
-	// is nil and currMustNot is already at or ahead of the lookup ID, we MUST NOT
-	// advance the currentID or the currMustNot cursors.
-	if (s.currentID == nil || s.currentID.Compare(ID) < 0) &&
-		(s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0) {
+	// Advance the searcher only if the cursor is trailing the lookup ID
+	if s.currentID == nil || s.currentID.Compare(ID) < 0 {
 		var err error
 		if s.mustSearcher != nil {
 			if s.currMust != nil {
@@ -362,12 +369,17 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
 		}
 
 		if s.mustNotSearcher != nil {
-			if s.currMustNot != nil {
-				ctx.DocumentMatchPool.Put(s.currMustNot)
-			}
-			s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID)
-			if err != nil {
-				return nil, err
+			// Additional check for mustNotSearcher, whose cursor isn't tracked by
+			// currentID to prevent it from moving when the searcher's tracked
+			// position is already ahead of or at the requested ID.
+			if s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0 {
+				if s.currMustNot != nil {
+					ctx.DocumentMatchPool.Put(s.currMustNot)
+				}
+				s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID)
+				if err != nil {
+					return nil, err
+				}
 			}
 		}
 

From 3bd9e536b6bc4f99a82eda4a62a50240b0598974 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 1 Apr 2019 20:30:03 -0700
Subject: [PATCH 580/728] MB-33600: Allow date ranges before epoch time

---
 search/query/date_range.go      | 11 +++++++++--
 search/query/date_range_test.go |  5 +++++
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/search/query/date_range.go b/search/query/date_range.go
index ae26f8dec..5f6a8db3c 100644
--- a/search/query/date_range.go
+++ b/search/query/date_range.go
@@ -41,6 +41,13 @@ type BleveQueryTime struct {
 	time.Time
 }
 
+var epochTime time.Time
+
+func init() {
+	unixEpochTime := "1970-01-01T00:00:00Z"
+	epochTime, _ = time.Parse(time.RFC3339, unixEpochTime)
+}
+
 func queryTimeFromString(t string) (time.Time, error) {
 	dateTimeParser, err := cache.DateTimeParserNamed(QueryDateTimeParser)
 	if err != nil {
@@ -144,7 +151,7 @@ func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) {
 	max := math.Inf(1)
 	if !q.Start.IsZero() {
 		startInt64 := q.Start.UnixNano()
-		if startInt64 < 0 {
+		if startInt64 < 0 && q.Start.After(epochTime) {
 			// overflow
 			return nil, nil, fmt.Errorf("invalid/unsupported date range, start: %v", q.Start)
 		}
@@ -152,7 +159,7 @@ func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) {
 	}
 	if !q.End.IsZero() {
 		endInt64 := q.End.UnixNano()
-		if endInt64 < 0 {
+		if endInt64 < 0 && q.End.After(epochTime) {
 			// overflow
 			return nil, nil, fmt.Errorf("invalid/unsupported date range, end: %v", q.End)
 		}
diff --git a/search/query/date_range_test.go b/search/query/date_range_test.go
index dd14d3238..cfec05e35 100644
--- a/search/query/date_range_test.go
+++ b/search/query/date_range_test.go
@@ -76,6 +76,11 @@ func TestValidateDatetimeRanges(t *testing.T) {
 			end:    "2262-04-12T00:00:00Z",
 			expect: false,
 		},
+		{
+			start:  "1950-03-22T12:23:23Z",
+			end:    "1960-02-21T15:23:34Z",
+			expect: true,
+		},
 	}
 
 	for _, test := range tests {

From 96b0280a5c6458245d2ff398a983c87268965162 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Tue, 2 Apr 2019 12:24:39 -0700
Subject: [PATCH 581/728] MB-33600: Further validation for start/end date
 ranges

+ MinAllowedTime: 1677-12-01T00:00:00Z
+ MaxAllowedTime: 2262-04-11T11:59:59Z

Any NonZero time.Time values that fall outside this range
will cause UnixNano() to overflow (int64 cannot accommodate).
---
 search/query/date_range.go      | 15 +++++++-------
 search/query/date_range_test.go | 35 +++++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/search/query/date_range.go b/search/query/date_range.go
index 5f6a8db3c..5d61bd37e 100644
--- a/search/query/date_range.go
+++ b/search/query/date_range.go
@@ -41,11 +41,12 @@ type BleveQueryTime struct {
 	time.Time
 }
 
-var epochTime time.Time
+var minValidTime time.Time
+var maxValidTime time.Time
 
 func init() {
-	unixEpochTime := "1970-01-01T00:00:00Z"
-	epochTime, _ = time.Parse(time.RFC3339, unixEpochTime)
+	minValidTime, _ = time.Parse(time.RFC3339, "1677-12-01T00:00:00Z")
+	maxValidTime, _ = time.Parse(time.RFC3339, "2262-04-11T11:59:59Z")
 }
 
 func queryTimeFromString(t string) (time.Time, error) {
@@ -150,19 +151,19 @@ func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) {
 	min := math.Inf(-1)
 	max := math.Inf(1)
 	if !q.Start.IsZero() {
-		startInt64 := q.Start.UnixNano()
-		if startInt64 < 0 && q.Start.After(epochTime) {
+		if q.Start.Before(minValidTime) || q.Start.After(maxValidTime) {
 			// overflow
 			return nil, nil, fmt.Errorf("invalid/unsupported date range, start: %v", q.Start)
 		}
+		startInt64 := q.Start.UnixNano()
 		min = numeric.Int64ToFloat64(startInt64)
 	}
 	if !q.End.IsZero() {
-		endInt64 := q.End.UnixNano()
-		if endInt64 < 0 && q.End.After(epochTime) {
+		if q.End.Before(minValidTime) || q.End.After(maxValidTime) {
 			// overflow
 			return nil, nil, fmt.Errorf("invalid/unsupported date range, end: %v", q.End)
 		}
+		endInt64 := q.End.UnixNano()
 		max = numeric.Int64ToFloat64(endInt64)
 	}
 
diff --git a/search/query/date_range_test.go b/search/query/date_range_test.go
index cfec05e35..2990642be 100644
--- a/search/query/date_range_test.go
+++ b/search/query/date_range_test.go
@@ -81,6 +81,41 @@ func TestValidateDatetimeRanges(t *testing.T) {
 			end:    "1960-02-21T15:23:34Z",
 			expect: true,
 		},
+		{
+			start:  "0001-01-01T00:00:00Z",
+			end:    "0001-01-01T00:00:00Z",
+			expect: false,
+		},
+		{
+			start:  "0001-01-01T00:00:00Z",
+			end:    "2000-01-01T00:00:00Z",
+			expect: true,
+		},
+		{
+			start:  "1677-11-30T11:59:59Z",
+			end:    "2262-04-11T11:59:59Z",
+			expect: false,
+		},
+		{
+			start:  "2262-04-12T00:00:00Z",
+			end:    "2262-04-11T11:59:59Z",
+			expect: false,
+		},
+		{
+			start:  "1677-12-01T00:00:00Z",
+			end:    "2262-04-12T00:00:00Z",
+			expect: false,
+		},
+		{
+			start:  "1677-12-01T00:00:00Z",
+			end:    "1677-11-30T11:59:59Z",
+			expect: false,
+		},
+		{
+			start:  "1677-12-01T00:00:00Z",
+			end:    "2262-04-11T11:59:59Z",
+			expect: true,
+		},
 	}
 
 	for _, test := range tests {

From 85d7cd0ba3567ba9224618ff2cf1db437ab81b40 Mon Sep 17 00:00:00 2001
From: Steve Yen <steve.yen@gmail.com>
Date: Wed, 3 Apr 2019 09:52:44 -0700
Subject: [PATCH 582/728] remove maxTerms optimization for ComputeGeoRange()

Unlike normal disjunctions, a geoboundingbox searcher does not limit
its multi-term-searcher against the DisjunctionMaxClauseCount, so my
recent maxTerms optimization for ComputeGeoRange() in commit 3d774b9
was just wrong.

This commit is a manual revert instead of an automated "git revert",
in that the previous commit 3d774b9 also changed a few things that
we'd still like to keep (e.g., error return value instead of
panic'ing) and there are now more unit tests in this commit from
diagnosing this issue, which compare the outputs of the optimized
ComputeGeoRange() with the previous, non-optimized version.

See also: https://issues.couchbase.com/browse/MB-33614
---
 search/searcher/search_geoboundingbox.go      | 11 +--
 search/searcher/search_geoboundingbox_test.go | 87 +++++++++++++++++--
 .../searcher/search_geopointdistance_test.go  | 73 ++++++++++++++++
 3 files changed, 154 insertions(+), 17 deletions(-)

diff --git a/search/searcher/search_geoboundingbox.go b/search/searcher/search_geoboundingbox.go
index a80855ac5..ed5424f1f 100644
--- a/search/searcher/search_geoboundingbox.go
+++ b/search/searcher/search_geoboundingbox.go
@@ -40,7 +40,7 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
 
 	// do math to produce list of terms needed for this search
 	onBoundaryTerms, notOnBoundaryTerms, err := ComputeGeoRange(0, GeoBitsShift1Minus1,
-		minLon, minLat, maxLon, maxLat, checkBoundaries, DisjunctionMaxClauseCount)
+		minLon, minLat, maxLon, maxLat, checkBoundaries)
 	if err != nil {
 		return nil, err
 	}
@@ -100,7 +100,7 @@ var geoMaxShift = document.GeoPrecisionStep * 4
 var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2
 
 func ComputeGeoRange(term uint64, shift uint,
-	sminLon, sminLat, smaxLon, smaxLat float64, checkBoundaries bool, maxTerms int) (
+	sminLon, sminLat, smaxLon, smaxLat float64, checkBoundaries bool) (
 	onBoundary [][]byte, notOnBoundary [][]byte, err error) {
 	preallocBytesLen := 32
 	preallocBytes := make([]byte, preallocBytesLen)
@@ -144,13 +144,6 @@ func ComputeGeoRange(term uint64, shift uint,
 	}
 
 	computeGeoRange = func(term uint64, shift uint) {
-		if maxTerms > 0 {
-			if len(onBoundary) > maxTerms {
-				err = tooManyClausesErr(len(onBoundary))
-			} else if len(notOnBoundary) > maxTerms {
-				err = tooManyClausesErr(len(notOnBoundary))
-			}
-		}
 		if err != nil {
 			return
 		}
diff --git a/search/searcher/search_geoboundingbox_test.go b/search/searcher/search_geoboundingbox_test.go
index be3d09168..cae803412 100644
--- a/search/searcher/search_geoboundingbox_test.go
+++ b/search/searcher/search_geoboundingbox_test.go
@@ -19,9 +19,11 @@ import (
 	"testing"
 
 	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/geo"
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/store/gtreap"
 	"github.com/blevesearch/bleve/index/upsidedown"
+	"github.com/blevesearch/bleve/numeric"
 	"github.com/blevesearch/bleve/search"
 )
 
@@ -202,20 +204,18 @@ func setupGeo(t *testing.T) index.Index {
 func TestComputeGeoRange(t *testing.T) {
 	tests := []struct {
 		degs        float64
-		maxTerms    int
 		onBoundary  int
 		offBoundary int
 		err         string
 	}{
-		{0.01, 0, 4, 0, ""},
-		{0.1, 0, 56, 144, ""},
-		{100.0, 0, 32768, 258560, ""},
-		{100.0, 1024, 0, 0, "geo range produces too many terms, so should have error"},
+		{0.01, 4, 0, ""},
+		{0.1, 56, 144, ""},
+		{100.0, 32768, 258560, ""},
 	}
 
-	for _, test := range tests {
+	for testi, test := range tests {
 		onBoundaryRes, offBoundaryRes, err := ComputeGeoRange(0, GeoBitsShift1Minus1,
-			-1.0*test.degs, -1.0*test.degs, test.degs, test.degs, true, test.maxTerms)
+			-1.0*test.degs, -1.0*test.degs, test.degs, test.degs, true)
 		if (err != nil) != (test.err != "") {
 			t.Errorf("test: %+v, err: %v", test, err)
 		}
@@ -225,6 +225,17 @@ func TestComputeGeoRange(t *testing.T) {
 		if len(offBoundaryRes) != test.offBoundary {
 			t.Errorf("test: %+v, offBoundaryRes: %v", test, len(offBoundaryRes))
 		}
+
+		onBROrig, offBROrig := origComputeGeoRange(0, GeoBitsShift1Minus1,
+			-1.0*test.degs, -1.0*test.degs, test.degs, test.degs, true)
+		if !reflect.DeepEqual(onBoundaryRes, onBROrig) {
+			t.Errorf("testi: %d, test: %+v, onBoundaryRes != onBROrig,\n onBoundaryRes:%v,\n onBROrig: %v",
+				testi, test, onBoundaryRes, onBROrig)
+		}
+		if !reflect.DeepEqual(offBoundaryRes, offBROrig) {
+			t.Errorf("testi: %d, test: %+v, offBoundaryRes, offBROrig,\n offBoundaryRes: %v,\n offBROrig: %v",
+				testi, test, offBoundaryRes, offBROrig)
+		}
 	}
 }
 
@@ -264,7 +275,7 @@ func benchmarkComputeGeoRange(b *testing.B,
 
 	for i := 0; i < b.N; i++ {
 		onBoundaryRes, offBoundaryRes, err :=
-			ComputeGeoRange(0, GeoBitsShift1Minus1, minLon, minLat, maxLon, maxLat, checkBoundaries, 0)
+			ComputeGeoRange(0, GeoBitsShift1Minus1, minLon, minLat, maxLon, maxLat, checkBoundaries)
 		if err != nil {
 			b.Fatalf("expected no err")
 		}
@@ -273,3 +284,63 @@ func benchmarkComputeGeoRange(b *testing.B,
 		}
 	}
 }
+
+// --------------------------------------------------------------------
+
+// original, non-optimized implementation of ComputeGeoRange
+func origComputeGeoRange(term uint64, shift uint,
+	sminLon, sminLat, smaxLon, smaxLat float64,
+	checkBoundaries bool) (
+	onBoundary [][]byte, notOnBoundary [][]byte) {
+	split := term | uint64(0x1)<<shift
+	var upperMax uint64
+	if shift < 63 {
+		upperMax = term | ((uint64(1) << (shift + 1)) - 1)
+	} else {
+		upperMax = 0xffffffffffffffff
+	}
+	lowerMax := split - 1
+	onBoundary, notOnBoundary = origRelateAndRecurse(term, lowerMax, shift,
+		sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
+	plusOnBoundary, plusNotOnBoundary := origRelateAndRecurse(split, upperMax, shift,
+		sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
+	onBoundary = append(onBoundary, plusOnBoundary...)
+	notOnBoundary = append(notOnBoundary, plusNotOnBoundary...)
+	return
+}
+
+// original, non-optimized implementation of relateAndRecurse
+func origRelateAndRecurse(start, end uint64, res uint,
+	sminLon, sminLat, smaxLon, smaxLat float64,
+	checkBoundaries bool) (
+	onBoundary [][]byte, notOnBoundary [][]byte) {
+	minLon := geo.MortonUnhashLon(start)
+	minLat := geo.MortonUnhashLat(start)
+	maxLon := geo.MortonUnhashLon(end)
+	maxLat := geo.MortonUnhashLat(end)
+
+	level := ((geo.GeoBits << 1) - res) >> 1
+
+	within := res%document.GeoPrecisionStep == 0 &&
+		geo.RectWithin(minLon, minLat, maxLon, maxLat,
+			sminLon, sminLat, smaxLon, smaxLat)
+	if within || (level == geoDetailLevel &&
+		geo.RectIntersects(minLon, minLat, maxLon, maxLat,
+			sminLon, sminLat, smaxLon, smaxLat)) {
+		if !within && checkBoundaries {
+			return [][]byte{
+				numeric.MustNewPrefixCodedInt64(int64(start), res),
+			}, nil
+		}
+		return nil,
+			[][]byte{
+				numeric.MustNewPrefixCodedInt64(int64(start), res),
+			}
+	} else if level < geoDetailLevel &&
+		geo.RectIntersects(minLon, minLat, maxLon, maxLat,
+			sminLon, sminLat, smaxLon, smaxLat) {
+		return origComputeGeoRange(start, res-1, sminLon, sminLat, smaxLon, smaxLat,
+			checkBoundaries)
+	}
+	return nil, nil
+}
diff --git a/search/searcher/search_geopointdistance_test.go b/search/searcher/search_geopointdistance_test.go
index 002e4e3e8..eaf8216a3 100644
--- a/search/searcher/search_geopointdistance_test.go
+++ b/search/searcher/search_geopointdistance_test.go
@@ -18,6 +18,7 @@ import (
 	"reflect"
 	"testing"
 
+	"github.com/blevesearch/bleve/geo"
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
 )
@@ -82,3 +83,75 @@ func testGeoPointDistanceSearch(i index.IndexReader, centerLon, centerLat, dist
 	}
 	return rv, nil
 }
+
+func TestGeoPointDistanceCompare(t *testing.T) {
+	tests := []struct {
+		docLat, docLon       float64
+		centerLat, centerLon float64
+		distance             string
+	}{
+		// Data points originally from MB-33454.
+		{
+			docLat:    33.718,
+			docLon:    -116.8293,
+			centerLat: 39.59000587,
+			centerLon: -119.22998428,
+			distance:  "10000mi",
+		},
+		{
+			docLat:    41.1305,
+			docLon:    -121.6587,
+			centerLat: 61.28,
+			centerLon: -149.34,
+			distance:  "10000mi",
+		},
+	}
+
+	for testi, test := range tests {
+		// compares the results from ComputeGeoRange with original, non-optimized version
+		compare := func(desc string,
+			minLon, minLat, maxLon, maxLat float64, checkBoundaries bool) {
+			// do math to produce list of terms needed for this search
+			onBoundaryRes, offBoundaryRes, err := ComputeGeoRange(0, GeoBitsShift1Minus1,
+				minLon, minLat, maxLon, maxLat, checkBoundaries)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			onBROrig, offBROrig := origComputeGeoRange(0, GeoBitsShift1Minus1,
+				minLon, minLat, maxLon, maxLat, checkBoundaries)
+			if !reflect.DeepEqual(onBoundaryRes, onBROrig) {
+				t.Fatalf("testi: %d, test: %+v, desc: %s, onBoundaryRes != onBROrig,\n onBoundaryRes:%v,\n onBROrig: %v",
+					testi, test, desc, onBoundaryRes, onBROrig)
+			}
+			if !reflect.DeepEqual(offBoundaryRes, offBROrig) {
+				t.Fatalf("testi: %d, test: %+v, desc: %s, offBoundaryRes, offBROrig,\n offBoundaryRes: %v,\n offBROrig: %v",
+					testi, test, desc, offBoundaryRes, offBROrig)
+			}
+		}
+
+		// follow the general approach of the GeoPointDistanceSearcher...
+		dist, err := geo.ParseDistance(test.distance)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, err :=
+			geo.RectFromPointDistance(test.centerLon, test.centerLat, dist)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		if bottomRightLon < topLeftLon {
+			// crosses date line, rewrite as two parts
+			compare("-180/f", -180, bottomRightLat, bottomRightLon, topLeftLat, false)
+			compare("-180/t", -180, bottomRightLat, bottomRightLon, topLeftLat, true)
+
+			compare("180/f", topLeftLon, bottomRightLat, 180, topLeftLat, false)
+			compare("180/t", topLeftLon, bottomRightLat, 180, topLeftLat, true)
+		} else {
+			compare("reg/f", topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, false)
+			compare("reg/t", topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, true)
+		}
+	}
+}

From 73e96b496795d6470e44e67dc5459c224953952a Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Wed, 3 Apr 2019 11:01:18 -0700
Subject: [PATCH 583/728] MB-33600: Checking if datetimes are RFC3339
 compatible

Check if datetimes provided within the datetime range query
are RFC3339 compatible only of the QueryDateTimeFormat is
RFC3339.
---
 search/query/date_range.go | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/search/query/date_range.go b/search/query/date_range.go
index 5d61bd37e..3ac0322f5 100644
--- a/search/query/date_range.go
+++ b/search/query/date_range.go
@@ -41,12 +41,12 @@ type BleveQueryTime struct {
 	time.Time
 }
 
-var minValidTime time.Time
-var maxValidTime time.Time
+var MinRFC3339CompatibleTime time.Time
+var MaxRFC3339CompatibleTime time.Time
 
 func init() {
-	minValidTime, _ = time.Parse(time.RFC3339, "1677-12-01T00:00:00Z")
-	maxValidTime, _ = time.Parse(time.RFC3339, "2262-04-11T11:59:59Z")
+	MinRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "1677-12-01T00:00:00Z")
+	MaxRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "2262-04-11T11:59:59Z")
 }
 
 func queryTimeFromString(t string) (time.Time, error) {
@@ -151,7 +151,7 @@ func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) {
 	min := math.Inf(-1)
 	max := math.Inf(1)
 	if !q.Start.IsZero() {
-		if q.Start.Before(minValidTime) || q.Start.After(maxValidTime) {
+		if !isDatetimeCompatible(q.Start) {
 			// overflow
 			return nil, nil, fmt.Errorf("invalid/unsupported date range, start: %v", q.Start)
 		}
@@ -159,7 +159,7 @@ func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) {
 		min = numeric.Int64ToFloat64(startInt64)
 	}
 	if !q.End.IsZero() {
-		if q.End.Before(minValidTime) || q.End.After(maxValidTime) {
+		if !isDatetimeCompatible(q.End) {
 			// overflow
 			return nil, nil, fmt.Errorf("invalid/unsupported date range, end: %v", q.End)
 		}
@@ -180,3 +180,12 @@ func (q *DateRangeQuery) Validate() error {
 	}
 	return nil
 }
+
+func isDatetimeCompatible(t BleveQueryTime) bool {
+	if QueryDateTimeFormat == time.RFC3339 &&
+		(t.Before(MinRFC3339CompatibleTime) || t.After(MaxRFC3339CompatibleTime)) {
+		return false
+	}
+
+	return true
+}

From 9dcbc5c71af0d5f5a5ffc392461a6579c15bf686 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Thu, 4 Apr 2019 13:38:36 -0700
Subject: [PATCH 584/728] Include freq/norm information when
 IncludeTermVectors:true

Fixes: https://github.com/blevesearch/bleve/issues/1194
---
 search/scorer/scorer_term.go   |  4 ++-
 search/searcher/search_term.go |  2 +-
 search_test.go                 | 46 ++++++++++++++++++++++++++++++++++
 3 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/search/scorer/scorer_term.go b/search/scorer/scorer_term.go
index 5544f2d01..f45fd5dbc 100644
--- a/search/scorer/scorer_term.go
+++ b/search/scorer/scorer_term.go
@@ -159,7 +159,9 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term
 
 	rv := ctx.DocumentMatchPool.Get()
 	rv.IndexInternalID = append(rv.IndexInternalID, termMatch.ID...)
-	rv.Score = score
+	if s.options.Score != "none" {
+		rv.Score = score
+	}
 	if s.options.Explain {
 		rv.Expl = scoreExplanation
 	}
diff --git a/search/searcher/search_term.go b/search/searcher/search_term.go
index c1af74c76..8ff74d60c 100644
--- a/search/searcher/search_term.go
+++ b/search/searcher/search_term.go
@@ -42,7 +42,7 @@ func NewTermSearcher(indexReader index.IndexReader, term string, field string, b
 }
 
 func NewTermSearcherBytes(indexReader index.IndexReader, term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
-	needFreqNorm := options.Score != "none"
+	needFreqNorm := options.IncludeTermVectors || options.Score != "none"
 	reader, err := indexReader.TermFieldReader(term, field, needFreqNorm, needFreqNorm, options.IncludeTermVectors)
 	if err != nil {
 		return nil, err
diff --git a/search_test.go b/search_test.go
index c534dd320..8c78d641f 100644
--- a/search_test.go
+++ b/search_test.go
@@ -1548,3 +1548,49 @@ func TestBooleanSearchBug1185(t *testing.T) {
 		t.Fatalf("expected same number of hits, got: %d and %d", len(res.Hits), len(res2.Hits))
 	}
 }
+
+func TestSearchScoreNone(t *testing.T) {
+	idx, err := NewUsing("testidx", NewIndexMapping(), scorch.Name, Config.DefaultKVStore, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	defer func() {
+		err := os.RemoveAll("testidx")
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	doc := map[string]interface{}{
+		"field1": "asd fgh jkl",
+		"field2": "more content blah blah",
+		"id":     "doc",
+	}
+
+	if err = idx.Index("doc", doc); err != nil {
+		t.Fatal(err)
+	}
+
+	q := NewQueryStringQuery("content")
+	sr := NewSearchRequest(q)
+	sr.IncludeLocations = true
+	sr.Score = "none"
+
+	res, err := idx.Search(sr)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(res.Hits) != 1 {
+		t.Fatal("unexpected number of hits")
+	}
+
+	if len(res.Hits[0].Locations) != 1 {
+		t.Fatal("unexpected locations for the hit")
+	}
+
+	if res.Hits[0].Score != 0 {
+		t.Fatal("unexpected score for the hit")
+	}
+}

From 45782f295224663b84e7654939bb127012611102 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Thu, 4 Apr 2019 17:58:07 -0700
Subject: [PATCH 585/728] New flag within TermQueryScorer on whether to add
 score

---
 search/scorer/scorer_term.go | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/search/scorer/scorer_term.go b/search/scorer/scorer_term.go
index f45fd5dbc..a0e02e438 100644
--- a/search/scorer/scorer_term.go
+++ b/search/scorer/scorer_term.go
@@ -40,6 +40,7 @@ type TermQueryScorer struct {
 	idf                    float64
 	options                search.SearcherOptions
 	idfExplanation         *search.Explanation
+	includeScore           bool
 	queryNorm              float64
 	queryWeight            float64
 	queryWeightExplanation *search.Explanation
@@ -62,14 +63,15 @@ func (s *TermQueryScorer) Size() int {
 
 func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer {
 	rv := TermQueryScorer{
-		queryTerm:   string(queryTerm),
-		queryField:  queryField,
-		queryBoost:  queryBoost,
-		docTerm:     docTerm,
-		docTotal:    docTotal,
-		idf:         1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)),
-		options:     options,
-		queryWeight: 1.0,
+		queryTerm:    string(queryTerm),
+		queryField:   queryField,
+		queryBoost:   queryBoost,
+		docTerm:      docTerm,
+		docTotal:     docTotal,
+		idf:          1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)),
+		options:      options,
+		queryWeight:  1.0,
+		includeScore: options.Score != "none",
 	}
 
 	if options.Explain {
@@ -159,7 +161,7 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term
 
 	rv := ctx.DocumentMatchPool.Get()
 	rv.IndexInternalID = append(rv.IndexInternalID, termMatch.ID...)
-	if s.options.Score != "none" {
+	if s.includeScore {
 		rv.Score = score
 	}
 	if s.options.Explain {

From eaa53f3be3d7e368753502a38fb04f29ceb3f253 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 8 Apr 2019 07:33:59 +0530
Subject: [PATCH 586/728] =?UTF-8?q?This=20change=20aims=20to=20cache=20the?=
 =?UTF-8?q?=20vellum=20fst=20instances=20per=20field=20at=20the=20SegmentB?=
 =?UTF-8?q?ase=20level=20to=20help=20it=E2=80=99s=20reuse=20across=20queri?=
 =?UTF-8?q?es.=20Its=20been=20noted=20from=20the=20cpu=20profiles=20that?=
 =?UTF-8?q?=20the=20vellum.Load/new=20could=20be=20optimised.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Improvements observed for a 1M data set with 10K w/sec,

Wildcard queries -  147 – 184 [25%]
Fuzzy1 – 705 – 790 [12%]
Fuzzy2 - 71 - 79 [11%]
Prefix queries - 253 -> 270 [7%]
---
 index/scorch/segment/zap/build.go   |  2 ++
 index/scorch/segment/zap/segment.go | 28 ++++++++++++++++++++--------
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
index 91bfd4e24..c02333cee 100644
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@@ -16,6 +16,7 @@ package zap
 
 import (
 	"bufio"
+	"github.com/couchbase/vellum"
 	"math"
 	"os"
 )
@@ -137,6 +138,7 @@ func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32,
 		docValueOffset:    docValueOffset,
 		dictLocs:          dictLocs,
 		fieldDvReaders:    make(map[uint16]*docValueReader),
+		fieldFSTs:         make(map[uint16]*vellum.FST),
 	}
 	sb.updateSize()
 
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 7ba28c236..bbae932e1 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -56,6 +56,7 @@ func Open(path string) (segment.Segment, error) {
 			mem:            mm[0 : len(mm)-FooterSize],
 			fieldsMap:      make(map[string]uint16),
 			fieldDvReaders: make(map[uint16]*docValueReader),
+			fieldFSTs:      make(map[uint16]*vellum.FST),
 		},
 		f:    f,
 		mm:   mm,
@@ -101,6 +102,9 @@ type SegmentBase struct {
 	fieldDvReaders    map[uint16]*docValueReader // naive chunk cache per field
 	fieldDvNames      []string                   // field names cached in fieldDvReaders
 	size              uint64
+
+	m         sync.Mutex
+	fieldFSTs map[uint16]*vellum.FST
 }
 
 func (sb *SegmentBase) Size() int {
@@ -258,19 +262,27 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) {
 
 		dictStart := sb.dictLocs[rv.fieldID]
 		if dictStart > 0 {
-			// read the length of the vellum data
-			vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64])
-			fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
-			if fstBytes != nil {
+			var ok bool
+			sb.m.Lock()
+			if rv.fst, ok = sb.fieldFSTs[rv.fieldID]; !ok {
+				// read the length of the vellum data
+				vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64])
+				fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
 				rv.fst, err = vellum.Load(fstBytes)
 				if err != nil {
+					sb.m.Unlock()
 					return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err)
 				}
-				rv.fstReader, err = rv.fst.Reader()
-				if err != nil {
-					return nil, fmt.Errorf("dictionary field %s vellum reader err: %v", field, err)
-				}
+
+				sb.fieldFSTs[rv.fieldID] = rv.fst
 			}
+
+			sb.m.Unlock()
+			rv.fstReader, err = rv.fst.Reader()
+			if err != nil {
+				return nil, fmt.Errorf("dictionary field %s vellum reader err: %v", field, err)
+			}
+
 		}
 	}
 

From 86614196cb9f5e971975abbd52a34d7e6c93cef3 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 8 Apr 2019 10:57:53 +0530
Subject: [PATCH 587/728] moving to unsafe.Sizeof() api to fix the vet errors.
 Its been analogous to reflect.Type.Size() as per API documentation.

---
 index/scorch/segment/zap/segment.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index bbae932e1..03cb070d3 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -20,8 +20,8 @@ import (
 	"fmt"
 	"io"
 	"os"
-	"reflect"
 	"sync"
+	"unsafe"
 
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index/scorch/segment"
@@ -35,7 +35,7 @@ var reflectStaticSizeSegmentBase int
 
 func init() {
 	var sb SegmentBase
-	reflectStaticSizeSegmentBase = int(reflect.TypeOf(sb).Size())
+	reflectStaticSizeSegmentBase = int(unsafe.Sizeof(sb))
 }
 
 // Open returns a zap impl of a segment

From 409d4e4fc3b43c29bc787618044c17cff2e75dad Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 8 Apr 2019 15:06:46 -0700
Subject: [PATCH 588/728] Include freq/norm if location information is asked
 for

---
 index/scorch/segment/zap/posting.go | 2 +-
 search/searcher/search_term.go      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 0656c8e99..417e89b4d 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -190,7 +190,7 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
 	}
 
 	rv.postings = p
-	rv.includeFreqNorm = includeFreq || includeNorm
+	rv.includeFreqNorm = includeFreq || includeNorm || includeLocs
 	rv.includeLocs = includeLocs
 
 	if p.normBits1Hit != 0 {
diff --git a/search/searcher/search_term.go b/search/searcher/search_term.go
index 8ff74d60c..c1af74c76 100644
--- a/search/searcher/search_term.go
+++ b/search/searcher/search_term.go
@@ -42,7 +42,7 @@ func NewTermSearcher(indexReader index.IndexReader, term string, field string, b
 }
 
 func NewTermSearcherBytes(indexReader index.IndexReader, term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
-	needFreqNorm := options.IncludeTermVectors || options.Score != "none"
+	needFreqNorm := options.Score != "none"
 	reader, err := indexReader.TermFieldReader(term, field, needFreqNorm, needFreqNorm, options.IncludeTermVectors)
 	if err != nil {
 		return nil, err

From ad32d2f762cf4d50e4332f494ea65b3f2535b273 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Wed, 24 Apr 2019 13:23:39 -0700
Subject: [PATCH 589/728] Update .travis.yml to test on the latest 3 versions
 of golang

+ 1.12.x, 1.11.x, 1.10.x
+ https://golang.org/doc/devel/release.html#policy
---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index c79b3fa3d..22612c966 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,9 +3,9 @@ sudo: false
 language: go
 
 go:
- - "1.9.x"
  - "1.10.x"
  - "1.11.x"
+ - "1.12.x"
 
 script:
   - go get golang.org/x/tools/cmd/cover

From dca226594cf3d2111f5d0740f392e5199c1bc71f Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Wed, 24 Apr 2019 18:31:35 -0700
Subject: [PATCH 590/728] Reset stats: lastDocSize, totalSize in batch's
 Reset()

---
 index.go | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/index.go b/index.go
index 99357eee0..ef6ede934 100644
--- a/index.go
+++ b/index.go
@@ -117,6 +117,8 @@ func (b *Batch) String() string {
 // be re-used in the future.
 func (b *Batch) Reset() {
 	b.internal.Reset()
+	b.lastDocSize = 0
+	b.totalSize = 0
 }
 
 func (b *Batch) Merge(o *Batch) {

From d1bc80056ac11ee36ef247eac3ef682a2690e769 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Thu, 25 Apr 2019 12:13:53 -0400
Subject: [PATCH 591/728] fix some minor issues with zap doc values (#1202)

- first, we now check the error returned by
loadFieldDocValueReader

- second, in order for that to no break things we changed the
behavior of loadFieldDocValueReader to not return an error
in the case where the field isn't uninverted.  this isn't really
an error, we now just return nil, nil

- third, we return a new error when the condition
    fieldDvLocEnd-fieldDvLocStart > 16
isn't satisfied.  this is a good indication that we're pointing
at the wrong part of the file, or it has been corrupted.

- fourth, the unit test helper method
testMergeWithEmptySegments was updated to check for errors
returned by AnalysisResultsToSegmentBase

- finally, the loadDvReaders metdod was updated to return early
when the number of docs is 0.  this was necessary because there
is a way to build a segment which has an invalid docValueOffset.
this case was exposed by the empty segment building test.  a
proper fix is to would change the file format, so adding this
check was preferred for the moment.
---
 index/scorch/segment/zap/docvalues.go  | 6 ++++--
 index/scorch/segment/zap/merge_test.go | 5 ++++-
 index/scorch/segment/zap/segment.go    | 7 +++++--
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index bcc0f9472..0ceb91fb5 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -88,8 +88,8 @@ func (s *SegmentBase) loadFieldDocValueReader(field string,
 	fieldDvLocStart, fieldDvLocEnd uint64) (*docValueReader, error) {
 	// get the docValue offset for the given fields
 	if fieldDvLocStart == fieldNotUninverted {
-		return nil, fmt.Errorf("loadFieldDocValueReader: "+
-			"no docValues found for field: %s", field)
+		// no docValues found, nothing to do
+		return nil, nil
 	}
 
 	// read the number of chunks, and chunk offsets position
@@ -101,6 +101,8 @@ func (s *SegmentBase) loadFieldDocValueReader(field string,
 		chunkOffsetsLen := binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-16 : fieldDvLocEnd-8])
 		// acquire position of chunk offsets
 		chunkOffsetsPosition = (fieldDvLocEnd - 16) - chunkOffsetsLen
+	} else {
+		return nil, fmt.Errorf("loadFieldDocValueReader: fieldDvLoc too small: %d-%d", fieldDvLocEnd, fieldDvLocStart)
 	}
 
 	fdvIter := &docValueReader{
diff --git a/index/scorch/segment/zap/merge_test.go b/index/scorch/segment/zap/merge_test.go
index 450ecba91..175671226 100644
--- a/index/scorch/segment/zap/merge_test.go
+++ b/index/scorch/segment/zap/merge_test.go
@@ -147,7 +147,10 @@ func testMergeWithEmptySegments(t *testing.T, before bool, numEmptySegments int)
 
 		_ = os.RemoveAll("/tmp/" + fname)
 
-		emptySegment, _, _ := AnalysisResultsToSegmentBase([]*index.AnalysisResult{}, 1024)
+		emptySegment, _, err := AnalysisResultsToSegmentBase([]*index.AnalysisResult{}, 1024)
+		if err != nil {
+			t.Fatal(err)
+		}
 		err = PersistSegmentBase(emptySegment, "/tmp/"+fname)
 		if err != nil {
 			t.Fatal(err)
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
index 03cb070d3..5aa33a26c 100644
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@@ -539,7 +539,7 @@ func (s *Segment) DictAddr(field string) (uint64, error) {
 }
 
 func (s *SegmentBase) loadDvReaders() error {
-	if s.docValueOffset == fieldNotUninverted {
+	if s.docValueOffset == fieldNotUninverted || s.numDocs == 0 {
 		return nil
 	}
 
@@ -558,7 +558,10 @@ func (s *SegmentBase) loadDvReaders() error {
 		}
 		read += uint64(n)
 
-		fieldDvReader, _ := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd)
+		fieldDvReader, err := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd)
+		if err != nil {
+			return err
+		}
 		if fieldDvReader != nil {
 			s.fieldDvReaders[uint16(fieldID)] = fieldDvReader
 			s.fieldDvNames = append(s.fieldDvNames, field)

From 7f3a218ae72960bb4841254833a52a5f088a9928 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Wed, 1 May 2019 13:41:33 -0400
Subject: [PATCH 592/728] add optional validation hooks to scorch/zap (#1206)

* add optional validation hooks to scorch/zap

ValidateDocFields can be used to check each field
seen by zap

ValidateMerge can be used to check the results of
a merge operation

These hooks are intended to allow for application specific
checks inside the main bleve branch, so as to allow maximum
flexibility with minimal disruption.  These hooks are not
intended to be permanent, but that decision can be made
at a later time.

* fix bug in logging

* invoke validate merge for in-memory merges as well

* change error message for in-memory merge validation fail
---
 index/scorch/merge.go             |  9 ++++++++-
 index/scorch/segment/zap/merge.go |  8 ++++++++
 index/scorch/segment/zap/new.go   | 13 +++++++++++++
 3 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index bcbf5b710..83f98aab0 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -151,7 +151,6 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 		atomic.AddUint64(&s.stats.TotFileMergePlanNone, 1)
 		return nil
 	}
-
 	atomic.AddUint64(&s.stats.TotFileMergePlanOk, 1)
 
 	atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(resultMergePlan.Tasks)))
@@ -221,6 +220,10 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 				atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
 				return err
 			}
+			err = zap.ValidateMerge(segmentsToMerge, nil, docsToDrop, seg.(*zap.Segment))
+			if err != nil {
+				return fmt.Errorf("merge validation failed: %v", err)
+			}
 			oldNewDocNums = make(map[uint64][]uint64)
 			for i, segNewDocNums := range newDocNums {
 				oldNewDocNums[task.Segments[i].Id()] = segNewDocNums
@@ -311,6 +314,10 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 		atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
 		return nil, 0, err
 	}
+	err = zap.ValidateMerge(nil, sbs, sbsDrops, seg.(*zap.Segment))
+	if err != nil {
+		return nil, 0, fmt.Errorf("in-memory merge validation failed: %v", err)
+	}
 
 	// update persisted stats
 	atomic.AddUint64(&s.stats.TotPersistedItems, seg.Count())
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
index 4ef222c1a..50bd7207a 100644
--- a/index/scorch/segment/zap/merge.go
+++ b/index/scorch/segment/zap/merge.go
@@ -31,6 +31,14 @@ import (
 
 var DefaultFileMergerBufferSize = 1024 * 1024
 
+// ValidateMerge can be set by applications to perform additional checks
+// on a new segment produced by a merge, by default this does nothing.
+// Caller should provide EITHER segments or memSegments, but not both.
+// This API is experimental and may be removed at any time.
+var ValidateMerge = func(segments []*Segment, memSegments []*SegmentBase, drops []*roaring.Bitmap, newSegment *Segment) error {
+	return nil
+}
+
 const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc
 
 // Merge takes a slice of zap segments and bit masks describing which
diff --git a/index/scorch/segment/zap/new.go b/index/scorch/segment/zap/new.go
index 22b69913e..c108ec16d 100644
--- a/index/scorch/segment/zap/new.go
+++ b/index/scorch/segment/zap/new.go
@@ -33,6 +33,14 @@ var NewSegmentBufferNumResultsBump int = 100
 var NewSegmentBufferNumResultsFactor float64 = 1.0
 var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0
 
+// ValidateDocFields can be set by applications to perform additional checks
+// on fields in a document being added to a new segment, by default it does
+// nothing.
+// This API is experimental and may be removed at any time.
+var ValidateDocFields = func(field document.Field) error {
+	return nil
+}
+
 // AnalysisResultsToSegmentBase produces an in-memory zap-encoded
 // SegmentBase from analysis results
 func AnalysisResultsToSegmentBase(results []*index.AnalysisResult,
@@ -521,6 +529,11 @@ func (s *interim) writeStoredFields() (
 			if opts.IncludeDocValues() {
 				s.IncludeDocValues[fieldID] = true
 			}
+
+			err := ValidateDocFields(field)
+			if err != nil {
+				return 0, err
+			}
 		}
 
 		var curr int

From 97932ae5b280705390b8b3b6803186f5fc98d822 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Mon, 6 May 2019 17:24:25 -0400
Subject: [PATCH 593/728] move the impl of DocumentFieldTermVisitable (#1208)

previously these methods were defined on Segment
but they only use fields in SegmentBase,
and having them defined on SegmentBase means you
can use them on in-memory segments
---
 index/scorch/segment/zap/docvalues.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
index 0ceb91fb5..a819ca239 100644
--- a/index/scorch/segment/zap/docvalues.go
+++ b/index/scorch/segment/zap/docvalues.go
@@ -39,7 +39,7 @@ type docNumTermsVisitor func(docNum uint64, terms []byte) error
 
 type docVisitState struct {
 	dvrs    map[uint16]*docValueReader
-	segment *Segment
+	segment *SegmentBase
 }
 
 type docValueReader struct {
@@ -252,7 +252,7 @@ func (di *docValueReader) getDocValueLocs(docNum uint64) (uint64, uint64) {
 
 // VisitDocumentFieldTerms is an implementation of the
 // DocumentFieldTermVisitable interface
-func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
+func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
 	visitor index.DocumentFieldTermVisitor, dvsIn segment.DocVisitState) (
 	segment.DocVisitState, error) {
 	dvs, ok := dvsIn.(*docVisitState)
@@ -291,7 +291,7 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
 		if dvr, ok = dvs.dvrs[fieldID]; ok && dvr != nil {
 			// check if the chunk is already loaded
 			if docInChunk != dvr.curChunkNumber() {
-				err := dvr.loadDvChunk(docInChunk, &s.SegmentBase)
+				err := dvr.loadDvChunk(docInChunk, s)
 				if err != nil {
 					return dvs, err
 				}
@@ -306,6 +306,6 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
 // VisitableDocValueFields returns the list of fields with
 // persisted doc value terms ready to be visitable using the
 // VisitDocumentFieldTerms method.
-func (s *Segment) VisitableDocValueFields() ([]string, error) {
+func (s *SegmentBase) VisitableDocValueFields() ([]string, error) {
 	return s.fieldDvNames, nil
 }

From 5b9d7756e18c7c18b507d148ed4936896bd50370 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 7 May 2019 20:33:25 -0400
Subject: [PATCH 594/728] fix batch persisted callback for upsidedown (#1209)

previous implementation would only execute the callback
if the control flow reached the end of the function

unfortunately there are other explicit returns earlier
in the function

users setting a callback, would reasonably expect it to
always be called (passing either nil or an err) and
due to this bug, certain error conditions resulted
in the callback not firing

this change checks for the callback, and if present
defers execution of the callback, ensuring it will
always be executed
---
 index/upsidedown/upsidedown.go      |  8 ++---
 index/upsidedown/upsidedown_test.go | 46 +++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/index/upsidedown/upsidedown.go b/index/upsidedown/upsidedown.go
index 0699dbf97..24f5aae94 100644
--- a/index/upsidedown/upsidedown.go
+++ b/index/upsidedown/upsidedown.go
@@ -801,6 +801,10 @@ func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []
 }
 
 func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
+	persistedCallback := batch.PersistedCallback()
+	if persistedCallback != nil {
+		defer persistedCallback(err)
+	}
 	analysisStart := time.Now()
 
 	resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps))
@@ -965,10 +969,6 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
 		atomic.AddUint64(&udc.stats.errors, 1)
 	}
 
-	persistedCallback := batch.PersistedCallback()
-	if persistedCallback != nil {
-		persistedCallback(err)
-	}
 	return
 }
 
diff --git a/index/upsidedown/upsidedown_test.go b/index/upsidedown/upsidedown_test.go
index 2135a22c9..cc7cccfcb 100644
--- a/index/upsidedown/upsidedown_test.go
+++ b/index/upsidedown/upsidedown_test.go
@@ -19,6 +19,7 @@ import (
 	"reflect"
 	"regexp"
 	"strconv"
+	"strings"
 	"sync"
 	"testing"
 	"time"
@@ -1447,3 +1448,48 @@ func TestLargeField(t *testing.T) {
 		t.Fatal(err)
 	}
 }
+
+func TestIndexBatchPersistedCallbackWithErrorUpsideDown(t *testing.T) {
+	defer func() {
+		err := DestroyTest()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	var callbackExecuted bool
+	batch := index.NewBatch()
+	batch.SetPersistedCallback(func(e error) {
+		callbackExecuted = true
+	})
+	// By using a really large ID, we ensure that the batch will fail,
+	// because the key generated by upside down will be too large for BoltDB
+	reallyBigId := strings.Repeat("x", 32768+1)
+	doc := document.NewDocument(reallyBigId)
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test3")))
+	batch.Update(doc)
+
+	_ = idx.Batch(batch)
+	// don't fail on this error, that isn't what we're testing
+
+	if !callbackExecuted {
+		t.Fatal("expected callback to fire, it did not")
+	}
+
+}

From 2b973805198b421f707520d449236f19f0236b75 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Wed, 8 May 2019 13:24:56 +0530
Subject: [PATCH 595/728] adding UTs for GeoHash

Its been noted a precision/accuracy issue
with respect to the existing geoHash decoder
since it performs rounding of lat/lon values.
This matters when the given hash is having a
smaller precision like 5, eg: "d3hn3".

This test is just a confirmation for the
results with some sample values.
---
 geo/geohash_test.go | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 geo/geohash_test.go

diff --git a/geo/geohash_test.go b/geo/geohash_test.go
new file mode 100644
index 000000000..9600612f6
--- /dev/null
+++ b/geo/geohash_test.go
@@ -0,0 +1,44 @@
+//  Copyright (c) 2019 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package geo
+
+import (
+	"testing"
+)
+
+func TestGeoHash(t *testing.T) {
+	tests := []struct {
+		hash string
+		lon  float64
+		lat  float64
+	}{
+		{"d3hn3", -73.080000, 6.730000},     // -73.05908203, 6.74560547 as per http://geohash.co/
+		{"u4pru", 10.380000, 57.620000},     // 10.39306641, 57.63427734
+		{"u4pruy", 10.410000, 57.646000},    // 10.40954590, 57.64801025
+		{"u4pruyd", 10.407000, 57.649000},   // 10.40748596, 57.64869690
+		{"u4pruydqqvj", 10.40744, 57.64911}, // 10.40743969, 57.64911063
+	}
+
+	for _, test := range tests {
+		lat, lon := GeoHashDecode(test.hash)
+
+		if compareGeo(test.lon, lon) != 0 {
+			t.Errorf("expected lon %f, got %f, hash %s", test.lon, lon, test.hash)
+		}
+		if compareGeo(test.lat, lat) != 0 {
+			t.Errorf("expected lat %f, got %f, hash %s", test.lat, lat, test.hash)
+		}
+	}
+}

From ee12e763a20db2b3dc6e7c090b9ebf029b9d6713 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Wed, 8 May 2019 14:40:36 +0530
Subject: [PATCH 596/728] MB-34021 - geo query not returning few docs

The geo location precision lost during geohash
decoding was resulting in this issue.
Introducing a new geohash decoding method
with better precision and performance.

go test -benchmem -bench=.
goos: darwin
goarch: amd64
pkg: github.com/blevesearch/bleve/geo
BenchmarkGeoHashLen5Decode-8      	10000000	       119 ns/op	       0 B/op	       0 allocs/op
BenchmarkGeoHashLen5NewDecode-8   	20000000	        60.7 ns/op	       0 B/op	       0 allocs/op
BenchmarkGeoHashLen6Decode-8      	10000000	       120 ns/op	       0 B/op	       0 allocs/op
BenchmarkGeoHashLen6NewDecode-8   	20000000	        70.4 ns/op	       0 B/op	       0 allocs/op
BenchmarkGeoHashLen7Decode-8      	10000000	       119 ns/op	       0 B/op	       0 allocs/op
BenchmarkGeoHashLen7NewDecode-8   	20000000	        83.2 ns/op	       0 B/op	       0 allocs/op
---
 geo/benchmark_geohash_test.go | 67 ++++++++++++++++++++++++++
 geo/geohash.go                | 71 ++++++++++++++++++++++++++++
 geo/geohash_test.go           | 89 +++++++++++++++++++++++++++++++++++
 geo/parse.go                  |  2 +-
 4 files changed, 228 insertions(+), 1 deletion(-)
 create mode 100644 geo/benchmark_geohash_test.go
 create mode 100644 geo/geohash_test.go

diff --git a/geo/benchmark_geohash_test.go b/geo/benchmark_geohash_test.go
new file mode 100644
index 000000000..5ed5d6b76
--- /dev/null
+++ b/geo/benchmark_geohash_test.go
@@ -0,0 +1,67 @@
+//  Copyright (c) 2019 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package geo
+
+import (
+	"testing"
+)
+
+func BenchmarkGeoHashLen5Decode(b *testing.B) {
+	b.ResetTimer()
+	hash := "d3hn3"
+	for i := 0; i < b.N; i++ {
+		_, _ = GeoHashDecode(hash)
+	}
+}
+
+func BenchmarkGeoHashLen5NewDecode(b *testing.B) {
+	b.ResetTimer()
+	hash := "d3hn3"
+	for i := 0; i < b.N; i++ {
+		_, _ = DecodeGeoHash(hash)
+	}
+}
+
+func BenchmarkGeoHashLen6Decode(b *testing.B) {
+	b.ResetTimer()
+	hash := "u4pruy"
+	for i := 0; i < b.N; i++ {
+		_, _ = GeoHashDecode(hash)
+	}
+}
+
+func BenchmarkGeoHashLen6NewDecode(b *testing.B) {
+	b.ResetTimer()
+	hash := "u4pruy"
+	for i := 0; i < b.N; i++ {
+		_, _ = DecodeGeoHash(hash)
+	}
+}
+
+func BenchmarkGeoHashLen7Decode(b *testing.B) {
+	b.ResetTimer()
+	hash := "u4pruyd"
+	for i := 0; i < b.N; i++ {
+		_, _ = GeoHashDecode(hash)
+	}
+}
+
+func BenchmarkGeoHashLen7NewDecode(b *testing.B) {
+	b.ResetTimer()
+	hash := "u4pruyd"
+	for i := 0; i < b.N; i++ {
+		_, _ = DecodeGeoHash(hash)
+	}
+}
diff --git a/geo/geohash.go b/geo/geohash.go
index 35db720c0..4fac3a877 100644
--- a/geo/geohash.go
+++ b/geo/geohash.go
@@ -172,3 +172,74 @@ func GeoHashDecode(hash string) (lat, lng float64) {
 	box := geoBoundingBox(hash)
 	return box.round()
 }
+
+var masks = []uint64{16, 8, 4, 2, 1}
+
+// DecodeGeoHash decodes the string geohash faster with
+// higher precision. This api is in experimental phase.
+func DecodeGeoHash(geoHash string) (float64, float64) {
+	even := true
+	lat := []float64{-90.0, 90.0}
+	lon := []float64{-180.0, 180.0}
+
+	for i := 0; i < len(geoHash); i++ {
+		cd := uint64(base32encoding.dec[geoHash[i]])
+		for j := 0; j < 5; j++ {
+			if even {
+				if cd&masks[j] > 0 {
+					lon[0] = (lon[0] + lon[1]) / 2
+				} else {
+					lon[1] = (lon[0] + lon[1]) / 2
+				}
+			} else {
+				if cd&masks[j] > 0 {
+					lat[0] = (lat[0] + lat[1]) / 2
+				} else {
+					lat[1] = (lat[0] + lat[1]) / 2
+				}
+			}
+			even = !even
+		}
+	}
+
+	return (lat[0] + lat[1]) / 2, (lon[0] + lon[1]) / 2
+}
+
+func EncodeGeoHash(lat, lon float64) string {
+	even := true
+	lats := []float64{-90.0, 90.0}
+	lons := []float64{-180.0, 180.0}
+	precision := 12
+	var ch, bit uint64
+	var geoHash string
+
+	for len(geoHash) < precision {
+		if even {
+			mid := (lons[0] + lons[1]) / 2
+			if lon > mid {
+				ch |= masks[bit]
+				lons[0] = mid
+			} else {
+				lons[1] = mid
+			}
+		} else {
+			mid := (lats[0] + lats[1]) / 2
+			if lat > mid {
+				ch |= masks[bit]
+				lats[0] = mid
+			} else {
+				lats[1] = mid
+			}
+		}
+		even = !even
+		if bit < 4 {
+			bit++
+		} else {
+			geoHash += string(base32encoding.enc[ch])
+			ch = 0
+			bit = 0
+		}
+	}
+
+	return geoHash
+}
diff --git a/geo/geohash_test.go b/geo/geohash_test.go
new file mode 100644
index 000000000..d0bec329d
--- /dev/null
+++ b/geo/geohash_test.go
@@ -0,0 +1,89 @@
+//  Copyright (c) 2019 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package geo
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestGeoHashDecode(t *testing.T) {
+	tests := []struct {
+		hash string
+		lon  float64
+		lat  float64
+	}{
+		{"d3hn3", -73.080000, 6.730000},     // -73.05908203, 6.74560547 as per http://geohash.co/
+		{"u4pru", 10.380000, 57.620000},     // 10.39306641, 57.63427734
+		{"u4pruy", 10.410000, 57.646000},    // 10.40954590, 57.64801025
+		{"u4pruyd", 10.407000, 57.649000},   // 10.40748596, 57.64869690
+		{"u4pruydqqvj", 10.40744, 57.64911}, // 10.40743969, 57.64911063
+	}
+
+	for _, test := range tests {
+		lat, lon := GeoHashDecode(test.hash)
+
+		if compareGeo(test.lon, lon) != 0 {
+			t.Errorf("expected lon %f, got %f, hash %s", test.lon, lon, test.hash)
+		}
+		if compareGeo(test.lat, lat) != 0 {
+			t.Errorf("expected lat %f, got %f, hash %s", test.lat, lat, test.hash)
+		}
+	}
+}
+
+func TestDecodeGeoHash(t *testing.T) {
+	tests := []struct {
+		hash string
+		lon  float64
+		lat  float64
+	}{
+		{"d3hn3", -73.059082, 6.745605},     // -73.05908203, 6.74560547 as per http://geohash.co/
+		{"u4pru", 10.393066, 57.634277},     // 10.39306641, 57.63427734
+		{"u4pruy", 10.409546, 57.648010},    // 10.40954590, 57.64801025
+		{"u4pruyd", 10.407486, 57.648697},   // 10.40748596, 57.64869690
+		{"u4pruydqqvj", 10.40744, 57.64911}, // 10.40743969, 57.64911063
+	}
+
+	for _, test := range tests {
+		lat, lon := DecodeGeoHash(test.hash)
+
+		if compareGeo(test.lon, lon) != 0 {
+			t.Errorf("expected lon %f, got %f, hash %s", test.lon, lon, test.hash)
+		}
+		if compareGeo(test.lat, lat) != 0 {
+			t.Errorf("expected lat %f, got %f, hash %s", test.lat, lat, test.hash)
+		}
+	}
+}
+
+func TestEncodeGeoHash(t *testing.T) {
+	tests := []struct {
+		lon  float64
+		lat  float64
+		hash string
+	}{
+		{2.29449034, 48.85841131, "u09tunquc"},
+		{76.491540, 10.060349, "t9y3hx7my0fp"},
+	}
+
+	for _, test := range tests {
+		hash := EncodeGeoHash(test.lat, test.lon)
+
+		if !strings.HasPrefix(hash, test.hash) {
+			t.Errorf("expected hash %s, got %s", test.hash, hash)
+		}
+	}
+}
diff --git a/geo/parse.go b/geo/parse.go
index 0511fea7b..5d833d911 100644
--- a/geo/parse.go
+++ b/geo/parse.go
@@ -85,7 +85,7 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
 			}
 		} else {
 			// geohash
-			lat, lon = GeoHashDecode(geoStr)
+			lat, lon = DecodeGeoHash(geoStr)
 			foundLat = true
 			foundLon = true
 		}

From 0c3d62bdb09c6caaec2f5ea1f1e80153f37b90d8 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Thu, 9 May 2019 12:35:47 +0530
Subject: [PATCH 597/728] adding versus test for geohash

This test basically confirms the dimensions of the
bounded box computed between the DecodeGeoHash method
and the existing original implementation from
https://github.com/mmcloughlin/geohash.
---
 geo/versus_test.go | 4160 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 4160 insertions(+)
 create mode 100644 geo/versus_test.go

diff --git a/geo/versus_test.go b/geo/versus_test.go
new file mode 100644
index 000000000..0a067dae7
--- /dev/null
+++ b/geo/versus_test.go
@@ -0,0 +1,4160 @@
+//  Copyright (c) 2019 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package geo
+
+import (
+	"testing"
+)
+
+// This test basically confirms the dimensions of the
+// bounded box computed between the DecodeGeoHash method
+// and the existing original implementation from
+// https://github.com/mmcloughlin/geohash.
+// DecodeGeoHash method returns the centre of the rectangle
+// than returning the box dimensions.
+// This test verifies that the returned rectangle centre matches
+// the centre for the box dimensions defined in the original
+// implementation tests here:
+// https://github.com/mmcloughlin/geohash/blob/master/decodecases_test.go
+func TestDecodeGeoHashVersus(t *testing.T) {
+	tests := []struct {
+		hash string
+		box  []float64
+	}{
+		{"91rc", []float64{7.20703125, 7.3828125, -124.1015625, -123.75}},
+		{"c", []float64{45.0, 90.0, -135.0, -90.0}},
+		{"0fuz", []float64{-73.30078125, -73.125, -139.5703125, -139.21875}},
+		{"dwfcndf", []float64{38.1596374512, 38.1610107422, -63.3444213867, -63.3430480957}},
+		{"2z7", []float64{-4.21875, -2.8125, -142.03125, -140.625}},
+		{"7spw2w", []float64{-21.3684082031, -21.3629150391, -11.9311523438, -11.9201660156}},
+		{"eq", []float64{33.75, 39.375, -33.75, -22.5}},
+		{"mgff0", []float64{-23.5546875, -23.5107421875, 82.6171875, 82.6611328125}},
+		{"dp7k386jtk0", []float64{41.5306591988, 41.5306605399, -85.3607976437, -85.3607963026}},
+		{"pjb", []float64{-57.65625, -56.25, 135.0, 136.40625}},
+		{"jkc7uh9", []float64{-62.5973510742, -62.5959777832, 58.184967041, 58.186340332}},
+		{"1gdp9", []float64{-68.994140625, -68.9501953125, -98.3935546875, -98.349609375}},
+		{"z9yj14mmnxte", []float64{55.7359149121, 55.7359150797, 165.988941416, 165.988941751}},
+		{"2brk", []float64{-42.890625, -42.71484375, -136.0546875, -135.703125}},
+		{"dhv5t2qh59", []float64{27.3360496759, 27.3360550404, -82.7296471596, -82.7296364307}},
+		{"v", []float64{45.0, 90.0, 45.0, 90.0}},
+		{"3fgd9k15b5k", []float64{-29.0691630542, -29.0691617131, -96.2718147039, -96.2718133628}},
+		{"j", []float64{-90.0, -45.0, 45.0, 90.0}},
+		{"b4", []float64{56.25, 61.875, -180.0, -168.75}},
+		{"sb38", []float64{1.40625, 1.58203125, 35.859375, 36.2109375}},
+		{"puqeug", []float64{-65.4180908203, -65.4125976562, 178.099365234, 178.110351562}},
+		{"45", []float64{-73.125, -67.5, -90.0, -78.75}},
+		{"34b", []float64{-29.53125, -28.125, -135.0, -133.59375}},
+		{"tqb8jzn9dfqn", []float64{38.0074727163, 38.0074728839, 57.2148630023, 57.2148633376}},
+		{"9x", []float64{39.375, 45.0, -112.5, -101.25}},
+		{"tybf7", []float64{38.3642578125, 38.408203125, 79.9365234375, 79.98046875}},
+		{"9nc", []float64{37.96875, 39.375, -133.59375, -132.1875}},
+		{"pp21", []float64{-49.04296875, -48.8671875, 135.0, 135.3515625}},
+		{"s6wjfu76v", []float64{15.0970602036, 15.0971031189, 19.8130273819, 19.8130702972}},
+		{"wxh8ped7", []float64{39.3947410583, 39.3949127197, 119.160804749, 119.161148071}},
+		{"8gr", []float64{18.28125, 19.6875, -136.40625, -135.0}},
+		{"ug6hf", []float64{64.1162109375, 64.16015625, 36.650390625, 36.6943359375}},
+		{"pb", []float64{-90.0, -84.375, 168.75, 180.0}},
+		{"nmhvpv", []float64{-60.9686279297, -60.9631347656, 108.270263672, 108.28125}},
+		{"rxgthm", []float64{-0.499877929688, -0.494384765625, 162.608642578, 162.619628906}},
+		{"mj8t", []float64{-13.18359375, -13.0078125, 45.703125, 46.0546875}},
+		{"rkvw", []float64{-17.2265625, -17.05078125, 153.984375, 154.3359375}},
+		{"j", []float64{-90.0, -45.0, 45.0, 90.0}},
+		{"u4ryw22k", []float64{58.8008880615, 58.8010597229, 11.1734390259, 11.1737823486}},
+		{"96bf6jfr", []float64{15.8970451355, 15.8972167969, -122.60433197, -122.603988647}},
+		{"ubhnbn2n1jvj", []float64{46.2219173647, 46.2219175324, 39.3750496209, 39.3750499561}},
+		{"3gmczz", []float64{-26.3726806641, -26.3671875, -92.8234863281, -92.8125}},
+		{"yb4jh3u7px0", []float64{45.8890718222, 45.8890731633, 126.75542593, 126.755427271}},
+		{"9ex7rkbw1duf", []float64{20.2859266475, 20.2859268151, -101.985326596, -101.98532626}},
+		{"xrkyg3mj", []float64{41.9754981995, 41.9756698608, 153.079376221, 153.079719543}},
+		{"z4hn2yfzryjq", []float64{57.3869894072, 57.3869895749, 140.662075169, 140.662075505}},
+		{"x357", []float64{6.15234375, 6.328125, 150.8203125, 151.171875}},
+		{"v3pew", []float64{51.240234375, 51.2841796875, 67.060546875, 67.1044921875}},
+		{"j1", []float64{-84.375, -78.75, 45.0, 56.25}},
+		{"ec1bkph03", []float64{5.70744037628, 5.70748329163, -8.60774517059, -8.60770225525}},
+		{"q4t85", []float64{-30.9375, -30.8935546875, 97.8662109375, 97.91015625}},
+		{"k26z8hf", []float64{-42.2492980957, -42.2479248047, 15.119934082, 15.121307373}},
+		{"p6fyq2u63", []float64{-73.4281110764, -73.428068161, 150.397725105, 150.397768021}},
+		{"uqu5w2yu", []float64{83.5887908936, 83.5889625549, 17.1589279175, 17.1592712402}},
+		{"terbkv", []float64{18.3526611328, 18.3581542969, 78.6071777344, 78.6181640625}},
+		{"8v2nwkp", []float64{30.6958007812, 30.6971740723, -145.96572876, -145.964355469}},
+		{"rbktxk2enhr", []float64{-42.6030693948, -42.6030680537, 175.397682041, 175.397683382}},
+		{"r1pnfct8", []float64{-38.1802368164, -38.180065155, 144.97215271, 144.972496033}},
+		{"rcmxg", []float64{-36.6064453125, -36.5625, 176.616210938, 176.66015625}},
+		{"jqw7xjdt8", []float64{-52.7911090851, -52.7910661697, 65.350112915, 65.3501558304}},
+		{"7mt737xd", []float64{-13.4716415405, -13.4714698792, -26.3019561768, -26.301612854}},
+		{"2dprx5rg57es", []float64{-32.4132534117, -32.413253244, -146.986283138, -146.986282803}},
+		{"fpr5d98ws0", []float64{86.40583992, 86.4058452845, -80.0455284119, -80.045517683}},
+		{"z4cmm3", []float64{61.3970947266, 61.4025878906, 136.988525391, 136.999511719}},
+		{"gz3b8j", []float64{85.8966064453, 85.9020996094, -8.7890625, -8.77807617188}},
+		{"svfztv42f2k", []float64{33.6897052824, 33.6897066236, 37.8730648756, 37.8730662167}},
+		{"z6f3yb6rum", []float64{60.7790976763, 60.7791030407, 149.713965654, 149.713976383}},
+		{"wvqv397", []float64{30.4609680176, 30.4623413086, 133.312225342, 133.313598633}},
+		{"r0ce65wshm", []float64{-40.1900213957, -40.1900160313, 137.206374407, 137.206385136}},
+		{"crqvbtfbjb09", []float64{86.8235780485, 86.8235782161, -114.23181586, -114.231815524}},
+		{"ptz0vc5z87", []float64{-57.5176173449, -57.5176119804, 167.601596117, 167.601606846}},
+		{"x806byp", []float64{0.516357421875, 0.517730712891, 157.894134521, 157.895507812}},
+		{"s", []float64{0.0, 45.0, 0.0, 45.0}},
+		{"kb23vjn10", []float64{-43.2584953308, -43.2584524155, 34.3295288086, 34.3295717239}},
+		{"n82kv", []float64{-87.7587890625, -87.71484375, 113.071289062, 113.115234375}},
+		{"dkhzmmqfg", []float64{23.8037252426, 23.803768158, -71.830201149, -71.8301582336}},
+		{"y1v", []float64{54.84375, 56.25, 97.03125, 98.4375}},
+		{"q977h9", []float64{-37.4359130859, -37.4304199219, 117.268066406, 117.279052734}},
+		{"sdffzu5qzu", []float64{15.9753012657, 15.9753066301, 26.7125594616, 26.7125701904}},
+		{"n3q", []float64{-82.96875, -81.5625, 109.6875, 111.09375}},
+		{"fr99zgs7e", []float64{87.5149440765, 87.5149869919, -76.2940835953, -76.2940406799}},
+		{"4d9cv4cbh", []float64{-75.6147766113, -75.614733696, -64.8167610168, -64.8167181015}},
+		{"np896wq5", []float64{-47.557926178, -47.5577545166, 90.8212280273, 90.8215713501}},
+		{"45", []float64{-73.125, -67.5, -90.0, -78.75}},
+		{"c5srsy", []float64{66.0388183594, 66.0443115234, -128.814697266, -128.803710938}},
+		{"vcshr", []float64{54.1845703125, 54.228515625, 84.6826171875, 84.7265625}},
+		{"3zbbs1wnn5", []float64{-1.30907356739, -1.30906820297, -100.011034012, -100.011023283}},
+		{"jtdvxn", []float64{-58.0627441406, -58.0572509766, 71.6748046875, 71.6857910156}},
+		{"yz27wu0e", []float64{86.4189720154, 86.4191436768, 124.398880005, 124.399223328}},
+		{"utb0ck65h9", []float64{77.4994522333, 77.4994575977, 22.5578713417, 22.5578820705}},
+		{"1t1dvq5jj", []float64{-61.3577842712, -61.3577413559, -110.15557766, -110.155534744}},
+		{"yzwgy5hvwz", []float64{87.8641408682, 87.8641462326, 133.512672186, 133.512682915}},
+		{"8dx38y6vby", []float64{14.3615233898, 14.3615287542, -147.267919779, -147.26790905}},
+		{"9bh", []float64{0.0, 1.40625, -95.625, -94.21875}},
+		{"2", []float64{-45.0, 0.0, -180.0, -135.0}},
+		{"x14ve", []float64{6.591796875, 6.6357421875, 138.999023438, 139.04296875}},
+		{"3603v", []float64{-33.4423828125, -33.3984375, -123.178710938, -123.134765625}},
+		{"bsuje4cn6", []float64{72.7017259598, 72.7017688751, -151.741704941, -151.741662025}},
+		{"9nnut36epqhn", []float64{34.5484302565, 34.5484304242, -125.273349881, -125.273349546}},
+		{"7q27jg", []float64{-9.29992675781, -9.29443359375, -33.1457519531, -33.134765625}},
+		{"933zzd6", []float64{8.40591430664, 8.40728759766, -120.956726074, -120.955352783}},
+		{"5743y", []float64{-72.8173828125, -72.7734375, -30.322265625, -30.2783203125}},
+		{"7m94uc9", []float64{-13.5708618164, -13.5694885254, -32.1336364746, -32.1322631836}},
+		{"780", []float64{-45.0, -43.59375, -22.5, -21.09375}},
+		{"sqpqx2w0hh", []float64{34.8953461647, 34.8953515291, 21.7723274231, 21.7723381519}},
+		{"1ep", []float64{-73.125, -71.71875, -102.65625, -101.25}},
+		{"k0j", []float64{-45.0, -43.59375, 7.03125, 8.4375}},
+		{"z1zgr0g440", []float64{55.4195022583, 55.4195076227, 146.210260391, 146.21027112}},
+		{"681bf", []float64{-44.8681640625, -44.82421875, -64.951171875, -64.9072265625}},
+		{"dn6j", []float64{36.03515625, 36.2109375, -87.1875, -86.8359375}},
+		{"m", []float64{-45.0, 0.0, 45.0, 90.0}},
+		{"t1m", []float64{7.03125, 8.4375, 52.03125, 53.4375}},
+		{"9qkshh67xe3", []float64{35.8833391964, 35.8833405375, -117.242680639, -117.242679298}},
+		{"4507gyj", []float64{-72.4328613281, -72.4314880371, -89.476776123, -89.475402832}},
+		{"bb844h", []float64{48.1860351562, 48.1915283203, -146.162109375, -146.151123047}},
+		{"trn6yz", []float64{39.8968505859, 39.90234375, 65.3356933594, 65.3466796875}},
+		{"vb99", []float64{47.98828125, 48.1640625, 80.859375, 81.2109375}},
+		{"dxpbksmed", []float64{39.4428920746, 39.4429349899, -56.3961696625, -56.3961267471}},
+		{"zc", []float64{50.625, 56.25, 168.75, 180.0}},
+		{"3webp3s6hdeb", []float64{-8.42890352011, -8.42890335247, -106.901924349, -106.901924014}},
+		{"8qg", []float64{37.96875, 39.375, -164.53125, -163.125}},
+		{"fgwzk", []float64{65.9619140625, 66.005859375, -46.58203125, -46.5380859375}},
+		{"1q2mxvd", []float64{-53.8467407227, -53.8453674316, -123.055114746, -123.053741455}},
+		{"sd", []float64{11.25, 16.875, 22.5, 33.75}},
+		{"8hhwv", []float64{23.6865234375, 23.73046875, -173.452148438, -173.408203125}},
+		{"1bw", []float64{-87.1875, -85.78125, -92.8125, -91.40625}},
+		{"66zkh0ex724", []float64{-28.824133873, -28.8241325319, -68.3739575744, -68.3739562333}},
+		{"w6qpy8", []float64{14.0185546875, 14.0240478516, 109.973144531, 109.984130859}},
+		{"nux13fvfr", []float64{-64.4522809982, -64.4522380829, 133.678851128, 133.678894043}},
+		{"cj2", []float64{74.53125, 75.9375, -135.0, -133.59375}},
+		{"1qrumeqp", []float64{-54.0776252747, -54.0774536133, -112.601623535, -112.601280212}},
+		{"hhm0", []float64{-66.09375, -65.91796875, 7.03125, 7.3828125}},
+		{"50cp37u6w86", []float64{-84.4858060777, -84.4858047366, -43.5327002406, -43.5326988995}},
+		{"0vjjfe5w", []float64{-60.8467483521, -60.8465766907, -139.1040802, -139.103736877}},
+		{"xm67", []float64{30.05859375, 30.234375, 149.4140625, 149.765625}},
+		{"1jmnqz1", []float64{-59.3316650391, -59.330291748, -127.67074585, -127.669372559}},
+		{"jjfh69u78", []float64{-56.8989658356, -56.8989229202, 47.9281997681, 47.9282426834}},
+		{"9xvnjtj3ytr", []float64{44.6762318909, 44.676233232, -105.219552666, -105.219551325}},
+		{"ebwk44", []float64{3.52661132812, 3.53210449219, -2.373046875, -2.36206054688}},
+		{"xej0p", []float64{16.875, 16.9189453125, 164.838867188, 164.8828125}},
+		{"hm4m", []float64{-60.99609375, -60.8203125, 14.4140625, 14.765625}},
+		{"71d1uhzq", []float64{-36.2277603149, -36.2275886536, -42.0017623901, -42.0014190674}},
+		{"u7d8cgc9h4w", []float64{64.8401203752, 64.8401217163, 14.8447689414, 14.8447702825}},
+		{"zwud", []float64{83.3203125, 83.49609375, 163.828125, 164.1796875}},
+		{"p4nbh7wnwsb", []float64{-78.7296326458, -78.7296313047, 144.687473774, 144.687475115}},
+		{"ev", []float64{28.125, 33.75, -11.25, 0.0}},
+		{"2", []float64{-45.0, 0.0, -180.0, -135.0}},
+		{"ytj8jhg3vmke", []float64{73.1514216028, 73.1514217705, 120.458796099, 120.458796434}},
+		{"xn", []float64{33.75, 39.375, 135.0, 146.25}},
+		{"1t7f", []float64{-60.1171875, -59.94140625, -107.2265625, -106.875}},
+		{"wjt", []float64{30.9375, 32.34375, 97.03125, 98.4375}},
+		{"f7j3", []float64{62.05078125, 62.2265625, -71.3671875, -71.015625}},
+		{"62bd4mvcg", []float64{-40.3978013992, -40.3977584839, -77.9399728775, -77.9399299622}},
+		{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
+		{"577epg1nh0be", []float64{-71.1738922633, -71.1738920957, -28.4860032052, -28.4860028699}},
+		{"pvyz2qtjw6kc", []float64{-56.3451739959, -56.3451738283, 178.260314874, 178.26031521}},
+		{"2hrf6fbj", []float64{-20.6822776794, -20.6821060181, -168.980712891, -168.980369568}},
+		{"z1yz7p6dc3h8", []float64{56.1584669352, 56.1584671028, 144.627516344, 144.627516679}},
+		{"24v5r460td50", []float64{-28.9475047588, -28.9475045912, -172.658146173, -172.658145837}},
+		{"rdtnyvudc4mu", []float64{-29.7189060599, -29.7189058922, 164.834111296, 164.834111631}},
+		{"s", []float64{0.0, 45.0, 0.0, 45.0}},
+		{"e7he", []float64{17.40234375, 17.578125, -27.421875, -27.0703125}},
+		{"9yrkg", []float64{35.9912109375, 36.03515625, -90.9228515625, -90.87890625}},
+		{"m", []float64{-45.0, 0.0, 45.0, 90.0}},
+		{"yppvw0", []float64{85.341796875, 85.3472900391, 101.162109375, 101.173095703}},
+		{"n3876wh", []float64{-80.9582519531, -80.9568786621, 101.716918945, 101.718292236}},
+		{"m1vjqf4", []float64{-34.2224121094, -34.2210388184, 52.3306274414, 52.3320007324}},
+		{"kev", []float64{-23.90625, -22.5, 29.53125, 30.9375}},
+		{"4qc", []float64{-52.03125, -50.625, -77.34375, -75.9375}},
+		{"5f4", []float64{-78.75, -77.34375, -8.4375, -7.03125}},
+		{"ug", []float64{61.875, 67.5, 33.75, 45.0}},
+		{"g5", []float64{61.875, 67.5, -45.0, -33.75}},
+		{"7wvx", []float64{-5.80078125, -5.625, -14.765625, -14.4140625}},
+		{"rx", []float64{-5.625, 0.0, 157.5, 168.75}},
+		{"jdg0t21qzr", []float64{-74.4421631098, -74.4421577454, 71.9514906406, 71.9515013695}},
+		{"606yg", []float64{-42.4072265625, -42.36328125, -86.0009765625, -85.95703125}},
+		{"nxt6zwhgqd", []float64{-47.2955739498, -47.2955685854, 120.219204426, 120.219215155}},
+		{"e71g7w8dr", []float64{17.482380867, 17.4824237823, -31.1342668533, -31.134223938}},
+		{"n6u", []float64{-74.53125, -73.125, 106.875, 108.28125}},
+		{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
+		{"zfcehd0d3", []float64{61.0074663162, 61.0075092316, 171.057858467, 171.057901382}},
+		{"hgx5efc24r5e", []float64{-69.68212137, -69.6821212023, 43.760362789, 43.7603631243}},
+		{"x", []float64{0.0, 45.0, 135.0, 180.0}},
+		{"q", []float64{-45.0, 0.0, 90.0, 135.0}},
+		{"zbzg87qu07g", []float64{49.8525439203, 49.8525452614, 179.668708295, 179.668709636}},
+		{"02urcn", []float64{-84.3859863281, -84.3804931641, -162.729492188, -162.718505859}},
+		{"p4", []float64{-78.75, -73.125, 135.0, 146.25}},
+		{"j6xjzcpsk78", []float64{-74.9205163121, -74.920514971, 66.4448082447, 66.4448095858}},
+		{"svchwhvd08d", []float64{33.1612041593, 33.1612055004, 35.4274991155, 35.4275004566}},
+		{"yughcgqek2", []float64{72.5721216202, 72.5721269846, 128.054763079, 128.054773808}},
+		{"18tc1b", []float64{-87.01171875, -87.0062255859, -104.337158203, -104.326171875}},
+		{"es43c", []float64{22.8076171875, 22.8515625, -19.2919921875, -19.248046875}},
+		{"zdcephk", []float64{61.0194396973, 61.0208129883, 159.922485352, 159.923858643}},
+		{"2zqh", []float64{-3.515625, -3.33984375, -137.8125, -137.4609375}},
+		{"sh9bfh31", []float64{25.4678535461, 25.4680252075, 2.55020141602, 2.55054473877}},
+		{"62vfgv", []float64{-40.2703857422, -40.2648925781, -70.4992675781, -70.48828125}},
+		{"u2yn4", []float64{50.2734375, 50.3173828125, 19.775390625, 19.8193359375}},
+		{"qx5wuf", []float64{-4.42749023438, -4.42199707031, 117.630615234, 117.641601562}},
+		{"9y163", []float64{34.1455078125, 34.189453125, -99.4482421875, -99.404296875}},
+		{"7ygu094y", []float64{-6.32160186768, -6.3214302063, -5.95081329346, -5.9504699707}},
+		{"nkfj9vxh9e6", []float64{-62.2834508121, -62.283449471, 104.149084389, 104.14908573}},
+		{"n220y9m", []float64{-88.4550476074, -88.4536743164, 101.542510986, 101.543884277}},
+		{"472p7k4d", []float64{-70.4220199585, -70.4218482971, -78.6037445068, -78.6034011841}},
+		{"7p", []float64{-5.625, 0.0, -45.0, -33.75}},
+		{"b1r1n2zp", []float64{52.2123527527, 52.2125244141, -169.87197876, -169.871635437}},
+		{"neu8fxsk8k4", []float64{-68.7324213982, -68.7324200571, 118.943838179, 118.94383952}},
+		{"m33fn", []float64{-37.6171875, -37.5732421875, 58.974609375, 59.0185546875}},
+		{"u6z5je", []float64{61.0125732422, 61.0180664062, 21.3354492188, 21.3464355469}},
+		{"5e6csnf", []float64{-71.4179992676, -71.4166259766, -18.454284668, -18.452911377}},
+		{"7f7k4", []float64{-31.640625, -31.5966796875, -6.591796875, -6.5478515625}},
+		{"ykd07ytm4", []float64{70.3930091858, 70.3930521011, 104.23459053, 104.234633446}},
+		{"ubyx97", []float64{50.5535888672, 50.5590820312, 42.9455566406, 42.9565429688}},
+		{"r", []float64{-45.0, 0.0, 135.0, 180.0}},
+		{"ungw16", []float64{84.0344238281, 84.0399169922, 4.97680664062, 4.98779296875}},
+		{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
+		{"bhfkepk0n", []float64{72.5495910645, 72.5496339798, -176.698350906, -176.698307991}},
+		{"d3", []float64{5.625, 11.25, -78.75, -67.5}},
+		{"wsenn", []float64{26.3671875, 26.4111328125, 116.982421875, 117.026367188}},
+		{"b4b6qpqk", []float64{60.9047698975, 60.9049415588, -179.376182556, -179.375839233}},
+		{"zwjx2wv3", []float64{80.0616645813, 80.0618362427, 165.263557434, 165.263900757}},
+		{"0", []float64{-90.0, -45.0, -180.0, -135.0}},
+		{"3sgq6", []float64{-17.1826171875, -17.138671875, -107.841796875, -107.797851562}},
+		{"9chbq", []float64{5.6689453125, 5.712890625, -94.306640625, -94.2626953125}},
+		{"37nehnbt", []float64{-27.5597190857, -27.5595474243, -114.432907104, -114.432563782}},
+		{"uhr5w71b", []float64{69.5379638672, 69.5381355286, 10.1208114624, 10.1211547852}},
+		{"8", []float64{0.0, 45.0, -180.0, -135.0}},
+		{"81", []float64{5.625, 11.25, -180.0, -168.75}},
+		{"vyxn", []float64{82.6171875, 82.79296875, 88.59375, 88.9453125}},
+		{"73jvv", []float64{-38.3642578125, -38.3203125, -25.4443359375, -25.400390625}},
+		{"nmw1ysg2f23", []float64{-58.7286601961, -58.728658855, 109.977705628, 109.977706969}},
+		{"5sv6js2nphq", []float64{-62.9052887857, -62.9052874446, -14.8751798272, -14.8751784861}},
+		{"289", []float64{-42.1875, -40.78125, -156.09375, -154.6875}},
+		{"cq9", []float64{81.5625, 82.96875, -122.34375, -120.9375}},
+		{"mm", []float64{-16.875, -11.25, 56.25, 67.5}},
+		{"49378fm9v", []float64{-82.3408555984, -82.3408126831, -65.7014608383, -65.701417923}},
+		{"ee079be", []float64{17.492980957, 17.494354248, -22.0674133301, -22.0660400391}},
+		{"4m5cs3h", []float64{-61.6058349609, -61.6044616699, -73.2843017578, -73.2829284668}},
+		{"hpdre", []float64{-46.494140625, -46.4501953125, 3.2958984375, 3.33984375}},
+		{"06", []float64{-78.75, -73.125, -168.75, -157.5}},
+		{"3x3r4evkpp1q", []float64{-2.9669566825, -2.96695651487, -110.624812357, -110.624812022}},
+		{"95074fyh23t", []float64{17.4181875587, 17.4181888998, -134.51933071, -134.519329369}},
+		{"5tdj7sf", []float64{-58.1135559082, -58.1121826172, -19.5309448242, -19.5295715332}},
+		{"8r9z", []float64{43.41796875, 43.59375, -166.2890625, -165.9375}},
+		{"nc", []float64{-84.375, -78.75, 123.75, 135.0}},
+		{"t2mb", []float64{1.40625, 1.58203125, 64.3359375, 64.6875}},
+		{"hcs1g2vbkq0g", []float64{-81.2506873347, -81.250687167, 39.525902085, 39.5259024203}},
+		{"pduzpxt", []float64{-73.2595825195, -73.2582092285, 164.516143799, 164.51751709}},
+		{"mw9u700", []float64{-7.6904296875, -7.68905639648, 70.0927734375, 70.0941467285}},
+		{"9y0ttsfr", []float64{34.7440910339, 34.7442626953, -100.302085876, -100.301742554}},
+		{"ztmu17de4", []float64{75.2541160583, 75.2541589737, 165.644388199, 165.644431114}},
+		{"1dqmehx", []float64{-76.3522338867, -76.3508605957, -103.569488525, -103.568115234}},
+		{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
+		{"6ydvkt6cp", []float64{-7.48563766479, -7.48559474945, -52.180981636, -52.1809387207}},
+		{"4", []float64{-90.0, -45.0, -90.0, -45.0}},
+		{"706tn", []float64{-42.71484375, -42.6708984375, -41.220703125, -41.1767578125}},
+		{"6z89j2x0gun", []float64{-2.63382196426, -2.63382062316, -55.3063800931, -55.306378752}},
+		{"mt1t", []float64{-15.99609375, -15.8203125, 69.609375, 69.9609375}},
+		{"95ypdkpcd", []float64{22.4343395233, 22.4343824387, -126.452894211, -126.452851295}},
+		{"nr1wc", []float64{-49.4384765625, -49.39453125, 103.403320312, 103.447265625}},
+		{"rb2uxf4", []float64{-42.7917480469, -42.7903747559, 170.148010254, 170.149383545}},
+		{"gzz5w20e1wk", []float64{89.2095328867, 89.2095342278, -1.13083541393, -1.13083407283}},
+		{"mtuddkh7my", []float64{-12.1942341328, -12.1942287683, 73.9330852032, 73.933095932}},
+		{"r15def4", []float64{-38.9245605469, -38.9231872559, 140.089416504, 140.090789795}},
+		{"9fwcy0d", []float64{14.3728637695, 14.3742370605, -91.491394043, -91.490020752}},
+		{"6vx9z80hkd", []float64{-13.7541425228, -13.7541371584, -45.3733420372, -45.3733313084}},
+		{"qqxt55re", []float64{-7.54022598267, -7.54005432129, 111.93901062, 111.939353943}},
+		{"4hreqy", []float64{-65.4895019531, -65.4840087891, -79.1564941406, -79.1455078125}},
+		{"9nv6", []float64{38.3203125, 38.49609375, -127.6171875, -127.265625}},
+		{"k980q", []float64{-36.5185546875, -36.474609375, 22.763671875, 22.8076171875}},
+		{"zd6qmdhsrtce", []float64{58.7666300498, 58.7666302174, 160.912265405, 160.91226574}},
+		{"ucz57k", []float64{55.4370117188, 55.4425048828, 43.7365722656, 43.7475585938}},
+		{"u5pq", []float64{62.9296875, 63.10546875, 10.1953125, 10.546875}},
+		{"fey0v9", []float64{66.2310791016, 66.2365722656, -58.8208007812, -58.8098144531}},
+		{"mdtggek9fmh5", []float64{-30.2601397969, -30.2601396292, 75.7460278273, 75.7460281625}},
+		{"y6nfy", []float64{56.7333984375, 56.77734375, 111.005859375, 111.049804688}},
+		{"f", []float64{45.0, 90.0, -90.0, -45.0}},
+		{"33hz5xbsw", []float64{-38.1011867523, -38.101143837, -116.915559769, -116.915516853}},
+		{"2wnb71890np2", []float64{-11.1976110935, -11.1976109259, -147.875280194, -147.875279859}},
+		{"7d", []float64{-33.75, -28.125, -22.5, -11.25}},
+		{"71vsdbh", []float64{-34.365234375, -34.363861084, -37.1392822266, -37.1379089355}},
+		{"nk", []float64{-67.5, -61.875, 101.25, 112.5}},
+		{"pn6uks", []float64{-54.0747070312, -54.0692138672, 139.064941406, 139.075927734}},
+		{"t0", []float64{0.0, 5.625, 45.0, 56.25}},
+		{"ze7c4z3e", []float64{63.4973716736, 63.497543335, 162.896347046, 162.896690369}},
+		{"ujq6txghfjdv", []float64{75.0141208805, 75.0141210482, 9.03497111052, 9.0349714458}},
+		{"40muh9sfm7", []float64{-87.8819829226, -87.8819775581, -81.7095601559, -81.709549427}},
+		{"0y4kk1", []float64{-55.4974365234, -55.4919433594, -142.91015625, -142.899169922}},
+		{"q8btfscp1g", []float64{-39.7431975603, -39.7431921959, 113.314436674, 113.314447403}},
+		{"yg2mxt", []float64{64.2755126953, 64.2810058594, 124.431152344, 124.442138672}},
+		{"7r5kh", []float64{-4.921875, -4.8779296875, -29.00390625, -28.9599609375}},
+		{"cvg6t", []float64{77.783203125, 77.8271484375, -96.4599609375, -96.416015625}},
+		{"msj4q7e3db", []float64{-22.0850086212, -22.0850032568, 74.8104894161, 74.810500145}},
+		{"1452n4", []float64{-78.7390136719, -78.7335205078, -130.166015625, -130.155029297}},
+		{"xj3kvwr2d", []float64{30.4006290436, 30.4006719589, 137.009553909, 137.009596825}},
+		{"2wtvb", []float64{-7.4267578125, -7.3828125, -149.4140625, -149.370117188}},
+		{"c5x5dhk", []float64{65.3260803223, 65.3274536133, -125.062866211, -125.06149292}},
+		{"eph56u6", []float64{39.9696350098, 39.9710083008, -39.2514038086, -39.2500305176}},
+		{"5dhyg0hckf", []float64{-77.5632512569, -77.5632458925, -15.6817495823, -15.6817388535}},
+		{"9vrb", []float64{29.53125, 29.70703125, -90.3515625, -90.0}},
+		{"b7nsys", []float64{62.7319335938, 62.7374267578, -159.323730469, -159.312744141}},
+		{"kmenbsk0", []float64{-12.8526306152, -12.8524589539, 15.4962158203, 15.4965591431}},
+		{"j35", []float64{-84.375, -82.96875, 60.46875, 61.875}},
+		{"e7sx", []float64{20.91796875, 21.09375, -27.421875, -27.0703125}},
+		{"h87mpg", []float64{-87.6983642578, -87.6928710938, 27.4108886719, 27.421875}},
+		{"qjbtp", []float64{-11.77734375, -11.7333984375, 91.0107421875, 91.0546875}},
+		{"4zqs2pndx", []float64{-48.4327983856, -48.4327554703, -47.100148201, -47.1001052856}},
+		{"1fsc5v3", []float64{-75.7328796387, -75.7315063477, -94.4041442871, -94.4027709961}},
+		{"kp6hptx", []float64{-3.48541259766, -3.48403930664, 3.15170288086, 3.15307617188}},
+		{"3n77", []float64{-9.31640625, -9.140625, -130.4296875, -130.078125}},
+		{"q347uc", []float64{-38.7103271484, -38.7048339844, 104.622802734, 104.633789062}},
+		{"n8gckvg3", []float64{-85.5297660828, -85.5295944214, 117.98664093, 117.986984253}},
+		{"p7szbr6ceq", []float64{-68.9100801945, -68.9100748301, 152.944589853, 152.944600582}},
+		{"8w7n", []float64{36.2109375, 36.38671875, -153.28125, -152.9296875}},
+		{"k4s3ndj8", []float64{-30.7507324219, -30.7505607605, 6.26976013184, 6.27010345459}},
+		{"fh38ev", []float64{69.0216064453, 69.0270996094, -87.7258300781, -87.71484375}},
+		{"rzebrsw", []float64{-2.74383544922, -2.7424621582, 174.36126709, 174.362640381}},
+		{"un", []float64{78.75, 84.375, 0.0, 11.25}},
+		{"27u3d4ybbkt", []float64{-23.6273190379, -23.6273176968, -162.676259726, -162.676258385}},
+		{"5hk2", []float64{-66.09375, -65.91796875, -39.0234375, -38.671875}},
+		{"62f6wqsbfc6n", []float64{-40.3059548512, -40.3059546836, -75.3046354651, -75.3046351299}},
+		{"r6jvqxczv", []float64{-32.7832460403, -32.783203125, 154.624199867, 154.624242783}},
+		{"wyg1es5yx", []float64{38.2555103302, 38.2555532455, 128.128008842, 128.128051758}},
+		{"smp9qbcpnt", []float64{28.3500748873, 28.3500802517, 22.0951581001, 22.095168829}},
+		{"4q46h", []float64{-55.8984375, -55.8544921875, -75.41015625, -75.3662109375}},
+		{"9u6v9g2ebcm5", []float64{24.8915505968, 24.8915507644, -97.3051826656, -97.3051823303}},
+		{"25mwbs3", []float64{-25.5088806152, -25.5075073242, -172.242279053, -172.240905762}},
+		{"kwf", []float64{-7.03125, -5.625, 25.3125, 26.71875}},
+		{"ekqgkknev", []float64{24.5001554489, 24.5001983643, -24.0619039536, -24.0618610382}},
+		{"y9974617cb12", []float64{53.9764738083, 53.9764739759, 114.358482845, 114.35848318}},
+		{"htuu1sxcpd", []float64{-56.9282233715, -56.9282180071, 29.2565703392, 29.256581068}},
+		{"150sv8q", []float64{-72.2886657715, -72.2872924805, -134.046936035, -134.045562744}},
+		{"j36p6wnmqm", []float64{-81.6604489088, -81.6604435444, 59.181214571, 59.1812252998}},
+		{"py", []float64{-56.25, -50.625, 168.75, 180.0}},
+		{"8", []float64{0.0, 45.0, -180.0, -135.0}},
+		{"y87", []float64{46.40625, 47.8125, 116.71875, 118.125}},
+		{"6v90bwn999", []float64{-13.8974422216, -13.8974368572, -54.8127865791, -54.8127758503}},
+		{"8kyq", []float64{27.7734375, 27.94921875, -159.9609375, -159.609375}},
+		{"8cht765b92zg", []float64{6.55892824754, 6.55892841518, -139.773838855, -139.77383852}},
+		{"nu5jwk23v7f", []float64{-66.5095366538, -66.5095353127, 128.243979514, 128.243980855}},
+		{"0m10969", []float64{-61.7733764648, -61.7720031738, -167.287445068, -167.286071777}},
+		{"s29h49frdp", []float64{3.52656304836, 3.52656841278, 12.7692890167, 12.7692997456}},
+		{"6", []float64{-45.0, 0.0, -90.0, -45.0}},
+		{"brmmeg2z8r", []float64{86.7672246695, 86.7672300339, -161.201351881, -161.201341152}},
+		{"r3bngg619d", []float64{-33.9516055584, -33.951600194, 146.417605877, 146.417616606}},
+		{"rmz76wced8c", []float64{-12.0472772419, -12.0472759008, 156.557344347, 156.557345688}},
+		{"z", []float64{45.0, 90.0, 135.0, 180.0}},
+		{"2h", []float64{-22.5, -16.875, -180.0, -168.75}},
+		{"ty2764x", []float64{35.7412719727, 35.7426452637, 79.1990661621, 79.2004394531}},
+		{"5yh3330r", []float64{-56.0235786438, -56.0234069824, -5.21816253662, -5.21781921387}},
+		{"9szz", []float64{27.94921875, 28.125, -101.6015625, -101.25}},
+		{"x7d41b", []float64{20.0390625, 20.0445556641, 149.139404297, 149.150390625}},
+		{"dw", []float64{33.75, 39.375, -67.5, -56.25}},
+		{"gnd4cw", []float64{82.0788574219, 82.0843505859, -42.1215820312, -42.1105957031}},
+		{"k9bxc2n8", []float64{-33.7939453125, -33.7937736511, 23.2669830322, 23.267326355}},
+		{"hump4nk", []float64{-64.8289489746, -64.8275756836, 40.8746337891, 40.8760070801}},
+		{"gkz", []float64{71.71875, 73.125, -23.90625, -22.5}},
+		{"g9e08yt", []float64{53.5610961914, 53.5624694824, -18.2414245605, -18.2400512695}},
+		{"3eyuyzpm", []float64{-23.0319786072, -23.0318069458, -102.701225281, -102.700881958}},
+		{"utpuc59p4m", []float64{73.9804154634, 73.9804208279, 33.443852663, 33.4438633919}},
+		{"cnqt", []float64{81.03515625, 81.2109375, -125.859375, -125.5078125}},
+		{"z05xfy72gk0", []float64{46.3967871666, 46.3967885077, 140.04732728, 140.047328621}},
+		{"skr4zd", []float64{24.4006347656, 24.4061279297, 21.4233398438, 21.4343261719}},
+		{"h2fe8mdnq", []float64{-85.1347303391, -85.1346874237, 14.7796154022, 14.7796583176}},
+		{"z18b", []float64{53.4375, 53.61328125, 136.0546875, 136.40625}},
+		{"6", []float64{-45.0, 0.0, -90.0, -45.0}},
+		{"vh", []float64{67.5, 73.125, 45.0, 56.25}},
+		{"v64zfspngxw", []float64{57.6354762912, 57.6354776323, 60.2368220687, 60.2368234098}},
+		{"w", []float64{0.0, 45.0, 90.0, 135.0}},
+		{"d800", []float64{0.0, 0.17578125, -67.5, -67.1484375}},
+		{"s", []float64{0.0, 45.0, 0.0, 45.0}},
+		{"bnugeje1", []float64{83.6143684387, 83.6145401001, -173.184356689, -173.184013367}},
+		{"46", []float64{-78.75, -73.125, -78.75, -67.5}},
+		{"8rsncsbz2mx4", []float64{43.4013903514, 43.401390519, -163.058031946, -163.058031611}},
+		{"t4w9wh7f98je", []float64{14.3499474786, 14.3499476463, 54.4095184654, 54.4095188007}},
+		{"nsr1tp2", []float64{-65.7902526855, -65.7888793945, 122.563476562, 122.564849854}},
+		{"9me", []float64{30.9375, 32.34375, -119.53125, -118.125}},
+		{"t8250bh2y", []float64{1.93372249603, 1.93376541138, 67.5390529633, 67.5390958786}},
+		{"1", []float64{-90.0, -45.0, -135.0, -90.0}},
+		{"uqty0nmvvj0c", []float64{82.652533818, 82.6525339857, 19.3440495059, 19.3440498412}},
+		{"7nkxt", []float64{-8.525390625, -8.4814453125, -38.4521484375, -38.408203125}},
+		{"jzev", []float64{-46.93359375, -46.7578125, 84.0234375, 84.375}},
+		{"dmtj1fktxe", []float64{31.8297261, 31.8297314644, -71.6353440285, -71.6353332996}},
+		{"0r", []float64{-50.625, -45.0, -168.75, -157.5}},
+		{"5hqv273y", []float64{-65.152015686, -65.1518440247, -35.4944229126, -35.4940795898}},
+		{"78k", []float64{-43.59375, -42.1875, -16.875, -15.46875}},
+		{"f2krt5", []float64{47.7410888672, 47.7465820312, -72.5537109375, -72.5427246094}},
+		{"ffw63hhzm2u", []float64{59.481229037, 59.4812303782, -47.4102383852, -47.4102370441}},
+		{"mv3z5k", []float64{-14.2163085938, -14.2108154297, 81.3537597656, 81.3647460938}},
+		{"f15", []float64{50.625, 52.03125, -85.78125, -84.375}},
+		{"j710re25dhzs", []float64{-73.0625749379, -73.0625747703, 57.9859357327, 57.985936068}},
+		{"rtt328k93", []float64{-13.8411855698, -13.8411426544, 164.911007881, 164.911050797}},
+		{"d2x003t048", []float64{2.82073974609, 2.82074511051, -68.8882899284, -68.8882791996}},
+		{"22uy0", []float64{-39.7265625, -39.6826171875, -162.0703125, -162.026367188}},
+		{"tuzxx", []float64{28.037109375, 28.0810546875, 89.6044921875, 89.6484375}},
+		{"su44fdqr2pv1", []float64{22.9970443435, 22.9970445111, 36.6809530556, 36.6809533909}},
+		{"yttq", []float64{76.9921875, 77.16796875, 119.8828125, 120.234375}},
+		{"9fu5vyr", []float64{16.1622619629, 16.1636352539, -95.362701416, -95.361328125}},
+		{"zwmzk37wsh97", []float64{81.4386709593, 81.438671127, 165.777684934, 165.77768527}},
+		{"hd777ygzewj", []float64{-76.7340624332, -76.7340610921, 27.2404141724, 27.2404155135}},
+		{"0b1", []float64{-90.0, -88.59375, -144.84375, -143.4375}},
+		{"ejmgd", []float64{30.146484375, 30.1904296875, -36.826171875, -36.7822265625}},
+		{"rzxh6", []float64{-2.0654296875, -2.021484375, 178.681640625, 178.725585938}},
+		{"0rf4f4u", []float64{-45.9077453613, -45.9063720703, -165.844116211, -165.84274292}},
+		{"1m23ggbv8", []float64{-60.1395893097, -60.1395463943, -123.23261261, -123.232569695}},
+		{"gdvt2y6wfv", []float64{61.4271193743, 61.4271247387, -14.7291147709, -14.7291040421}},
+		{"wxb3eqeug0y0", []float64{43.8939468563, 43.8939470239, 112.9996714, 112.999671735}},
+		{"516kngbm6", []float64{-82.2441244125, -82.2440814972, -41.5388774872, -41.5388345718}},
+		{"xtuwe2zu", []float64{33.4911346436, 33.4913063049, 163.981590271, 163.981933594}},
+		{"dhb1c2jrz6c1", []float64{27.027712483, 27.0277126506, -89.9375461042, -89.9375457689}},
+		{"23c397n4v8g", []float64{-34.8756225407, -34.8756211996, -166.928776056, -166.928774714}},
+		{"1w81bnmc3", []float64{-53.0953359604, -53.095293045, -112.492060661, -112.492017746}},
+		{"03hu77r", []float64{-83.6100769043, -83.6087036133, -161.917877197, -161.916503906}},
+		{"z2vrm", []float64{50.4931640625, 50.537109375, 153.852539062, 153.896484375}},
+		{"q630e", []float64{-32.255859375, -32.2119140625, 102.788085938, 102.83203125}},
+		{"h9uzt", []float64{-78.837890625, -78.7939453125, 29.3994140625, 29.443359375}},
+		{"x09c9jz", []float64{3.10775756836, 3.10913085938, 137.51449585, 137.515869141}},
+		{"8", []float64{0.0, 45.0, -180.0, -135.0}},
+		{"xv9ee3gq2j", []float64{31.5634471178, 31.5634524822, 171.006660461, 171.00667119}},
+		{"6e8xjp4", []float64{-24.0435791016, -24.0422058105, -66.5744018555, -66.5730285645}},
+		{"0m8sj439bys", []float64{-58.3466801047, -58.3466787636, -167.82505095, -167.825049609}},
+		{"khf7yq8js6", []float64{-17.5854098797, -17.5854045153, 3.43890309334, 3.43891382217}},
+		{"hh", []float64{-67.5, -61.875, 0.0, 11.25}},
+		{"kcyx9b0rd65e", []float64{-33.8365919329, -33.8365917653, 42.967973873, 42.9679742083}},
+		{"qcy4pees", []float64{-34.7847747803, -34.7846031189, 132.521896362, 132.522239685}},
+		{"tc6", []float64{7.03125, 8.4375, 81.5625, 82.96875}},
+		{"mxhnk2fkh", []float64{-4.52156066895, -4.5215177536, 73.3150291443, 73.3150720596}},
+		{"1mggmg5x0k", []float64{-57.067258358, -57.0672529936, -118.219059706, -118.219048977}},
+		{"f1udt102z", []float64{55.2888250351, 55.2888679504, -83.4515047073, -83.451461792}},
+		{"jjz", []float64{-57.65625, -56.25, 54.84375, 56.25}},
+		{"1q1dg8peze2", []float64{-55.765940398, -55.7659390569, -121.476194859, -121.476193517}},
+		{"604unch", []float64{-44.2913818359, -44.2900085449, -85.8306884766, -85.8293151855}},
+		{"kt", []float64{-16.875, -11.25, 22.5, 33.75}},
+		{"wpbgc", []float64{44.2529296875, 44.296875, 91.0986328125, 91.142578125}},
+		{"c", []float64{45.0, 90.0, -135.0, -90.0}},
+		{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
+		{"tz2pky3c", []float64{42.0901679993, 42.0903396606, 78.9611434937, 78.9614868164}},
+		{"j96v3y", []float64{-82.0129394531, -82.0074462891, 71.4440917969, 71.455078125}},
+		{"vs974dv", []float64{70.8549499512, 70.8563232422, 69.3745422363, 69.3759155273}},
+		{"dunwchdt7d6", []float64{23.712155968, 23.7121573091, -47.061843574, -47.0618422329}},
+		{"h2u2nkksw", []float64{-85.7571315765, -85.7570886612, 17.5076580048, 17.5077009201}},
+		{"z8qtu1", []float64{47.4224853516, 47.4279785156, 166.81640625, 166.827392578}},
+		{"2677dsdngh", []float64{-31.7026162148, -31.7026108503, -164.066948891, -164.066938162}},
+		{"4yy", []float64{-52.03125, -50.625, -47.8125, -46.40625}},
+		{"uym", []float64{80.15625, 81.5625, 40.78125, 42.1875}},
+		{"m2mssd", []float64{-42.7917480469, -42.7862548828, 64.1821289062, 64.1931152344}},
+		{"xed3b", []float64{19.9951171875, 20.0390625, 160.6640625, 160.708007812}},
+		{"rfp4ky", []float64{-33.3215332031, -33.3160400391, 178.802490234, 178.813476562}},
+		{"83fmm9e", []float64{10.7748413086, 10.7762145996, -165.340118408, -165.338745117}},
+		{"tr7z2dqh", []float64{42.0687103271, 42.0688819885, 61.5536499023, 61.5539932251}},
+		{"crsh", []float64{87.890625, 88.06640625, -118.125, -117.7734375}},
+		{"f", []float64{45.0, 90.0, -90.0, -45.0}},
+		{"761m6h", []float64{-32.8051757812, -32.7996826172, -31.904296875, -31.8933105469}},
+		{"7p", []float64{-5.625, 0.0, -45.0, -33.75}},
+		{"8qu9r5", []float64{38.2049560547, 38.2104492188, -162.114257812, -162.103271484}},
+		{"5z", []float64{-50.625, -45.0, -11.25, 0.0}},
+		{"kbz", []float64{-40.78125, -39.375, 43.59375, 45.0}},
+		{"zdftsw1rbpsf", []float64{61.4698768035, 61.4698769711, 161.21510189, 161.215102226}},
+		{"n1m105r", []float64{-82.7751159668, -82.7737426758, 97.0408630371, 97.0422363281}},
+		{"xf4kktnxsz", []float64{12.0258611441, 12.0258665085, 172.120946646, 172.120957375}},
+		{"kzqyq4m0qtyu", []float64{-3.10768313706, -3.10768296942, 43.5130138323, 43.5130141675}},
+		{"1j1vy57w7", []float64{-60.8453321457, -60.8452892303, -132.27045536, -132.270412445}},
+		{"nq4u3", []float64{-55.5029296875, -55.458984375, 105.161132812, 105.205078125}},
+		{"bhcy", []float64{72.7734375, 72.94921875, -177.5390625, -177.1875}},
+		{"vjr", []float64{74.53125, 75.9375, 54.84375, 56.25}},
+		{"uc99nrdsntv", []float64{53.6551974714, 53.6551988125, 36.1377520859, 36.137753427}},
+		{"3e8zmnz", []float64{-24.0010070801, -23.9996337891, -111.2159729, -111.214599609}},
+		{"j0yzdvs9", []float64{-84.4325065613, -84.4323348999, 54.6192169189, 54.6195602417}},
+		{"8chvm4", []float64{6.55883789062, 6.56433105469, -139.350585938, -139.339599609}},
+		{"ywf6099wx", []float64{83.329668045, 83.3297109604, 115.6883955, 115.688438416}},
+		{"b", []float64{45.0, 90.0, -180.0, -135.0}},
+		{"zrc", []float64{88.59375, 90.0, 147.65625, 149.0625}},
+		{"zq", []float64{78.75, 84.375, 146.25, 157.5}},
+		{"7xznu50ru", []float64{-0.201916694641, -0.201873779297, -12.4799537659, -12.4799108505}},
+		{"u", []float64{45.0, 90.0, 0.0, 45.0}},
+		{"7r", []float64{-5.625, 0.0, -33.75, -22.5}},
+		{"f27k", []float64{47.109375, 47.28515625, -74.1796875, -73.828125}},
+		{"0", []float64{-90.0, -45.0, -180.0, -135.0}},
+		{"b4s13188yv", []float64{59.2906218767, 59.2906272411, -174.330078363, -174.330067635}},
+		{"q8", []float64{-45.0, -39.375, 112.5, 123.75}},
+		{"skn7w", []float64{23.115234375, 23.1591796875, 20.302734375, 20.3466796875}},
+		{"1tzqwzyh5vp", []float64{-56.4703863859, -56.4703850448, -101.999646574, -101.999645233}},
+		{"r52cmn", []float64{-26.4660644531, -26.4605712891, 136.274414062, 136.285400391}},
+		{"7bwvbbbru", []float64{-41.1713075638, -41.1712646484, -1.72433853149, -1.72429561615}},
+		{"ruv0b1n", []float64{-18.1439208984, -18.1425476074, 175.789489746, 175.790863037}},
+		{"vwf01ujm", []float64{82.9915809631, 82.9917526245, 70.3966140747, 70.3969573975}},
+		{"0metgxjtrm91", []float64{-58.0123747699, -58.0123746023, -163.666450828, -163.666450493}},
+		{"2w", []float64{-11.25, -5.625, -157.5, -146.25}},
+		{"8kmch3", []float64{24.0875244141, 24.0930175781, -160.477294922, -160.466308594}},
+		{"g6m", []float64{57.65625, 59.0625, -26.71875, -25.3125}},
+		{"t6v4k2", []float64{15.8642578125, 15.8697509766, 63.4680175781, 63.4790039062}},
+		{"zr02vfju8hd", []float64{84.5186188817, 84.5186202228, 146.862147152, 146.862148494}},
+		{"kb8jn751", []float64{-41.2919425964, -41.2917709351, 34.0287780762, 34.0291213989}},
+		{"mj76", []float64{-15.1171875, -14.94140625, 49.5703125, 49.921875}},
+		{"f0hwcbkwjnr", []float64{46.1889602244, 46.1889615655, -83.5885669291, -83.588565588}},
+		{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
+		{"6rt9btpxj2p1", []float64{-2.47621519491, -2.47621502727, -70.9831179678, -70.9831176326}},
+		{"wh0s6yb6j", []float64{23.2844924927, 23.284535408, 90.8245325089, 90.8245754242}},
+		{"65b", []float64{-23.90625, -22.5, -90.0, -88.59375}},
+		{"r", []float64{-45.0, 0.0, 135.0, 180.0}},
+		{"z0tpc5d", []float64{49.1940307617, 49.1954040527, 142.077941895, 142.079315186}},
+		{"fgs2w0", []float64{64.775390625, 64.7808837891, -50.009765625, -49.9987792969}},
+		{"vfus35qgp", []float64{61.2341880798, 61.2342309952, 85.1316404343, 85.1316833496}},
+		{"hywnkqdye", []float64{-52.3020458221, -52.3020029068, 42.3781728745, 42.3782157898}},
+		{"qpwxwun0b", []float64{-1.47203922272, -1.47199630737, 99.4454956055, 99.4455385208}},
+		{"v88u9rqr24", []float64{48.6445963383, 48.6446017027, 68.6182022095, 68.6182129383}},
+		{"17x06x4fyw", []float64{-70.2295982838, -70.2295929193, -113.792331219, -113.79232049}},
+		{"3ykkhjyhrt", []float64{-9.1082829237, -9.10827755928, -95.0890946388, -95.08908391}},
+		{"mbzkuqw5", []float64{-39.910068512, -39.9098968506, 89.1403198242, 89.140663147}},
+		{"yqjf0p3mn", []float64{79.1422462463, 79.1422891617, 109.337911606, 109.337954521}},
+		{"2f", []float64{-33.75, -28.125, -146.25, -135.0}},
+		{"hqsm3", []float64{-52.5146484375, -52.470703125, 17.2705078125, 17.314453125}},
+		{"gp1y4wtft1tp", []float64{85.4658314399, 85.4658316076, -42.4210815132, -42.4210811779}},
+		{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
+		{"kxxruvh", []float64{-1.42272949219, -1.42135620117, 32.9095458984, 32.9109191895}},
+		{"08h0ft4vk97r", []float64{-89.839789141, -89.8397889733, -151.761162691, -151.761162356}},
+		{"y9u8", []float64{54.84375, 55.01953125, 118.828125, 119.1796875}},
+		{"3zhsk", []float64{-4.8779296875, -4.833984375, -94.74609375, -94.7021484375}},
+		{"x2", []float64{0.0, 5.625, 146.25, 157.5}},
+		{"mr7nnvr", []float64{-3.13522338867, -3.13385009766, 60.7749938965, 60.7763671875}},
+		{"d3g0pn54hr", []float64{9.87708985806, 9.87709522247, -74.2193305492, -74.2193198204}},
+		{"jv173u", []float64{-61.2817382812, -61.2762451172, 80.5847167969, 80.595703125}},
+		{"vte", []float64{75.9375, 77.34375, 71.71875, 73.125}},
+		{"303un61j2s", []float64{-42.878715992, -42.8787106276, -132.263009548, -132.262998819}},
+		{"m528h", []float64{-26.71875, -26.6748046875, 45.87890625, 45.9228515625}},
+		{"8", []float64{0.0, 45.0, -180.0, -135.0}},
+		{"p8h", []float64{-90.0, -88.59375, 163.125, 164.53125}},
+		{"tzusgm6v33y", []float64{44.4584606588, 44.4584619999, 85.2247855067, 85.2247868478}},
+		{"26sre", []float64{-29.619140625, -29.5751953125, -162.641601562, -162.59765625}},
+		{"q1tcnkmh55b", []float64{-36.3626660407, -36.3626646996, 98.3675909042, 98.3675922453}},
+		{"xs4t1g3", []float64{23.3967590332, 23.3981323242, 161.093902588, 161.095275879}},
+		{"td", []float64{11.25, 16.875, 67.5, 78.75}},
+		{"xdvwxsmsjd", []float64{16.6353714466, 16.635376811, 165.571753979, 165.571764708}},
+		{"dnw", []float64{36.5625, 37.96875, -81.5625, -80.15625}},
+		{"u7nu2ff43vx", []float64{62.6375922561, 62.6375935972, 20.777977556, 20.7779788971}},
+		{"0jy2", []float64{-57.65625, -57.48046875, -171.2109375, -170.859375}},
+		{"u", []float64{45.0, 90.0, 0.0, 45.0}},
+		{"2yx2k", []float64{-8.3935546875, -8.349609375, -135.87890625, -135.834960938}},
+		{"g3", []float64{50.625, 56.25, -33.75, -22.5}},
+		{"6", []float64{-45.0, 0.0, -90.0, -45.0}},
+		{"vrrpw34jfjs", []float64{87.1061190963, 87.1061204374, 66.3712459803, 66.3712473214}},
+		{"g2gkzeutg", []float64{50.0752973557, 50.075340271, -28.8437891006, -28.8437461853}},
+		{"z8ett4zh", []float64{48.7950897217, 48.7952613831, 162.6512146, 162.651557922}},
+		{"cyspv2k0", []float64{82.9261779785, 82.9263496399, -95.3887939453, -95.3884506226}},
+		{"fqu7qbshmr", []float64{83.5435527563, 83.5435581207, -72.471088171, -72.4710774422}},
+		{"578u4ww7", []float64{-69.5731544495, -69.5729827881, -32.5768661499, -32.5765228271}},
+		{"4gzkwxrzb", []float64{-68.0740785599, -68.0740356445, -45.7583999634, -45.758357048}},
+		{"g0z038npm1ym", []float64{49.2639500834, 49.263950251, -35.0818693265, -35.0818689913}},
+		{"vrse4ey62b1y", []float64{87.7358303592, 87.7358305268, 62.6966058835, 62.6966062188}},
+		{"7x2fkbbj6", []float64{-3.81822109222, -3.81817817688, -21.2364864349, -21.2364435196}},
+		{"tuvfdxy7b", []float64{27.2014188766, 27.201461792, 86.9543838501, 86.9544267654}},
+		{"9qzggb", []float64{38.6279296875, 38.6334228516, -112.686767578, -112.67578125}},
+		{"pupf6", []float64{-67.1044921875, -67.060546875, 179.736328125, 179.780273438}},
+		{"fknyrbe41k0w", []float64{68.6017451808, 68.6017453484, -68.9130621403, -68.9130618051}},
+		{"591vk5jz7x4v", []float64{-83.4343860112, -83.4343858436, -19.8552309349, -19.8552305996}},
+		{"n4psdv", []float64{-77.9315185547, -77.9260253906, 100.667724609, 100.678710938}},
+		{"zyw2", []float64{81.5625, 81.73828125, 177.5390625, 177.890625}},
+		{"r0e29", []float64{-42.099609375, -42.0556640625, 139.614257812, 139.658203125}},
+		{"j9cq7f", []float64{-79.0466308594, -79.0411376953, 69.4226074219, 69.43359375}},
+		{"n", []float64{-90.0, -45.0, 90.0, 135.0}},
+		{"m42x20h", []float64{-31.0693359375, -31.0679626465, 45.7086181641, 45.7099914551}},
+		{"405zg3kz2j", []float64{-88.6295574903, -88.6295521259, -84.5772171021, -84.5772063732}},
+		{"7vu7httsdm", []float64{-12.0978945494, -12.097889185, -5.06803393364, -5.0680232048}},
+		{"prznz91", []float64{-45.2142333984, -45.2128601074, 156.424713135, 156.426086426}},
+		{"46dupvqwbsuj", []float64{-75.2043508552, -75.2043506876, -74.5332831144, -74.5332827792}},
+		{"7s0", []float64{-22.5, -21.09375, -22.5, -21.09375}},
+		{"5nhz5er2nnm5", []float64{-55.0016444363, -55.0016442686, -38.1562833488, -38.1562830135}},
+		{"5qqp", []float64{-53.61328125, -53.4375, -25.3125, -24.9609375}},
+		{"t4dqn2", []float64{15.1171875, 15.1226806641, 48.4387207031, 48.4497070312}},
+		{"gmxt", []float64{76.81640625, 76.9921875, -23.203125, -22.8515625}},
+		{"syuh0ke1syh", []float64{38.6968839169, 38.696885258, 39.3903154135, 39.3903167546}},
+		{"nvr0", []float64{-60.46875, -60.29296875, 133.59375, 133.9453125}},
+		{"g4", []float64{56.25, 61.875, -45.0, -33.75}},
+		{"gnd2kb6w0s32", []float64{81.6088713706, 81.6088715382, -41.623740904, -41.6237405688}},
+		{"x7hptvsgdvu", []float64{18.2242034376, 18.2242047787, 152.134332061, 152.134333402}},
+		{"2fc8", []float64{-29.53125, -29.35546875, -144.140625, -143.7890625}},
+		{"e3fsu48yte7", []float64{10.693577081, 10.6935784221, -30.057323724, -30.0573223829}},
+		{"pbqhjvqh", []float64{-87.8610992432, -87.8609275818, 177.448425293, 177.448768616}},
+		{"sqx1nwgn", []float64{36.7763900757, 36.7765617371, 21.3835144043, 21.3838577271}},
+		{"e6g", []float64{15.46875, 16.875, -29.53125, -28.125}},
+		{"dt", []float64{28.125, 33.75, -67.5, -56.25}},
+		{"02s64yzk", []float64{-86.7981719971, -86.7980003357, -162.642631531, -162.642288208}},
+		{"z9", []float64{50.625, 56.25, 157.5, 168.75}},
+		{"fjv", []float64{77.34375, 78.75, -82.96875, -81.5625}},
+		{"5yc0", []float64{-52.03125, -51.85546875, -9.84375, -9.4921875}},
+		{"y", []float64{45.0, 90.0, 90.0, 135.0}},
+		{"k3x3j1pmdnfp", []float64{-36.3802440651, -36.3802438974, 21.6750839353, 21.6750842705}},
+		{"g2pjds", []float64{45.9887695312, 45.9942626953, -23.7963867188, -23.7854003906}},
+		{"ppw0p3q2gzbk", []float64{-47.8054625541, -47.8054623865, 143.764847852, 143.764848188}},
+		{"9xwp0kprs0x6", []float64{43.4412318841, 43.4412320517, -104.041375928, -104.041375592}},
+		{"0gzww7fv7gj", []float64{-67.7421551943, -67.7421538532, -135.424522609, -135.424521267}},
+		{"sgpt2", []float64{17.7978515625, 17.841796875, 44.296875, 44.3408203125}},
+		{"3", []float64{-45.0, 0.0, -135.0, -90.0}},
+		{"rr", []float64{-5.625, 0.0, 146.25, 157.5}},
+		{"wkqrtvj5", []float64{25.2525901794, 25.2527618408, 110.298614502, 110.298957825}},
+		{"ngtfyx77u", []float64{-69.7886323929, -69.7885894775, 132.126216888, 132.126259804}},
+		{"m", []float64{-45.0, 0.0, 45.0, 90.0}},
+		{"58y", []float64{-85.78125, -84.375, -14.0625, -12.65625}},
+		{"mvjrxvx", []float64{-15.5264282227, -15.5250549316, 86.483001709, 86.484375}},
+		{"jgve4ttu", []float64{-68.3480072021, -68.3478355408, 86.6021347046, 86.6024780273}},
+		{"h9", []float64{-84.375, -78.75, 22.5, 33.75}},
+		{"ytf9sb6mj27", []float64{77.609654814, 77.6096561551, 116.227684468, 116.227685809}},
+		{"rk9kv3q", []float64{-18.8456726074, -18.8442993164, 148.246765137, 148.248138428}},
+		{"kbq8t", []float64{-43.505859375, -43.4619140625, 43.1103515625, 43.154296875}},
+		{"j8prqp8kx", []float64{-88.6836147308, -88.6835718155, 77.9596281052, 77.9596710205}},
+		{"8", []float64{0.0, 45.0, -180.0, -135.0}},
+		{"mq53k", []float64{-11.0302734375, -10.986328125, 60.99609375, 61.0400390625}},
+		{"d95fw", []float64{6.064453125, 6.1083984375, -61.962890625, -61.9189453125}},
+		{"x15j3", []float64{6.5478515625, 6.591796875, 139.262695312, 139.306640625}},
+		{"x0yg7k2b", []float64{4.81338500977, 4.81355667114, 144.636039734, 144.636383057}},
+		{"dx8x9yxwprk5", []float64{43.5426343046, 43.5426344723, -66.7093545198, -66.7093541846}},
+		{"v", []float64{45.0, 90.0, 45.0, 90.0}},
+		{"429ptwcscc3", []float64{-85.8312396705, -85.8312383294, -77.0999144018, -77.0999130607}},
+		{"ncknjt", []float64{-81.8865966797, -81.8811035156, 129.616699219, 129.627685547}},
+		{"7ce8p70yc7h", []float64{-36.5448457003, -36.5448443592, -6.00843250751, -6.00843116641}},
+		{"un1", []float64{78.75, 80.15625, 1.40625, 2.8125}},
+		{"b6rv0shhd5", []float64{58.5579174757, 58.5579228401, -157.824010849, -157.82400012}},
+		{"qyg8d", []float64{-6.943359375, -6.8994140625, 128.759765625, 128.803710938}},
+		{"3p7f2mvx6", []float64{-3.79041194916, -3.79036903381, -129.707937241, -129.707894325}},
+		{"u0vj", []float64{50.09765625, 50.2734375, 7.03125, 7.3828125}},
+		{"49m", []float64{-82.96875, -81.5625, -60.46875, -59.0625}},
+		{"7vhu8hp00j2", []float64{-16.0619835556, -16.0619822145, -4.56069946289, -4.56069812179}},
+		{"3sh", []float64{-22.5, -21.09375, -106.875, -105.46875}},
+		{"sexg40hc", []float64{20.2150154114, 20.2151870728, 33.4928512573, 33.4931945801}},
+		{"4uwxw8u", []float64{-63.365020752, -63.3636474609, -46.8182373047, -46.8168640137}},
+		{"mr", []float64{-5.625, 0.0, 56.25, 67.5}},
+		{"1kymkgft79d3", []float64{-62.3368896358, -62.3368894681, -114.748610817, -114.748610482}},
+		{"3tj", []float64{-16.875, -15.46875, -105.46875, -104.0625}},
+		{"vxfzth5", []float64{89.9340820312, 89.9354553223, 71.5910339355, 71.5924072266}},
+		{"h", []float64{-90.0, -45.0, 0.0, 45.0}},
+		{"e5r9", []float64{18.45703125, 18.6328125, -34.453125, -34.1015625}},
+		{"hq9bc8pmfk", []float64{-53.3046555519, -53.3046501875, 13.7869083881, 13.786919117}},
+		{"05t", []float64{-70.3125, -68.90625, -172.96875, -171.5625}},
+		{"ye6yg", []float64{64.4677734375, 64.51171875, 116.499023438, 116.54296875}},
+		{"gg4k", []float64{62.578125, 62.75390625, -8.0859375, -7.734375}},
+		{"50q2fcp", []float64{-88.4564208984, -88.4550476074, -36.0804748535, -36.0791015625}},
+		{"2hu8d3131", []float64{-18.1876945496, -18.1876516342, -173.571238518, -173.571195602}},
+		{"08gcdmy84ewg", []float64{-85.4859731533, -85.4859729856, -152.118642814, -152.118642479}},
+		{"4", []float64{-90.0, -45.0, -90.0, -45.0}},
+		{"t05v4wzmqmet", []float64{0.91691667214, 0.916916839778, 50.3935300559, 50.3935303912}},
+		{"3nc", []float64{-7.03125, -5.625, -133.59375, -132.1875}},
+		{"v", []float64{45.0, 90.0, 45.0, 90.0}},
+		{"3gvk6zm1869", []float64{-23.1190833449, -23.1190820038, -93.7394593656, -93.7394580245}},
+		{"39rruq", []float64{-36.5734863281, -36.5679931641, -102.117919922, -102.106933594}},
+		{"jhkxvqt7q5z", []float64{-64.6951617301, -64.6951603889, 51.5663145483, 51.5663158894}},
+		{"bb", []float64{45.0, 50.625, -146.25, -135.0}},
+		{"es26styfpb", []float64{24.3776321411, 24.3776375055, -21.9410812855, -21.9410705566}},
+		{"500q98r9", []float64{-88.8558769226, -88.8557052612, -44.5722198486, -44.5718765259}},
+		{"kv", []float64{-16.875, -11.25, 33.75, 45.0}},
+		{"njf", []float64{-57.65625, -56.25, 92.8125, 94.21875}},
+		{"whk5vu", []float64{24.5874023438, 24.5928955078, 95.8776855469, 95.888671875}},
+		{"w9pq4yk4p4qf", []float64{6.71437550336, 6.714375671, 122.821964733, 122.821965069}},
+		{"yt", []float64{73.125, 78.75, 112.5, 123.75}},
+		{"ccztdek", []float64{55.8283996582, 55.8297729492, -90.5877685547, -90.5863952637}},
+		{"ej33gfth2", []float64{29.8533296585, 29.8533725739, -43.070526123, -43.0704832077}},
+		{"y", []float64{45.0, 90.0, 90.0, 135.0}},
+		{"hqyfjm", []float64{-51.6522216797, -51.6467285156, 20.9729003906, 20.9838867188}},
+		{"njcr", []float64{-56.42578125, -56.25, 91.7578125, 92.109375}},
+		{"48ejbxwrk6", []float64{-86.1343038082, -86.1342984438, -63.2505118847, -63.2505011559}},
+		{"78f1r09e5v8", []float64{-40.558232367, -40.5582310259, -19.3776619434, -19.3776606023}},
+		{"ged8fvuhk31", []float64{64.8516565561, 64.8516578972, -18.8578484952, -18.8578471541}},
+		{"8ss3fn", []float64{25.6530761719, 25.6585693359, -151.435546875, -151.424560547}},
+		{"v90sp4e6", []float64{51.3422012329, 51.3423728943, 68.5152053833, 68.5155487061}},
+		{"bx00h848", []float64{84.375, 84.3751716614, -157.298812866, -157.298469543}},
+		{"9y3", []float64{35.15625, 36.5625, -99.84375, -98.4375}},
+		{"ehpkg7", []float64{23.3514404297, 23.3569335938, -34.6618652344, -34.6508789062}},
+		{"r38623wxhs", []float64{-36.1575293541, -36.1575239897, 146.621668339, 146.621679068}},
+		{"x6yex2zx", []float64{16.0893058777, 16.0894775391, 155.719528198, 155.719871521}},
+		{"r", []float64{-45.0, 0.0, 135.0, 180.0}},
+		{"9", []float64{0.0, 45.0, -135.0, -90.0}},
+		{"6w", []float64{-11.25, -5.625, -67.5, -56.25}},
+		{"mx6g2d", []float64{-3.63647460938, -3.63098144531, 71.3891601562, 71.4001464844}},
+		{"vmsh9b6f", []float64{76.7302322388, 76.7304039001, 61.9556808472, 61.9560241699}},
+		{"7uqbsm728bjw", []float64{-20.9769334272, -20.9769332595, -1.56654216349, -1.56654182822}},
+		{"nvtqqh0jc3", []float64{-57.9409021139, -57.9408967495, 131.396538019, 131.396548748}},
+		{"x8", []float64{0.0, 5.625, 157.5, 168.75}},
+		{"nqx5n", []float64{-52.91015625, -52.8662109375, 111.357421875, 111.401367188}},
+		{"c4wmv60xtud", []float64{60.0855401158, 60.0855414569, -125.979288518, -125.979287177}},
+		{"t9", []float64{5.625, 11.25, 67.5, 78.75}},
+		{"e3nqrsk9fqdu", []float64{6.74731470644, 6.74731487408, -24.6250675991, -24.6250672638}},
+		{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
+		{"7x9", []float64{-2.8125, -1.40625, -21.09375, -19.6875}},
+		{"760qb6yxf", []float64{-32.5470399857, -32.5469970703, -33.3784389496, -33.3783960342}},
+		{"x7skftff", []float64{20.5543899536, 20.554561615, 152.340202332, 152.340545654}},
+		{"jv774u", []float64{-59.9194335938, -59.9139404297, 83.4411621094, 83.4521484375}},
+		{"phh7", []float64{-66.97265625, -66.796875, 140.9765625, 141.328125}},
+		{"hv4tt0pymy", []float64{-60.9070980549, -60.9070926905, 37.4962413311, 37.4962520599}},
+		{"3zemdehyubj", []float64{-1.82806491852, -1.82806357741, -96.563090533, -96.5630891919}},
+		{"v", []float64{45.0, 90.0, 45.0, 90.0}},
+		{"dep80ett", []float64{16.8950843811, 16.8952560425, -56.9235992432, -56.9232559204}},
+		{"bbj", []float64{45.0, 46.40625, -139.21875, -137.8125}},
+		{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
+		{"v", []float64{45.0, 90.0, 45.0, 90.0}},
+		{"9k1t0ecqh3v3", []float64{23.4005451389, 23.4005453065, -121.616746299, -121.616745964}},
+		{"mm5", []float64{-16.875, -15.46875, 60.46875, 61.875}},
+		{"jz4zmmv2", []float64{-49.3190002441, -49.3188285828, 82.8551101685, 82.8554534912}},
+		{"bpshcry", []float64{88.065032959, 88.06640625, -174.311828613, -174.310455322}},
+		{"z", []float64{45.0, 90.0, 135.0, 180.0}},
+		{"e87b", []float64{1.40625, 1.58203125, -17.2265625, -16.875}},
+		{"pcphekd1ph0", []float64{-83.5590720177, -83.5590706766, 178.739619255, 178.739620596}},
+		{"nrwq0", []float64{-46.7578125, -46.7138671875, 110.0390625, 110.083007812}},
+		{"6k3e7rk823cj", []float64{-20.4825823568, -20.4825821891, -76.4916108549, -76.4916105196}},
+		{"kkv059k5v", []float64{-18.2737398148, -18.2736968994, 18.4407663345, 18.4408092499}},
+		{"ep7x2t4t6x", []float64{42.084068656, 42.0840740204, -40.0526118279, -40.052601099}},
+		{"43hh0", []float64{-83.671875, -83.6279296875, -73.125, -73.0810546875}},
+		{"rdhfdg0qee", []float64{-33.2929354906, -33.2929301262, 164.301030636, 164.301041365}},
+		{"znku45j7p", []float64{80.8763694763, 80.8764123917, 141.77508831, 141.775131226}},
+		{"ju", []float64{-67.5, -61.875, 78.75, 90.0}},
+		{"b6zckuz", []float64{60.7145690918, 60.7159423828, -157.633209229, -157.631835938}},
+		{"fm7m", []float64{75.41015625, 75.5859375, -74.1796875, -73.828125}},
+		{"8xg", []float64{43.59375, 45.0, -153.28125, -151.875}},
+		{"wfk7q", []float64{13.2275390625, 13.271484375, 129.990234375, 130.034179688}},
+		{"6r1p9yz6z9", []float64{-4.26908433437, -4.26907896996, -77.2565674782, -77.2565567493}},
+		{"t", []float64{0.0, 45.0, 45.0, 90.0}},
+		{"fuzw5", []float64{72.7734375, 72.8173828125, -45.5712890625, -45.52734375}},
+		{"m33ehjv", []float64{-37.4098205566, -37.4084472656, 58.5420227051, 58.5433959961}},
+		{"s6stp", []float64{14.94140625, 14.9853515625, 17.8857421875, 17.9296875}},
+		{"tjxuvxh", []float64{31.8109130859, 31.812286377, 56.1456298828, 56.1470031738}},
+		{"0vgxezccsf", []float64{-56.2950503826, -56.2950450182, -141.160722971, -141.160712242}},
+		{"0h", []float64{-67.5, -61.875, -180.0, -168.75}},
+		{"ge6bb", []float64{63.4130859375, 63.45703125, -18.6328125, -18.5888671875}},
+		{"h2gyy9", []float64{-84.5892333984, -84.5837402344, 16.8090820312, 16.8200683594}},
+		{"g0f7xg", []float64{49.8504638672, 49.8559570312, -41.4953613281, -41.484375}},
+		{"ujfk5", []float64{78.046875, 78.0908203125, 3.2958984375, 3.33984375}},
+		{"q0b6rwm0", []float64{-40.3514099121, -40.3512382507, 90.6880187988, 90.6883621216}},
+		{"d", []float64{0.0, 45.0, -90.0, -45.0}},
+		{"0nyhwsx2g5", []float64{-51.2153702974, -51.215364933, -171.266770363, -171.266759634}},
+		{"mjpe", []float64{-16.34765625, -16.171875, 55.546875, 55.8984375}},
+		{"mt", []float64{-16.875, -11.25, 67.5, 78.75}},
+		{"z49vj0d4zz", []float64{59.9446624517, 59.9446678162, 137.683743238, 137.683753967}},
+		{"zry97vd3gs", []float64{88.8440108299, 88.8440161943, 155.55866003, 155.558670759}},
+		{"c", []float64{45.0, 90.0, -135.0, -90.0}},
+		{"v3syrvc9", []float64{54.5678901672, 54.5680618286, 63.2723236084, 63.2726669312}},
+		{"nrer", []float64{-46.58203125, -46.40625, 105.8203125, 106.171875}},
+		{"2hqt8nf65v5e", []float64{-20.0895036198, -20.0895034522, -170.856119469, -170.856119134}},
+		{"wekgdxc", []float64{18.9390563965, 18.9404296875, 119.290924072, 119.292297363}},
+		{"6bh43q", []float64{-44.5715332031, -44.5660400391, -50.5700683594, -50.5590820312}},
+		{"d564", []float64{18.6328125, 18.80859375, -87.1875, -86.8359375}},
+		{"m85", []float64{-45.0, -43.59375, 71.71875, 73.125}},
+		{"x4tj5rb77r", []float64{14.9845737219, 14.9845790863, 142.174555063, 142.174565792}},
+		{"0bwycz8vr", []float64{-85.9588766098, -85.9588336945, -136.679577827, -136.679534912}},
+		{"ehnu2e", []float64{23.2635498047, 23.2690429688, -35.4858398438, -35.4748535156}},
+		{"jbe8mk", []float64{-87.1215820312, -87.1160888672, 83.9025878906, 83.9135742188}},
+		{"ss", []float64{22.5, 28.125, 22.5, 33.75}},
+		{"8edywpv3ygj3", []float64{20.8729668148, 20.8729669824, -153.361634128, -153.361633793}},
+		{"xpxu1erq390", []float64{42.9095560312, 42.9095573723, 145.974376202, 145.974377543}},
+		{"4043qb91kj", []float64{-89.7772854567, -89.7772800922, -86.5377616882, -86.5377509594}},
+		{"2n7q0j0r", []float64{-8.76039505005, -8.76022338867, -175.429344177, -175.429000854}},
+		{"cm0n96q3vn8t", []float64{74.2802738585, 74.2802740261, -123.686270043, -123.686269708}},
+		{"50hgpxg", []float64{-89.4300842285, -89.4287109375, -37.9866027832, -37.9852294922}},
+		{"2m", []float64{-16.875, -11.25, -168.75, -157.5}},
+		{"w4s1zj8n9", []float64{14.4014453888, 14.4014883041, 95.9326601028, 95.9327030182}},
+		{"hxzh", []float64{-45.703125, -45.52734375, 32.34375, 32.6953125}},
+		{"cn", []float64{78.75, 84.375, -135.0, -123.75}},
+		{"dpt5k8", []float64{42.7587890625, 42.7642822266, -82.7709960938, -82.7600097656}},
+		{"gz", []float64{84.375, 90.0, -11.25, 0.0}},
+		{"d09knt", []float64{3.54309082031, 3.54858398438, -87.9565429688, -87.9455566406}},
+		{"mrucw", []float64{-1.142578125, -1.0986328125, 63.193359375, 63.2373046875}},
+		{"055egqf4y", []float64{-72.4282693863, -72.4282264709, -174.93229866, -174.932255745}},
+		{"qphu", []float64{-4.921875, -4.74609375, 96.6796875, 97.03125}},
+		{"dfeh", []float64{14.765625, 14.94140625, -52.03125, -51.6796875}},
+		{"00qfn4ctp57", []float64{-88.2262055576, -88.2262042165, -170.241776258, -170.241774917}},
+		{"q9xx27p2trn", []float64{-35.2714830637, -35.2714817226, 123.06805104, 123.068052381}},
+		{"10bhfgfsj8m", []float64{-84.9250017107, -84.9250003695, -134.875474423, -134.875473082}},
+		{"6k", []float64{-22.5, -16.875, -78.75, -67.5}},
+		{"zyvjr4m37", []float64{83.9041757584, 83.9042186737, 176.096205711, 176.096248627}},
+		{"0c8k7gb", []float64{-80.7948303223, -80.7934570312, -145.733642578, -145.732269287}},
+		{"n7k", []float64{-71.71875, -70.3125, 106.875, 108.28125}},
+		{"fpj4eecz", []float64{84.8362541199, 84.8364257812, -82.812538147, -82.8121948242}},
+		{"ev", []float64{28.125, 33.75, -11.25, 0.0}},
+		{"8y", []float64{33.75, 39.375, -146.25, -135.0}},
+		{"x9xd1q", []float64{8.82202148438, 8.82751464844, 168.101806641, 168.112792969}},
+		{"x", []float64{0.0, 45.0, 135.0, 180.0}},
+		{"d1pmefm0t", []float64{6.60424232483, 6.60428524017, -79.6328115463, -79.632768631}},
+		{"tmy", []float64{32.34375, 33.75, 64.6875, 66.09375}},
+		{"phu6jzv0z", []float64{-62.8869867325, -62.8869438171, 141.236414909, 141.236457825}},
+		{"zv7pjxpuwjbq", []float64{75.8009752259, 75.8009753935, 173.221350051, 173.221350387}},
+		{"9gegu1", []float64{20.3521728516, 20.3576660156, -95.80078125, -95.7897949219}},
+		{"tq33", []float64{35.33203125, 35.5078125, 58.0078125, 58.359375}},
+		{"e", []float64{0.0, 45.0, -45.0, 0.0}},
+		{"y6z1sy", []float64{60.7653808594, 60.7708740234, 111.302490234, 111.313476562}},
+		{"3", []float64{-45.0, 0.0, -135.0, -90.0}},
+		{"q", []float64{-45.0, 0.0, 90.0, 135.0}},
+		{"mtyyw7", []float64{-11.4971923828, -11.4916992188, 77.2668457031, 77.2778320312}},
+		{"2sqmb", []float64{-20.0830078125, -20.0390625, -148.7109375, -148.666992188}},
+		{"yp487k", []float64{84.4409179688, 84.4464111328, 93.6584472656, 93.6694335938}},
+		{"nyn5k1fc", []float64{-55.668926239, -55.6687545776, 132.3670578, 132.367401123}},
+		{"gnqk77v", []float64{80.9239196777, 80.9252929688, -36.0612487793, -36.0598754883}},
+		{"yw12", []float64{78.75, 78.92578125, 114.2578125, 114.609375}},
+		{"vcgqqmd", []float64{55.9725952148, 55.9739685059, 83.5977172852, 83.5990905762}},
+		{"27zp3q", []float64{-22.5988769531, -22.5933837891, -158.851318359, -158.840332031}},
+		{"cg4tb6", []float64{62.8967285156, 62.9022216797, -97.7233886719, -97.7124023438}},
+		{"w5njb9rnp5e", []float64{17.8936573863, 17.8936587274, 98.4693901241, 98.4693914652}},
+		{"9y6rp4pptv", []float64{36.3990193605, 36.399024725, -97.7684605122, -97.7684497833}},
+		{"pjup17j08hup", []float64{-56.4091892727, -56.409189105, 140.68680346, 140.686803795}},
+		{"vz52f33z9mu", []float64{84.5150206983, 84.5150220394, 83.421651721, 83.4216530621}},
+		{"zd", []float64{56.25, 61.875, 157.5, 168.75}},
+		{"q", []float64{-45.0, 0.0, 90.0, 135.0}},
+		{"dm46s", []float64{28.564453125, 28.6083984375, -75.41015625, -75.3662109375}},
+		{"d7dnvbu", []float64{20.8781433105, 20.8795166016, -75.6793212891, -75.677947998}},
+		{"02ercsvx0h", []float64{-85.7978796959, -85.7978743315, -164.106216431, -164.106205702}},
+		{"hnfx5f", []float64{-50.7897949219, -50.7843017578, 3.68041992188, 3.69140625}},
+		{"evb677f6t", []float64{32.7602863312, 32.7603292465, -10.7523107529, -10.7522678375}},
+		{"sg43r7p151", []float64{17.1113830805, 17.1113884449, 37.2424077988, 37.2424185276}},
+		{"441mdz5", []float64{-77.7447509766, -77.7433776855, -88.1172180176, -88.1158447266}},
+		{"k2qrsc7hr81", []float64{-42.2677946091, -42.267793268, 20.2522458136, 20.2522471547}},
+		{"d0ydzxhjwr", []float64{4.74158227444, 4.74158763885, -80.5240237713, -80.5240130424}},
+		{"4cn", []float64{-84.375, -82.96875, -47.8125, -46.40625}},
+		{"ds2", []float64{23.90625, 25.3125, -67.5, -66.09375}},
+		{"rxvywy", []float64{-0.230712890625, -0.225219726562, 165.882568359, 165.893554688}},
+		{"zs2k", []float64{69.609375, 69.78515625, 157.8515625, 158.203125}},
+		{"kg63", []float64{-26.54296875, -26.3671875, 36.9140625, 37.265625}},
+		{"hmxh64j", []float64{-58.3044433594, -58.3030700684, 21.1885070801, 21.1898803711}},
+		{"d5v3", []float64{21.26953125, 21.4453125, -82.6171875, -82.265625}},
+		{"ddg1", []float64{15.64453125, 15.8203125, -63.28125, -62.9296875}},
+		{"stf4tug", []float64{32.8092956543, 32.8106689453, 25.5693054199, 25.5706787109}},
+		{"vgc", []float64{66.09375, 67.5, 80.15625, 81.5625}},
+		{"jby3xf", []float64{-85.5065917969, -85.5010986328, 87.8796386719, 87.890625}},
+		{"9b1f419tdjf", []float64{0.360777229071, 0.360778570175, -98.6990234256, -98.6990220845}},
+		{"0zqp4", []float64{-47.98828125, -47.9443359375, -137.724609375, -137.680664062}},
+		{"gg292c4wd", []float64{63.5075855255, 63.5076284409, -10.5103969574, -10.5103540421}},
+		{"zy96qbt", []float64{81.9607543945, 81.9621276855, 170.811309814, 170.812683105}},
+		{"tz9x7f5c70", []float64{43.4731149673, 43.4731203318, 81.0294485092, 81.0294592381}},
+		{"rq", []float64{-11.25, -5.625, 146.25, 157.5}},
+		{"94tt", []float64{14.94140625, 15.1171875, -127.265625, -126.9140625}},
+		{"h7vnwf8", []float64{-67.7499389648, -67.7485656738, 18.5778808594, 18.5792541504}},
+		{"4f", []float64{-78.75, -73.125, -56.25, -45.0}},
+		{"kj3t", []float64{-14.58984375, -14.4140625, 2.109375, 2.4609375}},
+		{"qspj", []float64{-21.62109375, -21.4453125, 122.34375, 122.6953125}},
+		{"4y9", []float64{-53.4375, -52.03125, -54.84375, -53.4375}},
+		{"b05kqcvsm8", []float64{45.7574129105, 45.7574182749, -175.125267506, -175.125256777}},
+		{"p5zq4f", []float64{-67.8405761719, -67.8350830078, 145.316162109, 145.327148438}},
+		{"1cgx", []float64{-78.92578125, -78.75, -96.328125, -95.9765625}},
+		{"m2", []float64{-45.0, -39.375, 56.25, 67.5}},
+		{"j150xkd492", []float64{-84.2619609833, -84.2619556189, 49.5401537418, 49.5401644707}},
+		{"05rs", []float64{-71.015625, -70.83984375, -169.453125, -169.1015625}},
+		{"ve8", []float64{64.6875, 66.09375, 67.5, 68.90625}},
+		{"r9tv", []float64{-35.68359375, -35.5078125, 165.5859375, 165.9375}},
+		{"d71r07", []float64{18.1219482422, 18.1274414062, -76.9812011719, -76.9702148438}},
+		{"b6hepfqk6", []float64{56.79043293, 56.7904758453, -162.072629929, -162.072587013}},
+		{"md8y86mqjd", []float64{-29.7815215588, -29.7815161943, 68.5731196404, 68.5731303692}},
+		{"bcgcyq", []float64{55.1843261719, 55.1898193359, -140.701904297, -140.690917969}},
+		{"e3hpu9352", []float64{6.99472904205, 6.9947719574, -27.9258728027, -27.9258298874}},
+		{"hsbsq2rkmg", []float64{-62.5320607424, -62.532055378, 23.4879863262, 23.4879970551}},
+		{"ub2r", []float64{47.63671875, 47.8125, 34.1015625, 34.453125}},
+		{"d8", []float64{0.0, 5.625, -67.5, -56.25}},
+		{"gexm9j6y088", []float64{65.6841686368, 65.6841699779, -12.2569441795, -12.2569428384}},
+		{"15cdq", []float64{-68.5107421875, -68.466796875, -132.626953125, -132.583007812}},
+		{"9zud17gy", []float64{43.9669418335, 43.9671134949, -94.8617935181, -94.8614501953}},
+		{"4q3y", []float64{-53.7890625, -53.61328125, -76.2890625, -75.9375}},
+		{"gph138", []float64{84.5947265625, 84.6002197266, -39.3090820312, -39.2980957031}},
+		{"m09d8ju2b", []float64{-41.7163324356, -41.7162895203, 47.1152114868, 47.1152544022}},
+		{"8mszup4gk", []float64{32.3388147354, 32.3388576508, -161.890583038, -161.890540123}},
+		{"dyrfvtvqh", []float64{35.6722640991, 35.6723070145, -45.102481842, -45.1024389267}},
+		{"3h9tgkp", []float64{-18.6547851562, -18.6534118652, -132.738189697, -132.736816406}},
+		{"66gdty14u", []float64{-29.0583658218, -29.0583229065, -73.5738945007, -73.5738515854}},
+		{"83zp1d", []float64{11.0852050781, 11.0906982422, -158.840332031, -158.829345703}},
+		{"e7gp0", []float64{22.32421875, 22.3681640625, -29.53125, -29.4873046875}},
+		{"s0ykfgceytk", []float64{5.07498219609, 5.0749835372, 8.91225636005, 8.91225770116}},
+		{"zfe7", []float64{59.58984375, 59.765625, 173.3203125, 173.671875}},
+		{"cr9", []float64{87.1875, 88.59375, -122.34375, -120.9375}},
+		{"9ugr3kq", []float64{28.0165100098, 28.0178833008, -96.6165161133, -96.6151428223}},
+		{"2grcq0", []float64{-26.4990234375, -26.4935302734, -135.087890625, -135.076904297}},
+		{"50bb31vkds3p", []float64{-85.726895202, -85.7268950343, -43.8940487802, -43.8940484449}},
+		{"qhxdv1hcqe3", []float64{-19.1983763874, -19.1983750463, 100.773404986, 100.773406327}},
+		{"k2cv0gp2vk", []float64{-39.8857140541, -39.8857086897, 13.7540781498, 13.7540888786}},
+		{"y5jd", []float64{62.2265625, 62.40234375, 97.734375, 98.0859375}},
+		{"gnvg3p", []float64{83.5784912109, 83.583984375, -36.8701171875, -36.8591308594}},
+		{"9g70w", []float64{18.369140625, 18.4130859375, -96.767578125, -96.7236328125}},
+		{"9g7qsb", []float64{19.423828125, 19.4293212891, -96.4709472656, -96.4599609375}},
+		{"1zq2u", []float64{-49.0869140625, -49.04296875, -92.28515625, -92.2412109375}},
+		{"tr", []float64{39.375, 45.0, 56.25, 67.5}},
+		{"4wmddz9mgk38", []float64{-54.3620882928, -54.3620881252, -59.6429172903, -59.6429169551}},
+		{"wkcdsn8nbz", []float64{27.1951049566, 27.195110321, 103.535188437, 103.535199165}},
+		{"1r198wxj", []float64{-50.3247642517, -50.3245925903, -121.609039307, -121.608695984}},
+		{"eu", []float64{22.5, 28.125, -11.25, 0.0}},
+		{"k2y7mk6sd0wz", []float64{-40.1858386584, -40.1858384907, 20.2733035013, 20.2733038366}},
+		{"gms9ytw4v5", []float64{76.2758177519, 76.2758231163, -27.1277761459, -27.1277654171}},
+		{"2vdkc", []float64{-13.2275390625, -13.18359375, -143.041992188, -142.998046875}},
+		{"bke7fx3", []float64{71.011505127, 71.012878418, -164.068450928, -164.067077637}},
+		{"tvnxu7jt8", []float64{29.5047283173, 29.5047712326, 88.0849456787, 88.0849885941}},
+		{"f864yp3c", []float64{46.9296455383, 46.9298171997, -64.4214248657, -64.421081543}},
+		{"g8hxr7x150d7", []float64{46.2938149832, 46.2938151509, -15.8435266837, -15.8435263485}},
+		{"zmk4kmh", []float64{74.9542236328, 74.9555969238, 152.067260742, 152.068634033}},
+		{"gtqsvep4", []float64{75.3830337524, 75.3832054138, -13.1080627441, -13.1077194214}},
+		{"trsvy0", []float64{43.1982421875, 43.2037353516, 63.193359375, 63.2043457031}},
+		{"bevjfs", []float64{67.1264648438, 67.1319580078, -150.358886719, -150.347900391}},
+		{"ktrb", []float64{-15.46875, -15.29296875, 33.3984375, 33.75}},
+		{"dn20q1pv", []float64{35.2065467834, 35.2067184448, -89.7256851196, -89.7253417969}},
+		{"8n3wy5g2", []float64{36.3633728027, 36.3635444641, -177.622489929, -177.622146606}},
+		{"vyzft", []float64{83.408203125, 83.4521484375, 89.8681640625, 89.912109375}},
+		{"gwuedjbs8222", []float64{83.6163438857, 83.6163440533, -16.0832866654, -16.0832863301}},
+		{"fpb89dkktj83", []float64{88.6948023923, 88.6948025599, -89.2249056324, -89.2249052972}},
+		{"wjjk3", []float64{28.8720703125, 28.916015625, 97.4267578125, 97.470703125}},
+		{"wx6", []float64{40.78125, 42.1875, 115.3125, 116.71875}},
+		{"yzpuuv3w0pw", []float64{85.2398702502, 85.2398715913, 134.859245718, 134.859247059}},
+		{"k2518ucy", []float64{-44.7092056274, -44.7090339661, 15.5041122437, 15.5044555664}},
+		{"hkjk1075", []float64{-66.7949867249, -66.7948150635, 18.6808776855, 18.6812210083}},
+		{"btmd", []float64{74.8828125, 75.05859375, -149.765625, -149.4140625}},
+		{"ucbvmyuw9z", []float64{55.8048337698, 55.8048391342, 35.0636279583, 35.0636386871}},
+		{"wf86ytd34", []float64{14.5762825012, 14.5763254166, 124.390382767, 124.390425682}},
+		{"9zjbws3u", []float64{39.4869232178, 39.4870948792, -92.8760147095, -92.8756713867}},
+		{"rqc", []float64{-7.03125, -5.625, 147.65625, 149.0625}},
+		{"pwqw8gh8x74", []float64{-53.6845904589, -53.6845891178, 166.680077612, 166.680078954}},
+		{"5ekn", []float64{-70.6640625, -70.48828125, -16.875, -16.5234375}},
+		{"mxx0b3mzwxp", []float64{-2.67247259617, -2.67247125506, 77.3629210889, 77.36292243}},
+		{"tn", []float64{33.75, 39.375, 45.0, 56.25}},
+		{"ju59u9b8grr", []float64{-67.1826021373, -67.1826007962, 83.8704644144, 83.8704657555}},
+		{"hmte6v6jzq84", []float64{-58.4613495693, -58.4613494016, 19.1082823277, 19.1082826629}},
+		{"t3", []float64{5.625, 11.25, 56.25, 67.5}},
+		{"es9g6", []float64{25.8837890625, 25.927734375, -19.951171875, -19.9072265625}},
+		{"2bwcdk6y", []float64{-41.8994522095, -41.8992805481, -136.655158997, -136.654815674}},
+		{"4ew3jn8umh", []float64{-70.1002621651, -70.1002568007, -58.4899663925, -58.4899556637}},
+		{"0meekufm4x3", []float64{-58.4642212093, -58.4642198682, -163.616186231, -163.61618489}},
+		{"02", []float64{-90.0, -84.375, -168.75, -157.5}},
+		{"9yuv0t43sv0", []float64{38.8754063845, 38.8754077256, -94.5450460911, -94.54504475}},
+		{"u0g7y8444", []float64{49.8782730103, 49.8783159256, 4.85878944397, 4.85883235931}},
+		{"r4", []float64{-33.75, -28.125, 135.0, 146.25}},
+		{"1ps631dwde", []float64{-47.4076205492, -47.4076151848, -128.975951672, -128.975940943}},
+		{"vbmfdm", []float64{46.8731689453, 46.8786621094, 86.9348144531, 86.9458007812}},
+		{"s7gmxm8vhn5v", []float64{22.0916506089, 22.0916507766, 16.1401226744, 16.1401230097}},
+		{"mcwrh68t", []float64{-35.317440033, -35.3172683716, 87.7265167236, 87.7268600464}},
+		{"9yt5645jq9r9", []float64{37.145683486, 37.1456836537, -94.1264504939, -94.1264501587}},
+		{"61t1x3z", []float64{-36.2892150879, -36.2878417969, -82.6405334473, -82.6391601562}},
+		{"wmzkmbm", []float64{33.0921936035, 33.0935668945, 111.704864502, 111.706237793}},
+		{"jy", []float64{-56.25, -50.625, 78.75, 90.0}},
+		{"9ctckntmvgkr", []float64{8.69393778965, 8.69393795729, -92.9808190092, -92.980818674}},
+		{"8rnugbss4gd", []float64{40.2134129405, 40.2134142816, -159.086717069, -159.086715728}},
+		{"ek8", []float64{25.3125, 26.71875, -33.75, -32.34375}},
+		{"nvqegbu", []float64{-59.8054504395, -59.8040771484, 133.060913086, 133.062286377}},
+		{"h0u7v6pujp5", []float64{-85.1103597879, -85.1103584468, 6.21813699603, 6.21813833714}},
+		{"2rj", []float64{-5.625, -4.21875, -161.71875, -160.3125}},
+		{"jk38", []float64{-66.09375, -65.91796875, 58.359375, 58.7109375}},
+		{"c67qe30m", []float64{58.8051795959, 58.8053512573, -119.036521912, -119.036178589}},
+		{"xhtwk", []float64{26.4111328125, 26.455078125, 142.91015625, 142.954101562}},
+		{"pw6ue97rjn11", []float64{-54.0446339361, -54.0446337685, 161.525675207, 161.525675543}},
+		{"xpmw424pk6", []float64{41.8371927738, 41.8371981382, 142.836180925, 142.836191654}},
+		{"rk", []float64{-22.5, -16.875, 146.25, 157.5}},
+		{"hmfmn5uwdh2", []float64{-56.755605787, -56.7556044459, 14.6840000153, 14.6840013564}},
+		{"defvfj", []float64{22.1319580078, 22.1374511719, -63.544921875, -63.5339355469}},
+		{"sg7jd89w", []float64{19.2518234253, 19.2519950867, 38.0806732178, 38.0810165405}},
+		{"yn0", []float64{78.75, 80.15625, 90.0, 91.40625}},
+		{"3n7re6gv2e92", []float64{-8.50936442614, -8.5093642585, -130.281692259, -130.281691924}},
+		{"vyj", []float64{78.75, 80.15625, 85.78125, 87.1875}},
+		{"9cntsz", []float64{6.63024902344, 6.6357421875, -91.9006347656, -91.8896484375}},
+		{"w4uwq3d", []float64{16.5756225586, 16.5769958496, 96.6055297852, 96.6069030762}},
+		{"zffrf", []float64{61.8310546875, 61.875, 172.001953125, 172.045898438}},
+		{"hq6142s", []float64{-54.665222168, -54.663848877, 14.1668701172, 14.1682434082}},
+		{"m7srp977", []float64{-24.0746498108, -24.0744781494, 62.5606155396, 62.5609588623}},
+		{"8v6hxy3sz", []float64{30.3574132919, 30.3574562073, -143.094563484, -143.094520569}},
+		{"rf4snrh70h", []float64{-33.0078864098, -33.0078810453, 172.54611969, 172.546130419}},
+		{"6", []float64{-45.0, 0.0, -90.0, -45.0}},
+		{"ssr6y", []float64{24.3896484375, 24.43359375, 32.958984375, 33.0029296875}},
+		{"z", []float64{45.0, 90.0, 135.0, 180.0}},
+		{"yz", []float64{84.375, 90.0, 123.75, 135.0}},
+		{"n2", []float64{-90.0, -84.375, 101.25, 112.5}},
+		{"vfe", []float64{59.0625, 60.46875, 82.96875, 84.375}},
+		{"h", []float64{-90.0, -45.0, 0.0, 45.0}},
+		{"z3", []float64{50.625, 56.25, 146.25, 157.5}},
+		{"z2web5usz", []float64{48.4930944443, 48.4931373596, 155.397105217, 155.397148132}},
+		{"8gkc", []float64{18.45703125, 18.6328125, -139.5703125, -139.21875}},
+		{"17de4", []float64{-69.78515625, -69.7412109375, -120.146484375, -120.102539062}},
+		{"ky71b3fwd2vv", []float64{-9.52539911494, -9.5253989473, 37.9832738265, 37.9832741618}},
+		{"e4gfcm", []float64{15.9796142578, 15.9851074219, -39.6716308594, -39.6606445312}},
+		{"kdgwxnnb", []float64{-28.3557128906, -28.3555412292, 27.7387619019, 27.7391052246}},
+		{"4nyn2chy", []float64{-50.9260940552, -50.9259223938, -81.5230178833, -81.5226745605}},
+		{"u6je3kk57", []float64{56.8451929092, 56.8452358246, 19.0449285507, 19.0449714661}},
+		{"nrs9j1hj5tj", []float64{-47.630340457, -47.6303391159, 107.803501636, 107.803502977}},
+		{"j", []float64{-90.0, -45.0, 45.0, 90.0}},
+		{"krqrcc2hm9uq", []float64{-2.84883890301, -2.84883873537, 20.116208531, 20.1162088662}},
+		{"fmceu", []float64{78.0029296875, 78.046875, -76.46484375, -76.4208984375}},
+		{"q3w576", []float64{-35.9802246094, -35.9747314453, 109.830322266, 109.841308594}},
+		{"19ehn", []float64{-80.859375, -80.8154296875, -108.017578125, -107.973632812}},
+		{"zpkvjk", []float64{86.6821289062, 86.6876220703, 141.910400391, 141.921386719}},
+		{"7cgwy9jm", []float64{-33.9633750916, -33.9632034302, -6.03527069092, -6.03492736816}},
+		{"jju1cefk5v1", []float64{-57.3273199797, -57.3273186386, 50.6941701472, 50.6941714883}},
+		{"5tfpq6", []float64{-56.3708496094, -56.3653564453, -19.4128417969, -19.4018554688}},
+		{"j", []float64{-90.0, -45.0, 45.0, 90.0}},
+		{"dm0vh8", []float64{29.00390625, 29.0093994141, -77.4975585938, -77.4865722656}},
+		{"jt5c8hhd58b", []float64{-61.5890081227, -61.5890067816, 72.7797675133, 72.7797688544}},
+		{"4ttr", []float64{-57.83203125, -57.65625, -60.1171875, -59.765625}},
+		{"d", []float64{0.0, 45.0, -90.0, -45.0}},
+		{"sp2d3v6wz", []float64{41.2067556381, 41.2067985535, 0.783762931824, 0.783805847168}},
+		{"up1yhbspqy", []float64{85.4337108135, 85.4337161779, 2.67546057701, 2.67547130585}},
+		{"1z4bx8dp1ex1", []float64{-50.5331422202, -50.5331420526, -97.0504023135, -97.0504019782}},
+		{"76qbnz5n1937", []float64{-32.3042606749, -32.3042605072, -23.9569957182, -23.9569953829}},
+		{"0w1bvcjb40xd", []float64{-56.112667881, -56.1126677133, -154.778384641, -154.778384306}},
+		{"x", []float64{0.0, 45.0, 135.0, 180.0}},
+		{"yqbjwyvsmc", []float64{83.9733606577, 83.9733660221, 101.554430723, 101.554441452}},
+		{"0hdp2tdy5", []float64{-63.3818435669, -63.3818006516, -177.161622047, -177.161579132}},
+		{"s8yv34v", []float64{5.15670776367, 5.15808105469, 32.0429992676, 32.0443725586}},
+		{"uc60k7w6", []float64{52.0947647095, 52.0949363708, 36.757850647, 36.7581939697}},
+		{"en6h7me8", []float64{35.9335327148, 35.9337043762, -42.0398712158, -42.0395278931}},
+		{"1bks34", []float64{-87.8356933594, -87.8302001953, -94.8779296875, -94.8669433594}},
+		{"65", []float64{-28.125, -22.5, -90.0, -78.75}},
+		{"qwrquvr", []float64{-8.62838745117, -8.62701416016, 122.913665771, 122.915039062}},
+		{"3dmchcusw83", []float64{-32.1575818956, -32.1575805545, -104.198862165, -104.198860824}},
+		{"urfeh", []float64{89.12109375, 89.1650390625, 14.94140625, 14.9853515625}},
+		{"d3g6rs", []float64{10.2612304688, 10.2667236328, -73.8500976562, -73.8391113281}},
+		{"wx0v", []float64{40.25390625, 40.4296875, 113.5546875, 113.90625}},
+		{"7", []float64{-45.0, 0.0, -45.0, 0.0}},
+		{"et", []float64{28.125, 33.75, -22.5, -11.25}},
+		{"dqtd0", []float64{36.9140625, 36.9580078125, -71.015625, -70.9716796875}},
+		{"vtzuwhxhgjv8", []float64{78.1603311002, 78.1603312679, 78.6718585342, 78.6718588695}},
+		{"bn9", []float64{81.5625, 82.96875, -178.59375, -177.1875}},
+		{"685n", []float64{-43.9453125, -43.76953125, -63.28125, -62.9296875}},
+		{"fqy4nhy", []float64{83.3464050293, 83.3477783203, -70.0405883789, -70.0392150879}},
+		{"5q1dw", []float64{-55.810546875, -55.7666015625, -31.376953125, -31.3330078125}},
+		{"0sv8m11dgkf", []float64{-63.2313139737, -63.2313126326, -149.543696344, -149.543695003}},
+		{"vp435nn", []float64{84.5837402344, 84.5851135254, 48.3041381836, 48.3055114746}},
+		{"wydj", []float64{37.44140625, 37.6171875, 126.5625, 126.9140625}},
+		{"ebg8", []float64{4.21875, 4.39453125, -6.328125, -5.9765625}},
+		{"ksmb6pfxe4", []float64{-21.0059344769, -21.0059291124, 30.6773900986, 30.6774008274}},
+		{"8bv63qv96dp", []float64{4.65156197548, 4.65156331658, -138.804586083, -138.804584742}},
+		{"0s0d53hd", []float64{-67.1426010132, -67.1424293518, -156.647872925, -156.647529602}},
+		{"1yjf34ubpm", []float64{-55.8393591642, -55.8393537998, -93.1132829189, -93.1132721901}},
+		{"823y79hmfe", []float64{2.51137912273, 2.51138448715, -166.129310131, -166.129299402}},
+		{"xynnrv", []float64{34.8760986328, 34.8815917969, 177.528076172, 177.5390625}},
+		{"9b9ejqcqqdu", []float64{3.37801024318, 3.37801158428, -98.9079111814, -98.9079098403}},
+		{"cuuhfkd", []float64{72.5784301758, 72.5798034668, -95.5233764648, -95.5220031738}},
+		{"khwceh", []float64{-19.4018554688, -19.3963623047, 9.6240234375, 9.63500976562}},
+		{"z8vub32sy6", []float64{50.061403513, 50.0614088774, 165.597878695, 165.597889423}},
+		{"4s", []float64{-67.5, -61.875, -67.5, -56.25}},
+		{"bsrb4qeqt7eq", []float64{68.9430911466, 68.9430913143, -146.497992687, -146.497992352}},
+		{"b1x0sc", []float64{53.5308837891, 53.5363769531, -169.947509766, -169.936523438}},
+		{"1ngn2bn2vub", []float64{-50.9324629605, -50.9324616194, -130.739461184, -130.739459842}},
+		{"bsm", []float64{68.90625, 70.3125, -150.46875, -149.0625}},
+		{"xyzd7", []float64{38.3642578125, 38.408203125, 179.428710938, 179.47265625}},
+		{"cvvf1tqs", []float64{77.7248382568, 77.7250099182, -93.0892181396, -93.0888748169}},
+		{"fz2tpb1xj", []float64{86.6613578796, 86.661400795, -55.2040243149, -55.2039813995}},
+		{"r4zqr7", []float64{-28.4161376953, -28.4106445312, 145.513916016, 145.524902344}},
+		{"wth", []float64{28.125, 29.53125, 118.125, 119.53125}},
+		{"n", []float64{-90.0, -45.0, 90.0, 135.0}},
+		{"4j0ff5xs58h", []float64{-61.3716888428, -61.3716875017, -88.8469666243, -88.8469652832}},
+		{"c15", []float64{50.625, 52.03125, -130.78125, -129.375}},
+		{"tnkgyg", []float64{35.8319091797, 35.8374023438, 51.9763183594, 51.9873046875}},
+		{"e99", []float64{8.4375, 9.84375, -21.09375, -19.6875}},
+		{"bcrz69", []float64{53.3111572266, 53.3166503906, -135.241699219, -135.230712891}},
+		{"e5w29f6", []float64{19.7877502441, 19.7891235352, -36.1312866211, -36.1299133301}},
+		{"gykjjcq656b5", []float64{81.0423812829, 81.0423814505, -5.36359190941, -5.36359157413}},
+		{"7j62nzypd3be", []float64{-15.4248806275, -15.4248804599, -41.5309696645, -41.5309693292}},
+		{"rrsgbbvwt9r7", []float64{-2.14807743207, -2.14807726443, 152.970445342, 152.970445678}},
+		{"93kub", []float64{7.8662109375, 7.91015625, -117.0703125, -117.026367188}},
+		{"8", []float64{0.0, 45.0, -180.0, -135.0}},
+		{"02bcjtw5f84", []float64{-85.5746126175, -85.5746112764, -167.445263565, -167.445262223}},
+		{"262dqsqs1", []float64{-31.9242095947, -31.9241666794, -167.752261162, -167.752218246}},
+		{"185ss806c", []float64{-89.2085123062, -89.2084693909, -107.379984856, -107.37994194}},
+		{"9s6", []float64{23.90625, 25.3125, -109.6875, -108.28125}},
+		{"ych1d25e", []float64{50.8891868591, 50.8893585205, 129.478683472, 129.479026794}},
+		{"7f2qcykht0d", []float64{-31.1221191287, -31.1221177876, -10.8158227801, -10.815821439}},
+		{"5gk7qw", []float64{-71.1145019531, -71.1090087891, -4.98779296875, -4.97680664062}},
+		{"7kjutjpu98", []float64{-21.6807460785, -21.6807407141, -25.4336285591, -25.4336178303}},
+		{"h64", []float64{-78.75, -77.34375, 14.0625, 15.46875}},
+		{"uy57", []float64{79.27734375, 79.453125, 38.3203125, 38.671875}},
+		{"r5dtqkydk796", []float64{-24.3631505594, -24.3631503917, 138.799393661, 138.799393997}},
+		{"5j", []float64{-61.875, -56.25, -45.0, -33.75}},
+		{"xzbszzeu", []float64{44.4705963135, 44.4707679749, 169.798851013, 169.799194336}},
+		{"wqjz0fj5e", []float64{34.9920558929, 34.9920988083, 109.375891685, 109.375934601}},
+		{"dekz", []float64{19.51171875, 19.6875, -60.8203125, -60.46875}},
+		{"bbyux", []float64{50.009765625, 50.0537109375, -136.450195312, -136.40625}},
+		{"rctysz36", []float64{-35.3797531128, -35.3795814514, 177.046394348, 177.046737671}},
+		{"xmhvqm1wcw7", []float64{29.0765096247, 29.0765109658, 153.206474036, 153.206475377}},
+		{"nhw6c", []float64{-64.2041015625, -64.16015625, 98.8330078125, 98.876953125}},
+		{"u9d3gdu", []float64{53.7602233887, 53.7615966797, 25.8233642578, 25.8247375488}},
+		{"xenu1tyd0q", []float64{17.6100862026, 17.610091567, 167.067042589, 167.067053318}},
+		{"qm70t", []float64{-15.380859375, -15.3369140625, 105.688476562, 105.732421875}},
+		{"3g0", []float64{-28.125, -26.71875, -101.25, -99.84375}},
+		{"fg", []float64{61.875, 67.5, -56.25, -45.0}},
+		{"jq1tn6nn0hfs", []float64{-55.3590513021, -55.3590511344, 58.642276302, 58.6422766373}},
+		{"b9kmw", []float64{52.998046875, 53.0419921875, -151.259765625, -151.215820312}},
+		{"z7f9pj6", []float64{66.2983703613, 66.2997436523, 150.07598877, 150.077362061}},
+		{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
+		{"hqbvrz4x", []float64{-51.0687446594, -51.068572998, 12.6486968994, 12.6490402222}},
+		{"2", []float64{-45.0, 0.0, -180.0, -135.0}},
+		{"81es", []float64{9.140625, 9.31640625, -175.078125, -174.7265625}},
+		{"nyn6mf2", []float64{-55.8421325684, -55.8407592773, 132.791748047, 132.793121338}},
+		{"6", []float64{-45.0, 0.0, -90.0, -45.0}},
+		{"46hfqnuv", []float64{-78.3165550232, -78.3163833618, -71.8001174927, -71.7997741699}},
+		{"7tne", []float64{-16.34765625, -16.171875, -13.359375, -13.0078125}},
+		{"z7pkduh4g2c", []float64{62.6884643734, 62.6884657145, 156.571796089, 156.571797431}},
+		{"xgmm", []float64{19.16015625, 19.3359375, 176.1328125, 176.484375}},
+		{"he054x", []float64{-72.5592041016, -72.5537109375, 22.6098632812, 22.6208496094}},
+		{"rqnu21r8g", []float64{-10.4959344864, -10.495891571, 155.752615929, 155.752658844}},
+		{"k8xg8n36", []float64{-41.5375900269, -41.5374183655, 33.4001541138, 33.4004974365}},
+		{"d60md", []float64{12.216796875, 12.2607421875, -78.310546875, -78.2666015625}},
+		{"50tqctv", []float64{-85.9693908691, -85.9680175781, -37.5444030762, -37.5430297852}},
+		{"yxknv54eqnz", []float64{86.984847039, 86.9848483801, 118.34842667, 118.348428011}},
+		{"5zpczbcjgj", []float64{-50.3122490644, -50.3122437, -0.00948429107666, -0.0094735622406}},
+		{"yp7nz0rr7d8", []float64{86.9704046845, 86.9704060256, 94.5364737511, 94.5364750922}},
+		{"kfjb9s772f", []float64{-33.6381947994, -33.638189435, 41.9063508511, 41.9063615799}},
+		{"q", []float64{-45.0, 0.0, 90.0, 135.0}},
+		{"cx1f", []float64{84.7265625, 84.90234375, -110.0390625, -109.6875}},
+		{"y", []float64{45.0, 90.0, 90.0, 135.0}},
+		{"hdtcdg62524", []float64{-75.6559753418, -75.6559740007, 30.7100191712, 30.7100205123}},
+		{"fpy32", []float64{88.8134765625, 88.857421875, -81.2109375, -81.1669921875}},
+		{"256qs", []float64{-25.576171875, -25.5322265625, -176.66015625, -176.616210938}},
+		{"6rgzd89v3r48", []float64{-0.0842052698135, -0.0842051021755, -73.3642389625, -73.3642386273}},
+		{"c8430hd23", []float64{45.2005434036, 45.200586319, -109.33280468, -109.332761765}},
+		{"xtn", []float64{28.125, 29.53125, 165.9375, 167.34375}},
+		{"1u2s9", []float64{-65.302734375, -65.2587890625, -100.502929688, -100.458984375}},
+		{"5c80k35m35p", []float64{-81.512144208, -81.5121428668, -11.058716923, -11.0587155819}},
+		{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
+		{"u7c0rfsdjn", []float64{66.1518037319, 66.1518090963, 13.0032205582, 13.003231287}},
+		{"f4ce8gy", []float64{61.1045837402, 61.1059570312, -87.8494262695, -87.8480529785}},
+		{"pbd4y02", []float64{-86.7027282715, -86.7013549805, 171.826171875, 171.827545166}},
+		{"9n", []float64{33.75, 39.375, -135.0, -123.75}},
+		{"ztybk5c8mw", []float64{77.4083697796, 77.408375144, 167.170264721, 167.17027545}},
+		{"ks8pv0cv5u6j", []float64{-18.3201934956, -18.320193328, 22.7222934365, 22.7222937718}},
+		{"nuh", []float64{-67.5, -66.09375, 129.375, 130.78125}},
+		{"6khcf5xumxz", []float64{-22.1723856032, -22.1723842621, -71.9715334475, -71.9715321064}},
+		{"nj2", []float64{-60.46875, -59.0625, 90.0, 91.40625}},
+		{"mdd459nebt64", []float64{-30.5797721073, -30.5797719397, 70.4752591252, 70.4752594605}},
+		{"e7ujxh", []float64{22.0825195312, 22.0880126953, -27.8173828125, -27.8063964844}},
+		{"eb1zvy9n", []float64{1.39904022217, 1.39921188354, -8.53500366211, -8.53466033936}},
+		{"1huxmt", []float64{-61.9793701172, -61.9738769531, -128.430175781, -128.419189453}},
+		{"v4cyrgyg", []float64{61.5884971619, 61.5886688232, 47.8107833862, 47.811126709}},
+		{"j6h0dvut", []float64{-78.6296653748, -78.6294937134, 62.0020294189, 62.0023727417}},
+		{"r8j", []float64{-45.0, -43.59375, 164.53125, 165.9375}},
+		{"feyj018b7", []float64{66.9809389114, 66.9809818268, -59.0613412857, -59.0612983704}},
+		{"tw33887htf7j", []float64{35.4220805503, 35.422080718, 69.2841558158, 69.2841561511}},
+		{"3rbgkj8z4cy8", []float64{-0.803537517786, -0.803537350148, -122.518374547, -122.518374212}},
+		{"gq30gnn9dg", []float64{80.3213185072, 80.3213238716, -32.2028696537, -32.2028589249}},
+		{"rek4qy2", []float64{-26.2889099121, -26.2875366211, 163.421630859, 163.42300415}},
+		{"j8", []float64{-90.0, -84.375, 67.5, 78.75}},
+		{"f2rev1d7", []float64{47.0741844177, 47.0743560791, -67.9803085327, -67.97996521}},
+		{"rw1sg2f78x", []float64{-10.4102808237, -10.4102754593, 159.755308628, 159.755319357}},
+		{"r", []float64{-45.0, 0.0, 135.0, 180.0}},
+		{"8xfw", []float64{44.6484375, 44.82421875, -153.984375, -153.6328125}},
+		{"fj20", []float64{74.53125, 74.70703125, -90.0, -89.6484375}},
+		{"m76w867pt9yj", []float64{-25.5625145696, -25.562514402, 59.7809752822, 59.7809756175}},
+		{"n", []float64{-90.0, -45.0, 90.0, 135.0}},
+		{"wdb4v9vwt4ez", []float64{15.9628918581, 15.9628920257, 112.749471925, 112.74947226}},
+		{"m53rvw", []float64{-25.3234863281, -25.3179931641, 46.9995117188, 47.0104980469}},
+		{"f33", []float64{52.03125, 53.4375, -77.34375, -75.9375}},
+		{"0t36bun98", []float64{-59.9631214142, -59.9630784988, -155.700302124, -155.700259209}},
+		{"ezs1", []float64{42.36328125, 42.5390625, -5.625, -5.2734375}},
+		{"jb", []float64{-90.0, -84.375, 78.75, 90.0}},
+		{"vd0k", []float64{56.953125, 57.12890625, 67.8515625, 68.203125}},
+		{"39cqr9bd", []float64{-34.0476608276, -34.0474891663, -110.411911011, -110.411567688}},
+		{"n8r", []float64{-88.59375, -87.1875, 122.34375, 123.75}},
+		{"1b6", []float64{-88.59375, -87.1875, -98.4375, -97.03125}},
+		{"358wn7v3tch", []float64{-24.2369502783, -24.2369489372, -134.014754891, -134.01475355}},
+		{"d506n2v77qf", []float64{17.2312764823, 17.2312778234, -89.366427362, -89.3664260209}},
+		{"qb", []float64{-45.0, -39.375, 123.75, 135.0}},
+		{"sn", []float64{33.75, 39.375, 0.0, 11.25}},
+		{"5rj", []float64{-50.625, -49.21875, -26.71875, -25.3125}},
+		{"0y51c", []float64{-55.9423828125, -55.8984375, -141.987304688, -141.943359375}},
+		{"r85z8", []float64{-43.681640625, -43.6376953125, 162.7734375, 162.817382812}},
+		{"rk5z", []float64{-21.26953125, -21.09375, 151.5234375, 151.875}},
+		{"vzn2", []float64{84.375, 84.55078125, 87.5390625, 87.890625}},
+		{"bjvk", []float64{78.046875, 78.22265625, -172.6171875, -172.265625}},
+		{"b9f", []float64{54.84375, 56.25, -154.6875, -153.28125}},
+		{"q0u1y3mu4k", []float64{-40.4660582542, -40.4660528898, 95.907651186, 95.9076619148}},
+		{"r7", []float64{-28.125, -22.5, 146.25, 157.5}},
+		{"cv90dz31", []float64{76.0653877258, 76.0655593872, -99.7215270996, -99.7211837769}},
+		{"gxfeekgv9sng", []float64{89.2360430025, 89.2360431701, -18.8363294676, -18.8363291323}},
+		{"92t", []float64{2.8125, 4.21875, -116.71875, -115.3125}},
+		{"m06", []float64{-43.59375, -42.1875, 47.8125, 49.21875}},
+		{"n27p3f3c", []float64{-87.306804657, -87.3066329956, 105.548057556, 105.548400879}},
+		{"jjptjwvt2", []float64{-60.9581136703, -60.958070755, 55.7961273193, 55.7961702347}},
+		{"u258hxwr", []float64{45.0424003601, 45.0425720215, 16.3782119751, 16.3785552979}},
+		{"t47", []float64{12.65625, 14.0625, 49.21875, 50.625}},
+		{"e", []float64{0.0, 45.0, -45.0, 0.0}},
+		{"0s", []float64{-67.5, -61.875, -157.5, -146.25}},
+		{"4drwu7v94g3", []float64{-76.1364381015, -76.1364367604, -56.758684963, -56.7586836219}},
+		{"wk14pc", []float64{22.8570556641, 22.8625488281, 102.996826172, 103.0078125}},
+		{"w5xg", []float64{20.21484375, 20.390625, 100.8984375, 101.25}},
+		{"f2z2dvf7", []float64{49.3387413025, 49.3389129639, -68.4307479858, -68.4304046631}},
+		{"duk0212sgtp", []float64{23.9579039812, 23.9579053223, -50.6241537631, -50.624152422}},
+		{"h7z", []float64{-68.90625, -67.5, 21.09375, 22.5}},
+		{"31k02yzy90", []float64{-37.8866100311, -37.8866046667, -129.331355095, -129.331344366}},
+		{"vus0k", []float64{70.3564453125, 70.400390625, 84.55078125, 84.5947265625}},
+		{"5m0sjm4rrvf", []float64{-61.1431337893, -61.1431324482, -32.8127369285, -32.8127355874}},
+		{"4syd3n9", []float64{-62.8500366211, -62.8486633301, -58.3140563965, -58.3126831055}},
+		{"n", []float64{-90.0, -45.0, 90.0, 135.0}},
+		{"g", []float64{45.0, 90.0, -45.0, 0.0}},
+		{"ntn69zcu", []float64{-61.392288208, -61.3921165466, 121.368370056, 121.368713379}},
+		{"vnc4m", []float64{83.3642578125, 83.408203125, 46.6259765625, 46.669921875}},
+		{"sy848jtk7wdj", []float64{37.0329307951, 37.0329309627, 33.7573626637, 33.757362999}},
+		{"ry7y", []float64{-8.7890625, -8.61328125, 174.0234375, 174.375}},
+		{"5k7gesef08j", []float64{-65.453453064, -65.4534517229, -28.3175759017, -28.3175745606}},
+		{"n5gnev", []float64{-67.7362060547, -67.7307128906, 94.3835449219, 94.39453125}},
+		{"kz3eeg1trf7", []float64{-3.58612284064, -3.58612149954, 36.0265664756, 36.0265678167}},
+		{"t55rxfr7", []float64{18.2062339783, 18.2064056396, 49.9208450317, 49.9211883545}},
+		{"tm9", []float64{30.9375, 32.34375, 57.65625, 59.0625}},
+		{"pjw", []float64{-59.0625, -57.65625, 143.4375, 144.84375}},
+		{"f", []float64{45.0, 90.0, -90.0, -45.0}},
+		{"2my", []float64{-12.65625, -11.25, -160.3125, -158.90625}},
+		{"3w6bwyt", []float64{-9.72015380859, -9.71878051758, -108.329315186, -108.327941895}},
+		{"v4s8r3q", []float64{59.1133117676, 59.1146850586, 51.6549682617, 51.6563415527}},
+		{"ggx8be488kyn", []float64{64.8359277472, 64.8359279148, -0.677700340748, -0.677700005472}},
+		{"95w", []float64{19.6875, 21.09375, -126.5625, -125.15625}},
+		{"9jck7spubf", []float64{33.1136190891, 33.1136244535, -133.077703714, -133.077692986}},
+		{"f1zfe", []float64{55.283203125, 55.3271484375, -78.9697265625, -78.92578125}},
+		{"p5ycnk9j", []float64{-68.7048912048, -68.7047195435, 144.768218994, 144.768562317}},
+		{"v", []float64{45.0, 90.0, 45.0, 90.0}},
+		{"zk", []float64{67.5, 73.125, 146.25, 157.5}},
+		{"rkq7z", []float64{-20.4345703125, -20.390625, 155.346679688, 155.390625}},
+		{"j5wsc3t4", []float64{-69.4689559937, -69.4687843323, 54.2024230957, 54.2027664185}},
+		{"56szw", []float64{-74.619140625, -74.5751953125, -26.806640625, -26.7626953125}},
+		{"6xp21s7", []float64{-5.60165405273, -5.60028076172, -57.2346496582, -57.2332763672}},
+		{"gbmymdgc5x", []float64{47.520198226, 47.5202035904, -2.91706323624, -2.9170525074}},
+		{"bmm935md5m", []float64{74.7691994905, 74.769204855, -160.963987112, -160.963976383}},
+		{"z2v55d8", []float64{49.7598266602, 49.7611999512, 153.435058594, 153.436431885}},
+		{"q232vgb0", []float64{-43.4413146973, -43.4411430359, 103.260498047, 103.26084137}},
+		{"b7kf7b5uz4", []float64{63.6775839329, 63.6775892973, -161.900067329, -161.900056601}},
+		{"y6", []float64{56.25, 61.875, 101.25, 112.5}},
+		{"3xdf2ts4d", []float64{-2.38635063171, -2.38630771637, -108.605260849, -108.605217934}},
+		{"0szy1551", []float64{-62.2099113464, -62.2097396851, -146.553497314, -146.553153992}},
+		{"z", []float64{45.0, 90.0, 135.0, 180.0}},
+		{"kk86s", []float64{-19.248046875, -19.2041015625, 11.77734375, 11.8212890625}},
+		{"tqq", []float64{35.15625, 36.5625, 64.6875, 66.09375}},
+		{"3gcy97b", []float64{-22.7430725098, -22.7416992188, -98.7341308594, -98.7327575684}},
+		{"5dnz5z1d", []float64{-77.4807357788, -77.4805641174, -12.8409576416, -12.8406143188}},
+		{"eumv1pe", []float64{24.8263549805, 24.8277282715, -3.11599731445, -3.11462402344}},
+		{"2kxmbgqtfc3", []float64{-18.6579112709, -18.6579099298, -158.512682766, -158.512681425}},
+		{"hc5uf211rpb", []float64{-83.5397829115, -83.5397815704, 39.1239881516, 39.1239894927}},
+		{"p4khbd21", []float64{-76.496257782, -76.4960861206, 140.646972656, 140.647315979}},
+		{"xuq", []float64{23.90625, 25.3125, 177.1875, 178.59375}},
+		{"gn63cf91nhf", []float64{80.47779724, 80.4777985811, -41.7573997378, -41.7573983967}},
+		{"5hd1nh3", []float64{-64.4883728027, -64.4869995117, -41.922454834, -41.921081543}},
+		{"ustjn5", []float64{71.2078857422, 71.2133789062, 29.794921875, 29.8059082031}},
+		{"btv2", []float64{77.34375, 77.51953125, -150.1171875, -149.765625}},
+		{"7ds5rkh3u1", []float64{-30.3439325094, -30.343927145, -16.5503883362, -16.5503776073}},
+		{"54", []float64{-78.75, -73.125, -45.0, -33.75}},
+		{"7sr", []float64{-21.09375, -19.6875, -12.65625, -11.25}},
+		{"kr552fbb", []float64{-5.03860473633, -5.03843307495, 15.5027389526, 15.5030822754}},
+		{"8uc453nv4qq4", []float64{27.0766978338, 27.0766980015, -144.691553414, -144.691553079}},
+		{"6n3m", []float64{-8.96484375, -8.7890625, -88.2421875, -87.890625}},
+		{"rptzf8", []float64{-1.4501953125, -1.44470214844, 143.195800781, 143.206787109}},
+		{"x63b", []float64{12.65625, 12.83203125, 148.7109375, 149.0625}},
+		{"qwj7353", []float64{-10.6608581543, -10.6594848633, 119.928131104, 119.929504395}},
+		{"j", []float64{-90.0, -45.0, 45.0, 90.0}},
+		{"v6", []float64{56.25, 61.875, 56.25, 67.5}},
+		{"5mpwtp", []float64{-60.6939697266, -60.6884765625, -22.9833984375, -22.9724121094}},
+		{"ukgs", []float64{72.421875, 72.59765625, 16.171875, 16.5234375}},
+		{"qxyb4g6qf", []float64{-1.3872385025, -1.38719558716, 122.116212845, 122.11625576}},
+		{"qhww7zdb5v", []float64{-18.5476416349, -18.5476362705, 99.3093574047, 99.3093681335}},
+		{"wfwffcw5vd", []float64{14.5547926426, 14.554798007, 133.37151289, 133.371523619}},
+		{"rynp", []float64{-10.01953125, -9.84375, 177.1875, 177.5390625}},
+		{"fb57ykxryn82", []float64{45.6852641702, 45.6852643378, -51.3948151097, -51.3948147744}},
+		{"30sq", []float64{-41.1328125, -40.95703125, -129.0234375, -128.671875}},
+		{"9", []float64{0.0, 45.0, -135.0, -90.0}},
+		{"e5gjz7", []float64{22.1209716797, 22.1264648438, -40.4626464844, -40.4516601562}},
+		{"t6vkbx", []float64{16.3421630859, 16.34765625, 63.6547851562, 63.6657714844}},
+		{"3e", []float64{-28.125, -22.5, -112.5, -101.25}},
+		{"th", []float64{22.5, 28.125, 45.0, 56.25}},
+		{"7j", []float64{-16.875, -11.25, -45.0, -33.75}},
+		{"2", []float64{-45.0, 0.0, -180.0, -135.0}},
+		{"1r15z2z5", []float64{-49.9611854553, -49.9610137939, -122.015533447, -122.015190125}},
+		{"k9e57kg2f3", []float64{-35.9649842978, -35.9649789333, 26.866132021, 26.8661427498}},
+		{"4w", []float64{-56.25, -50.625, -67.5, -56.25}},
+		{"82c582qhsx7", []float64{4.83616903424, 4.83617037535, -167.324326783, -167.324325442}},
+		{"qzdzkwwdc", []float64{-1.50190830231, -1.50186538696, 127.823910713, 127.823953629}},
+		{"6xn26p6fnr0", []float64{-5.54084837437, -5.54084703326, -58.6190021038, -58.6190007627}},
+		{"wm4y0xu8ee", []float64{29.2223614454, 29.2223668098, 105.14549017, 105.145500898}},
+		{"rke5rewv", []float64{-19.0961265564, -19.095954895, 150.807609558, 150.807952881}},
+		{"0bn3vwcjj", []float64{-89.6544456482, -89.6544027328, -137.217650414, -137.217607498}},
+		{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
+		{"red", []float64{-25.3125, -23.90625, 160.3125, 161.71875}},
+		{"r2", []float64{-45.0, -39.375, 146.25, 157.5}},
+		{"v7qptt2u5k", []float64{64.6291565895, 64.6291619539, 64.9303686619, 64.9303793907}},
+		{"pey", []float64{-68.90625, -67.5, 165.9375, 167.34375}},
+		{"r7cg5cz8", []float64{-23.3692932129, -23.3691215515, 148.886032104, 148.886375427}},
+		{"9", []float64{0.0, 45.0, -135.0, -90.0}},
+		{"qqr5mxz", []float64{-9.22988891602, -9.228515625, 111.345062256, 111.346435547}},
+		{"wuuvdz7x2", []float64{27.7266168594, 27.7266597748, 130.555343628, 130.555386543}},
+		{"t", []float64{0.0, 45.0, 45.0, 90.0}},
+		{"4mgwkd9yp3r5", []float64{-56.5428471006, -56.542846933, -73.6276473105, -73.6276469752}},
+		{"dpk6jbemhyvh", []float64{41.1364542693, 41.1364544369, -83.7660782039, -83.7660778686}},
+		{"768py6th4818", []float64{-29.5607757568, -29.5607755892, -33.4683660418, -33.4683657065}},
+		{"q8", []float64{-45.0, -39.375, 112.5, 123.75}},
+		{"xmgvmq9cf", []float64{33.3026075363, 33.3026504517, 151.756639481, 151.756682396}},
+		{"6rhhdy3", []float64{-4.79965209961, -4.79827880859, -73.0027770996, -73.0014038086}},
+		{"05dd6myj4xqj", []float64{-69.884508457, -69.8845082894, -176.377142966, -176.377142631}},
+		{"yw6", []float64{80.15625, 81.5625, 115.3125, 116.71875}},
+		{"6mt6h4jrw", []float64{-13.6986637115, -13.6986207962, -71.1839389801, -71.1838960648}},
+		{"86ymqv2s", []float64{16.4211273193, 16.4212989807, -159.663619995, -159.663276672}},
+		{"vygq9vr6umc", []float64{84.1406701505, 84.1406714916, 83.4073568881, 83.4073582292}},
+		{"89g6sp8p81", []float64{10.3256946802, 10.3257000446, -152.75390625, -152.753895521}},
+		{"w", []float64{0.0, 45.0, 90.0, 135.0}},
+		{"6st", []float64{-19.6875, -18.28125, -60.46875, -59.0625}},
+		{"v", []float64{45.0, 90.0, 45.0, 90.0}},
+		{"sk", []float64{22.5, 28.125, 11.25, 22.5}},
+		{"t", []float64{0.0, 45.0, 45.0, 90.0}},
+		{"2", []float64{-45.0, 0.0, -180.0, -135.0}},
+		{"f3pf69r61v4", []float64{51.0277444124, 51.0277457535, -67.7316650748, -67.7316637337}},
+		{"r9qde0cj", []float64{-37.5243186951, -37.5241470337, 166.773834229, 166.774177551}},
+		{"nbm5pqh", []float64{-88.0334472656, -88.0320739746, 131.10534668, 131.106719971}},
+		{"u7f9fh2dzpw9", []float64{66.4252256043, 66.425225772, 14.8545113951, 14.8545117304}},
+		{"pnzr19j29", []float64{-50.7952022552, -50.7951593399, 145.268483162, 145.268526077}},
+		{"3e7rf8ww4fe5", []float64{-25.3526548482, -25.3526546806, -107.810775787, -107.810775451}},
+		{"1x", []float64{-50.625, -45.0, -112.5, -101.25}},
+		{"4", []float64{-90.0, -45.0, -90.0, -45.0}},
+		{"4h4swejxpzv", []float64{-66.6912616789, -66.6912603378, -86.1908380687, -86.1908367276}},
+		{"hy6gnf0u", []float64{-54.3047332764, -54.304561615, 37.9148483276, 37.9151916504}},
+		{"xnycujudf", []float64{38.3084249496, 38.308467865, 144.67423439, 144.674277306}},
+		{"t6bypmux9z54", []float64{16.5563485399, 16.5563487075, 57.6295499504, 57.6295502856}},
+		{"ufw1c6xp2t", []float64{59.3851214647, 59.3851268291, 42.2520661354, 42.2520768642}},
+		{"42jpxwe9byn", []float64{-88.6456024647, -88.6456011236, -71.3843134046, -71.3843120635}},
+		{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
+		{"tj0de", []float64{28.564453125, 28.6083984375, 45.8349609375, 45.87890625}},
+		{"e0kqd", []float64{2.548828125, 2.5927734375, -38.935546875, -38.8916015625}},
+		{"bzysgj0rr", []float64{89.4574213028, 89.4574642181, -136.976895332, -136.976852417}},
+		{"bxdp8ycgtcqt", []float64{88.543546591, 88.5435467586, -154.651882276, -154.651881941}},
+		{"k0kx2785", []float64{-42.2995948792, -42.2994232178, 6.33911132812, 6.33945465088}},
+		{"75ugg", []float64{-23.2470703125, -23.203125, -38.1884765625, -38.14453125}},
+		{"sbbsbv2r", []float64{5.08375167847, 5.08392333984, 34.4864273071, 34.4867706299}},
+		{"u7vunvq7rn", []float64{66.8263041973, 66.8263095617, 19.6414518356, 19.6414625645}},
+		{"w4m7uexcx", []float64{13.3349132538, 13.3349561691, 97.591509819, 97.5915527344}},
+		{"350g", []float64{-27.59765625, -27.421875, -133.9453125, -133.59375}},
+		{"p", []float64{-90.0, -45.0, 135.0, 180.0}},
+		{"t8w2g1m1", []float64{2.95137405396, 2.95154571533, 76.4277648926, 76.4281082153}},
+		{"96k738f", []float64{13.2316589355, 13.2330322266, -117.704772949, -117.703399658}},
+		{"c26nv174", []float64{47.5999832153, 47.6001548767, -120.713653564, -120.713310242}},
+		{"s67g9ehvds", []float64{13.2889294624, 13.2889348269, 16.5959858894, 16.5959966183}},
+		{"4ybt4sw", []float64{-51.1276245117, -51.1262512207, -55.4287719727, -55.4273986816}},
+		{"5jqnz9zqyk12", []float64{-59.2714333534, -59.2714331858, -36.2226838991, -36.2226835638}},
+		{"31d5nguy", []float64{-36.0135269165, -36.0133552551, -131.884346008, -131.884002686}},
+		{"m4bbcg", []float64{-29.3829345703, -29.3774414062, 46.1315917969, 46.142578125}},
+		{"u", []float64{45.0, 90.0, 0.0, 45.0}},
+		{"mkx0hzzq", []float64{-19.6438980103, -19.6437263489, 66.3124465942, 66.312789917}},
+		{"9bv7x0", []float64{4.833984375, 4.83947753906, -93.5595703125, -93.5485839844}},
+		{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
+		{"0x7pv0083h", []float64{-47.8563809395, -47.8563755751, -153.060793877, -153.060783148}},
+		{"ruqzw0ztgw", []float64{-19.7702515125, -19.7702461481, 178.516309261, 178.51631999}},
+		{"xpy5c3c", []float64{44.2625427246, 44.2639160156, 143.493804932, 143.495178223}},
+		{"bnn6fxqm", []float64{79.2740821838, 79.2742538452, -171.09249115, -171.092147827}},
+		{"fnm9u1t", []float64{80.4721069336, 80.4734802246, -82.0829772949, -82.0816040039}},
+		{"jfvn54", []float64{-73.4655761719, -73.4600830078, 85.9130859375, 85.9240722656}},
+		{"dj", []float64{28.125, 33.75, -90.0, -78.75}},
+		{"mxfstk2s", []float64{-0.591201782227, -0.59103012085, 71.2470245361, 71.2473678589}},
+		{"v", []float64{45.0, 90.0, 45.0, 90.0}},
+		{"4xy4p", []float64{-46.0546875, -46.0107421875, -58.7548828125, -58.7109375}},
+		{"cbdg47d", []float64{48.3590698242, 48.3604431152, -97.2811889648, -97.2798156738}},
+		{"1ddv9", []float64{-74.970703125, -74.9267578125, -108.588867188, -108.544921875}},
+		{"4cn07cd", []float64{-84.3228149414, -84.3214416504, -47.6449584961, -47.6435852051}},
+		{"dq8fjtv64n", []float64{36.9460237026, 36.946029067, -77.4463176727, -77.4463069439}},
+		{"gx2qc4", []float64{86.9787597656, 86.9842529297, -22.1044921875, -22.0935058594}},
+		{"yx", []float64{84.375, 90.0, 112.5, 123.75}},
+		{"44", []float64{-78.75, -73.125, -90.0, -78.75}},
+		{"zz679sbs", []float64{86.4232635498, 86.4234352112, 171.980667114, 171.981010437}},
+		{"2fdh2u769u7y", []float64{-30.1666307822, -30.1666306145, -143.399997689, -143.399997354}},
+		{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
+		{"11cxx", []float64{-78.837890625, -78.7939453125, -132.583007812, -132.5390625}},
+		{"ygf2", []float64{66.09375, 66.26953125, 126.9140625, 127.265625}},
+		{"m0uscc", []float64{-39.9407958984, -39.9353027344, 51.4050292969, 51.416015625}},
+		{"y", []float64{45.0, 90.0, 90.0, 135.0}},
+		{"kn2m", []float64{-8.96484375, -8.7890625, 0.3515625, 0.703125}},
+		{"k56d043m45", []float64{-26.3539534807, -26.3539481163, 3.51742744446, 3.51743817329}},
+		{"m7", []float64{-28.125, -22.5, 56.25, 67.5}},
+		{"pwe", []float64{-53.4375, -52.03125, 161.71875, 163.125}},
+		{"1j7q0hs8u", []float64{-59.3892145157, -59.3891716003, -130.423336029, -130.423293114}},
+		{"f264077wzn", []float64{46.776856184, 46.7768615484, -75.9214067459, -75.9213960171}},
+		{"2jvn", []float64{-11.6015625, -11.42578125, -172.96875, -172.6171875}},
+		{"4d4ghmzzsjb9", []float64{-78.1897520833, -78.1897519156, -63.4352295846, -63.4352292493}},
+		{"1h2n6vef0we", []float64{-64.9645265937, -64.9645252526, -134.873975068, -134.873973727}},
+		{"vnfc1v2yh", []float64{83.1744003296, 83.1744432449, 48.9452934265, 48.9453363419}},
+		{"2b5brk", []float64{-44.9340820312, -44.9285888672, -140.657958984, -140.646972656}},
+		{"yntb", []float64{81.5625, 81.73828125, 98.0859375, 98.4375}},
+		{"5fj85yy", []float64{-78.7129211426, -78.7115478516, -3.34259033203, -3.34121704102}},
+		{"cm36wevqn", []float64{74.9923324585, 74.9923753738, -121.699075699, -121.699032784}},
+		{"7hf52n", []float64{-17.6770019531, -17.6715087891, -42.1875, -42.1765136719}},
+		{"dh1sz", []float64{23.3349609375, 23.37890625, -87.5830078125, -87.5390625}},
+		{"h", []float64{-90.0, -45.0, 0.0, 45.0}},
+		{"7ny5k0k28", []float64{-6.4585018158, -6.45845890045, -36.3808822632, -36.3808393478}},
+		{"w7", []float64{16.875, 22.5, 101.25, 112.5}},
+		{"f9j0296wjz", []float64{50.6768792868, 50.6768846512, -60.443097353, -60.4430866241}},
+		{"v316n9gu7y5", []float64{50.9869372845, 50.9869386256, 58.2987718284, 58.2987731695}},
+		{"9", []float64{0.0, 45.0, -135.0, -90.0}},
+		{"0jt97bxt", []float64{-58.8391685486, -58.8389968872, -172.090530396, -172.090187073}},
+		{"cfdbvrc", []float64{59.236907959, 59.23828125, -97.1507263184, -97.1493530273}},
+		{"95d", []float64{19.6875, 21.09375, -132.1875, -130.78125}},
+		{"my", []float64{-11.25, -5.625, 78.75, 90.0}},
+		{"5t", []float64{-61.875, -56.25, -22.5, -11.25}},
+		{"n", []float64{-90.0, -45.0, 90.0, 135.0}},
+		{"4", []float64{-90.0, -45.0, -90.0, -45.0}},
+		{"k2j", []float64{-45.0, -43.59375, 18.28125, 19.6875}},
+		{"rjygh", []float64{-12.12890625, -12.0849609375, 144.66796875, 144.711914062}},
+		{"xs79f", []float64{24.2138671875, 24.2578125, 162.509765625, 162.553710938}},
+		{"nmz3x305vedq", []float64{-57.3864214495, -57.3864212818, 111.764155068, 111.764155403}},
+		{"u1hq", []float64{51.6796875, 51.85546875, 5.9765625, 6.328125}},
+		{"2v8jpgs92zxm", []float64{-13.1641120277, -13.1641118601, -145.903202109, -145.903201774}},
+		{"f54n04uq", []float64{62.9458236694, 62.9459953308, -87.1816635132, -87.1813201904}},
+		{"8zkv13p8", []float64{41.6656494141, 41.6658210754, -139.505081177, -139.504737854}},
+		{"z03j6ktm2", []float64{47.354722023, 47.3547649384, 136.512336731, 136.512379646}},
+		{"qd266u", []float64{-31.9262695312, -31.9207763672, 112.972412109, 112.983398438}},
+		{"783q5", []float64{-42.5390625, -42.4951171875, -20.6103515625, -20.56640625}},
+		{"ynqe7fy5s9m6", []float64{80.7432531193, 80.7432532869, 99.3138598278, 99.3138601631}},
+		{"pp", []float64{-50.625, -45.0, 135.0, 146.25}},
+		{"bd", []float64{56.25, 61.875, -157.5, -146.25}},
+		{"8573xzmt2qy", []float64{18.5856847465, 18.5856860876, -175.081539452, -175.081538111}},
+		{"gwhbzrftu0", []float64{78.9253950119, 78.9254003763, -15.4981040955, -15.4980933666}},
+		{"h42w9e805", []float64{-76.1819458008, -76.1819028854, 0.769171714783, 0.769214630127}},
+		{"uzbhd66135d1", []float64{89.397358764, 89.3973589316, 33.8516691327, 33.851669468}},
+		{"zf", []float64{56.25, 61.875, 168.75, 180.0}},
+		{"6q2r", []float64{-8.61328125, -8.4375, -78.3984375, -78.046875}},
+		{"qtxy7v4w9", []float64{-12.9352855682, -12.9352426529, 123.566708565, 123.56675148}},
+		{"58", []float64{-90.0, -84.375, -22.5, -11.25}},
+		{"r", []float64{-45.0, 0.0, 135.0, 180.0}},
+		{"8u9qjb5qw", []float64{26.368303299, 26.3683462143, -144.234781265, -144.23473835}},
+		{"48sx", []float64{-85.95703125, -85.78125, -61.171875, -60.8203125}},
+		{"690tdt6", []float64{-38.3793640137, -38.3779907227, -66.6842651367, -66.6828918457}},
+		{"qm8", []float64{-14.0625, -12.65625, 101.25, 102.65625}},
+		{"2mj", []float64{-16.875, -15.46875, -161.71875, -160.3125}},
+		{"3e5", []float64{-28.125, -26.71875, -108.28125, -106.875}},
+		{"t", []float64{0.0, 45.0, 45.0, 90.0}},
+		{"f6dg1tndqxy", []float64{59.6177373827, 59.6177387238, -74.8076811433, -74.8076798022}},
+		{"c", []float64{45.0, 90.0, -135.0, -90.0}},
+		{"9f3eqkhmf", []float64{13.2504987717, 13.250541687, -98.8600444794, -98.860001564}},
+		{"yuqghk1x5z", []float64{69.4568055868, 69.4568109512, 133.431175947, 133.431186676}},
+		{"w30rynrjmy", []float64{7.02257037163, 7.02257573605, 101.875094175, 101.875104904}},
+		{"m25ergh6", []float64{-44.4118881226, -44.4117164612, 61.5182876587, 61.5186309814}},
+		{"jqznhq", []float64{-50.9436035156, -50.9381103516, 66.2805175781, 66.2915039062}},
+		{"3u4", []float64{-22.5, -21.09375, -98.4375, -97.03125}},
+		{"fzr", []float64{85.78125, 87.1875, -46.40625, -45.0}},
+		{"je", []float64{-73.125, -67.5, 67.5, 78.75}},
+		{"pmztf5q7e7d4", []float64{-56.6270351037, -56.6270349361, 156.893490851, 156.893491186}},
+		{"xdknsz", []float64{13.8372802734, 13.8427734375, 163.333740234, 163.344726562}},
+		{"736h1c7d5h", []float64{-37.2583937645, -37.2583884001, -30.8556604385, -30.8556497097}},
+		{"c57pmmv9u", []float64{64.5875501633, 64.5875930786, -130.542812347, -130.542769432}},
+		{"80qnjsh", []float64{2.48291015625, 2.48428344727, -171.315307617, -171.313934326}},
+		{"kp", []float64{-5.625, 0.0, 0.0, 11.25}},
+		{"u6yufbthbdw", []float64{61.3072863221, 61.3072876632, 20.8699330688, 20.8699344099}},
+		{"c4yj93f16ys", []float64{61.4454093575, 61.4454106987, -126.504698396, -126.504697055}},
+		{"ygkp", []float64{64.51171875, 64.6875, 129.375, 129.7265625}},
+		{"h1ypf", []float64{-78.7939453125, -78.75, 8.525390625, 8.5693359375}},
+		{"hz76my5h4qe", []float64{-48.7895616889, -48.7895603478, 38.5772185028, 38.5772198439}},
+		{"zh1cw", []float64{67.763671875, 67.8076171875, 137.724609375, 137.768554688}},
+		{"00", []float64{-90.0, -84.375, -180.0, -168.75}},
+		{"h9be5u0k", []float64{-79.6062469482, -79.6060752869, 23.3682632446, 23.3686065674}},
+		{"4btv", []float64{-86.30859375, -86.1328125, -48.1640625, -47.8125}},
+		{"42hz7pm83dt", []float64{-88.6857041717, -88.6857028306, -71.9308523834, -71.9308510423}},
+		{"qv49", []float64{-16.69921875, -16.5234375, 127.265625, 127.6171875}},
+		{"0nwzwd", []float64{-52.1081542969, -52.1026611328, -170.222167969, -170.211181641}},
+		{"jhkmc", []float64{-65.0830078125, -65.0390625, 51.0205078125, 51.064453125}},
+		{"sysens0py", []float64{37.1131467819, 37.1131896973, 40.3640270233, 40.3640699387}},
+		{"5q792kbf", []float64{-54.5975875854, -54.5974159241, -28.8161087036, -28.8157653809}},
+		{"624gbe", []float64{-44.3243408203, -44.3188476562, -74.8608398438, -74.8498535156}},
+		{"gtkjfqg", []float64{75.5790710449, 75.5804443359, -16.7720031738, -16.7706298828}},
+		{"nv4", []float64{-61.875, -60.46875, 126.5625, 127.96875}},
+		{"dcwv6uu0", []float64{9.3864440918, 9.38661575317, -46.6314697266, -46.6311264038}},
+		{"vvgtyfv9", []float64{78.36977005, 78.3699417114, 83.97605896, 83.9764022827}},
+		{"53rjpqr6zt9", []float64{-82.0550099015, -82.0550085604, -23.5773669183, -23.5773655772}},
+		{"vmyp4dnemp6", []float64{78.5858018696, 78.5858032107, 64.8065069318, 64.8065082729}},
+		{"t9xqjxjpv", []float64{9.53197002411, 9.53201293945, 77.9440927505, 77.9441356659}},
+		{"sby32e", []float64{4.45495605469, 4.46044921875, 42.5610351562, 42.5720214844}},
+		{"sjfgy", []float64{33.0029296875, 33.046875, 4.130859375, 4.1748046875}},
+		{"k7q0z2b2du", []float64{-26.5826869011, -26.5826815367, 20.0065648556, 20.0065755844}},
+		{"nt", []float64{-61.875, -56.25, 112.5, 123.75}},
+		{"1", []float64{-90.0, -45.0, -135.0, -90.0}},
+		{"mpfpfd32e", []float64{-0.0314998626709, -0.0314569473267, 47.9242086411, 47.9242515564}},
+		{"hqjqn", []float64{-55.1953125, -55.1513671875, 18.896484375, 18.9404296875}},
+		{"9q7chj6u2uw", []float64{35.3616240621, 35.3616254032, -118.296964467, -118.296963125}},
+		{"0wsf47v9c", []float64{-53.0650377274, -53.064994812, -150.713839531, -150.713796616}},
+		{"kdv72env8xke", []float64{-28.9424979128, -28.9424977452, 29.9140823632, 29.9140826985}},
+		{"trfx", []float64{44.82421875, 45.0, 59.765625, 60.1171875}},
+		{"02uttm", []float64{-84.7869873047, -84.7814941406, -162.191162109, -162.180175781}},
+		{"hhjgb5s3vv39", []float64{-66.8212655, -66.8212653324, 8.0920227617, 8.09202309698}},
+		{"r16", []float64{-37.96875, -36.5625, 137.8125, 139.21875}},
+		{"4xy44eer4t3", []float64{-46.0342316329, -46.0342302918, -58.9480648935, -58.9480635524}},
+		{"8b", []float64{0.0, 5.625, -146.25, -135.0}},
+		{"zd", []float64{56.25, 61.875, 157.5, 168.75}},
+		{"z0x", []float64{47.8125, 49.21875, 144.84375, 146.25}},
+		{"4967", []float64{-82.44140625, -82.265625, -64.3359375, -63.984375}},
+		{"2vf4", []float64{-12.3046875, -12.12890625, -143.4375, -143.0859375}},
+		{"tzp3t0rtg", []float64{39.6410322189, 39.6410751343, 89.1754674911, 89.1755104065}},
+		{"75yry", []float64{-22.5439453125, -22.5, -35.947265625, -35.9033203125}},
+		{"bdgtu", []float64{61.4794921875, 61.5234375, -152.40234375, -152.358398438}},
+		{"u1", []float64{50.625, 56.25, 0.0, 11.25}},
+		{"rz2bgp7hds", []float64{-4.04629468918, -4.04628932476, 169.940750599, 169.940761328}},
+		{"g", []float64{45.0, 90.0, -45.0, 0.0}},
+		{"psptppx32e", []float64{-66.5796643496, -66.5796589851, 168.364470005, 168.364480734}},
+		{"gshfctpwf1", []float64{68.0120283365, 68.0120337009, -15.7440090179, -15.7439982891}},
+		{"3yq", []float64{-9.84375, -8.4375, -92.8125, -91.40625}},
+		{"685zv36e0fd2", []float64{-43.6303004622, -43.6303002946, -61.9923811778, -61.9923808426}},
+		{"7gf5fd", []float64{-23.2360839844, -23.2305908203, -8.32763671875, -8.31665039062}},
+		{"bmmzfq", []float64{75.9265136719, 75.9320068359, -160.565185547, -160.554199219}},
+		{"m40", []float64{-33.75, -32.34375, 45.0, 46.40625}},
+		{"tx45501g7v", []float64{39.9029284716, 39.902933836, 70.4469001293, 70.4469108582}},
+		{"u", []float64{45.0, 90.0, 0.0, 45.0}},
+		{"ej054jn", []float64{28.6798095703, 28.6811828613, -44.9038696289, -44.9024963379}},
+		{"n1g7d", []float64{-79.541015625, -79.4970703125, 94.658203125, 94.7021484375}},
+		{"nn6ejehuf", []float64{-54.2991113663, -54.2990684509, 93.7639331818, 93.7639760971}},
+		{"qs8e93xwrrc", []float64{-19.0629114211, -19.06291008, 113.268668801, 113.268670142}},
+		{"f2", []float64{45.0, 50.625, -78.75, -67.5}},
+		{"gm", []float64{73.125, 78.75, -33.75, -22.5}},
+		{"npp4rnm", []float64{-50.1951599121, -50.1937866211, 100.158233643, 100.159606934}},
+		{"6t", []float64{-16.875, -11.25, -67.5, -56.25}},
+		{"2f4fe3d5", []float64{-33.3017921448, -33.3016204834, -142.237243652, -142.23690033}},
+		{"s7r00k0196", []float64{18.3034908772, 18.3034962416, 21.1047899723, 21.1048007011}},
+		{"st084", []float64{28.125, 28.1689453125, 23.291015625, 23.3349609375}},
+		{"p6f3bv9f1", []float64{-74.1930770874, -74.1930341721, 149.449467659, 149.449510574}},
+		{"fgk5j", []float64{63.80859375, 63.8525390625, -50.4052734375, -50.361328125}},
+		{"yeu22jjtp", []float64{66.1660194397, 66.166062355, 118.484416008, 118.484458923}},
+		{"3bcn7gkusn25", []float64{-39.6639578976, -39.6639577299, -99.6722602844, -99.6722599491}},
+		{"1", []float64{-90.0, -45.0, -135.0, -90.0}},
+		{"6q4dh71sqn", []float64{-10.8811962605, -10.881190896, -75.0452899933, -75.0452792645}},
+		{"p", []float64{-90.0, -45.0, 135.0, 180.0}},
+		{"6uy2kbef9yg", []float64{-18.2340927422, -18.2340914011, -47.2469682992, -47.246966958}},
+		{"58j", []float64{-90.0, -88.59375, -15.46875, -14.0625}},
+		{"j", []float64{-90.0, -45.0, 45.0, 90.0}},
+		{"mh6x", []float64{-19.86328125, -19.6875, 48.515625, 48.8671875}},
+		{"cgq4xrjz", []float64{63.7603569031, 63.7605285645, -92.486000061, -92.4856567383}},
+		{"n", []float64{-90.0, -45.0, 90.0, 135.0}},
+		{"ppqg570sk3n", []float64{-48.6741918325, -48.6741904914, 144.635886848, 144.635888189}},
+		{"1suyu1k", []float64{-62.0878601074, -62.0864868164, -105.639038086, -105.637664795}},
+		{"xm4xkzsnvux", []float64{29.4417956471, 29.4417969882, 149.980114549, 149.980115891}},
+		{"8p3xj", []float64{42.01171875, 42.0556640625, -177.670898438, -177.626953125}},
+		{"ef92nkk", []float64{14.0858459473, 14.0872192383, -9.21203613281, -9.2106628418}},
+		{"qnrf101", []float64{-9.4921875, -9.49081420898, 100.943756104, 100.945129395}},
+		{"2qt", []float64{-8.4375, -7.03125, -161.71875, -160.3125}},
+		{"c2q7e", []float64{47.021484375, 47.0654296875, -114.829101562, -114.78515625}},
+		{"w", []float64{0.0, 45.0, 90.0, 135.0}},
+		{"j6t0bwpjpusg", []float64{-75.7718221284, -75.7718219608, 63.3131746575, 63.3131749928}},
+		{"wrq", []float64{40.78125, 42.1875, 109.6875, 111.09375}},
+		{"xvf2jk", []float64{32.3657226562, 32.3712158203, 172.144775391, 172.155761719}},
+		{"xy0p5y", []float64{35.0134277344, 35.0189208984, 168.914794922, 168.92578125}},
+		{"bsh9xbd", []float64{67.766418457, 67.767791748, -150.828552246, -150.827178955}},
+		{"g675yc", []float64{58.3209228516, 58.3264160156, -29.2346191406, -29.2236328125}},
+		{"dkrnq7", []float64{25.0213623047, 25.0268554688, -68.6315917969, -68.6206054688}},
+		{"6q4uk3dyk5", []float64{-10.4936009645, -10.4935956001, -74.6920967102, -74.6920859814}},
+		{"t58tp1kxb54p", []float64{20.5746203475, 20.5746205151, 46.0169246793, 46.0169250146}},
+		{"bbw73yzeuy", []float64{48.4215438366, 48.421549201, -137.373529673, -137.373518944}},
+		{"gnq", []float64{80.15625, 81.5625, -36.5625, -35.15625}},
+		{"3j", []float64{-16.875, -11.25, -135.0, -123.75}},
+		{"7dx2c", []float64{-30.8056640625, -30.76171875, -12.2607421875, -12.216796875}},
+		{"vn9", []float64{81.5625, 82.96875, 46.40625, 47.8125}},
+		{"4kj", []float64{-67.5, -66.09375, -71.71875, -70.3125}},
+		{"cuvj4trg5nb8", []float64{72.6270465553, 72.6270467229, -94.0981142968, -94.0981139615}},
+		{"uetmbuswe", []float64{65.7240772247, 65.7241201401, 29.92208004, 29.9221229553}},
+		{"z", []float64{45.0, 90.0, 135.0, 180.0}},
+		{"f", []float64{45.0, 90.0, -90.0, -45.0}},
+		{"jg", []float64{-73.125, -67.5, 78.75, 90.0}},
+		{"ycz", []float64{54.84375, 56.25, 133.59375, 135.0}},
+		{"pevtd", []float64{-67.939453125, -67.8955078125, 165.322265625, 165.366210938}},
+		{"gf7fm3hmb8", []float64{58.0582380295, 58.0582433939, -5.73999166489, -5.73998093605}},
+		{"w7zwjnh24qd", []float64{22.1814313531, 22.1814326942, 112.022537291, 112.022538632}},
+		{"nfesgy", []float64{-75.0695800781, -75.0640869141, 128.836669922, 128.84765625}},
+		{"s", []float64{0.0, 45.0, 0.0, 45.0}},
+		{"efq0q0dg0g0n", []float64{12.7034739777, 12.7034741454, -2.5450193882, -2.54501905292}},
+		{"kkucr8pk", []float64{-18.060836792, -18.0606651306, 18.2692337036, 18.2695770264}},
+		{"5zdg4zvz", []float64{-47.2413825989, -47.2412109375, -7.25406646729, -7.25372314453}},
+		{"fuw", []float64{70.3125, 71.71875, -47.8125, -46.40625}},
+		{"x51mnftp8", []float64{17.7689266205, 17.7689695358, 137.061309814, 137.06135273}},
+		{"y0", []float64{45.0, 50.625, 90.0, 101.25}},
+		{"ndufku4sr", []float64{-74.1130399704, -74.1129970551, 119.392161369, 119.392204285}},
+		{"ydwndhywhg8", []float64{60.232219398, 60.2322207391, 121.034520864, 121.034522206}},
+		{"gj6ehkq0", []float64{75.0819396973, 75.0821113586, -41.2893676758, -41.289024353}},
+		{"m3hfct0", []float64{-38.8641357422, -38.8627624512, 62.9956054688, 62.9969787598}},
+		{"6745yupp70qu", []float64{-27.4426010996, -27.442600932, -75.631118305, -75.6311179698}},
+		{"d7b0m9dzj213", []float64{21.1471368559, 21.1471370235, -78.504297249, -78.5042969137}},
+		{"py", []float64{-56.25, -50.625, 168.75, 180.0}},
+		{"4vhrpypw", []float64{-60.6105422974, -60.610370636, -49.9225616455, -49.9222183228}},
+		{"xwyyj1xz5kzw", []float64{39.0329053625, 39.0329055302, 167.222706601, 167.222706936}},
+		{"18ht0j2w8ste", []float64{-89.0911141969, -89.0911140293, -106.171159521, -106.171159185}},
+		{"vynwqurve", []float64{79.8729228973, 79.8729658127, 88.1980276108, 88.1980705261}},
+		{"s77hhn", []float64{19.0173339844, 19.0228271484, 15.64453125, 15.6555175781}},
+		{"hj66tgs86", []float64{-60.0100278854, -60.0099849701, 3.42301368713, 3.42305660248}},
+		{"e5nh4k", []float64{17.6000976562, 17.6055908203, -36.4636230469, -36.4526367188}},
+		{"jk", []float64{-67.5, -61.875, 56.25, 67.5}},
+		{"7", []float64{-45.0, 0.0, -45.0, 0.0}},
+		{"f0p3v5", []float64{45.3240966797, 45.3295898438, -79.5849609375, -79.5739746094}},
+		{"numc175r1", []float64{-65.9002876282, -65.9002447128, 131.895375252, 131.895418167}},
+		{"7pc", []float64{-1.40625, 0.0, -43.59375, -42.1875}},
+		{"b7qw82mfqc4", []float64{64.4255930185, 64.4255943596, -159.590199888, -159.590198547}},
+		{"qfe", []float64{-30.9375, -29.53125, 127.96875, 129.375}},
+		{"mw9kj6nrue61", []float64{-7.72204069421, -7.72204052657, 69.4973042607, 69.497304596}},
+		{"6en5d6psj", []float64{-27.4980926514, -27.498049736, -58.9531087875, -58.9530658722}},
+		{"mk80", []float64{-19.6875, -19.51171875, 56.25, 56.6015625}},
+		{"d2fbpmpyjv", []float64{4.24727261066, 4.24727797508, -74.5533192158, -74.5533084869}},
+		{"pf84wbwguh9", []float64{-75.4946324229, -75.4946310818, 169.056073576, 169.056074917}},
+		{"ncj8287", []float64{-84.3296813965, -84.3283081055, 131.510467529, 131.51184082}},
+		{"smd4t", []float64{31.376953125, 31.4208984375, 14.2822265625, 14.326171875}},
+		{"4ryj3jjxrd", []float64{-45.4546773434, -45.454671979, -70.2606797218, -70.260668993}},
+		{"udffsxnn8", []float64{60.9477710724, 60.9478139877, 26.5731811523, 26.5732240677}},
+		{"cub7vr", []float64{72.4163818359, 72.421875, -100.667724609, -100.656738281}},
+		{"y7c6s4", []float64{66.5441894531, 66.5496826172, 103.18359375, 103.194580078}},
+		{"t253", []float64{0.17578125, 0.3515625, 60.8203125, 61.171875}},
+		{"1e2bhmk9ybw", []float64{-71.6896077991, -71.6896064579, -111.252067387, -111.252066046}},
+		{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
+		{"99r75", []float64{7.55859375, 7.6025390625, -102.172851562, -102.12890625}},
+		{"knbr2kzz1791", []float64{-5.72952283546, -5.72952266783, 0.373246818781, 0.373247154057}},
+		{"v8h", []float64{45.0, 46.40625, 73.125, 74.53125}},
+		{"sm6xvf3bc", []float64{30.9060430527, 30.906085968, 15.0207567215, 15.0207996368}},
+		{"vu", []float64{67.5, 73.125, 78.75, 90.0}},
+		{"w56htc6", []float64{19.0791320801, 19.0805053711, 93.0679321289, 93.0693054199}},
+		{"t", []float64{0.0, 45.0, 45.0, 90.0}},
+		{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
+		{"5hm57j8", []float64{-65.4922485352, -65.4908752441, -37.8369140625, -37.8355407715}},
+		{"k8qwr0e", []float64{-42.4923706055, -42.4909973145, 31.9523620605, 31.9537353516}},
+		{"716e0", []float64{-37.44140625, -37.3974609375, -41.484375, -41.4404296875}},
+		{"wz71b", []float64{41.0888671875, 41.1328125, 127.96875, 128.012695312}},
+		{"w", []float64{0.0, 45.0, 90.0, 135.0}},
+		{"v", []float64{45.0, 90.0, 45.0, 90.0}},
+		{"89x2", []float64{8.4375, 8.61328125, -147.3046875, -146.953125}},
+		{"rcr37p", []float64{-37.7105712891, -37.705078125, 179.077148438, 179.088134766}},
+		{"4xzjc7vmstr", []float64{-45.3739361465, -45.3739348054, -57.5939060748, -57.5939047337}},
+		{"tv07ndf8", []float64{28.6674499512, 28.6676216125, 79.3906402588, 79.3909835815}},
+		{"qb2z", []float64{-42.36328125, -42.1875, 124.8046875, 125.15625}},
+		{"xjq0fjferr", []float64{29.6952670813, 29.6952724457, 143.529134989, 143.529145718}},
+		{"zwn6", []float64{79.1015625, 79.27734375, 166.2890625, 166.640625}},
+		{"7xc", []float64{-1.40625, 0.0, -21.09375, -19.6875}},
+		{"m", []float64{-45.0, 0.0, 45.0, 90.0}},
+		{"vw4sy8z", []float64{79.5890808105, 79.5904541016, 71.3108825684, 71.3122558594}},
+		{"djg8s3pre", []float64{32.4384212494, 32.4384641647, -84.881272316, -84.8812294006}},
+		{"vpn8t", []float64{84.462890625, 84.5068359375, 54.3603515625, 54.404296875}},
+		{"1sse8x6", []float64{-64.0324401855, -64.0310668945, -106.147155762, -106.145782471}},
+		{"snm4", []float64{35.5078125, 35.68359375, 7.03125, 7.3828125}},
+		{"t", []float64{0.0, 45.0, 45.0, 90.0}},
+		{"5rb", []float64{-46.40625, -45.0, -33.75, -32.34375}},
+		{"q7", []float64{-28.125, -22.5, 101.25, 112.5}},
+		{"8", []float64{0.0, 45.0, -180.0, -135.0}},
+		{"hn5sw9rbvjk3", []float64{-55.4519608431, -55.4519606754, 5.21838281304, 5.21838314831}},
+		{"y0f7w5tep1f", []float64{49.8537348211, 49.8537361622, 93.4355905652, 93.4355919063}},
+		{"xts0uk", []float64{31.0913085938, 31.0968017578, 163.311767578, 163.322753906}},
+		{"ftybqfey", []float64{77.4024581909, 77.4026298523, -57.7060317993, -57.7056884766}},
+		{"eqvhf", []float64{38.8037109375, 38.84765625, -26.630859375, -26.5869140625}},
+		{"ctpbp73", []float64{73.1428527832, 73.1442260742, -101.281585693, -101.280212402}},
+		{"15czhy35", []float64{-67.6409339905, -67.6407623291, -132.328948975, -132.328605652}},
+		{"1", []float64{-90.0, -45.0, -135.0, -90.0}},
+		{"vk6mwwrysbf", []float64{69.9084989727, 69.9085003138, 59.7105565667, 59.7105579078}},
+		{"4yfcbqvevqy", []float64{-51.6858740151, -51.685872674, -52.3640397191, -52.364038378}},
+		{"d6qm1q41g", []float64{13.5684156418, 13.5684585571, -69.9031305313, -69.903087616}},
+		{"kjqew1cty", []float64{-14.842915535, -14.8428726196, 9.40661430359, 9.40665721893}},
+		{"hf9zn39ewerv", []float64{-74.6981724165, -74.6981722489, 36.4879449829, 36.4879453182}},
+		{"1j", []float64{-61.875, -56.25, -135.0, -123.75}},
+		{"u41", []float64{56.25, 57.65625, 1.40625, 2.8125}},
+		{"pd8sbu5sk", []float64{-75.0798368454, -75.0797939301, 158.241062164, 158.24110508}},
+		{"k7", []float64{-28.125, -22.5, 11.25, 22.5}},
+		{"fx6xcm", []float64{87.1710205078, 87.1765136719, -63.9294433594, -63.9184570312}},
+		{"k1nwc4mun", []float64{-38.1754302979, -38.1753873825, 9.19272422791, 9.19276714325}},
+		{"nechx1mg", []float64{-68.1078529358, -68.1076812744, 114.221763611, 114.222106934}},
+		{"8et6dbj4g", []float64{20.1274251938, 20.1274681091, -149.98934269, -149.989299774}},
+		{"7e", []float64{-28.125, -22.5, -22.5, -11.25}},
+		{"vqcthybtw0", []float64{83.885679245, 83.8856846094, 58.5690593719, 58.5690701008}},
+		{"r6qdv32n", []float64{-31.8524551392, -31.8522834778, 155.621337891, 155.621681213}},
+		{"tbhh", []float64{0.703125, 0.87890625, 84.375, 84.7265625}},
+		{"0c5fpu", []float64{-84.0014648438, -83.9959716797, -140.635986328, -140.625}},
+		{"7b", []float64{-45.0, -39.375, -11.25, 0.0}},
+		{"9vzmkfvug", []float64{33.2825231552, 33.2825660706, -90.8379220963, -90.8378791809}},
+		{"68t", []float64{-42.1875, -40.78125, -60.46875, -59.0625}},
+		{"ef1szshm45h", []float64{12.1078079939, 12.107809335, -8.80510747433, -8.80510613322}},
+		{"21dgj4", []float64{-36.0241699219, -36.0186767578, -175.913085938, -175.902099609}},
+		{"109q9yt", []float64{-86.0092163086, -86.0078430176, -133.158416748, -133.157043457}},
+		{"nhj3b9vc", []float64{-67.182598114, -67.1824264526, 97.4126815796, 97.4130249023}},
+		{"nye5uzgb", []float64{-52.735748291, -52.7355766296, 128.182640076, 128.182983398}},
+		{"dhz5f", []float64{27.3779296875, 27.421875, -80.068359375, -80.0244140625}},
+		{"g1verehd", []float64{55.4318618774, 55.4320335388, -36.9298553467, -36.9295120239}},
+		{"jtr", []float64{-60.46875, -59.0625, 77.34375, 78.75}},
+		{"m5nbruj", []float64{-28.0590820312, -28.0577087402, 54.839630127, 54.841003418}},
+		{"p", []float64{-90.0, -45.0, 135.0, 180.0}},
+		{"h", []float64{-90.0, -45.0, 0.0, 45.0}},
+		{"bm3gr", []float64{75.1025390625, 75.146484375, -165.981445312, -165.9375}},
+		{"e7my1m0qp", []float64{19.3644332886, 19.3644762039, -25.6084871292, -25.6084442139}},
+		{"fzue", []float64{89.12109375, 89.296875, -49.921875, -49.5703125}},
+		{"q70", []float64{-28.125, -26.71875, 101.25, 102.65625}},
+		{"sjeed", []float64{31.552734375, 31.5966796875, 5.009765625, 5.0537109375}},
+		{"cvsuyyw", []float64{76.8081665039, 76.8095397949, -94.2654418945, -94.2640686035}},
+		{"7dnp", []float64{-32.51953125, -32.34375, -14.0625, -13.7109375}},
+		{"tf9kr1u", []float64{14.8191833496, 14.8205566406, 80.8209228516, 80.8222961426}},
+		{"j38nduwqew", []float64{-80.3940546513, -80.3940492868, 56.3795828819, 56.3795936108}},
+		{"444y82r", []float64{-77.606048584, -77.604675293, -86.1122131348, -86.1108398438}},
+		{"1rwzsww", []float64{-46.4584350586, -46.4570617676, -114.051818848, -114.050445557}},
+		{"98vu", []float64{4.921875, 5.09765625, -104.4140625, -104.0625}},
+		{"f0hu79k2y84", []float64{45.7540655136, 45.7540668547, -83.1603857875, -83.1603844464}},
+		{"35399zm2gnn", []float64{-26.415091753, -26.4150904119, -132.806374133, -132.806372792}},
+		{"qzxy", []float64{-1.7578125, -1.58203125, 134.6484375, 135.0}},
+		{"7gpr25", []float64{-26.8341064453, -26.8286132812, -1.0546875, -1.04370117188}},
+		{"xucdp", []float64{27.0703125, 27.1142578125, 171.166992188, 171.2109375}},
+		{"db3mpnz89zq", []float64{2.32235983014, 2.32236117125, -54.1741874814, -54.1741861403}},
+		{"p", []float64{-90.0, -45.0, 135.0, 180.0}},
+		{"94m52", []float64{13.2275390625, 13.271484375, -127.96875, -127.924804688}},
+		{"u7ucp", []float64{66.26953125, 66.3134765625, 18.2373046875, 18.28125}},
+		{"81qq43p", []float64{8.09143066406, 8.09280395508, -171.10244751, -171.101074219}},
+		{"f80w8", []float64{46.142578125, 46.1865234375, -66.796875, -66.7529296875}},
+		{"8j5z", []float64{29.35546875, 29.53125, -174.7265625, -174.375}},
+		{"56q", []float64{-77.34375, -75.9375, -25.3125, -23.90625}},
+		{"b72vvhj", []float64{64.3139648438, 64.3153381348, -167.468719482, -167.467346191}},
+		{"5j", []float64{-61.875, -56.25, -45.0, -33.75}},
+		{"42hm9tj0rn", []float64{-89.0056622028, -89.0056568384, -72.7003526688, -72.7003419399}},
+		{"cbxx9q2c89", []float64{49.1654545069, 49.1654598713, -90.6471419334, -90.6471312046}},
+		{"43", []float64{-84.375, -78.75, -78.75, -67.5}},
+		{"rvmw", []float64{-14.4140625, -14.23828125, 176.484375, 176.8359375}},
+		{"jwmeyr4hj7", []float64{-54.1454154253, -54.1454100609, 75.5120050907, 75.5120158195}},
+		{"b3y", []float64{54.84375, 56.25, -160.3125, -158.90625}},
+		{"4y3n0e7u8j", []float64{-53.7704104185, -53.7704050541, -54.8166275024, -54.8166167736}},
+		{"m0k0x", []float64{-43.505859375, -43.4619140625, 50.9326171875, 50.9765625}},
+		{"2zc1v219ev8z", []float64{-1.09834464267, -1.09834447503, -144.610815234, -144.610814899}},
+		{"3rvj3ezyffez", []float64{-0.46162577346, -0.461625605822, -116.64206598, -116.642065644}},
+		{"35bdpq", []float64{-23.5217285156, -23.5162353516, -133.978271484, -133.967285156}},
+		{"qdqrzp2e7b", []float64{-30.9410619736, -30.9410566092, 121.597527266, 121.597537994}},
+		{"vmrsrejf", []float64{75.2951431274, 75.2953147888, 67.1343612671, 67.1347045898}},
+		{"up", []float64{84.375, 90.0, 0.0, 11.25}},
+		{"bzy", []float64{88.59375, 90.0, -137.8125, -136.40625}},
+		{"3rnm42gs62", []float64{-4.7412443161, -4.74123895168, -114.857157469, -114.85714674}},
+		{"yhekty3621c", []float64{71.1382435262, 71.1382448673, 94.8247160017, 94.8247173429}},
+		{"ektx", []float64{26.54296875, 26.71875, -26.015625, -25.6640625}},
+		{"9nxkb4u9f6", []float64{37.4128782749, 37.4128836393, -124.798411131, -124.798400402}},
+		{"fg", []float64{61.875, 67.5, -56.25, -45.0}},
+		{"66e4x10k68", []float64{-30.4918241501, -30.4918187857, -74.2231822014, -74.2231714725}},
+		{"me", []float64{-28.125, -22.5, 67.5, 78.75}},
+		{"r385f5q9", []float64{-35.8852958679, -35.8851242065, 146.346817017, 146.347160339}},
+		{"xbdc8wgn0", []float64{3.11428070068, 3.11432361603, 172.643280029, 172.643322945}},
+		{"74s", []float64{-30.9375, -29.53125, -39.375, -37.96875}},
+		{"dg8t7", []float64{20.6103515625, 20.654296875, -55.4150390625, -55.37109375}},
+		{"nf7", []float64{-77.34375, -75.9375, 127.96875, 129.375}},
+		{"6nzfqpxy", []float64{-6.59351348877, -6.59334182739, -78.8272476196, -78.8269042969}},
+		{"0ux06p9jktht", []float64{-64.6014270745, -64.6014269069, -136.31678693, -136.316786595}},
+		{"nb8pxznpjh", []float64{-85.8294653893, -85.8294600248, 124.099030495, 124.099041224}},
+		{"6qks2x97hzm", []float64{-9.05492708087, -9.05492573977, -72.3979751766, -72.3979738355}},
+		{"us6qrd43em", []float64{70.0161534548, 70.0161588192, 25.9968817234, 25.9968924522}},
+		{"tp2eh", []float64{41.30859375, 41.3525390625, 45.87890625, 45.9228515625}},
+		{"vcgf16q2", []float64{55.2076721191, 55.2078437805, 84.0869522095, 84.0872955322}},
+		{"qt15nkc82kz", []float64{-16.3214953244, -16.3214939833, 114.182988256, 114.182989597}},
+		{"t6t", []float64{14.0625, 15.46875, 63.28125, 64.6875}},
+		{"yx53b3kj32", []float64{84.6903848648, 84.6903902292, 117.086845636, 117.086856365}},
+		{"twqdxev4cx", []float64{35.6168121099, 35.6168174744, 76.9771456718, 76.9771564007}},
+		{"p", []float64{-90.0, -45.0, 135.0, 180.0}},
+		{"4gty084hgddy", []float64{-69.2569826916, -69.2569825239, -48.13918937, -48.1391890347}},
+		{"c7", []float64{61.875, 67.5, -123.75, -112.5}},
+		{"ywffc641", []float64{83.463306427, 83.4634780884, 116.424865723, 116.425209045}},
+		{"k0km", []float64{-42.71484375, -42.5390625, 5.9765625, 6.328125}},
+		{"17k4fg", []float64{-71.2188720703, -71.2133789062, -118.004150391, -117.993164062}},
+		{"9fr", []float64{12.65625, 14.0625, -91.40625, -90.0}},
+		{"w", []float64{0.0, 45.0, 90.0, 135.0}},
+		{"h08scsx", []float64{-86.3278198242, -86.3264465332, 0.778656005859, 0.780029296875}},
+		{"8f8nq48", []float64{15.1748657227, 15.1762390137, -145.986328125, -145.984954834}},
+		{"hecr6qx49k0", []float64{-67.59567976, -67.5956784189, 24.3663561344, 24.3663574755}},
+		{"jn", []float64{-56.25, -50.625, 45.0, 56.25}},
+		{"qwx7j", []float64{-7.91015625, -7.8662109375, 122.915039062, 122.958984375}},
+		{"z", []float64{45.0, 90.0, 135.0, 180.0}},
+		{"wxcj4", []float64{44.47265625, 44.5166015625, 113.994140625, 114.038085938}},
+		{"gw63h", []float64{80.33203125, 80.3759765625, -19.16015625, -19.1162109375}},
+		{"hp7b6f27tq6p", []float64{-49.1618095525, -49.1618093848, 5.3948584199, 5.39485875517}},
+		{"kd", []float64{-33.75, -28.125, 22.5, 33.75}},
+		{"fbweu", []float64{48.4716796875, 48.515625, -46.93359375, -46.8896484375}},
+		{"m1fcuue7nm1g", []float64{-34.8233712651, -34.8233710974, 49.080661498, 49.0806618333}},
+		{"h9j", []float64{-84.375, -82.96875, 29.53125, 30.9375}},
+		{"n9d3g5uv", []float64{-81.2334251404, -81.233253479, 115.80242157, 115.802764893}},
+		{"nhpp1spf", []float64{-66.247215271, -66.2470436096, 99.9203109741, 99.9206542969}},
+		{"7jg2w13b23h", []float64{-12.5614446402, -12.5614432991, -40.1635962725, -40.1635949314}},
+		{"6q4z0ebf3", []float64{-9.99854564667, -9.99850273132, -74.8597669601, -74.8597240448}},
+		{"sv9vqgeecr", []float64{31.8802589178, 31.8802642822, 36.5124285221, 36.5124392509}},
+		{"wqd4", []float64{36.9140625, 37.08984375, 104.0625, 104.4140625}},
+		{"bwqgj", []float64{80.68359375, 80.7275390625, -147.788085938, -147.744140625}},
+		{"hk73qx", []float64{-65.8355712891, -65.830078125, 16.1059570312, 16.1169433594}},
+		{"gdx1d6hr76v", []float64{59.3384175003, 59.3384188414, -12.5513903797, -12.5513890386}},
+		{"47czd", []float64{-67.587890625, -67.5439453125, -76.201171875, -76.1572265625}},
+		{"1kpebdk50", []float64{-66.8279457092, -66.8279027939, -113.17565918, -113.175616264}},
+		{"g3z7", []float64{55.37109375, 55.546875, -23.5546875, -23.203125}},
+		{"8x", []float64{39.375, 45.0, -157.5, -146.25}},
+		{"nczvrs", []float64{-79.2114257812, -79.2059326172, 134.978027344, 134.989013672}},
+		{"fbyjmwc6", []float64{50.1790237427, 50.1791954041, -47.5690841675, -47.5687408447}},
+		{"yhz41tus5wm", []float64{72.1026183665, 72.1026197076, 99.9160046875, 99.9160060287}},
+		{"uktff1pp0", []float64{70.8025932312, 70.8026361465, 19.4334411621, 19.4334840775}},
+		{"hrt8", []float64{-47.8125, -47.63671875, 18.984375, 19.3359375}},
+		{"b5vkzheshz3", []float64{66.9541557133, 66.9541570544, -172.304558605, -172.304557264}},
+		{"v", []float64{45.0, 90.0, 45.0, 90.0}},
+		{"h6mvgwv1kmp", []float64{-76.2956875563, -76.2956862152, 19.4968043268, 19.4968056679}},
+		{"etzq7udz", []float64{33.4683036804, 33.4684753418, -12.1361160278, -12.1357727051}},
+		{"rf2x5pd6", []float64{-31.0717391968, -31.0715675354, 169.588050842, 169.588394165}},
+		{"k8kquxqbt2", []float64{-42.3673152924, -42.3673099279, 28.6838114262, 28.683822155}},
+		{"bz91jncb2c7", []float64{87.4004097283, 87.4004110694, -144.621583968, -144.621582627}},
+		{"3uk4y5r6sjwr", []float64{-20.5920389481, -20.5920387805, -95.3511917219, -95.3511913866}},
+		{"d", []float64{0.0, 45.0, -90.0, -45.0}},
+		{"y95n1hd", []float64{51.7044067383, 51.7057800293, 116.765441895, 116.766815186}},
+		{"629k5b3kbkc", []float64{-41.4821608365, -41.4821594954, -76.8256638944, -76.8256625533}},
+		{"pp42st7vp5", []float64{-50.5073958635, -50.5073904991, 138.367266655, 138.367277384}},
+		{"u17e", []float64{52.55859375, 52.734375, 4.921875, 5.2734375}},
+		{"s", []float64{0.0, 45.0, 0.0, 45.0}},
+		{"ex5w6znubms", []float64{40.5129298568, 40.5129311979, -17.447989583, -17.4479882419}},
+		{"8jsn", []float64{31.9921875, 32.16796875, -174.375, -174.0234375}},
+		{"6", []float64{-45.0, 0.0, -90.0, -45.0}},
+		{"mht9efkj", []float64{-19.410610199, -19.4104385376, 52.9046630859, 52.9050064087}},
+		{"kkbcqqfcsz", []float64{-18.0241495371, -18.0241441727, 12.5833261013, 12.5833368301}},
+		{"866rppwznm", []float64{13.9291459322, 13.9291512966, -165.268782377, -165.268771648}},
+		{"96wj53wczp0c", []float64{14.9499841221, 14.9499842897, -115.160106607, -115.160106272}},
+		{"9ctzrj", []float64{9.73937988281, 9.74487304688, -92.8564453125, -92.8454589844}},
+		{"d", []float64{0.0, 45.0, -90.0, -45.0}},
+		{"wfq5hd", []float64{13.1945800781, 13.2000732422, 132.385253906, 132.396240234}},
+		{"9y6vu2v8wm5", []float64{36.1712247133, 36.1712260544, -97.1882195771, -97.188218236}},
+		{"6xcpg", []float64{-0.0439453125, 0.0, -65.9619140625, -65.91796875}},
+		{"rxqgqmc", []float64{-3.61587524414, -3.61450195312, 167.268218994, 167.269592285}},
+		{"yye", []float64{81.5625, 82.96875, 127.96875, 129.375}},
+		{"r3", []float64{-39.375, -33.75, 146.25, 157.5}},
+		{"x7t", []float64{19.6875, 21.09375, 153.28125, 154.6875}},
+		{"3", []float64{-45.0, 0.0, -135.0, -90.0}},
+		{"g9mmh9sku8h", []float64{52.9192113876, 52.9192127287, -14.9133986235, -14.9133972824}},
+		{"v2q6qu3", []float64{46.8251037598, 46.8264770508, 65.3370666504, 65.3384399414}},
+		{"7j9ckmu00j", []float64{-13.8111609221, -13.8111555576, -42.3468017578, -42.346791029}},
+		{"4q3td", []float64{-53.876953125, -53.8330078125, -76.552734375, -76.5087890625}},
+		{"9ve2c92z33ke", []float64{31.077454146, 31.0774543136, -96.6126798838, -96.6126795486}},
+		{"9sscvm0mw1kw", []float64{25.6485348567, 25.6485350244, -105.58899276, -105.588992424}},
+		{"9u", []float64{22.5, 28.125, -101.25, -90.0}},
+		{"nv4j7yb8mjjy", []float64{-60.9149988368, -60.9149986692, 126.728203855, 126.728204191}},
+		{"80w8", []float64{2.8125, 2.98828125, -170.859375, -170.5078125}},
+		{"q06ch78z1", []float64{-43.3975410461, -43.3974981308, 94.0550279617, 94.0550708771}},
+		{"u", []float64{45.0, 90.0, 0.0, 45.0}},
+		{"gzw", []float64{87.1875, 88.59375, -2.8125, -1.40625}},
+		{"1", []float64{-90.0, -45.0, -135.0, -90.0}},
+		{"u", []float64{45.0, 90.0, 0.0, 45.0}},
+		{"furb", []float64{68.90625, 69.08203125, -45.3515625, -45.0}},
+		{"xen8pyc36", []float64{16.9122934341, 16.9123363495, 166.983003616, 166.983046532}},
+		{"n1gc29sd", []float64{-79.9279403687, -79.9277687073, 95.3015899658, 95.3019332886}},
+		{"cjvu", []float64{78.046875, 78.22265625, -126.9140625, -126.5625}},
+		{"w7x53f7fb0", []float64{20.2716207504, 20.2716261148, 111.175804138, 111.175814867}},
+		{"hz", []float64{-50.625, -45.0, 33.75, 45.0}},
+		{"8tz14", []float64{32.51953125, 32.5634765625, -147.568359375, -147.524414062}},
+		{"z", []float64{45.0, 90.0, 135.0, 180.0}},
+		{"pyfunm", []float64{-51.3006591797, -51.2951660156, 172.891845703, 172.902832031}},
+		{"b", []float64{45.0, 90.0, -180.0, -135.0}},
+		{"7xre21ceuxv", []float64{-3.63716259599, -3.63716125488, -11.9508652389, -11.9508638978}},
+		{"17", []float64{-73.125, -67.5, -123.75, -112.5}},
+		{"ru", []float64{-22.5, -16.875, 168.75, 180.0}},
+		{"bdjs6y77m", []float64{57.0319604874, 57.0320034027, -149.640097618, -149.640054703}},
+		{"u1vsgx2", []float64{55.718536377, 55.719909668, 7.88818359375, 7.88955688477}},
+		{"pj5e80uz", []float64{-61.2544441223, -61.2542724609, 139.928398132, 139.928741455}},
+		{"ju01xcg9", []float64{-67.2265434265, -67.2263717651, 79.0953826904, 79.0957260132}},
+		{"4", []float64{-90.0, -45.0, -90.0, -45.0}},
+		{"k05kz1", []float64{-44.1595458984, -44.1540527344, 4.8779296875, 4.88891601562}},
+		{"ru2h1xc", []float64{-20.3480529785, -20.3466796875, 168.81729126, 168.818664551}},
+		{"ud2s6zr", []float64{58.443145752, 58.444519043, 23.3335876465, 23.3349609375}},
+		{"mrq5fm1zrqp", []float64{-3.5308277607, -3.53082641959, 64.7891007364, 64.7891020775}},
+		{"0y2u0dg2rg", []float64{-54.1254597902, -54.1254544258, -145.168544054, -145.168533325}},
+		{"9nkt0rnyx3", []float64{36.0747295618, 36.0747349262, -128.651307821, -128.651297092}},
+		{"77q", []float64{-26.71875, -25.3125, -25.3125, -23.90625}},
+		{"ng76t7", []float64{-71.2628173828, -71.2573242188, 128.551025391, 128.562011719}},
+		{"4ewypr0y27up", []float64{-69.2182661779, -69.2182660103, -57.6881629229, -57.6881625876}},
+		{"ge7vkm2x73", []float64{64.2341905832, 64.2341959476, -17.0389688015, -17.0389580727}},
+		{"3qm4d", []float64{-9.404296875, -9.3603515625, -116.630859375, -116.586914062}},
+		{"6gqw9", []float64{-25.576171875, -25.5322265625, -47.0654296875, -47.021484375}},
+		{"32", []float64{-45.0, -39.375, -123.75, -112.5}},
+		{"ns85", []float64{-64.16015625, -63.984375, 112.5, 112.8515625}},
+		{"hzy00b4j5tcj", []float64{-46.4053600095, -46.4053598419, 42.2233571112, 42.2233574465}},
+		{"7qrk5nt", []float64{-9.10491943359, -9.10354614258, -23.4159851074, -23.4146118164}},
+		{"vd4219t7th", []float64{56.2588620186, 56.258867383, 70.7374048233, 70.7374155521}},
+		{"g", []float64{45.0, 90.0, -45.0, 0.0}},
+		{"pq1p16t", []float64{-55.0057983398, -55.0044250488, 147.718048096, 147.719421387}},
+		{"gryfsc3wgn", []float64{89.0412604809, 89.0412658453, -24.0468835831, -24.0468728542}},
+		{"np0u", []float64{-49.921875, -49.74609375, 91.0546875, 91.40625}},
+		{"u87", []float64{46.40625, 47.8125, 26.71875, 28.125}},
+		{"9qz2", []float64{37.96875, 38.14453125, -113.5546875, -113.203125}},
+		{"xunf", []float64{22.8515625, 23.02734375, 178.2421875, 178.59375}},
+		{"ve", []float64{61.875, 67.5, 67.5, 78.75}},
+		{"s2c1tf2bvp4s", []float64{4.49494846165, 4.49494862929, 12.9101834446, 12.9101837799}},
+		{"5znd0f", []float64{-50.2624511719, -50.2569580078, -2.07641601562, -2.0654296875}},
+		{"dn90ug", []float64{36.7108154297, 36.7163085938, -88.3850097656, -88.3740234375}},
+		{"24bg3", []float64{-28.9599609375, -28.916015625, -178.901367188, -178.857421875}},
+		{"x46xpb2", []float64{13.888092041, 13.889465332, 138.856201172, 138.857574463}},
+		{"83q", []float64{7.03125, 8.4375, -160.3125, -158.90625}},
+		{"2pup20sqj", []float64{-0.128059387207, -0.128016471863, -174.368948936, -174.368906021}},
+		{"07", []float64{-73.125, -67.5, -168.75, -157.5}},
+		{"jj7nh21g4zk", []float64{-59.4135086238, -59.4135072827, 49.408044219, 49.4080455601}},
+		{"19up4rw9", []float64{-78.8844108582, -78.8842391968, -106.767196655, -106.766853333}},
+		{"2j", []float64{-16.875, -11.25, -180.0, -168.75}},
+		{"14uexty", []float64{-73.8844299316, -73.8830566406, -128.33404541, -128.332672119}},
+		{"t3wtb", []float64{9.4482421875, 9.4921875, 65.390625, 65.4345703125}},
+		{"wv0z", []float64{29.35546875, 29.53125, 124.8046875, 125.15625}},
+		{"jj6gcte19u", []float64{-59.7790789604, -59.779073596, 48.9373004436, 48.9373111725}},
+		{"xz0wc0te1bbe", []float64{40.5647895299, 40.5647896975, 169.504699185, 169.504699521}},
+		{"d", []float64{0.0, 45.0, -90.0, -45.0}},
+		{"zyejp1um86c", []float64{82.4519781768, 82.4519795179, 173.282215744, 173.282217085}},
+		{"ft915xruxj8n", []float64{76.1539096758, 76.1539098434, -65.9289979935, -65.9289976582}},
+		{"vchvx0yp5c", []float64{51.5971237421, 51.5971291065, 85.7457053661, 85.745716095}},
+		{"x5x", []float64{19.6875, 21.09375, 144.84375, 146.25}},
+		{"0ykju58", []float64{-53.8137817383, -53.8124084473, -140.44921875, -140.447845459}},
+		{"d2yk35fd", []float64{4.98676300049, 4.98693466187, -69.91355896, -69.9132156372}},
+		{"6ymr7k8", []float64{-8.54461669922, -8.5432434082, -48.7243652344, -48.7229919434}},
+		{"pjsxb9f", []float64{-57.6905822754, -57.6892089844, 141.352844238, 141.354217529}},
+		{"trydkh27gn2t", []float64{44.0132818557, 44.0132820234, 65.5668789893, 65.5668793246}},
+		{"k72c0uqqw2", []float64{-26.5185070038, -26.5185016394, 12.3464977741, 12.346508503}},
+		{"s8trjfz", []float64{4.05807495117, 4.05944824219, 30.145111084, 30.146484375}},
+		{"57ffkwfnrh1", []float64{-68.4725689888, -68.4725676477, -29.6820102632, -29.6820089221}},
+		{"x", []float64{0.0, 45.0, 135.0, 180.0}},
+		{"u2k", []float64{46.40625, 47.8125, 16.875, 18.28125}},
+		{"nndqt", []float64{-52.294921875, -52.2509765625, 93.3837890625, 93.427734375}},
+		{"w7", []float64{16.875, 22.5, 101.25, 112.5}},
+		{"r6c7x00p6", []float64{-28.91477108, -28.9147281647, 148.315515518, 148.315558434}},
+		{"mdrgz", []float64{-31.6845703125, -31.640625, 78.7060546875, 78.75}},
+		{"f1dzhud0j", []float64{54.6926879883, 54.6927309036, -85.9211111069, -85.9210681915}},
+		{"yqh", []float64{78.75, 80.15625, 106.875, 108.28125}},
+		{"9jp43kj", []float64{28.5424804688, 28.5438537598, -125.094451904, -125.093078613}},
+		{"14pb4v5q", []float64{-78.7215042114, -78.72133255, -123.976249695, -123.975906372}},
+		{"bjzkjzr9hsvc", []float64{78.0868977495, 78.0868979171, -169.54150144, -169.541501105}},
+		{"svjbhs9e9g8", []float64{28.1503388286, 28.1503401697, 42.0358264446, 42.0358277857}},
+		{"guuxc8c5v", []float64{73.0858182907, 73.0858612061, -4.85436916351, -4.85432624817}},
+		{"utu2603h0ru5", []float64{77.3897973262, 77.3897974938, 28.5658425093, 28.5658428445}},
+		{"bq", []float64{78.75, 84.375, -168.75, -157.5}},
+		{"kk", []float64{-22.5, -16.875, 11.25, 22.5}},
+		{"6vxq65mhx", []float64{-12.9452419281, -12.9451990128, -45.9596300125, -45.9595870972}},
+		{"f4sb", []float64{59.0625, 59.23828125, -83.3203125, -82.96875}},
+		{"y5p4", []float64{62.2265625, 62.40234375, 99.84375, 100.1953125}},
+		{"bs6cju", []float64{69.1040039062, 69.1094970703, -153.380126953, -153.369140625}},
+		{"5j", []float64{-61.875, -56.25, -45.0, -33.75}},
+		{"4e8z0qpsppx", []float64{-69.048345387, -69.0483440459, -66.4237166941, -66.423715353}},
+		{"nbyg", []float64{-85.25390625, -85.078125, 133.2421875, 133.59375}},
+		{"8jn2dnxvbd", []float64{28.2495939732, 28.2495993376, -171.112382412, -171.112371683}},
+		{"0h6ej9g", []float64{-65.5567932129, -65.5554199219, -176.238555908, -176.237182617}},
+		{"9j18njf9mc", []float64{28.1568056345, 28.1568109989, -132.623273134, -132.623262405}},
+		{"pf93jtqd2xm9", []float64{-75.7324543409, -75.7324541733, 170.758466944, 170.758467279}},
+		{"hc2", []float64{-82.96875, -81.5625, 33.75, 35.15625}},
+		{"g", []float64{45.0, 90.0, -45.0, 0.0}},
+		{"cewhub", []float64{65.5224609375, 65.5279541016, -103.853759766, -103.842773438}},
+		{"2vcrfbgsv2sx", []float64{-11.2890061922, -11.2890060246, -144.366300032, -144.366299696}},
+		{"mxdsnv", []float64{-2.08190917969, -2.07641601562, 71.3122558594, 71.3232421875}},
+		{"03", []float64{-84.375, -78.75, -168.75, -157.5}},
+		{"u73kybuxbq", []float64{64.1216933727, 64.1216987371, 13.3106338978, 13.3106446266}},
+		{"uz2", []float64{85.78125, 87.1875, 33.75, 35.15625}},
+		{"3w", []float64{-11.25, -5.625, -112.5, -101.25}},
+		{"j", []float64{-90.0, -45.0, 45.0, 90.0}},
+		{"q8kttcg9t", []float64{-42.6170825958, -42.6170396805, 119.085831642, 119.085874557}},
+		{"j8w8vhmh9d", []float64{-87.0315349102, -87.0315295458, 76.8672823906, 76.8672931194}},
+		{"qxn54fn91g", []float64{-5.08648216724, -5.08647680283, 121.067351103, 121.067361832}},
+		{"9", []float64{0.0, 45.0, -135.0, -90.0}},
+		{"3h2p4vmu", []float64{-19.8337554932, -19.8335838318, -134.871253967, -134.870910645}},
+		{"j4re6xcw", []float64{-76.7288589478, -76.7286872864, 55.6587982178, 55.6591415405}},
+		{"ugkmrc1vj", []float64{64.2104530334, 64.2104959488, 40.0697565079, 40.0697994232}},
+		{"n1mj98dq", []float64{-81.9981765747, -81.9980049133, 97.1002578735, 97.1006011963}},
+		{"r6c", []float64{-29.53125, -28.125, 147.65625, 149.0625}},
+		{"9uksmr", []float64{24.6917724609, 24.697265625, -94.6911621094, -94.6801757812}},
+		{"dmve", []float64{32.87109375, 33.046875, -71.015625, -70.6640625}},
+		{"jgrdkvuq", []float64{-71.2906265259, -71.2904548645, 89.5114517212, 89.5117950439}},
+		{"94g", []float64{15.46875, 16.875, -130.78125, -129.375}},
+		{"2vj4gt", []float64{-16.3641357422, -16.3586425781, -139.064941406, -139.053955078}},
+		{"q39q2pm5kxt", []float64{-35.4234436154, -35.4234422743, 103.01487878, 103.014880121}},
+		{"qcuy493hpwdf", []float64{-34.0939741954, -34.0939740278, 130.541249625, 130.541249961}},
+		{"nhfpjqpyqnv3", []float64{-62.0167130046, -62.0167128369, 93.0541204289, 93.0541207641}},
+		{"838kk", []float64{9.1845703125, 9.228515625, -168.22265625, -168.178710938}},
+		{"zx2fdg", []float64{86.2371826172, 86.2426757812, 158.675537109, 158.686523438}},
+		{"j7ktd1g4", []float64{-70.7419967651, -70.7418251038, 62.670135498, 62.6704788208}},
+		{"yzp", []float64{84.375, 85.78125, 133.59375, 135.0}},
+		{"76kf7tcsnb94", []float64{-31.9159668311, -31.9159666635, -26.91415295, -26.9141526148}},
+		{"9jb", []float64{32.34375, 33.75, -135.0, -133.59375}},
+		{"6w", []float64{-11.25, -5.625, -67.5, -56.25}},
+		{"f2zs58", []float64{49.921875, 49.9273681641, -68.0493164062, -68.0383300781}},
+		{"f0", []float64{45.0, 50.625, -90.0, -78.75}},
+		{"mnqum74bk", []float64{-9.08015727997, -9.08011436462, 54.7268486023, 54.7268915176}},
+		{"t6rhggbn5", []float64{13.512840271, 13.5128831863, 66.2586736679, 66.2587165833}},
+		{"g9q", []float64{52.03125, 53.4375, -14.0625, -12.65625}},
+		{"7vr", []float64{-15.46875, -14.0625, -1.40625, 0.0}},
+		{"t6sr47h", []float64{15.3094482422, 15.3108215332, 62.3309326172, 62.3323059082}},
+		{"076vzrw", []float64{-70.666809082, -70.665435791, -164.555969238, -164.554595947}},
+		{"s2", []float64{0.0, 5.625, 11.25, 22.5}},
+		{"gd7350xxrrs", []float64{57.8360626101, 57.8360639513, -17.7872353792, -17.7872340381}},
+		{"0p8sgbg", []float64{-46.9734191895, -46.9720458984, -179.127960205, -179.126586914}},
+		{"5zsn39", []float64{-46.7083740234, -46.7028808594, -5.55908203125, -5.54809570312}},
+		{"80f", []float64{4.21875, 5.625, -177.1875, -175.78125}},
+		{"cymr4xm05c0", []float64{81.4265495539, 81.426550895, -93.7502968311, -93.75029549}},
+		{"qmjmz", []float64{-15.8642578125, -15.8203125, 108.940429688, 108.984375}},
+		{"39c0", []float64{-35.15625, -34.98046875, -111.09375, -110.7421875}},
+		{"pgxnue0jpfn", []float64{-69.1086280346, -69.1086266935, 178.791844547, 178.791845888}},
+		{"nytxjp", []float64{-52.1685791016, -52.1630859375, 131.704101562, 131.715087891}},
+		{"q1mvpgze", []float64{-37.0687294006, -37.0685577393, 98.4368133545, 98.4371566772}},
+		{"tqjgn4h", []float64{34.2883300781, 34.2897033691, 64.6051025391, 64.6064758301}},
+		{"tjn18qrtdk", []float64{28.4239697456, 28.4239751101, 53.4588825703, 53.4588932991}},
+		{"qq3w12r", []float64{-8.78768920898, -8.78631591797, 103.423919678, 103.425292969}},
+		{"zzdu8g6", []float64{87.9963684082, 87.9977416992, 172.652893066, 172.654266357}},
+		{"mz72xeccms3t", []float64{-4.11002179608, -4.11002162844, 83.6525436491, 83.6525439844}},
+		{"s7fnw5nzhbs4", []float64{22.2540122643, 22.2540124319, 14.3356508017, 14.3356511369}},
+		{"76rtxb6nkdm", []float64{-31.3744948804, -31.3744935393, -22.8596024215, -22.8596010804}},
+		{"znbejer93dr", []float64{83.5141731799, 83.514174521, 135.955197662, 135.955199003}},
+		{"smk7u7bz27", []float64{30.212289691, 30.2122950554, 17.4143707752, 17.4143815041}},
+		{"yvmurs", []float64{75.3002929688, 75.3057861328, 132.165527344, 132.176513672}},
+		{"tnjn6dgd8", []float64{34.8641681671, 34.8642110825, 52.1459197998, 52.1459627151}},
+		{"gyee1hnu", []float64{82.1125030518, 82.1126747131, -6.27490997314, -6.27456665039}},
+		{"gjwh9n02wfmc", []float64{76.7615726776, 76.7615728453, -36.5179139748, -36.5179136395}},
+		{"n6jh51hrnfws", []float64{-78.0401661247, -78.0401659571, 108.41922082, 108.419221155}},
+		{"shgszu46pudj", []float64{27.5760518946, 27.5760520622, 5.26587635279, 5.26587668806}},
+		{"3c", []float64{-39.375, -33.75, -101.25, -90.0}},
+		{"fy", []float64{78.75, 84.375, -56.25, -45.0}},
+		{"s75d5tn3m", []float64{17.254242897, 17.2542858124, 16.3344812393, 16.3345241547}},
+		{"2c1c9kkw8dk5", []float64{-39.0868538059, -39.0868536383, -143.727924228, -143.727923892}},
+		{"5yugurey8e2", []float64{-51.3297383487, -51.3297370076, -4.37837362289, -4.37837228179}},
+		{"rd", []float64{-33.75, -28.125, 157.5, 168.75}},
+		{"um", []float64{73.125, 78.75, 11.25, 22.5}},
+		{"bkgc", []float64{71.89453125, 72.0703125, -163.4765625, -163.125}},
+		{"8cfxnrphhu0", []float64{11.1133790016, 11.1133803427, -142.449899912, -142.449898571}},
+		{"tdm", []float64{12.65625, 14.0625, 74.53125, 75.9375}},
+		{"y9usehucn02q", []float64{55.6610321626, 55.6610323302, 118.966741897, 118.966742232}},
+		{"vk7kbfk0vf33", []float64{69.7537115403, 69.7537117079, 60.859013088, 60.8590134233}},
+		{"ewyx82", []float64{39.287109375, 39.2926025391, -13.3483886719, -13.3374023438}},
+		{"ev8c3", []float64{31.1572265625, 31.201171875, -10.1513671875, -10.107421875}},
+		{"37p4fhuc", []float64{-27.6153373718, -27.6151657104, -113.811836243, -113.81149292}},
+		{"0yvh2p9wbu", []float64{-51.2418007851, -51.2417954206, -139.216657877, -139.216647148}},
+		{"81k", []float64{7.03125, 8.4375, -174.375, -172.96875}},
+		{"7", []float64{-45.0, 0.0, -45.0, 0.0}},
+		{"5c67rs", []float64{-82.3754882812, -82.3699951172, -7.75634765625, -7.74536132812}},
+		{"udjvtg", []float64{57.2332763672, 57.2387695312, 30.8386230469, 30.849609375}},
+		{"8b56vfqrppu", []float64{0.497001260519, 0.497002601624, -141.418113112, -141.418111771}},
+		{"xv50", []float64{28.125, 28.30078125, 172.96875, 173.3203125}},
+		{"7ep", []float64{-28.125, -26.71875, -12.65625, -11.25}},
+		{"bxzp4746", []float64{89.8410415649, 89.8412132263, -147.554283142, -147.553939819}},
+		{"5d54r", []float64{-78.3544921875, -78.310546875, -17.9736328125, -17.9296875}},
+		{"hhknsytff", []float64{-64.9149942398, -64.9149513245, 5.8417224884, 5.84176540375}},
+		{"gvjjq75", []float64{74.0643310547, 74.0657043457, -3.93997192383, -3.93859863281}},
+		{"6ryrt5", []float64{-0.0714111328125, -0.06591796875, -69.7412109375, -69.7302246094}},
+		{"tykj1vq1gj", []float64{36.0643225908, 36.0643279552, 84.460272789, 84.4602835178}},
+		{"20tdw84b1w", []float64{-41.7480146885, -41.7480093241, -171.976139545, -171.976128817}},
+		{"r", []float64{-45.0, 0.0, 135.0, 180.0}},
+		{"sbwdq1c4355", []float64{3.21802318096, 3.21802452207, 43.1557171047, 43.1557184458}},
+		{"rwhkzg", []float64{-10.3985595703, -10.3930664062, 163.817138672, 163.828125}},
+		{"wrzphwjw57", []float64{44.8582237959, 44.8582291603, 111.299196482, 111.299207211}},
+		{"674", []float64{-28.125, -26.71875, -75.9375, -74.53125}},
+		{"z8kb", []float64{46.40625, 46.58203125, 164.1796875, 164.53125}},
+		{"pmudq9vs33", []float64{-57.2503942251, -57.2503888607, 152.871376276, 152.871387005}},
+		{"j1br4n9", []float64{-78.8900756836, -78.8887023926, 45.440826416, 45.442199707}},
+		{"ccc", []float64{54.84375, 56.25, -99.84375, -98.4375}},
+		{"src", []float64{43.59375, 45.0, 12.65625, 14.0625}},
+		{"cc51keq", []float64{50.8625793457, 50.8639526367, -96.8252563477, -96.8238830566}},
+		{"pr", []float64{-50.625, -45.0, 146.25, 157.5}},
+		{"tvd9", []float64{31.11328125, 31.2890625, 82.265625, 82.6171875}},
+		{"489bdms44x", []float64{-87.069016099, -87.0690107346, -64.9345850945, -64.9345743656}},
+		{"cmn23svsyv", []float64{73.1958800554, 73.1958854198, -114.887176752, -114.887166023}},
+		{"dm9vug1194", []float64{31.9649899006, 31.964995265, -76.0789060593, -76.0788953304}},
+		{"45f7", []float64{-68.37890625, -68.203125, -86.8359375, -86.484375}},
+		{"mxuxkdtgy50", []float64{-0.117443203926, -0.117441862822, 74.0340328217, 74.0340341628}},
+		{"tdwd7", []float64{14.4580078125, 14.501953125, 76.7724609375, 76.81640625}},
+		{"s", []float64{0.0, 45.0, 0.0, 45.0}},
+		{"jss4y", []float64{-64.2041015625, -64.16015625, 73.388671875, 73.4326171875}},
+		{"tmcs", []float64{33.046875, 33.22265625, 58.359375, 58.7109375}},
+		{"x", []float64{0.0, 45.0, 135.0, 180.0}},
+		{"gc61ncnxuec", []float64{52.2138749063, 52.2138762474, -8.13174828887, -8.13174694777}},
+		{"jwpc", []float64{-56.07421875, -55.8984375, 78.3984375, 78.75}},
+		{"yq5gn", []float64{79.27734375, 79.3212890625, 106.787109375, 106.831054688}},
+		{"uhtmex", []float64{71.3177490234, 71.3232421875, 7.53662109375, 7.54760742188}},
+		{"n0hbvyc", []float64{-89.8310852051, -89.8297119141, 96.9337463379, 96.9351196289}},
+		{"kp8q0gk0h", []float64{-1.7399597168, -1.73991680145, 0.390186309814, 0.390229225159}},
+		{"yjzduj46p", []float64{77.8549575806, 77.8550004959, 100.726046562, 100.726089478}},
+		{"8hhkjyy4v5s", []float64{23.2406947017, 23.2406960428, -173.762292266, -173.762290925}},
+		{"7", []float64{-45.0, 0.0, -45.0, 0.0}},
+		{"0", []float64{-90.0, -45.0, -180.0, -135.0}},
+		{"wwu63", []float64{38.3642578125, 38.408203125, 118.520507812, 118.564453125}},
+		{"z9rnym", []float64{53.2452392578, 53.2507324219, 167.618408203, 167.629394531}},
+		{"78fnfc", []float64{-39.5892333984, -39.5837402344, -19.5666503906, -19.5556640625}},
+		{"8dc1mqyer", []float64{15.7261133194, 15.7261562347, -155.85381031, -155.853767395}},
+		{"b5", []float64{61.875, 67.5, -180.0, -168.75}},
+		{"q3zq3gz6w7", []float64{-34.0365725756, -34.0365672112, 111.532441378, 111.532452106}},
+		{"xx6", []float64{40.78125, 42.1875, 160.3125, 161.71875}},
+		{"r3", []float64{-39.375, -33.75, 146.25, 157.5}},
+		{"dytz0swq74e", []float64{37.8187742829, 37.818775624, -48.1333740056, -48.1333726645}},
+		{"gpwu", []float64{87.890625, 88.06640625, -35.5078125, -35.15625}},
+		{"9ywdf6cr2w7", []float64{37.0622827113, 37.0622840524, -92.0087559521, -92.008754611}},
+		{"3", []float64{-45.0, 0.0, -135.0, -90.0}},
+		{"ues", []float64{64.6875, 66.09375, 28.125, 29.53125}},
+		{"qggvs5q7pr", []float64{-22.9210478067, -22.9210424423, 129.208112955, 129.208123684}},
+		{"fy9d8y2mv2", []float64{82.0372724533, 82.0372778177, -54.1070973873, -54.1070866585}},
+		{"bxx92bb60s", []float64{87.411711216, 87.4117165804, -146.919801235, -146.919790506}},
+		{"uugkmp4jkm0", []float64{72.5052005053, 72.5052018464, 38.5429680347, 38.5429693758}},
+		{"7mmd13sd30", []float64{-15.1085615158, -15.1085561514, -25.9544706345, -25.9544599056}},
+		{"5nn2", []float64{-56.25, -56.07421875, -36.2109375, -35.859375}},
+		{"jf9sz2r", []float64{-75.1011657715, -75.0997924805, 81.1875915527, 81.1889648438}},
+		{"3r", []float64{-5.625, 0.0, -123.75, -112.5}},
+		{"yw", []float64{78.75, 84.375, 112.5, 123.75}},
+		{"yt31y5hwc3c5", []float64{74.8565152846, 74.8565154523, 114.17615667, 114.176157005}},
+		{"7vgv9xhmn", []float64{-11.6501426697, -11.6500997543, -5.90455055237, -5.90450763702}},
+		{"f10tgm4q6", []float64{51.6642808914, 51.6643238068, -89.1508769989, -89.1508340836}},
+		{"hepj84tfcj", []float64{-72.143971324, -72.1439659595, 32.3516893387, 32.3517000675}},
+		{"zg", []float64{61.875, 67.5, 168.75, 180.0}},
+		{"by12", []float64{78.75, 78.92578125, -144.4921875, -144.140625}},
+		{"51", []float64{-84.375, -78.75, -45.0, -33.75}},
+		{"w44s78ur", []float64{12.0023918152, 12.0025634766, 93.6752700806, 93.6756134033}},
+		{"2tcr0", []float64{-11.42578125, -11.3818359375, -155.7421875, -155.698242188}},
+		{"p0n", []float64{-90.0, -88.59375, 143.4375, 144.84375}},
+		{"u1", []float64{50.625, 56.25, 0.0, 11.25}},
+		{"nygu1mshqxku", []float64{-51.2971434742, -51.2971433066, 129.084147625, 129.08414796}},
+		{"3khrs6gty5", []float64{-21.1655312777, -21.1655259132, -117.581605911, -117.581595182}},
+		{"4n", []float64{-56.25, -50.625, -90.0, -78.75}},
+		{"tj8pjxb5", []float64{32.2110557556, 32.211227417, 45.2416992188, 45.2420425415}},
+		{"7nhpg3stdjgu", []float64{-9.87847991288, -9.87847974524, -39.225907065, -39.2259067297}},
+		{"rhtcg24t2s50", []float64{-19.3789601326, -19.378959965, 143.232218474, 143.232218809}},
+		{"5vx7c75x", []float64{-58.3856391907, -58.3854675293, -0.99494934082, -0.994606018066}},
+		{"cs4kgc65z", []float64{68.3424711227, 68.3425140381, -109.168095589, -109.168052673}},
+		{"k3sn2mb", []float64{-35.4322814941, -35.4309082031, 16.8859863281, 16.8873596191}},
+		{"ud7s8v", []float64{58.4747314453, 58.4802246094, 27.4548339844, 27.4658203125}},
+		{"8qqg2ue1mr6b", []float64{35.7525117695, 35.7525119372, -159.220504649, -159.220504314}},
+		{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
+		{"kwtg55d3me", []float64{-7.89069950581, -7.89069414139, 30.7210993767, 30.7211101055}},
+		{"mqsedtkq3", []float64{-7.79235363007, -7.79231071472, 62.6938676834, 62.6939105988}},
+		{"94j6tg", []float64{11.7059326172, 11.7114257812, -127.364501953, -127.353515625}},
+		{"wd45s", []float64{11.865234375, 11.9091796875, 115.48828125, 115.532226562}},
+		{"fwgxjq0n2", []float64{84.233250618, 84.2332935333, -62.3474121094, -62.347369194}},
+		{"1k8fh1", []float64{-64.3304443359, -64.3249511719, -122.51953125, -122.508544922}},
+		{"h", []float64{-90.0, -45.0, 0.0, 45.0}},
+		{"f", []float64{45.0, 90.0, -90.0, -45.0}},
+		{"6", []float64{-45.0, 0.0, -90.0, -45.0}},
+		{"qh", []float64{-22.5, -16.875, 90.0, 101.25}},
+		{"24nszw5", []float64{-32.8820800781, -32.8807067871, -170.525665283, -170.524291992}},
+		{"p8rfy", []float64{-88.1103515625, -88.06640625, 168.662109375, 168.706054688}},
+		{"st23m", []float64{29.7509765625, 29.794921875, 23.0712890625, 23.115234375}},
+		{"zgg3", []float64{66.26953125, 66.4453125, 173.3203125, 173.671875}},
+		{"zgsgk7bcz8k", []float64{65.2796901762, 65.2796915174, 175.617812276, 175.617813617}},
+		{"js0", []float64{-67.5, -66.09375, 67.5, 68.90625}},
+		{"9zs3bh", []float64{42.5170898438, 42.5225830078, -95.2734375, -95.2624511719}},
+		{"k8qd0d6mqfz", []float64{-43.2289119065, -43.2289105654, 31.6659866273, 31.6659879684}},
+		{"xrj", []float64{39.375, 40.78125, 153.28125, 154.6875}},
+		{"0mbxv2r4", []float64{-56.2922286987, -56.2920570374, -167.806549072, -167.80620575}},
+		{"bdf8wz8g1", []float64{60.5983543396, 60.5983972549, -153.686671257, -153.686628342}},
+		{"emgeh3mkyu", []float64{32.8787970543, 32.8788024187, -28.6338579655, -28.6338472366}},
+		{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
+		{"156p3nxfwq", []float64{-70.4081690311, -70.4081636667, -132.132643461, -132.132632732}},
+		{"stduvpcnp4", []float64{31.8160736561, 31.8160790205, 26.5885877609, 26.5885984898}},
+		{"shk8evrmmbgr", []float64{24.0238861553, 24.023886323, 6.50312740356, 6.50312773883}},
+		{"7ynw", []float64{-10.1953125, -10.01953125, -2.109375, -1.7578125}},
+		{"r4zgvkp", []float64{-28.8500976562, -28.8487243652, 146.138763428, 146.140136719}},
+		{"5b9p", []float64{-85.95703125, -85.78125, -9.84375, -9.4921875}},
+		{"gsp", []float64{67.5, 68.90625, -12.65625, -11.25}},
+		{"ek4mmvr", []float64{23.4516906738, 23.4530639648, -30.323638916, -30.322265625}},
+		{"hd2nu", []float64{-76.1572265625, -76.11328125, 22.67578125, 22.7197265625}},
+		{"xzp0t", []float64{39.462890625, 39.5068359375, 178.813476562, 178.857421875}},
+		{"fycjx9", []float64{83.9410400391, 83.9465332031, -54.5141601562, -54.5031738281}},
+		{"x4ygy62x5", []float64{16.1414909363, 16.1415338516, 144.767661095, 144.76770401}},
+		{"37", []float64{-28.125, -22.5, -123.75, -112.5}},
+		{"m40", []float64{-33.75, -32.34375, 45.0, 46.40625}},
+		{"m", []float64{-45.0, 0.0, 45.0, 90.0}},
+		{"b5", []float64{61.875, 67.5, -180.0, -168.75}},
+		{"zwd", []float64{81.5625, 82.96875, 160.3125, 161.71875}},
+		{"qgnb2g", []float64{-28.0645751953, -28.0590820312, 133.275146484, 133.286132812}},
+		{"7w", []float64{-11.25, -5.625, -22.5, -11.25}},
+		{"v6x", []float64{59.0625, 60.46875, 66.09375, 67.5}},
+		{"018v8j6y", []float64{-80.5658340454, -80.565662384, -178.94153595, -178.941192627}},
+		{"mm4p02h18xc", []float64{-15.6442321837, -15.6442308426, 59.079002291, 59.0790036321}},
+		{"6cty1v7", []float64{-35.4789733887, -35.4776000977, -48.0830383301, -48.0816650391}},
+		{"g6rzs", []float64{58.974609375, 59.0185546875, -22.67578125, -22.6318359375}},
+		{"58qb", []float64{-88.59375, -88.41796875, -13.0078125, -12.65625}},
+		{"n8v", []float64{-85.78125, -84.375, 119.53125, 120.9375}},
+		{"h1nj4b1uv", []float64{-83.4952783585, -83.4952354431, 8.56096744537, 8.56101036072}},
+		{"qt4ukphd", []float64{-16.0891342163, -16.0889625549, 116.54914856, 116.549491882}},
+		{"n2", []float64{-90.0, -84.375, 101.25, 112.5}},
+		{"50sux01hejpj", []float64{-86.3956842385, -86.3956840709, -38.0111838877, -38.0111835524}},
+		{"4exy", []float64{-69.2578125, -69.08203125, -56.6015625, -56.25}},
+		{"x9t8r", []float64{8.4814453125, 8.525390625, 165.541992188, 165.5859375}},
+		{"785m2wrxgw2", []float64{-44.0414522588, -44.0414509177, -17.8972649574, -17.8972636163}},
+		{"0exegdddqf", []float64{-69.6391904354, -69.639185071, -146.7955935, -146.795582771}},
+		{"ynrw", []float64{81.2109375, 81.38671875, 100.546875, 100.8984375}},
+		{"uqdmuzrz6", []float64{82.6143121719, 82.6143550873, 14.6335315704, 14.6335744858}},
+		{"gchp3yu15c", []float64{51.9366699457, 51.9366753101, -5.54244160652, -5.54243087769}},
+		{"415qzwpj2r", []float64{-83.154578805, -83.1545734406, -85.0904738903, -85.0904631615}},
+		{"v", []float64{45.0, 90.0, 45.0, 90.0}},
+		{"vzq07z4", []float64{85.8636474609, 85.865020752, 87.3550415039, 87.3564147949}},
+		{"w43s", []float64{13.359375, 13.53515625, 92.109375, 92.4609375}},
+		{"b7td", []float64{65.0390625, 65.21484375, -161.015625, -160.6640625}},
+		{"2fpe", []float64{-33.22265625, -33.046875, -135.703125, -135.3515625}},
+		{"nyt", []float64{-53.4375, -52.03125, 130.78125, 132.1875}},
+		{"g", []float64{45.0, 90.0, -45.0, 0.0}},
+		{"s", []float64{0.0, 45.0, 0.0, 45.0}},
+		{"dkzk", []float64{27.421875, 27.59765625, -68.5546875, -68.203125}},
+		{"r3925m207w", []float64{-36.5335857868, -36.5335804224, 148.150784969, 148.150795698}},
+		{"0zy5gndwbzt3", []float64{-45.710165631, -45.7101654634, -137.677191608, -137.677191272}},
+		{"46ex1dmu9", []float64{-74.6938991547, -74.6938562393, -73.7542676926, -73.7542247772}},
+		{"jf74h9rrvnz8", []float64{-76.9839544594, -76.9839542918, 83.1766849011, 83.1766852364}},
+		{"6vj1jtgv", []float64{-16.6667747498, -16.6666030884, -48.9719009399, -48.9715576172}},
+		{"ntsx", []float64{-57.83203125, -57.65625, 118.828125, 119.1796875}},
+		{"ehr3ejb", []float64{24.2015075684, 24.2028808594, -34.6728515625, -34.6714782715}},
+		{"p7", []float64{-73.125, -67.5, 146.25, 157.5}},
+		{"re7", []float64{-26.71875, -25.3125, 161.71875, 163.125}},
+		{"66x6j7sc0t", []float64{-30.5665129423, -30.5665075779, -68.3174300194, -68.3174192905}},
+		{"mywcjb2q", []float64{-8.25931549072, -8.25914382935, 88.4952163696, 88.4955596924}},
+		{"f88jrw", []float64{48.7683105469, 48.7738037109, -67.1704101562, -67.1594238281}},
+		{"bjty3ef9n3y6", []float64{77.0569135621, 77.0569137298, -171.844434701, -171.844434366}},
+		{"jhz66rh4fj7s", []float64{-62.8467891365, -62.8467889689, 55.2997731417, 55.299773477}},
+		{"u16r3nm", []float64{53.3399963379, 53.3413696289, 3.21487426758, 3.21624755859}},
+		{"b", []float64{45.0, 90.0, -180.0, -135.0}},
+		{"q5rwnj4", []float64{-25.6365966797, -25.6352233887, 100.813293457, 100.814666748}},
+		{"dsqd4vc85c", []float64{24.2894035578, 24.2894089222, -58.2363045216, -58.2362937927}},
+		{"bzpu7z2qxf9", []float64{85.1630249619, 85.1630263031, -135.18609032, -135.186088979}},
+		{"e805cvqt", []float64{0.688877105713, 0.68904876709, -22.4141693115, -22.4138259888}},
+		{"y9su01vg", []float64{54.1507530212, 54.1509246826, 119.187583923, 119.187927246}},
+		{"41", []float64{-84.375, -78.75, -90.0, -78.75}},
+		{"vu3f", []float64{69.2578125, 69.43359375, 81.2109375, 81.5625}},
+		{"86", []float64{11.25, 16.875, -168.75, -157.5}},
+		{"wpuksnn65", []float64{44.4180679321, 44.4181108475, 96.1610555649, 96.1610984802}},
+		{"w", []float64{0.0, 45.0, 90.0, 135.0}},
+		{"nrqk483fq0kp", []float64{-48.5138629563, -48.5138627887, 110.151591897, 110.151592232}},
+		{"cg0mc", []float64{62.8857421875, 62.9296875, -100.854492188, -100.810546875}},
+		{"myt75g", []float64{-7.89367675781, -7.88818359375, 86.2976074219, 86.30859375}},
+		{"ugxe27b", []float64{65.2793884277, 65.2807617188, 44.3078613281, 44.3092346191}},
+		{"wd845dtbhs8", []float64{14.42781955, 14.4278208911, 112.661898136, 112.661899477}},
+		{"c8ef9h6mr", []float64{48.2762002945, 48.2762432098, -107.179226875, -107.17918396}},
+		{"bx", []float64{84.375, 90.0, -157.5, -146.25}},
+		{"qduv3", []float64{-28.6083984375, -28.564453125, 119.223632812, 119.267578125}},
+		{"j86depxm", []float64{-88.1122398376, -88.1120681763, 71.1574172974, 71.1577606201}},
+		{"4semjs5hr9p", []float64{-63.7858861685, -63.7858848274, -62.6835371554, -62.6835358143}},
+		{"ee", []float64{16.875, 22.5, -22.5, -11.25}},
+		{"jxv25m96n", []float64{-46.3756942749, -46.3756513596, 75.0276088715, 75.0276517868}},
+		{"vx7gj0006xwe", []float64{86.3086774014, 86.308677569, 72.993280068, 72.9932804033}},
+		{"6c", []float64{-39.375, -33.75, -56.25, -45.0}},
+		{"ukstfgt78f", []float64{71.3430798054, 71.3430851698, 17.7062165737, 17.7062273026}},
+		{"g1h0ye", []float64{50.7733154297, 50.7788085938, -39.0893554688, -39.0783691406}},
+		{"5s3j5er", []float64{-65.1969909668, -65.1956176758, -20.9303283691, -20.9289550781}},
+		{"6yrnnwq1", []float64{-8.75455856323, -8.75438690186, -46.1123657227, -46.1120223999}},
+		{"20fjy", []float64{-39.7705078125, -39.7265625, -176.923828125, -176.879882812}},
+		{"1tt417q", []float64{-58.6930847168, -58.6917114258, -105.405578613, -105.404205322}},
+		{"hh68", []float64{-66.09375, -65.91796875, 3.515625, 3.8671875}},
+		{"85s823r7j", []float64{19.7388267517, 19.7388696671, -173.650717735, -173.65067482}},
+		{"3u6n", []float64{-20.0390625, -19.86328125, -98.4375, -98.0859375}},
+		{"7w5y8", []float64{-10.107421875, -10.0634765625, -17.2265625, -17.1826171875}},
+		{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
+		{"zk8j", []float64{71.19140625, 71.3671875, 146.25, 146.6015625}},
+		{"mgm1evw0", []float64{-26.4248657227, -26.4246940613, 85.954284668, 85.9546279907}},
+		{"m7nzv", []float64{-26.7626953125, -26.71875, 65.9619140625, 66.005859375}},
+		{"ev89re48", []float64{31.1737060547, 31.1738777161, -10.2138519287, -10.213508606}},
+		{"dnc7mrxvb3", []float64{38.5822302103, 38.5822355747, -88.0008208752, -88.0008101463}},
+		{"2", []float64{-45.0, 0.0, -180.0, -135.0}},
+		{"wbksmz281", []float64{2.19314575195, 2.1931886673, 130.331540108, 130.331583023}},
+		{"0x3zqt", []float64{-47.9168701172, -47.9113769531, -154.753417969, -154.742431641}},
+		{"h97q", []float64{-81.9140625, -81.73828125, 27.0703125, 27.421875}},
+		{"ypjt38wtc", []float64{85.3015851974, 85.3016281128, 97.8092622757, 97.809305191}},
+		{"d3ey054b7p", []float64{9.50874745846, 9.50875282288, -73.4726572037, -73.4726464748}},
+		{"zpbkps3qd9r", []float64{89.3213434517, 89.3213447928, 135.682985634, 135.682986975}},
+		{"sqxhhhs68mxy", []float64{37.2908039019, 37.2908040695, 21.2753888592, 21.2753891945}},
+		{"293cyj4g6q", []float64{-37.6330769062, -37.6330715418, -154.771517515, -154.771506786}},
+		{"w3", []float64{5.625, 11.25, 101.25, 112.5}},
+		{"r", []float64{-45.0, 0.0, 135.0, 180.0}},
+		{"q69s", []float64{-30.234375, -30.05859375, 103.359375, 103.7109375}},
+		{"fy1qerq2ven", []float64{79.9325484037, 79.9325497448, -54.3405380845, -54.3405367434}},
+		{"62", []float64{-45.0, -39.375, -78.75, -67.5}},
+		{"yke1jyek0fg", []float64{70.5246882141, 70.5246895552, 105.725934952, 105.725936294}},
+		{"2rcbq1z5c", []float64{-1.35204792023, -1.35200500488, -166.015734673, -166.015691757}},
+		{"gue", []float64{70.3125, 71.71875, -7.03125, -5.625}},
+		{"t8kqv", []float64{2.5927734375, 2.63671875, 73.6962890625, 73.740234375}},
+		{"bgtecc1b", []float64{65.3521728516, 65.3523445129, -138.436317444, -138.435974121}},
+		{"8s550", []float64{23.02734375, 23.0712890625, -153.28125, -153.237304688}},
+		{"j655kpm1w0b", []float64{-78.1386239827, -78.1386226416, 60.6516551971, 60.6516565382}},
+		{"980h", []float64{0.703125, 0.87890625, -112.5, -112.1484375}},
+		{"ssywj", []float64{27.7734375, 27.8173828125, 31.8603515625, 31.904296875}},
+		{"hrvu", []float64{-45.703125, -45.52734375, 19.3359375, 19.6875}},
+		{"3ftuv", []float64{-30.1025390625, -30.05859375, -92.9443359375, -92.900390625}},
+		{"zcphg93jux", []float64{51.4678519964, 51.4678573608, 178.749125004, 178.749135733}},
+		{"6", []float64{-45.0, 0.0, -90.0, -45.0}},
+		{"vjxk", []float64{76.640625, 76.81640625, 55.1953125, 55.546875}},
+		{"t6cgynpnh4", []float64{16.161929369, 16.1619347334, 58.9843940735, 58.9844048023}},
+		{"jspc", []float64{-67.32421875, -67.1484375, 78.3984375, 78.75}},
+		{"6w7k2v", []float64{-9.06921386719, -9.06372070312, -62.8967285156, -62.8857421875}},
+		{"n6z4", []float64{-74.1796875, -74.00390625, 111.09375, 111.4453125}},
+		{"y507hx0", []float64{62.4407958984, 62.4421691895, 90.5493164062, 90.5506896973}},
+		{"z17p35rnsc6v", []float64{53.3246401884, 53.324640356, 139.272515886, 139.272516221}},
+		{"p8uj5760", []float64{-84.8844909668, -84.8843193054, 163.270568848, 163.27091217}},
+		{"8pcszdrxwp", []float64{44.4423955679, 44.4424009323, -177.550477982, -177.550467253}},
+		{"mckjc7q4r", []float64{-36.9397687912, -36.9397258759, 84.4384717941, 84.4385147095}},
+		{"p6sngm70", []float64{-74.7221374512, -74.7219657898, 152.021942139, 152.022285461}},
+		{"59gcptbk", []float64{-79.9481964111, -79.9480247498, -16.8966293335, -16.8962860107}},
+		{"n", []float64{-90.0, -45.0, 90.0, 135.0}},
+		{"fmnv3ug5m", []float64{74.0745019913, 74.0745449066, -69.1765737534, -69.176530838}},
+		{"qc8kkbc0xev", []float64{-35.8112038672, -35.8112025261, 124.312004596, 124.312005937}},
+		{"b4", []float64{56.25, 61.875, -180.0, -168.75}},
+		{"vkxeh", []float64{70.83984375, 70.8837890625, 66.97265625, 67.0166015625}},
+		{"399qcn2zv", []float64{-35.3403139114, -35.3402709961, -110.696997643, -110.696954727}},
+		{"ybz3", []float64{49.39453125, 49.5703125, 133.9453125, 134.296875}},
+		{"d5e25", []float64{19.6875, 19.7314453125, -85.2978515625, -85.25390625}},
+		{"5n", []float64{-56.25, -50.625, -45.0, -33.75}},
+		{"wdw", []float64{14.0625, 15.46875, 120.9375, 122.34375}},
+		{"29q7", []float64{-37.44140625, -37.265625, -148.7109375, -148.359375}},
+		{"tqe1y4trw", []float64{36.885137558, 36.8851804733, 60.7398891449, 60.7399320602}},
+		{"zfwy172d", []float64{60.135383606, 60.1355552673, 178.297805786, 178.298149109}},
+		{"tfd57f", []float64{14.6447753906, 14.6502685547, 81.7272949219, 81.73828125}},
+		{"27f6s6h", []float64{-23.4558105469, -23.4544372559, -165.393676758, -165.392303467}},
+		{"zk2q2ph7db", []float64{70.0439357758, 70.0439411402, 146.607517004, 146.607527733}},
+		{"ptp", []float64{-61.875, -60.46875, 167.34375, 168.75}},
+		{"7pcgp042wz", []float64{-0.878782868385, -0.878777503967, -42.2280657291, -42.2280550003}},
+		{"9t", []float64{28.125, 33.75, -112.5, -101.25}},
+		{"d", []float64{0.0, 45.0, -90.0, -45.0}},
+		{"qd0pwby4y", []float64{-32.4270486832, -32.4270057678, 112.805128098, 112.805171013}},
+		{"b", []float64{45.0, 90.0, -180.0, -135.0}},
+		{"yet", []float64{64.6875, 66.09375, 119.53125, 120.9375}},
+		{"v4pbsh7cp", []float64{56.3614082336, 56.361451149, 56.0796689987, 56.0797119141}},
+		{"kvr7u7pm7jeu", []float64{-14.7921594232, -14.7921592556, 44.1421702132, 44.1421705484}},
+		{"687jj", []float64{-42.71484375, -42.6708984375, -63.0615234375, -63.017578125}},
+		{"4w29", []float64{-54.66796875, -54.4921875, -66.796875, -66.4453125}},
+		{"6bz45dve", []float64{-40.4140663147, -40.4138946533, -46.2448883057, -46.2445449829}},
+		{"gfysykk0nw29", []float64{61.32709058, 61.3270907477, -1.82894401252, -1.82894367725}},
+		{"pdw4scw", []float64{-75.4898071289, -75.4884338379, 166.15447998, 166.155853271}},
+		{"65f4", []float64{-23.5546875, -23.37890625, -87.1875, -86.8359375}},
+		{"jc9g6tpd08j", []float64{-80.9634017944, -80.9634004533, 81.3311286271, 81.3311299682}},
+		{"ckw1tvf18r09", []float64{70.608052779, 70.6080529466, -115.057056472, -115.057056136}},
+		{"2cbtp", []float64{-34.27734375, -34.2333984375, -145.239257812, -145.1953125}},
+		{"54myf", []float64{-76.1572265625, -76.11328125, -36.826171875, -36.7822265625}},
+		{"kw", []float64{-11.25, -5.625, 22.5, 33.75}},
+		{"mw", []float64{-11.25, -5.625, 67.5, 78.75}},
+		{"2bjvee8sgek", []float64{-44.0131442249, -44.0131428838, -138.009411693, -138.009410352}},
+		{"yj6r4eyxuv1", []float64{75.783675313, 75.7836766541, 93.2830573618, 93.2830587029}},
+		{"n", []float64{-90.0, -45.0, 90.0, 135.0}},
+		{"pkec4ng", []float64{-64.4746398926, -64.4732666016, 151.615447998, 151.616821289}},
+		{"uvpynzd0v", []float64{74.2210149765, 74.2210578918, 44.9480295181, 44.9480724335}},
+		{"grxrc9by8g4", []float64{88.5605496168, 88.5605509579, -23.4877046943, -23.4877033532}},
+		{"6z0", []float64{-5.625, -4.21875, -56.25, -54.84375}},
+		{"s", []float64{0.0, 45.0, 0.0, 45.0}},
+		{"6npe6", []float64{-10.6787109375, -10.634765625, -79.365234375, -79.3212890625}},
+		{"04wzmgz23", []float64{-74.6424436569, -74.6424007416, -170.245127678, -170.245084763}},
+		{"dmfzk9", []float64{33.6236572266, 33.6291503906, -74.6850585938, -74.6740722656}},
+		{"eeu", []float64{21.09375, 22.5, -16.875, -15.46875}},
+		{"84bd9k3t", []float64{15.9324073792, 15.9325790405, -179.239883423, -179.2395401}},
+		{"7q6ywwxq2kc", []float64{-8.664367944, -8.6643666029, -29.5871995389, -29.5871981978}},
+		{"bve6jyuwk", []float64{76.327214241, 76.3272571564, -141.420650482, -141.420607567}},
+		{"h558", []float64{-73.125, -72.94921875, 4.921875, 5.2734375}},
+		{"sk4f3d2h1vq", []float64{22.9085822403, 22.9085835814, 15.1831886172, 15.1831899583}},
+		{"tffnvn", []float64{16.6882324219, 16.6937255859, 81.7822265625, 81.7932128906}},
+		{"eppvmm886m", []float64{40.3281337023, 40.3281390667, -33.8700664043, -33.8700556755}},
+		{"d2w", []float64{2.8125, 4.21875, -70.3125, -68.90625}},
+		{"sd3", []float64{12.65625, 14.0625, 23.90625, 25.3125}},
+		{"q73kk", []float64{-25.9716796875, -25.927734375, 103.18359375, 103.227539062}},
+		{"wtz5yx", []float64{33.0413818359, 33.046875, 122.629394531, 122.640380859}},
+		{"xw8z", []float64{37.79296875, 37.96875, 158.5546875, 158.90625}},
+		{"dyhuchkt7qr", []float64{34.6092416346, 34.6092429757, -49.5200385153, -49.5200371742}},
+		{"pyb1es47f", []float64{-51.7449617386, -51.7449188232, 168.906984329, 168.907027245}},
+		{"v6jcgfj1nus", []float64{56.5687993169, 56.568800658, 64.5078939199, 64.507895261}},
+		{"xxe", []float64{42.1875, 43.59375, 161.71875, 163.125}},
+		{"7xdvcext7rq", []float64{-1.78159162402, -1.78159028292, -18.5564473271, -18.556445986}},
+		{"1f35b7w", []float64{-76.6653442383, -76.6639709473, -99.8245239258, -99.8231506348}},
+		{"he675b8pjek", []float64{-71.187440604, -71.1874392629, 25.8290988207, 25.8291001618}},
+		{"pzj3", []float64{-50.44921875, -50.2734375, 176.1328125, 176.484375}},
+		{"1s1m8j10zwq3", []float64{-66.5055748634, -66.5055746958, -110.740483962, -110.740483627}},
+		{"sk4xsg4ufxm", []float64{23.8356931508, 23.8356944919, 14.9782557786, 14.9782571197}},
+		{"r3m2ker83", []float64{-37.906908989, -37.9068660736, 153.840909004, 153.84095192}},
+		{"p7bj", []float64{-68.02734375, -67.8515625, 146.25, 146.6015625}},
+		{"4wws597u", []float64{-52.7268218994, -52.726650238, -58.2004165649, -58.2000732422}},
+		{"u9rk", []float64{52.734375, 52.91015625, 32.6953125, 33.046875}},
+		{"2mv856bp9uj4", []float64{-12.6398345456, -12.6398343779, -160.872720927, -160.872720592}},
+		{"57th4543xnbs", []float64{-69.5926011354, -69.5926009677, -26.6274683923, -26.627468057}},
+		{"8pct", []float64{44.47265625, 44.6484375, -177.890625, -177.5390625}},
+		{"me0skjqbk8", []float64{-27.3490476608, -27.3490422964, 68.3883690834, 68.3883798122}},
+		{"30s74", []float64{-41.66015625, -41.6162109375, -128.935546875, -128.891601562}},
+		{"9r0h9fedn", []float64{40.1800918579, 40.1801347733, -123.668031693, -123.667988777}},
+		{"9v6gvte7r", []float64{30.2211999893, 30.2212429047, -97.136349678, -97.1363067627}},
+		{"j5d", []float64{-70.3125, -68.90625, 47.8125, 49.21875}},
+		{"hf8ge", []float64{-75.322265625, -75.2783203125, 34.9365234375, 34.98046875}},
+		{"hf2hmz", []float64{-76.5582275391, -76.552734375, 34.0026855469, 34.013671875}},
+		{"5wc405", []float64{-51.6632080078, -51.6577148438, -21.09375, -21.0827636719}},
+		{"x2dk49y7p8", []float64{3.52575302124, 3.52575838566, 149.532830715, 149.532841444}},
+		{"350jjfux7dbk", []float64{-27.2297275811, -27.2297274135, -134.740984105, -134.740983769}},
+		{"04wd", []float64{-75.5859375, -75.41015625, -170.859375, -170.5078125}},
+		{"1zxmhstk", []float64{-46.9081878662, -46.9080162048, -90.8497238159, -90.8493804932}},
+		{"b2sj26ddce3", []float64{48.7495739758, 48.7495753169, -163.11051473, -163.110513389}},
+		{"vznd8mz363", []float64{84.8462587595, 84.8462641239, 87.9116642475, 87.9116749763}},
+		{"f", []float64{45.0, 90.0, -90.0, -45.0}},
+		{"0", []float64{-90.0, -45.0, -180.0, -135.0}},
+		{"kw", []float64{-11.25, -5.625, 22.5, 33.75}},
+		{"1s0nq579", []float64{-66.3833427429, -66.3831710815, -112.231521606, -112.231178284}},
+		{"mzky2scd", []float64{-3.09368133545, -3.09350967407, 85.4537200928, 85.4540634155}},
+		{"kctpzwx2rxg", []float64{-35.1644052565, -35.1644039154, 41.121122092, 41.1211234331}},
+		{"19", []float64{-84.375, -78.75, -112.5, -101.25}},
+		{"pmg4wc8pn", []float64{-57.2073554993, -57.2073125839, 150.765638351, 150.765681267}},
+		{"sxcn0yd0jck", []float64{44.6841497719, 44.684151113, 23.9422076941, 23.9422090352}},
+		{"000dsj4g", []float64{-89.5325660706, -89.5323944092, -179.1173172, -179.116973877}},
+		{"pgv0", []float64{-68.90625, -68.73046875, 175.78125, 176.1328125}},
+		{"dhw0935d8", []float64{25.4063129425, 25.4063558578, -81.5027618408, -81.5027189255}},
+		{"4gvz08kbk9", []float64{-67.6743596792, -67.6743543148, -48.1353735924, -48.1353628635}},
+		{"djz4g1m", []float64{32.8340148926, 32.8353881836, -80.0175476074, -80.0161743164}},
+		{"x4yhn1h5m1z", []float64{16.1779354513, 16.1779367924, 143.706889004, 143.706890345}},
+		{"9t", []float64{28.125, 33.75, -112.5, -101.25}},
+		{"d", []float64{0.0, 45.0, -90.0, -45.0}},
+		{"4fgytpvp2v", []float64{-73.3448284864, -73.344823122, -50.7499372959, -50.7499265671}},
+		{"jggxx", []float64{-67.587890625, -67.5439453125, 83.9794921875, 84.0234375}},
+		{"mye8t1", []float64{-8.34411621094, -8.33862304688, 83.8916015625, 83.9025878906}},
+		{"bssun7dstj", []float64{71.0356503725, 71.0356557369, -150.542006493, -150.541995764}},
+		{"ehhv", []float64{23.37890625, 23.5546875, -38.3203125, -37.96875}},
+		{"484tc", []float64{-88.9892578125, -88.9453125, -63.9404296875, -63.896484375}},
+		{"d4tjtv", []float64{15.0567626953, 15.0622558594, -82.7160644531, -82.705078125}},
+		{"2z", []float64{-5.625, 0.0, -146.25, -135.0}},
+		{"t5ey32", []float64{20.7861328125, 20.7916259766, 50.3283691406, 50.3393554688}},
+		{"um", []float64{73.125, 78.75, 11.25, 22.5}},
+		{"pe", []float64{-73.125, -67.5, 157.5, 168.75}},
+		{"2x05zw6z", []float64{-4.93028640747, -4.93011474609, -157.166633606, -157.166290283}},
+		{"67", []float64{-28.125, -22.5, -78.75, -67.5}},
+		{"dxfr3yndexbg", []float64{44.9015942775, 44.9015944451, -64.249955602, -64.2499552667}},
+		{"y87", []float64{46.40625, 47.8125, 116.71875, 118.125}},
+		{"2h29w", []float64{-20.830078125, -20.7861328125, -179.033203125, -178.989257812}},
+		{"cv", []float64{73.125, 78.75, -101.25, -90.0}},
+		{"jcx2m1mjy9e", []float64{-81.5106931329, -81.5106917918, 89.1721884906, 89.1721898317}},
+		{"ryj9w", []float64{-10.986328125, -10.9423828125, 176.748046875, 176.791992188}},
+		{"g5sftxrhf40", []float64{65.1676046848, 65.1676060259, -38.0689144135, -38.0689130723}},
+		{"nhs95z98z", []float64{-64.4703912735, -64.4703483582, 96.4952802658, 96.4953231812}},
+		{"s7n00se8u3n", []float64{16.8998533487, 16.8998546898, 19.7144696116, 19.7144709527}},
+		{"4310r6m", []float64{-84.3186950684, -84.3173217773, -77.0182800293, -77.0169067383}},
+		{"7kkmp", []float64{-20.21484375, -20.1708984375, -27.4658203125, -27.421875}},
+		{"3dvnpf13665", []float64{-28.4653508663, -28.4653495252, -105.126356632, -105.12635529}},
+		{"1cv2hegqd1s7", []float64{-80.1345262863, -80.1345261186, -93.6648788676, -93.6648785323}},
+		{"uy0yttxwk65", []float64{79.9238741398, 79.9238754809, 35.0568728149, 35.056874156}},
+		{"166cs2etxffy", []float64{-77.0763716474, -77.0763714798, -119.690902121, -119.690901786}},
+		{"bm7n7", []float64{75.6298828125, 75.673828125, -164.399414062, -164.35546875}},
+		{"5r7q", []float64{-48.1640625, -47.98828125, -29.1796875, -28.828125}},
+		{"ymjqjv6", []float64{74.2085266113, 74.2098999023, 108.888244629, 108.88961792}},
+		{"jx95kpvmv9", []float64{-47.1976464987, -47.1976411343, 69.0894770622, 69.0894877911}},
+		{"f1m4m9", []float64{52.4322509766, 52.4377441406, -82.7270507812, -82.7160644531}},
+		{"0cdr1wfep", []float64{-80.2944374084, -80.2943944931, -143.016285896, -143.016242981}},
+		{"fe4q2v7", []float64{63.0024719238, 63.0038452148, -64.2988586426, -64.2974853516}},
+		{"9pxfyb9p", []float64{42.6748466492, 42.6750183105, -123.80355835, -123.803215027}},
+		{"8w088r", []float64{33.8763427734, 33.8818359375, -156.785888672, -156.774902344}},
+		{"u569fj", []float64{63.6163330078, 63.6218261719, 3.603515625, 3.61450195312}},
+		{"smvm0syj0kst", []float64{33.2496320643, 33.2496322319, 18.6630416662, 18.6630420014}},
+		{"vx", []float64{84.375, 90.0, 67.5, 78.75}},
+		{"zj", []float64{73.125, 78.75, 135.0, 146.25}},
+		{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
+		{"3", []float64{-45.0, 0.0, -135.0, -90.0}},
+		{"69n85w0", []float64{-39.3420410156, -39.3406677246, -58.2055664062, -58.2041931152}},
+		{"ywmj", []float64{81.03515625, 81.2109375, 119.53125, 119.8828125}},
+		{"2717c52t", []float64{-27.4471092224, -27.446937561, -166.947555542, -166.947212219}},
+		{"t1h", []float64{5.625, 7.03125, 50.625, 52.03125}},
+		{"c", []float64{45.0, 90.0, -135.0, -90.0}},
+		{"5xphd2", []float64{-49.833984375, -49.8284912109, -12.5573730469, -12.5463867188}},
+		{"xy8", []float64{36.5625, 37.96875, 168.75, 170.15625}},
+		{"t", []float64{0.0, 45.0, 45.0, 90.0}},
+		{"3pet007v7qb", []float64{-1.93128302693, -1.93128168583, -130.072835684, -130.072834343}},
+		{"dyuz", []float64{39.19921875, 39.375, -49.5703125, -49.21875}},
+		{"6r", []float64{-5.625, 0.0, -78.75, -67.5}},
+		{"y8v06s5", []float64{49.2846679688, 49.2860412598, 119.645233154, 119.646606445}},
+		{"6", []float64{-45.0, 0.0, -90.0, -45.0}},
+		{"mfuy2m", []float64{-28.4051513672, -28.3996582031, 85.4406738281, 85.4516601562}},
+		{"dnp2u", []float64{33.8818359375, 33.92578125, -79.62890625, -79.5849609375}},
+		{"g4u3ehx6", []float64{60.757484436, 60.7576560974, -38.8816452026, -38.8813018799}},
+		{"q14h9tg88tx", []float64{-38.5522833467, -38.5522820055, 92.8832553327, 92.8832566738}},
+		{"d4m6rn", []float64{13.0847167969, 13.0902099609, -82.3095703125, -82.2985839844}},
+		{"64", []float64{-33.75, -28.125, -90.0, -78.75}},
+		{"y2nzb", []float64{46.3623046875, 46.40625, 110.7421875, 110.786132812}},
+		{"yhs5m8ytcgxb", []float64{70.8889147639, 70.8889149316, 95.8757111058, 95.875711441}},
+		{"ytp9j8f0dv8", []float64{73.305016458, 73.3050177991, 123.291438818, 123.291440159}},
+		{"pxcpm2h", []float64{-45.1318359375, -45.1304626465, 159.142456055, 159.143829346}},
+		{"5zyf9", []float64{-45.966796875, -45.9228515625, -1.7138671875, -1.669921875}},
+		{"wmz7v2", []float64{33.0029296875, 33.0084228516, 111.676025391, 111.687011719}},
+		{"3fb7hkc8wus", []float64{-28.9777037501, -28.977702409, -100.709314942, -100.709313601}},
+		{"ssstmsrv", []float64{26.2595558167, 26.259727478, 29.0804672241, 29.0808105469}},
+		{"21", []float64{-39.375, -33.75, -180.0, -168.75}},
+		{"w0du7r8257", []float64{3.60078513622, 3.60079050064, 94.0104925632, 94.0105032921}},
+		{"fhx", []float64{70.3125, 71.71875, -80.15625, -78.75}},
+		{"1xd", []float64{-47.8125, -46.40625, -109.6875, -108.28125}},
+		{"s040p9v3shb2", []float64{0.00989601016045, 0.00989617779851, 3.14947161824, 3.14947195351}},
+		{"gq", []float64{78.75, 84.375, -33.75, -22.5}},
+		{"0", []float64{-90.0, -45.0, -180.0, -135.0}},
+		{"17b4wsgdq0q", []float64{-68.4403167665, -68.4403154254, -123.459283412, -123.45928207}},
+		{"x3qs", []float64{7.734375, 7.91015625, 155.390625, 155.7421875}},
+		{"rc", []float64{-39.375, -33.75, 168.75, 180.0}},
+		{"sfxjv06b9", []float64{15.0747013092, 15.0747442245, 43.8172960281, 43.8173389435}},
+		{"6", []float64{-45.0, 0.0, -90.0, -45.0}},
+		{"ngveyg", []float64{-68.2305908203, -68.2250976562, 131.781005859, 131.791992188}},
+		{"pmxd5bd39qp", []float64{-58.7079012394, -58.7078998983, 156.964822859, 156.9648242}},
+		{"xhw6", []float64{25.6640625, 25.83984375, 143.7890625, 144.140625}},
+		{"bc6vx6", []float64{53.0090332031, 53.0145263672, -142.064208984, -142.053222656}},
+		{"uuxy5sbu", []float64{71.3939666748, 71.3941383362, 44.803276062, 44.8036193848}},
+		{"yu", []float64{67.5, 73.125, 123.75, 135.0}},
+		{"610pf9c3ebh", []float64{-38.0028247833, -38.0028234422, -89.888253808, -89.8882524669}},
+		{"9d", []float64{11.25, 16.875, -112.5, -101.25}},
+		{"1s0tk4yp", []float64{-66.5608406067, -66.5606689453, -111.612854004, -111.612510681}},
+		{"ss7yjqhs5su", []float64{24.9946086109, 24.994609952, 28.0104857683, 28.0104871094}},
+		{"krqww1d0kp", []float64{-3.06785166264, -3.06784629822, 20.6572151184, 20.6572258472}},
+		{"4yzg18c07qnm", []float64{-51.4997104369, -51.4997102693, -45.2841233835, -45.2841230482}},
+		{"5wf241", []float64{-52.0257568359, -52.0202636719, -19.248046875, -19.2370605469}},
+		{"gxrh60ce", []float64{86.5329551697, 86.5331268311, -12.5662994385, -12.5659561157}},
+		{"5rzrgr9qf", []float64{-45.0015878677, -45.0015449524, -23.4100627899, -23.4100198746}},
+		{"u3", []float64{50.625, 56.25, 11.25, 22.5}},
+		{"8tm4uz7c", []float64{30.0546455383, 30.0548171997, -150.254859924, -150.254516602}},
+		{"wz5f3", []float64{39.7705078125, 39.814453125, 129.067382812, 129.111328125}},
+		{"ckz", []float64{71.71875, 73.125, -113.90625, -112.5}},
+		{"h4s31", []float64{-75.76171875, -75.7177734375, 6.0205078125, 6.064453125}},
+		{"hguje", []float64{-67.939453125, -67.8955078125, 39.5068359375, 39.55078125}},
+		{"4rnfer", []float64{-50.1470947266, -50.1416015625, -69.1149902344, -69.1040039062}},
+		{"gj", []float64{73.125, 78.75, -45.0, -33.75}},
+		{"04", []float64{-78.75, -73.125, -180.0, -168.75}},
+		{"kvj", []float64{-16.875, -15.46875, 40.78125, 42.1875}},
+		{"7c3p", []float64{-36.73828125, -36.5625, -9.84375, -9.4921875}},
+		{"rdw55", []float64{-30.41015625, -30.3662109375, 166.069335938, 166.11328125}},
+		{"7spe18wk094b", []float64{-21.969217658, -21.9692174904, -11.8785988167, -11.8785984814}},
+		{"uxumm", []float64{89.5166015625, 89.560546875, 28.6962890625, 28.740234375}},
+		{"1n0sh0", []float64{-55.546875, -55.5413818359, -134.12109375, -134.110107422}},
+		{"cphmy", []float64{85.3857421875, 85.4296875, -128.759765625, -128.715820312}},
+		{"sd", []float64{11.25, 16.875, 22.5, 33.75}},
+		{"h6jbb2gxyd0", []float64{-78.6127030849, -78.6127017438, 19.3520092964, 19.3520106375}},
+		{"x3", []float64{5.625, 11.25, 146.25, 157.5}},
+		{"yv289c0nbs", []float64{74.625813961, 74.6258193254, 124.530050755, 124.530061483}},
+		{"g1fxbp5", []float64{56.2445068359, 56.245880127, -41.480255127, -41.4788818359}},
+		{"bqdh", []float64{82.265625, 82.44140625, -165.9375, -165.5859375}},
+		{"w558neznn3", []float64{16.8966346979, 16.8966400623, 95.2174007893, 95.2174115181}},
+		{"up", []float64{84.375, 90.0, 0.0, 11.25}},
+		{"pmy73pm2r", []float64{-57.0450925827, -57.0450496674, 155.090517998, 155.090560913}},
+		{"jzerkufsjnh", []float64{-46.5112745762, -46.5112732351, 83.5327059031, 83.5327072442}},
+		{"8eks", []float64{18.984375, 19.16015625, -151.171875, -150.8203125}},
+		{"3rryyvfxc", []float64{-2.99931049347, -2.99926757812, -112.551455498, -112.551412582}},
+		{"vn0cz", []float64{79.0576171875, 79.1015625, 46.3623046875, 46.40625}},
+		{"skb5cnez", []float64{27.4148368835, 27.4150085449, 11.2990951538, 11.2994384766}},
+		{"3p5cu0yju", []float64{-5.31227588654, -5.31223297119, -129.542369843, -129.542326927}},
+		{"8yrwss1y2s", []float64{36.3218951225, 36.3219004869, -135.502946377, -135.502935648}},
+		{"7x9kys4ydp", []float64{-1.95441305637, -1.95440769196, -20.4526805878, -20.4526698589}},
+		{"u7gscks3", []float64{66.9536018372, 66.9537734985, 16.2326431274, 16.2329864502}},
+		{"30c8pz7", []float64{-40.7414245605, -40.7400512695, -132.545928955, -132.544555664}},
+		{"umertwj6wxuc", []float64{77.2892892547, 77.2892894223, 16.0695068166, 16.0695071518}},
+		{"n6", []float64{-78.75, -73.125, 101.25, 112.5}},
+		{"br8dqrhg058f", []float64{87.6219940558, 87.6219942234, -167.765692659, -167.765692323}},
+		{"tp", []float64{39.375, 45.0, 45.0, 56.25}},
+		{"33uy", []float64{-34.1015625, -33.92578125, -117.0703125, -116.71875}},
+		{"u3ps1jnxg", []float64{51.356921196, 51.3569641113, 21.8498754501, 21.8499183655}},
+		{"nqwmqymbyz", []float64{-52.4801498652, -52.4801445007, 110.343879461, 110.34389019}},
+		{"wfugcgd", []float64{16.1471557617, 16.1485290527, 130.509338379, 130.51071167}},
+		{"vp7g2eg", []float64{86.3731384277, 86.3745117188, 50.2995300293, 50.3009033203}},
+		{"zz", []float64{84.375, 90.0, 168.75, 180.0}},
+		{"859t0xmm6gwk", []float64{20.6071523577, 20.6071525253, -177.861316167, -177.861315832}},
+		{"bvjnf49wp1hu", []float64{74.3262923509, 74.3262925185, -139.128492661, -139.128492326}},
+		{"08ybs", []float64{-85.693359375, -85.6494140625, -147.83203125, -147.788085938}},
+		{"dr908p7", []float64{42.3152160645, 42.3165893555, -77.339630127, -77.3382568359}},
+		{"gufpbsu", []float64{73.1071472168, 73.1085205078, -8.41003417969, -8.40866088867}},
+		{"623c388eg2t", []float64{-43.3706304431, -43.370629102, -76.2223117054, -76.2223103642}},
+		{"8rc", []float64{43.59375, 45.0, -167.34375, -165.9375}},
+		{"h4", []float64{-78.75, -73.125, 0.0, 11.25}},
+		{"x84", []float64{0.0, 1.40625, 160.3125, 161.71875}},
+		{"bbxgcx6nyzk", []float64{48.5127027333, 48.5127040744, -135.282602906, -135.282601565}},
+		{"tqg06239nrht", []float64{38.014278654, 38.0142788216, 60.5699611455, 60.5699614808}},
+		{"05fd", []float64{-68.5546875, -68.37890625, -176.484375, -176.1328125}},
+		{"wuc", []float64{26.71875, 28.125, 125.15625, 126.5625}},
+		{"m3hh", []float64{-38.671875, -38.49609375, 61.875, 62.2265625}},
+		{"m2w4ru", []float64{-41.7700195312, -41.7645263672, 65.0280761719, 65.0390625}},
+		{"4cz3k", []float64{-79.9365234375, -79.892578125, -45.87890625, -45.8349609375}},
+		{"jqefz9v", []float64{-52.9444885254, -52.9431152344, 61.8598937988, 61.8612670898}},
+		{"qqcnf60wjf", []float64{-5.83269953728, -5.83269417286, 102.756060362, 102.756071091}},
+		{"n", []float64{-90.0, -45.0, 90.0, 135.0}},
+		{"n8qen549x9vr", []float64{-88.0496587045, -88.0496585369, 121.908059008, 121.908059344}},
+		{"g01nwm2wyj", []float64{46.1726027727, 46.1726081371, -43.3181476593, -43.3181369305}},
+		{"6xc", []float64{-1.40625, 0.0, -66.09375, -64.6875}},
+		{"v", []float64{45.0, 90.0, 45.0, 90.0}},
+		{"nmjjnhz67", []float64{-60.9696149826, -60.9695720673, 108.555006981, 108.555049896}},
+		{"2gzwv1zggqg", []float64{-22.7094335854, -22.7094322443, -135.472611934, -135.472610593}},
+		{"dt", []float64{28.125, 33.75, -67.5, -56.25}},
+		{"v33ysf2y0s", []float64{53.1872391701, 53.1872445345, 58.9207291603, 58.9207398891}},
+		{"fwy6tccjp3", []float64{83.4186798334, 83.4186851978, -58.4565675259, -58.456556797}},
+		{"qug6fjx4ue8k", []float64{-17.7671476454, -17.7671474777, 128.418009616, 128.418009952}},
+		{"8y59duq21", []float64{34.0370178223, 34.0370607376, -141.198649406, -141.198606491}},
+		{"pxgfrpkhdnk", []float64{-45.9701107442, -45.9701094031, 163.086639047, 163.086640388}},
+		{"91b", []float64{9.84375, 11.25, -135.0, -133.59375}},
+		{"v83mnvugmh", []float64{47.3173213005, 47.3173266649, 69.5611810684, 69.5611917973}},
+		{"k76cem9", []float64{-26.4248657227, -26.4234924316, 15.2613830566, 15.2627563477}},
+		{"ydjq0mk", []float64{57.3335266113, 57.3348999023, 119.899291992, 119.900665283}},
+		{"cdsuf", []float64{59.8974609375, 59.94140625, -105.732421875, -105.688476562}},
+		{"tuyz3", []float64{27.9931640625, 28.037109375, 88.2861328125, 88.330078125}},
+		{"r7r2skwfj00", []float64{-26.605796814, -26.6057954729, 156.641564369, 156.64156571}},
+		{"8hx507p9f5x", []float64{25.8566424251, 25.8566437662, -170.134868771, -170.13486743}},
+		{"cc", []float64{50.625, 56.25, -101.25, -90.0}},
+		{"pkuuvxz1z3", []float64{-62.4034112692, -62.4034059048, 153.181310892, 153.181321621}},
+		{"4", []float64{-90.0, -45.0, -90.0, -45.0}},
+		{"7vvz9gbf", []float64{-11.316947937, -11.3167762756, -3.08612823486, -3.08578491211}},
+		{"54r", []float64{-77.34375, -75.9375, -35.15625, -33.75}},
+		{"5r0mm0pwj1j", []float64{-49.7011131048, -49.7011117637, -33.1681899726, -33.1681886315}},
+		{"mx", []float64{-5.625, 0.0, 67.5, 78.75}},
+		{"rm39jwc", []float64{-15.2558898926, -15.2545166016, 148.60244751, 148.603820801}},
+		{"x", []float64{0.0, 45.0, 135.0, 180.0}},
+		{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
+		{"4gqrkyvqc", []float64{-70.4060983658, -70.4060554504, -47.2449445724, -47.2449016571}},
+		{"9s36btfr9", []float64{24.4225215912, 24.4225645065, -110.717082024, -110.717039108}},
+		{"dxt5", []float64{42.71484375, 42.890625, -60.46875, -60.1171875}},
+		{"21x8tven9", []float64{-36.4432811737, -36.4432382584, -169.196276665, -169.196233749}},
+		{"7hh", []float64{-22.5, -21.09375, -39.375, -37.96875}},
+		{"v7nm8q7s", []float64{62.8768157959, 62.8769874573, 65.0548553467, 65.0551986694}},
+		{"e9qxh", []float64{8.26171875, 8.3056640625, -13.18359375, -13.1396484375}},
+		{"273", []float64{-26.71875, -25.3125, -167.34375, -165.9375}},
+		{"p99qpkcvb", []float64{-80.4807329178, -80.4806900024, 159.578819275, 159.57886219}},
+		{"q2062f9csybw", []float64{-44.5904645696, -44.590464402, 101.637129262, 101.637129597}},
+		{"btsq32", []float64{77.0361328125, 77.0416259766, -151.468505859, -151.457519531}},
+		{"1cj5", []float64{-83.84765625, -83.671875, -94.21875, -93.8671875}},
+		{"j71pemzwqxt9", []float64{-71.7739416473, -71.7739414796, 57.8096582741, 57.8096586093}},
+		{"qnqcbndu0nf", []float64{-9.49970439076, -9.49970304966, 99.4959667325, 99.4959680736}},
+		{"2v7cm6junqb", []float64{-15.237314254, -15.2373129129, -140.737684965, -140.737683624}},
+		{"e7", []float64{16.875, 22.5, -33.75, -22.5}},
+		{"s91p45", []float64{6.87194824219, 6.87744140625, 23.994140625, 24.0051269531}},
+		{"xwk1de36", []float64{35.438117981, 35.4382896423, 163.236579895, 163.236923218}},
+		{"gf2", []float64{57.65625, 59.0625, -11.25, -9.84375}},
+		{"d1m", []float64{7.03125, 8.4375, -82.96875, -81.5625}},
+		{"0b", []float64{-90.0, -84.375, -146.25, -135.0}},
+		{"rw4", []float64{-11.25, -9.84375, 160.3125, 161.71875}},
+		{"74pk9xsb", []float64{-32.9177856445, -32.9176139832, -34.7322463989, -34.7319030762}},
+		{"4ghe0cn8s", []float64{-72.5920772552, -72.5920343399, -49.8798179626, -49.8797750473}},
+		{"tw4", []float64{33.75, 35.15625, 70.3125, 71.71875}},
+		{"gevx3", []float64{67.3681640625, 67.412109375, -14.7216796875, -14.677734375}},
+		{"kw8w6yqc1ks", []float64{-7.30433911085, -7.30433776975, 23.3333033323, 23.3333046734}},
+		{"vbdjwzpg7", []float64{48.8183069229, 48.8183498383, 81.8699026108, 81.8699455261}},
+		{"tp", []float64{39.375, 45.0, 45.0, 56.25}},
+		{"751q", []float64{-27.0703125, -26.89453125, -43.2421875, -42.890625}},
+		{"yeurzu", []float64{67.4780273438, 67.4835205078, 118.817138672, 118.828125}},
+		{"svx", []float64{30.9375, 32.34375, 43.59375, 45.0}},
+		{"4yrg1w", []float64{-54.2834472656, -54.2779541016, -45.2856445312, -45.2746582031}},
+		{"h6xmxenmzkc", []float64{-74.9532110989, -74.9532097578, 21.7837978899, 21.7837992311}},
+		{"j5uzmqughwj1", []float64{-67.5942097418, -67.5942095742, 51.9171233475, 51.9171236828}},
+		{"26trngxu", []float64{-29.6871185303, -29.6869468689, -161.059913635, -161.059570312}},
+		{"9", []float64{0.0, 45.0, -135.0, -90.0}},
+		{"h58jxs7g", []float64{-69.3218421936, -69.3216705322, 0.334739685059, 0.335083007812}},
+		{"tg2kgbk3", []float64{19.1177558899, 19.1179275513, 79.2721939087, 79.2725372314}},
+		{"x34hfu", []float64{6.48193359375, 6.48742675781, 149.183349609, 149.194335938}},
+		{"774j", []float64{-27.24609375, -27.0703125, -30.9375, -30.5859375}},
+		{"yf", []float64{56.25, 61.875, 123.75, 135.0}},
+		{"0", []float64{-90.0, -45.0, -180.0, -135.0}},
+		{"e273k4gje9s", []float64{1.64203494787, 1.64203628898, -28.9996308088, -28.9996294677}},
+		{"e", []float64{0.0, 45.0, -45.0, 0.0}},
+		{"hx", []float64{-50.625, -45.0, 22.5, 33.75}},
+		{"wz", []float64{39.375, 45.0, 123.75, 135.0}},
+		{"w7p94b", []float64{17.05078125, 17.0562744141, 111.917724609, 111.928710938}},
+		{"69sgzyb46pbs", []float64{-35.8658129722, -35.8658128045, -60.4796498269, -60.4796494916}},
+		{"7f7m8k2u5", []float64{-31.3529205322, -31.3528776169, -6.66754245758, -6.66749954224}},
+		{"gq4n7m", []float64{79.8760986328, 79.8815917969, -30.7946777344, -30.7836914062}},
+		{"c1srg1pz8kt", []float64{54.8066094518, 54.8066107929, -128.880941123, -128.880939782}},
+		{"b2h1dy", []float64{45.2966308594, 45.3021240234, -163.004150391, -162.993164062}},
+		{"7170zy", []float64{-37.8039550781, -37.7984619141, -40.4406738281, -40.4296875}},
+		{"p09", []float64{-87.1875, -85.78125, 136.40625, 137.8125}},
+		{"sw970ux6", []float64{37.114906311, 37.1150779724, 24.3007278442, 24.301071167}},
+		{"rc8hsdptqn", []float64{-35.7595646381, -35.7595592737, 168.958311081, 168.95832181}},
+		{"qp3jujqc1", []float64{-3.17899703979, -3.17895412445, 91.5913438797, 91.591386795}},
+		{"dn7n9krj2tgg", []float64{36.3231066428, 36.3231068105, -85.7166788355, -85.7166785002}},
+		{"23e7vwt", []float64{-35.8676147461, -35.8662414551, -163.931121826, -163.929748535}},
+		{"um", []float64{73.125, 78.75, 11.25, 22.5}},
+		{"p8ws1", []float64{-86.484375, -86.4404296875, 166.684570312, 166.728515625}},
+		{"vqt63tm6xx", []float64{81.9873136282, 81.9873189926, 63.7062621117, 63.7062728405}},
+		{"b", []float64{45.0, 90.0, -180.0, -135.0}},
+		{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
+		{"bm20d", []float64{74.619140625, 74.6630859375, -168.662109375, -168.618164062}},
+		{"p3t4k9c0", []float64{-81.1573791504, -81.157207489, 153.480377197, 153.48072052}},
+		{"x", []float64{0.0, 45.0, 135.0, 180.0}},
+		{"8rsnr3k0", []float64{43.2929992676, 43.293170929, -162.80090332, -162.800559998}},
+		{"b739vykqw", []float64{63.6243152618, 63.6243581772, -166.381845474, -166.381802559}},
+		{"m", []float64{-45.0, 0.0, 45.0, 90.0}},
+		{"13zrrq", []float64{-78.8488769531, -78.8433837891, -113.236083984, -113.225097656}},
+		{"6yk9jt", []float64{-9.64050292969, -9.63500976562, -49.6801757812, -49.6691894531}},
+		{"zmn0", []float64{73.125, 73.30078125, 154.6875, 155.0390625}},
+		{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
+		{"ps151cxkk8", []float64{-66.9636869431, -66.9636815786, 158.993303776, 158.993314505}},
+		{"tn", []float64{33.75, 39.375, 45.0, 56.25}},
+		{"u7", []float64{61.875, 67.5, 11.25, 22.5}},
+		{"55yuz", []float64{-68.0712890625, -68.02734375, -35.2001953125, -35.15625}},
+		{"x0p3grv8k", []float64{0.350232124329, 0.350275039673, 145.345859528, 145.345902443}},
+		{"y5c6gjhshg", []float64{66.6053169966, 66.605322361, 91.896032095, 91.8960428238}},
+		{"6fd2j", []float64{-30.9375, -30.8935546875, -52.8662109375, -52.822265625}},
+		{"gegbfkt19cj6", []float64{66.2505683675, 66.2505685352, -17.1207369491, -17.1207366139}},
+		{"k7", []float64{-28.125, -22.5, 11.25, 22.5}},
+		{"y1ydbr", []float64{55.3656005859, 55.37109375, 99.1516113281, 99.1625976562}},
+		{"byqsd", []float64{80.947265625, 80.9912109375, -137.021484375, -136.977539062}},
+		{"mfcbxhf", []float64{-29.4172668457, -29.4158935547, 81.5213012695, 81.5226745605}},
+		{"yxwd5901", []float64{87.5447273254, 87.5448989868, 121.794433594, 121.794776917}},
+		{"24k7s7y0gqgj", []float64{-31.7077504657, -31.7077502981, -173.828286678, -173.828286342}},
+		{"9031fbu", []float64{1.71798706055, 1.71936035156, -133.467407227, -133.466033936}},
+		{"xqusuduvmc", []float64{38.8197237253, 38.8197290897, 152.782648802, 152.782659531}},
+		{"5bxw", []float64{-86.1328125, -85.95703125, -0.703125, -0.3515625}},
+		{"xw593h52f", []float64{33.9918279648, 33.9918708801, 162.470369339, 162.470412254}},
+		{"3", []float64{-45.0, 0.0, -135.0, -90.0}},
+		{"9xxm2s", []float64{43.1323242188, 43.1378173828, -102.282714844, -102.271728516}},
+		{"byf7t", []float64{83.583984375, 83.6279296875, -142.866210938, -142.822265625}},
+		{"v6", []float64{56.25, 61.875, 56.25, 67.5}},
+		{"yh", []float64{67.5, 73.125, 90.0, 101.25}},
+		{"d6k43xp", []float64{13.0902099609, 13.091583252, -73.0494689941, -73.0480957031}},
+		{"k4m5h", []float64{-31.81640625, -31.7724609375, 7.20703125, 7.2509765625}},
+		{"r1t7", []float64{-36.03515625, -35.859375, 142.3828125, 142.734375}},
+		{"cs4kvrjdkm7", []float64{68.3738274872, 68.3738288283, -109.097485095, -109.097483754}},
+		{"unu", []float64{82.96875, 84.375, 5.625, 7.03125}},
+		{"59xp", []float64{-80.33203125, -80.15625, -12.65625, -12.3046875}},
+		{"542p4hbm", []float64{-76.0863304138, -76.0861587524, -44.9117660522, -44.9114227295}},
+		{"5j", []float64{-61.875, -56.25, -45.0, -33.75}},
+		{"v3", []float64{50.625, 56.25, 56.25, 67.5}},
+		{"mstr13c0", []float64{-18.4474182129, -18.4472465515, 74.9391174316, 74.9394607544}},
+		{"wcvtdg61swv8", []float64{10.8286933601, 10.8286935277, 131.608171687, 131.608172022}},
+		{"3cm40m0w91z3", []float64{-37.5885963254, -37.5885961577, -94.207024388, -94.2070240527}},
+		{"m9fup0", []float64{-34.453125, -34.4476318359, 71.6748046875, 71.6857910156}},
+		{"jx", []float64{-50.625, -45.0, 67.5, 78.75}},
+		{"7myut8cg", []float64{-11.8605995178, -11.8604278564, -24.013710022, -24.0133666992}},
+		{"d", []float64{0.0, 45.0, -90.0, -45.0}},
+		{"5dgu11uuf12g", []float64{-73.8176893629, -73.8176891953, -17.1760072187, -17.1760068834}},
+		{"qp1ens1zqg", []float64{-5.07442295551, -5.07441759109, 92.3977124691, 92.3977231979}},
+		{"vusxu8ewwbrz", []float64{71.6786695831, 71.6786697507, 85.2809854969, 85.2809858322}},
+		{"8qjtgd1q", []float64{34.7727584839, 34.7729301453, -160.860099792, -160.85975647}},
+		{"60dppvw", []float64{-40.9268188477, -40.9254455566, -86.838684082, -86.837310791}},
+		{"tygxz83s2", []float64{39.3331575394, 39.3332004547, 84.0035247803, 84.0035676956}},
+		{"e0qwrc", []float64{2.51037597656, 2.51586914062, -35.5187988281, -35.5078125}},
+		{"5wyh2qbz", []float64{-51.2458992004, -51.2457275391, -14.0504837036, -14.0501403809}},
+		{"0zqs", []float64{-48.515625, -48.33984375, -137.109375, -136.7578125}},
+		{"n5ss", []float64{-69.609375, -69.43359375, 96.328125, 96.6796875}},
+		{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
+		{"v9q84x9n8ktx", []float64{52.0735898428, 52.0735900104, 76.7518796772, 76.7518800125}},
+		{"4", []float64{-90.0, -45.0, -90.0, -45.0}},
+		{"j", []float64{-90.0, -45.0, 45.0, 90.0}},
+		{"2eh8wf4ktz", []float64{-28.0253130198, -28.0253076553, -150.871907473, -150.871896744}},
+		{"cr96dg281x", []float64{87.6448434591, 87.6448488235, -121.870586872, -121.870576143}},
+		{"56u", []float64{-74.53125, -73.125, -28.125, -26.71875}},
+		{"628vt", []float64{-41.220703125, -41.1767578125, -77.4755859375, -77.431640625}},
+		{"0heb", []float64{-64.6875, -64.51171875, -174.7265625, -174.375}},
+		{"8q5skg4mk", []float64{34.5144510269, 34.5144939423, -163.616123199, -163.616080284}},
+		{"7x0pw97495hw", []float64{-4.2993279174, -4.29932774976, -22.2101866454, -22.2101863101}},
+		{"1j4vjyz1dqx", []float64{-60.9587225318, -60.9587211907, -130.870407969, -130.870406628}},
+		{"u4q1szh", []float64{57.9583740234, 57.9597473145, 8.65173339844, 8.65310668945}},
+		{"3v3dbm2uv2", []float64{-14.9556970596, -14.9556916952, -99.1283833981, -99.1283726692}},
+		{"te3jtfwjnq", []float64{19.2626702785, 19.262675643, 69.1674435139, 69.1674542427}},
+		{"sgvgx3ey6qe", []float64{21.7183318734, 21.7183332145, 42.1597914398, 42.1597927809}},
+		{"2sw", []float64{-19.6875, -18.28125, -149.0625, -147.65625}},
+		{"wzy2pqu8e", []float64{43.6309146881, 43.6309576035, 132.863974571, 132.864017487}},
+		{"dk2849", []float64{23.9117431641, 23.9172363281, -77.9370117188, -77.9260253906}},
+		{"0d65", []float64{-76.81640625, -76.640625, -154.6875, -154.3359375}},
+		{"84", []float64{11.25, 16.875, -180.0, -168.75}},
+		{"q17", []float64{-37.96875, -36.5625, 94.21875, 95.625}},
+		{"x9wzer", []float64{9.79431152344, 9.7998046875, 167.135009766, 167.145996094}},
+		{"7xk2s7425n6", []float64{-4.1143463552, -4.1143450141, -16.3334485888, -16.3334472477}},
+		{"jv", []float64{-61.875, -56.25, 78.75, 90.0}},
+		{"u5juvw", []float64{62.7429199219, 62.7484130859, 8.32763671875, 8.33862304688}},
+		{"cnuczt0c", []float64{83.3040046692, 83.3041763306, -127.989692688, -127.989349365}},
+		{"7f3jv330zuh", []float64{-31.3259911537, -31.3259898126, -9.61132586002, -9.61132451892}},
+		{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
+		{"70jqgb8uv", []float64{-43.8099145889, -43.8098716736, -37.4511480331, -37.4511051178}},
+		{"u5t5d155", []float64{65.3087425232, 65.3089141846, 7.12326049805, 7.1236038208}},
+		{"r9hy1gz8m4p", []float64{-38.2996594906, -38.2996581495, 164.267115444, 164.267116785}},
+		{"qwmurb9920", []float64{-9.09371852875, -9.09371316433, 120.928573608, 120.928584337}},
+		{"d2pt693fw4", []float64{0.930157899857, 0.930163264275, -68.0906009674, -68.0905902386}},
+		{"zfgbx", []float64{60.556640625, 60.6005859375, 174.331054688, 174.375}},
+		{"c7mtdqkfuuc", []float64{64.2828767002, 64.2828780413, -115.910019726, -115.910018384}},
+		{"9", []float64{0.0, 45.0, -135.0, -90.0}},
+		{"w89eqd", []float64{3.39477539062, 3.40026855469, 114.895019531, 114.906005859}},
+		{"75mtem68vx", []float64{-25.7229477167, -25.7229423523, -37.1191334724, -37.1191227436}},
+		{"12e9fwr", []float64{-86.8455505371, -86.8441772461, -118.708648682, -118.707275391}},
+		{"v", []float64{45.0, 90.0, 45.0, 90.0}},
+		{"nq", []float64{-56.25, -50.625, 101.25, 112.5}},
+		{"svnx", []float64{29.35546875, 29.53125, 42.890625, 43.2421875}},
+		{"0e1kd5pxdgt", []float64{-72.316198647, -72.3161973059, -155.64387247, -155.643871129}},
+		{"pgk", []float64{-71.71875, -70.3125, 174.375, 175.78125}},
+		{"vry9q", []float64{88.8134765625, 88.857421875, 65.654296875, 65.6982421875}},
+		{"6f2x91u", []float64{-31.0157775879, -31.0144042969, -55.4974365234, -55.4960632324}},
+		{"3hefw", []float64{-19.248046875, -19.2041015625, -129.462890625, -129.418945312}},
+		{"g", []float64{45.0, 90.0, -45.0, 0.0}},
+		{"m66erp", []float64{-31.7340087891, -31.728515625, 60.0732421875, 60.0842285156}},
+		{"5nwny", []float64{-52.2509765625, -52.20703125, -36.298828125, -36.2548828125}},
+		{"d5", []float64{16.875, 22.5, -90.0, -78.75}},
+		{"wuphy8", []float64{23.3349609375, 23.3404541016, 133.879394531, 133.890380859}},
+		{"b8u631kf", []float64{49.6214675903, 49.6216392517, -151.472969055, -151.472625732}},
+		{"34d8j", []float64{-30.9375, -30.8935546875, -131.264648438, -131.220703125}},
+		{"x", []float64{0.0, 45.0, 135.0, 180.0}},
+		{"he", []float64{-73.125, -67.5, 22.5, 33.75}},
+		{"yec1yepjd", []float64{66.4187908173, 66.4188337326, 114.201593399, 114.201636314}},
+		{"h0p58hpz", []float64{-89.3615913391, -89.3614196777, 9.85439300537, 9.85473632812}},
+		{"8u", []float64{22.5, 28.125, -146.25, -135.0}},
+		{"hg9sxf0mu", []float64{-69.509510994, -69.5094680786, 36.200466156, 36.2005090714}},
+		{"7zdg4c9868", []float64{-2.27687358856, -2.27686822414, -7.25979566574, -7.2597849369}},
+		{"1h", []float64{-67.5, -61.875, -135.0, -123.75}},
+		{"k7h239zr4097", []float64{-28.0702368356, -28.070236668, 17.3025243357, 17.302524671}},
+		{"9h", []float64{22.5, 28.125, -135.0, -123.75}},
+		{"fx", []float64{84.375, 90.0, -67.5, -56.25}},
+		{"sf66h05", []float64{13.0078125, 13.009185791, 37.093963623, 37.0953369141}},
+		{"4ge39", []float64{-70.048828125, -70.0048828125, -51.6357421875, -51.591796875}},
+		{"w", []float64{0.0, 45.0, 90.0, 135.0}},
+		{"7", []float64{-45.0, 0.0, -45.0, 0.0}},
+		{"ypd", []float64{87.1875, 88.59375, 92.8125, 94.21875}},
+		{"gbzh2r", []float64{50.0042724609, 50.009765625, -1.39526367188, -1.38427734375}},
+		{"h0", []float64{-90.0, -84.375, 0.0, 11.25}},
+		{"nk7wkxwvku2", []float64{-64.952994436, -64.9529930949, 106.379102468, 106.37910381}},
+		{"23qu", []float64{-37.265625, -37.08984375, -159.2578125, -158.90625}},
+		{"n15b3", []float64{-84.3310546875, -84.287109375, 95.3173828125, 95.361328125}},
+		{"8x9c6f1cjkn", []float64{42.4184060097, 42.4184073508, -154.915576279, -154.915574938}},
+		{"5dr", []float64{-77.34375, -75.9375, -12.65625, -11.25}},
+		{"022q4", []float64{-87.5390625, -87.4951171875, -168.310546875, -168.266601562}},
+		{"52", []float64{-90.0, -84.375, -33.75, -22.5}},
+		{"s0j8hb", []float64{0.0, 0.0054931640625, 7.94311523438, 7.9541015625}},
+		{"58ygg9vn9nj", []float64{-85.1113092899, -85.1113079488, -12.8470878303, -12.8470864892}},
+		{"ztzqs1", []float64{78.4918212891, 78.4973144531, 167.87109375, 167.882080078}},
+		{"n5x", []float64{-70.3125, -68.90625, 99.84375, 101.25}},
+		{"jh593g3fsf", []float64{-67.261980772, -67.2619754076, 50.001386404, 50.0013971329}},
+		{"8vjg52364", []float64{28.6540603638, 28.6541032791, -138.01943779, -138.019394875}},
+		{"dqeh1", []float64{37.265625, 37.3095703125, -74.4873046875, -74.443359375}},
+		{"2", []float64{-45.0, 0.0, -180.0, -135.0}},
+		{"m1mjx", []float64{-37.001953125, -36.9580078125, 52.3388671875, 52.3828125}},
+		{"w4fr", []float64{16.69921875, 16.875, 93.1640625, 93.515625}},
+		{"k2v0kj", []float64{-40.7098388672, -40.7043457031, 18.45703125, 18.4680175781}},
+		{"ytmdb30u", []float64{75.0208282471, 75.0209999084, 120.246391296, 120.246734619}},
+		{"wzuhv", []float64{44.4287109375, 44.47265625, 129.594726562, 129.638671875}},
+		{"84m2ue733hx1", []float64{12.8061776049, 12.8061777726, -172.414918095, -172.41491776}},
+		{"02", []float64{-90.0, -84.375, -168.75, -157.5}},
+		{"x8j5vd68s", []float64{0.671625137329, 0.671668052673, 164.776554108, 164.776597023}},
+		{"7k", []float64{-22.5, -16.875, -33.75, -22.5}},
+		{"4xtrffmbtxr", []float64{-46.4377109706, -46.4377096295, -59.9881960452, -59.9881947041}},
+		{"k8x", []float64{-42.1875, -40.78125, 32.34375, 33.75}},
+		{"yyxsh", []float64{82.265625, 82.3095703125, 134.47265625, 134.516601562}},
+		{"f2rqpzxu", []float64{47.502822876, 47.5029945374, -68.2034683228, -68.203125}},
+		{"d6h4jg", []float64{11.6180419922, 11.6235351562, -72.8723144531, -72.861328125}},
+		{"e7d", []float64{19.6875, 21.09375, -30.9375, -29.53125}},
+		{"bbs0tcr5wc9", []float64{47.9078659415, 47.9078672826, -140.362410396, -140.362409055}},
+		{"7fuf", []float64{-29.1796875, -29.00390625, -4.5703125, -4.21875}},
+		{"n", []float64{-90.0, -45.0, 90.0, 135.0}},
+		{"tt9p6y", []float64{32.2448730469, 32.2503662109, 69.0270996094, 69.0380859375}},
+		{"xypsv42g6pr", []float64{34.5979173481, 34.5979186893, 179.517726749, 179.51772809}},
+		{"p4gvh3sr97h", []float64{-73.6428004503, -73.6427991092, 140.466100574, 140.466101915}},
+		{"4", []float64{-90.0, -45.0, -90.0, -45.0}},
+		{"6ecy4rk", []float64{-22.8117370605, -22.8103637695, -64.9346923828, -64.9333190918}},
+		{"045xe38uj", []float64{-77.4227142334, -77.4226713181, -174.934058189, -174.934015274}},
+		{"xyunrc48", []float64{39.0728759766, 39.0730476379, 174.719009399, 174.719352722}},
+		{"enm03", []float64{35.2001953125, 35.244140625, -37.9248046875, -37.880859375}},
+		{"n7r9k6ecbhm", []float64{-71.4849673212, -71.4849659801, 111.988799125, 111.988800466}},
+		{"2bpxnf2", []float64{-43.7571716309, -43.7557983398, -135.406494141, -135.40512085}},
+		{"4", []float64{-90.0, -45.0, -90.0, -45.0}},
+		{"vs65p7h9m", []float64{69.4502878189, 69.4503307343, 70.6374979019, 70.6375408173}},
+		{"j6vt6yyjmms", []float64{-73.5703888535, -73.5703875124, 64.1136950254, 64.1136963665}},
+		{"pp7z8s7", []float64{-47.8770446777, -47.8756713867, 140.299530029, 140.30090332}},
+		{"skxsqyzdgn", []float64{26.0971534252, 26.0971587896, 22.103934288, 22.1039450169}},
+		{"y1x7mdy", []float64{54.0238952637, 54.0252685547, 100.445251465, 100.446624756}},
+		{"ck9n9vptne", []float64{71.4834183455, 71.4834237099, -122.256267071, -122.256256342}},
+		{"fq7u12930mgt", []float64{80.862324927, 80.8623250946, -73.4198988229, -73.4198984876}},
+		{"zs50j6", []float64{67.5109863281, 67.5164794922, 161.949462891, 161.960449219}},
+		{"9jccmq0j", []float64{32.5972938538, 32.5974655151, -132.308349609, -132.308006287}},
+		{"y", []float64{45.0, 90.0, 90.0, 135.0}},
+		{"hhtn7", []float64{-63.5888671875, -63.544921875, 7.1630859375, 7.20703125}},
+		{"480spdnxu", []float64{-89.2845582962, -89.2845153809, -66.4581871033, -66.4581441879}},
+		{"ghz87yh", []float64{71.7956542969, 71.7970275879, -34.2828369141, -34.281463623}},
+		{"cf", []float64{56.25, 61.875, -101.25, -90.0}},
+		{"gd1n5pnc8p7", []float64{57.3434360325, 57.3434373736, -20.9526403248, -20.9526389837}},
+		{"hef9dvw3q", []float64{-68.6121511459, -68.6121082306, 26.1453151703, 26.1453580856}},
+		{"wbjwhe8rkyf0", []float64{1.07519432902, 1.07519449666, 131.682678759, 131.682679094}},
+		{"pndb3wg", []float64{-53.3564758301, -53.3551025391, 138.937225342, 138.938598633}},
+		{"bh2k52ewybs", []float64{69.6132829785, 69.6132843196, -179.500513673, -179.500512332}},
+		{"t4r", []float64{12.65625, 14.0625, 54.84375, 56.25}},
+		{"s7215", []float64{18.45703125, 18.5009765625, 11.3818359375, 11.42578125}},
+		{"u8", []float64{45.0, 50.625, 22.5, 33.75}},
+		{"f2", []float64{45.0, 50.625, -78.75, -67.5}},
+		{"3hwb5r0etbt", []float64{-19.6484443545, -19.6484430134, -125.36405012, -125.364048779}},
+		{"wendsfh63", []float64{17.3258256912, 17.3258686066, 121.855244637, 121.855287552}},
+		{"90n", []float64{0.0, 1.40625, -126.5625, -125.15625}},
+		{"e5mx13zs4t2", []float64{19.5220465958, 19.5220479369, -37.2002863884, -37.2002850473}},
+		{"6kngstqxn", []float64{-21.854724884, -21.8546819687, -69.0508747101, -69.0508317947}},
+		{"rgs", []float64{-25.3125, -23.90625, 174.375, 175.78125}},
+		{"sbd1n1z90", []float64{2.99806594849, 2.99810886383, 36.8364715576, 36.836514473}},
+		{"693c7m26y6", []float64{-37.7197015285, -37.7196961641, -64.8956286907, -64.8956179619}},
+		{"ynu", []float64{82.96875, 84.375, 95.625, 97.03125}},
+		{"68t74uch2444", []float64{-41.6333230957, -41.6333229281, -59.9949619174, -59.9949615821}},
+		{"jcmv2zscc", []float64{-82.0043992996, -82.0043563843, 86.875462532, 86.8755054474}},
+		{"3c", []float64{-39.375, -33.75, -101.25, -90.0}},
+		{"r76ymc", []float64{-25.6146240234, -25.6091308594, 150.369873047, 150.380859375}},
+		{"kj9vj024cd", []float64{-13.1817376614, -13.1817322969, 2.68072843552, 2.68073916435}},
+		{"scr", []float64{7.03125, 8.4375, 43.59375, 45.0}},
+		{"57g8wvk", []float64{-68.7895202637, -68.7881469727, -28.5260009766, -28.5246276855}},
+		{"8qde6", []float64{37.1337890625, 37.177734375, -165.146484375, -165.102539062}},
+		{"7ve260", []float64{-14.0185546875, -14.0130615234, -6.591796875, -6.58081054688}},
+		{"trcxzhy6", []float64{44.9824905396, 44.9826622009, 58.6755752563, 58.6759185791}},
+		{"syqw7fyhx", []float64{36.2707614899, 36.2708044052, 43.0639600754, 43.0640029907}},
+		{"tb0", []float64{0.0, 1.40625, 78.75, 80.15625}},
+		{"dttugd1xtj5p", []float64{31.7847627215, 31.7847628891, -59.2579753697, -59.2579750344}},
+		{"v899zsbyh7f2", []float64{48.1472598016, 48.1472599693, 69.9401802197, 69.940180555}},
+		{"8e", []float64{16.875, 22.5, -157.5, -146.25}},
+		{"s", []float64{0.0, 45.0, 0.0, 45.0}},
+		{"ny4017r8x", []float64{-56.2320613861, -56.2320184708, 126.628031731, 126.628074646}},
+		{"8ued", []float64{25.6640625, 25.83984375, -141.328125, -140.9765625}},
+		{"bqpsbj0h9p1t", []float64{79.6132376231, 79.6132377908, -158.203080073, -158.203079738}},
+		{"s", []float64{0.0, 45.0, 0.0, 45.0}},
+		{"b5m", []float64{63.28125, 64.6875, -172.96875, -171.5625}},
+		{"t", []float64{0.0, 45.0, 45.0, 90.0}},
+		{"w722c", []float64{18.4130859375, 18.45703125, 101.645507812, 101.689453125}},
+		{"ey", []float64{33.75, 39.375, -11.25, 0.0}},
+		{"6ndrutd", []float64{-7.04498291016, -7.04360961914, -86.6354370117, -86.6340637207}},
+		{"c", []float64{45.0, 90.0, -135.0, -90.0}},
+		{"47jtykh", []float64{-72.0922851562, -72.0909118652, -70.7354736328, -70.7341003418}},
+		{"krb5qvkb", []float64{-0.806121826172, -0.805950164795, 11.5531539917, 11.5534973145}},
+		{"mtpgnes7uugs", []float64{-16.3277602941, -16.3277601264, 78.6901270598, 78.6901273951}},
+		{"6062z", []float64{-43.4619140625, -43.41796875, -86.5283203125, -86.484375}},
+		{"241quukp", []float64{-32.5389289856, -32.5387573242, -178.027954102, -178.027610779}},
+		{"g4m49", []float64{58.095703125, 58.1396484375, -37.9248046875, -37.880859375}},
+		{"wwq", []float64{35.15625, 36.5625, 120.9375, 122.34375}},
+		{"beujb1c", []float64{67.1141052246, 67.1154785156, -151.873626709, -151.872253418}},
+		{"h4", []float64{-78.75, -73.125, 0.0, 11.25}},
+		{"69xq1n9v", []float64{-35.4712486267, -35.4710769653, -57.2583389282, -57.2579956055}},
+		{"cjpu", []float64{73.828125, 74.00390625, -124.1015625, -123.75}},
+		{"pmks2611hw", []float64{-59.7104895115, -59.7104841471, 152.590677738, 152.590688467}},
+		{"zd78cuj300z8", []float64{57.8102342784, 57.8102344461, 162.505999133, 162.505999468}},
+		{"g9h460p7719", []float64{51.0210737586, 51.0210750997, -16.777022928, -16.7770215869}},
+		{"crncwf7g0c", []float64{84.6515518427, 84.6515572071, -113.955999613, -113.955988884}},
+		{"mp4", []float64{-5.625, -4.21875, 47.8125, 49.21875}},
+		{"3ghgz3gsjgr", []float64{-27.4555031955, -27.4555018544, -94.2466463149, -94.2466449738}},
+		{"m2g9", []float64{-40.60546875, -40.4296875, 61.171875, 61.5234375}},
+		{"ngt309w", []float64{-70.1284790039, -70.1271057129, 131.163024902, 131.164398193}},
+		{"hgn9u3537p", []float64{-72.8116375208, -72.8116321564, 43.08198452, 43.0819952488}},
+		{"6spnhu", []float64{-21.4233398438, -21.4178466797, -57.4475097656, -57.4365234375}},
+		{"z22v4r0d82", []float64{47.3240375519, 47.3240429163, 147.404261827, 147.404272556}},
+		{"ytypn", []float64{78.57421875, 78.6181640625, 121.201171875, 121.245117188}},
+		{"qstc", []float64{-19.51171875, -19.3359375, 120.5859375, 120.9375}},
+		{"y8r48c5", []float64{46.8511962891, 46.8525695801, 122.380828857, 122.382202148}},
+		{"uq2xybqqg", []float64{81.5210866928, 81.5211296082, 12.2584676743, 12.2585105896}},
+		{"95xm8dtz5u42", []float64{20.6692528725, 20.6692530401, -124.77465447, -124.774654135}},
+		{"x0uqxwj", []float64{5.39428710938, 5.39566040039, 141.313018799, 141.31439209}},
+		{"0kve9v", []float64{-62.6385498047, -62.6330566406, -160.938720703, -160.927734375}},
+		{"szwv1er87", []float64{43.0843019485, 43.0843448639, 43.3185338974, 43.3185768127}},
+		{"88zscymedp", []float64{5.08868157864, 5.08868694305, -146.868581772, -146.868571043}},
+		{"pd", []float64{-78.75, -73.125, 157.5, 168.75}},
+		{"g6", []float64{56.25, 61.875, -33.75, -22.5}},
+		{"3pg6r201vv51", []float64{-1.01041479036, -1.01041462272, -130.110833198, -130.110832863}},
+		{"y", []float64{45.0, 90.0, 90.0, 135.0}},
+		{"g02", []float64{46.40625, 47.8125, -45.0, -43.59375}},
+		{"jx", []float64{-50.625, -45.0, 67.5, 78.75}},
+		{"zksxs2nz2j3", []float64{71.6321320832, 71.6321334243, 152.774163634, 152.774164975}},
+		{"erqyg8ff4d", []float64{41.9722473621, 41.9722527266, -24.1001200676, -24.1001093388}},
+		{"n3wm1r3p4jev", []float64{-80.6425363384, -80.6425361708, 110.095458291, 110.095458627}},
+		{"sqfbq13pjp9", []float64{38.0208036304, 38.0208049715, 15.3824485838, 15.3824499249}},
+		{"2", []float64{-45.0, 0.0, -180.0, -135.0}},
+		{"y0", []float64{45.0, 50.625, 90.0, 101.25}},
+		{"04q3tpcc68bq", []float64{-77.0372864977, -77.03728633, -170.988700055, -170.988699719}},
+		{"81", []float64{5.625, 11.25, -180.0, -168.75}},
+		{"nj", []float64{-61.875, -56.25, 90.0, 101.25}},
+		{"b", []float64{45.0, 90.0, -180.0, -135.0}},
+		{"8bxs0rfgp3n7", []float64{3.55871787295, 3.55871804059, -135.688042603, -135.688042268}},
+		{"j0rm6", []float64{-87.6708984375, -87.626953125, 55.283203125, 55.3271484375}},
+		{"86v9", []float64{15.64453125, 15.8203125, -161.015625, -160.6640625}},
+		{"c", []float64{45.0, 90.0, -135.0, -90.0}},
+		{"nmcsv", []float64{-56.8212890625, -56.77734375, 103.579101562, 103.623046875}},
+		{"cqytkn1d56", []float64{83.9249145985, 83.9249199629, -114.431394339, -114.43138361}},
+		{"jfpcxxud86r9", []float64{-78.4433147125, -78.4433145449, 89.9842279404, 89.9842282757}},
+		{"zf8cm8fkrg", []float64{59.2870920897, 59.2870974541, 170.049809217, 170.049819946}},
+		{"98x", []float64{2.8125, 4.21875, -102.65625, -101.25}},
+		{"e5mbzuu", []float64{18.4391784668, 18.4405517578, -36.5679931641, -36.566619873}},
+		{"03g5kqkcp", []float64{-79.5504570007, -79.5504140854, -164.337658882, -164.337615967}},
+		{"b362q0b79x2", []float64{52.0799548924, 52.0799562335, -165.321857929, -165.321856588}},
+		{"m3pq", []float64{-38.3203125, -38.14453125, 66.4453125, 66.796875}},
+		{"6564m2w5b0", []float64{-26.3198518753, -26.3198465109, -86.9485473633, -86.9485366344}},
+		{"m9puw71wrf", []float64{-38.5664212704, -38.566415906, 78.6754882336, 78.6754989624}},
+		{"z", []float64{45.0, 90.0, 135.0, 180.0}},
+		{"9t34u", []float64{30.0146484375, 30.05859375, -110.91796875, -110.874023438}},
+		{"hewq3", []float64{-69.2138671875, -69.169921875, 31.3330078125, 31.376953125}},
+		{"2m2uy1tgg0", []float64{-14.6249055862, -14.6249002218, -167.423615456, -167.423604727}},
+		{"qu8dxgebd9wz", []float64{-19.22872575, -19.2287255824, 124.798967354, 124.798967689}},
+		{"scwmj", []float64{9.31640625, 9.3603515625, 42.7587890625, 42.802734375}},
+		{"9", []float64{0.0, 45.0, -135.0, -90.0}},
+		{"zm02460g", []float64{73.1365013123, 73.1366729736, 146.701469421, 146.701812744}},
+		{"cgtd84ky", []float64{65.1403427124, 65.1405143738, -93.5091018677, -93.5087585449}},
+		{"eq", []float64{33.75, 39.375, -33.75, -22.5}},
+		{"ht26dn8h", []float64{-59.9929046631, -59.9927330017, 22.939453125, 22.9397964478}},
+		{"c", []float64{45.0, 90.0, -135.0, -90.0}},
+		{"b", []float64{45.0, 90.0, -180.0, -135.0}},
+		{"j70dzsu", []float64{-72.6155090332, -72.6141357422, 57.2882080078, 57.2895812988}},
+		{"y241djby0dk", []float64{45.2962996066, 45.2963009477, 104.151447415, 104.151448756}},
+		{"un9", []float64{81.5625, 82.96875, 1.40625, 2.8125}},
+		{"2kzdj9", []float64{-17.9241943359, -17.9187011719, -157.961425781, -157.950439453}},
+		{"m7b3gj70", []float64{-23.5697937012, -23.5696220398, 56.7375183105, 56.7378616333}},
+		{"vvpmy3", []float64{74.1412353516, 74.1467285156, 89.2199707031, 89.2309570312}},
+		{"ym0n0", []float64{74.1796875, 74.2236328125, 101.25, 101.293945312}},
+		{"k6", []float64{-33.75, -28.125, 11.25, 22.5}},
+		{"d96nj5sqmjfn", []float64{8.10626830906, 8.10626847669, -64.4617196918, -64.4617193565}},
+		{"9yfrdwk", []float64{39.3214416504, 39.3228149414, -97.9705810547, -97.9692077637}},
+		{"8vj712jd0kv", []float64{28.6527125537, 28.6527138948, -138.804685324, -138.804683983}},
+		{"bk86yhd", []float64{70.8206176758, 70.8219909668, -168.132019043, -168.130645752}},
+		{"6e3pb2s87q10", []float64{-25.3536236286, -25.353623461, -66.0764430463, -66.0764427111}},
+		{"nxbn2n7yn", []float64{-45.2722549438, -45.2722120285, 112.505407333, 112.505450249}},
+		{"cg4n", []float64{62.9296875, 63.10546875, -98.4375, -98.0859375}},
+		{"4de6s37sk467", []float64{-75.4904382862, -75.4904381186, -62.7379387245, -62.7379383892}},
+		{"pzegg", []float64{-47.1533203125, -47.109375, 174.155273438, 174.19921875}},
+		{"xytyx62", []float64{37.7174377441, 37.7188110352, 177.154541016, 177.155914307}},
+		{"2x4", []float64{-5.625, -4.21875, -154.6875, -153.28125}},
+		{"j11m8j1eywcu", []float64{-83.3800566941, -83.3800565265, 46.7601537332, 46.7601540685}},
+		{"k2xw57e", []float64{-41.1135864258, -41.1122131348, 21.9438171387, 21.9451904297}},
+		{"2q74", []float64{-9.4921875, -9.31640625, -164.53125, -164.1796875}},
+		{"9tp1960t8", []float64{28.4006023407, 28.400645256, -102.600631714, -102.600588799}},
+		{"0", []float64{-90.0, -45.0, -180.0, -135.0}},
+		{"2b2mwb", []float64{-42.626953125, -42.6214599609, -145.601806641, -145.590820312}},
+		{"b7ts80s", []float64{65.481262207, 65.482635498, -161.010131836, -161.008758545}},
+		{"10x0", []float64{-87.1875, -87.01171875, -125.15625, -124.8046875}},
+		{"c5p9s65qqch", []float64{62.1507364511, 62.1507377923, -124.261599183, -124.261597842}},
+		{"n1j", []float64{-84.375, -82.96875, 97.03125, 98.4375}},
+		{"gjy9e2g7hcvc", []float64{77.6120662875, 77.6120664552, -35.7118779793, -35.7118776441}},
+		{"d0nngw26hnc0", []float64{1.22123524547, 1.2212354131, -81.408175081, -81.4081747457}},
+		{"sqqh99ewn", []float64{35.9565353394, 35.9565782547, 19.7584819794, 19.7585248947}},
+		{"27", []float64{-28.125, -22.5, -168.75, -157.5}},
+		{"9", []float64{0.0, 45.0, -135.0, -90.0}},
+		{"uv", []float64{73.125, 78.75, 33.75, 45.0}},
+		{"nu6", []float64{-66.09375, -64.6875, 126.5625, 127.96875}},
+		{"hjg9s97mjbfp", []float64{-57.3848481663, -57.3848479986, 5.12434154749, 5.12434188277}},
+		{"cekd", []float64{63.6328125, 63.80859375, -106.171875, -105.8203125}},
+		{"fx2s", []float64{86.484375, 86.66015625, -66.796875, -66.4453125}},
+		{"zxx5n4wh37", []float64{87.7293223143, 87.7293276787, 167.615715265, 167.615725994}},
+		{"k0redc", []float64{-42.9730224609, -42.9675292969, 10.6677246094, 10.6787109375}},
+		{"xhj810f1r7", []float64{22.504350543, 22.5043559074, 142.781378031, 142.78138876}},
+		{"9j", []float64{28.125, 33.75, -135.0, -123.75}},
+		{"3", []float64{-45.0, 0.0, -135.0, -90.0}},
+		{"z59900erg", []float64{64.8673582077, 64.867401123, 137.113966942, 137.114009857}},
+		{"6dnccesgx8ts", []float64{-33.4225525707, -33.4225524031, -57.9350421578, -57.9350418225}},
+		{"jx", []float64{-50.625, -45.0, 67.5, 78.75}},
+		{"312y4y56", []float64{-36.8807601929, -36.8805885315, -133.819999695, -133.819656372}},
+		{"32vk3g9np", []float64{-40.013923645, -40.0138807297, -116.288609505, -116.288566589}},
+		{"kn39", []float64{-9.66796875, -9.4921875, 2.109375, 2.4609375}},
+		{"eed4ybdn4mpp", []float64{20.1747029833, 20.174703151, -19.3880166113, -19.3880162761}},
+		{"st", []float64{28.125, 33.75, 22.5, 33.75}},
+		{"7", []float64{-45.0, 0.0, -45.0, 0.0}},
+		{"0t8kw4xyhu5x", []float64{-58.2566988654, -58.2566986978, -156.873914078, -156.873913743}},
+		{"dcy", []float64{9.84375, 11.25, -47.8125, -46.40625}},
+		{"p3r", []float64{-82.96875, -81.5625, 156.09375, 157.5}},
+		{"wdc0w9tbd6", []float64{15.5649769306, 15.564982295, 114.199887514, 114.199898243}},
+		{"j3jc", []float64{-84.19921875, -84.0234375, 64.3359375, 64.6875}},
+		{"k5cw3t", []float64{-22.7801513672, -22.7746582031, 2.17529296875, 2.18627929688}},
+		{"pd6m", []float64{-76.46484375, -76.2890625, 160.6640625, 161.015625}},
+		{"hnfqkjqrqnh", []float64{-50.9025013447, -50.9025000036, 3.34868967533, 3.34869101644}},
+		{"6qkwd", []float64{-8.701171875, -8.6572265625, -72.333984375, -72.2900390625}},
+		{"1q2x", []float64{-53.61328125, -53.4375, -123.046875, -122.6953125}},
+		{"3nps1et4nde", []float64{-10.527292192, -10.5272908509, -124.380057603, -124.380056262}},
+		{"gq2c8qnkx2", []float64{80.4536533356, 80.4536587, -32.6754319668, -32.6754212379}},
+		{"q7d", []float64{-25.3125, -23.90625, 104.0625, 105.46875}},
+		{"560", []float64{-78.75, -77.34375, -33.75, -32.34375}},
+		{"j855ffmuxv4s", []float64{-89.3276607245, -89.3276605569, 71.8478319794, 71.8478323147}},
+		{"sumfzt8z4", []float64{24.4210624695, 24.4211053848, 42.1666431427, 42.166686058}},
+		{"bje9d2n0", []float64{76.201171875, 76.2013435364, -174.971008301, -174.970664978}},
+		{"y943", []float64{50.80078125, 50.9765625, 115.6640625, 116.015625}},
+		{"3", []float64{-45.0, 0.0, -135.0, -90.0}},
+		{"gmn", []float64{73.125, 74.53125, -25.3125, -23.90625}},
+		{"1djwe5s", []float64{-77.5881958008, -77.5868225098, -104.628295898, -104.626922607}},
+		{"tb5wv3", []float64{1.19201660156, 1.19750976562, 83.9025878906, 83.9135742188}},
+		{"58h3qnfv9m", []float64{-89.7422236204, -89.742218256, -16.2559354305, -16.2559247017}},
+		{"f0du0sguugkg", []float64{48.5425508581, 48.5425510257, -86.1054797843, -86.105479449}},
+		{"h6zyvwk", []float64{-73.3103942871, -73.3090209961, 22.3956298828, 22.3970031738}},
+		{"vg2nu", []float64{64.4677734375, 64.51171875, 78.92578125, 78.9697265625}},
+		{"j7r7de", []float64{-71.0870361328, -71.0815429688, 66.5551757812, 66.5661621094}},
+		{"hjshn", []float64{-58.359375, -58.3154296875, 5.888671875, 5.9326171875}},
+		{"46khrs1t", []float64{-76.5738487244, -76.573677063, -72.7933502197, -72.793006897}},
+		{"g5p5p", []float64{62.40234375, 62.4462890625, -34.8486328125, -34.8046875}},
+		{"7pxgy2sf", []float64{-2.15023040771, -2.15005874634, -33.8203811646, -33.8200378418}},
+		{"vrz", []float64{88.59375, 90.0, 66.09375, 67.5}},
+		{"bry3ngyuq", []float64{88.7908601761, 88.7909030914, -159.654779434, -159.654736519}},
+		{"9cu9t1g3cs", []float64{10.1173567772, 10.1173621416, -94.6976208687, -94.6976101398}},
+		{"82", []float64{0.0, 5.625, -168.75, -157.5}},
+		{"m9yzmzsmxx", []float64{-33.8396555185, -33.8396501541, 77.2510313988, 77.2510421276}},
+		{"4wv4wwxb2dr2", []float64{-51.5560363233, -51.5560361557, -60.1724312827, -60.1724309474}},
+		{"j7ufy8", []float64{-68.4228515625, -68.4173583984, 63.2153320312, 63.2263183594}},
+		{"hzrjzg9xjj", []float64{-48.1875532866, -48.1875479221, 43.9366006851, 43.936611414}},
+		{"z5eh3ghtm7", []float64{65.4519671202, 65.4519724846, 139.302059412, 139.302070141}},
+		{"6sjhet2cts", []float64{-21.6798663139, -21.6798609495, -60.3136754036, -60.3136646748}},
+		{"vjyn6v", []float64{78.4698486328, 78.4753417969, 53.5583496094, 53.5693359375}},
+		{"hysxqy94", []float64{-52.1270370483, -52.126865387, 40.3761291504, 40.3764724731}},
+		{"8yd4qxm0d", []float64{36.9979190826, 36.997961998, -143.144903183, -143.144860268}},
+		{"w43b42f", []float64{12.660369873, 12.6617431641, 92.5625610352, 92.5639343262}},
+		{"7suc291x", []float64{-18.0548286438, -18.0546569824, -15.7962799072, -15.7959365845}},
+		{"pq", []float64{-56.25, -50.625, 146.25, 157.5}},
+		{"frd", []float64{87.1875, 88.59375, -75.9375, -74.53125}},
+		{"r2", []float64{-45.0, -39.375, 146.25, 157.5}},
+		{"5rs1x50fe5m", []float64{-47.531902045, -47.5319007039, -27.8162173927, -27.8162160516}},
+		{"zjv30682s21k", []float64{77.5333506614, 77.533350829, 142.394326217, 142.394326553}},
+		{"zbbtmw", []float64{50.1745605469, 50.1800537109, 169.694824219, 169.705810547}},
+		{"e56k", []float64{18.984375, 19.16015625, -41.8359375, -41.484375}},
+		{"v7dzp", []float64{65.91796875, 65.9619140625, 60.4248046875, 60.46875}},
+		{"n2z8qt2hnzss", []float64{-85.707738027, -85.7077378593, 112.082815245, 112.08281558}},
+		{"zbp8bt07bb5", []float64{45.159945488, 45.1599468291, 179.319227189, 179.31922853}},
+		{"s551t", []float64{17.138671875, 17.1826171875, 4.4384765625, 4.482421875}},
+		{"5zp7trjp", []float64{-49.9701118469, -49.9699401855, -0.817108154297, -0.816764831543}},
+		{"81n2z7dz", []float64{5.77726364136, 5.77743530273, -170.888557434, -170.888214111}},
+		{"dw", []float64{33.75, 39.375, -67.5, -56.25}},
+		{"vvw35", []float64{76.11328125, 76.1572265625, 87.6708984375, 87.71484375}},
+		{"zhtuc3b8bg5n", []float64{71.1572198197, 71.1572199874, 143.141591996, 143.141592331}},
+		{"042w4qgx", []float64{-76.2507820129, -76.2506103516, -179.193191528, -179.192848206}},
+		{"9sntb7", []float64{23.5272216797, 23.5327148438, -103.348388672, -103.337402344}},
+		{"wt6x5nxk", []float64{30.7981109619, 30.7982826233, 116.157417297, 116.15776062}},
+		{"spzwehn", []float64{44.7583007812, 44.7596740723, 10.6869506836, 10.6883239746}},
+		{"hv70sbbpw64", []float64{-60.3754413128, -60.3754399717, 38.1777611375, 38.1777624786}},
+		{"wxt", []float64{42.1875, 43.59375, 119.53125, 120.9375}},
+		{"7dvqsskj8", []float64{-28.3643817902, -28.3643388748, -14.9139404297, -14.9138975143}},
+		{"wm22", []float64{29.53125, 29.70703125, 101.6015625, 101.953125}},
+		{"5t4kf", []float64{-61.0400390625, -60.99609375, -19.248046875, -19.2041015625}},
+		{"5sy67z47fz1", []float64{-62.846608758, -62.8466074169, -13.542933315, -13.5429319739}},
+		{"c36nw2msv0t", []float64{53.1760194898, 53.1760208309, -120.655067414, -120.655066073}},
+		{"311v", []float64{-38.49609375, -38.3203125, -132.5390625, -132.1875}},
+		{"x", []float64{0.0, 45.0, 135.0, 180.0}},
+		{"xcykeys", []float64{10.6704711914, 10.6718444824, 177.709350586, 177.710723877}},
+		{"gtmvbbv", []float64{75.5461120605, 75.5474853516, -14.3742370605, -14.3728637695}},
+		{"5n1", []float64{-56.25, -54.84375, -43.59375, -42.1875}},
+		{"y08uf", []float64{48.6474609375, 48.69140625, 91.142578125, 91.1865234375}},
+		{"ds4", []float64{22.5, 23.90625, -64.6875, -63.28125}},
+		{"1t49n9", []float64{-61.6937255859, -61.6882324219, -108.698730469, -108.687744141}},
+		{"v81d55x758", []float64{45.3713035583, 45.3713089228, 69.7513175011, 69.7513282299}},
+		{"39j80vxmjh", []float64{-39.3439078331, -39.3439024687, -104.722495079, -104.72248435}},
+		{"x3vd66k2vj46", []float64{10.251773335, 10.2517735027, 154.089306034, 154.089306369}},
+		{"vg", []float64{61.875, 67.5, 78.75, 90.0}},
+		{"06q04jnnuyz", []float64{-77.3150892556, -77.3150879145, -160.216156393, -160.216155052}},
+		{"cvws", []float64{76.640625, 76.81640625, -92.109375, -91.7578125}},
+		{"9d5j84gmgbv", []float64{12.2328941524, 12.2328954935, -108.276619166, -108.276617825}},
+		{"mjg", []float64{-12.65625, -11.25, 49.21875, 50.625}},
+		{"k7gspu", []float64{-23.1811523438, -23.1756591797, 16.5124511719, 16.5234375}},
+		{"13nrmggfx", []float64{-83.0795574188, -83.0795145035, -114.702801704, -114.702758789}},
+		{"ypj759", []float64{84.9078369141, 84.9133300781, 97.5366210938, 97.5476074219}},
+		{"hhy3z0zjn", []float64{-62.9686546326, -62.9686117172, 9.10655021667, 9.10659313202}},
+		{"b0xhjv", []float64{48.5430908203, 48.5485839844, -169.903564453, -169.892578125}},
+		{"xucn7t7t11", []float64{27.8470855951, 27.8470909595, 170.314908028, 170.314918756}},
+		{"f0yqwpw6", []float64{50.4028701782, 50.4030418396, -80.9386825562, -80.9383392334}},
+		{"hcud164f7z", []float64{-79.7932773829, -79.7932720184, 40.1369941235, 40.1370048523}},
+		{"k7b09t", []float64{-23.7908935547, -23.7854003906, 11.3159179688, 11.3269042969}},
+		{"gzttr69", []float64{88.1240844727, 88.1254577637, -3.19564819336, -3.19427490234}},
+		{"z1", []float64{50.625, 56.25, 135.0, 146.25}},
+		{"mt", []float64{-16.875, -11.25, 67.5, 78.75}},
+		{"vgpm3pe", []float64{62.839050293, 62.840423584, 88.9933776855, 88.9947509766}},
+		{"xc2xk", []float64{8.3056640625, 8.349609375, 169.62890625, 169.672851562}},
+		{"7gpegjrrn", []float64{-27.4357795715, -27.4357366562, -0.561075210571, -0.561032295227}},
+		{"5m00s41bg21m", []float64{-61.7759934627, -61.775993295, -33.5716743395, -33.5716740042}},
+		{"ybz9un", []float64{49.5593261719, 49.5648193359, 134.47265625, 134.483642578}},
+		{"5cxhpu5jy6y", []float64{-80.8364005387, -80.8363991976, -1.06127768755, -1.06127634645}},
+		{"0gk7412", []float64{-71.1845397949, -71.1831665039, -140.185546875, -140.184173584}},
+		{"zj2", []float64{74.53125, 75.9375, 135.0, 136.40625}},
+		{"6jj5vt6zv5", []float64{-16.1856347322, -16.1856293678, -82.7230596542, -82.7230489254}},
+		{"mdp7x0cy8x", []float64{-33.1294924021, -33.1294870377, 78.0053544044, 78.0053651333}},
+		{"515", []float64{-84.375, -82.96875, -40.78125, -39.375}},
+		{"rcrpxqxje", []float64{-36.613740921, -36.6136980057, 178.922095299, 178.922138214}},
+		{"ydd5n3qr6tu", []float64{59.5979855955, 59.5979869366, 115.595853925, 115.595855266}},
+		{"hd", []float64{-78.75, -73.125, 22.5, 33.75}},
+		{"rj4uc", []float64{-16.0400390625, -15.99609375, 138.911132812, 138.955078125}},
+		{"0r3x", []float64{-47.98828125, -47.8125, -166.640625, -166.2890625}},
+		{"490rp8g4y", []float64{-83.1399393082, -83.1398963928, -66.8144702911, -66.8144273758}},
+		{"v19nuj", []float64{54.6514892578, 54.6569824219, 46.58203125, 46.5930175781}},
+		{"wrg3hx7", []float64{43.8093566895, 43.8107299805, 106.022186279, 106.02355957}},
+		{"ntzjry4", []float64{-56.7004394531, -56.6990661621, 122.687072754, 122.688446045}},
+		{"3s5c85t2d", []float64{-22.2170162201, -22.2169733047, -107.219266891, -107.219223976}},
+		{"wyy", []float64{37.96875, 39.375, 132.1875, 133.59375}},
+		{"6wk17", []float64{-9.6240234375, -9.580078125, -61.7431640625, -61.69921875}},
+		{"7m5wqccz7meg", []float64{-15.7654795982, -15.7654794306, -28.5289463773, -28.5289460421}},
+		{"sk8n5z1qd2", []float64{26.4067554474, 26.4067608118, 11.4166080952, 11.416618824}},
+		{"kw8hyjjj384", []float64{-7.57417201996, -7.57417067885, 22.7706053853, 22.7706067264}},
+		{"14rw6bt3vx9v", []float64{-76.2420291267, -76.2420289591, -124.324827231, -124.324826896}},
+		{"9cdu", []float64{9.140625, 9.31640625, -97.3828125, -97.03125}},
+		{"0cptj3e6crgm", []float64{-83.4873395227, -83.4873393551, -135.467890911, -135.467890576}},
+		{"0m", []float64{-61.875, -56.25, -168.75, -157.5}},
+		{"mx", []float64{-5.625, 0.0, 67.5, 78.75}},
+		{"p1sfuc", []float64{-81.0736083984, -81.0681152344, 141.888427734, 141.899414062}},
+		{"03nduncm", []float64{-83.8536643982, -83.8534927368, -159.431877136, -159.431533813}},
+		{"0m0j", []float64{-60.99609375, -60.8203125, -168.75, -168.3984375}},
+		{"7", []float64{-45.0, 0.0, -45.0, 0.0}},
+		{"fn", []float64{78.75, 84.375, -90.0, -78.75}},
+		{"srddrb0", []float64{42.5830078125, 42.5843811035, 15.1062011719, 15.1075744629}},
+		{"wpbf1dnj", []float64{43.957157135, 43.9573287964, 91.1288452148, 91.1291885376}},
+		{"nv8k1f98", []float64{-58.3456420898, -58.3454704285, 124.180526733, 124.180870056}},
+		{"7upj4ct1", []float64{-21.6126823425, -21.6125106812, -1.27853393555, -1.27819061279}},
+		{"k312nzxpwm", []float64{-39.3324869871, -39.3324816227, 13.3143246174, 13.3143353462}},
+		{"s", []float64{0.0, 45.0, 0.0, 45.0}},
+		{"qure08zn2n", []float64{-20.5611813068, -20.5611759424, 134.328460693, 134.328471422}},
+		{"m1", []float64{-39.375, -33.75, 45.0, 56.25}},
+		{"q7wbtv20e", []float64{-25.195684433, -25.1956415176, 110.995001793, 110.995044708}},
+		{"fsqx0ywr3s", []float64{70.1736903191, 70.1736956835, -58.3177685738, -58.3177578449}},
+		{"rn9k7k2927vd", []float64{-7.66684871167, -7.66684854403, 136.901339516, 136.901339851}},
+		{"8ypg", []float64{34.27734375, 34.453125, -135.3515625, -135.0}},
+		{"42ru", []float64{-87.890625, -87.71484375, -67.8515625, -67.5}},
+		{"z3efhtnprhw", []float64{53.8177970052, 53.8177983463, 151.729739606, 151.729740947}},
+		{"c", []float64{45.0, 90.0, -135.0, -90.0}},
+		{"fu", []float64{67.5, 73.125, -56.25, -45.0}},
+		{"uz9", []float64{87.1875, 88.59375, 35.15625, 36.5625}},
+		{"bqdnsr4y4", []float64{82.7445602417, 82.744603157, -165.746870041, -165.746827126}},
+		{"rshpnngvd", []float64{-21.231508255, -21.2314653397, 163.393907547, 163.393950462}},
+		{"4egtrvjfe", []float64{-67.9555034637, -67.9554605484, -62.2295236588, -62.2294807434}},
+		{"w", []float64{0.0, 45.0, 90.0, 135.0}},
+		{"7", []float64{-45.0, 0.0, -45.0, 0.0}},
+		{"r2m2yc", []float64{-43.4564208984, -43.4509277344, 153.929443359, 153.940429688}},
+		{"v9vps2z7", []float64{56.1667442322, 56.1669158936, 74.727973938, 74.7283172607}},
+		{"dh", []float64{22.5, 28.125, -90.0, -78.75}},
+		{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
+		{"7", []float64{-45.0, 0.0, -45.0, 0.0}},
+		{"rrtp", []float64{-1.58203125, -1.40625, 153.28125, 153.6328125}},
+		{"57wdet", []float64{-69.8455810547, -69.8400878906, -24.4555664062, -24.4445800781}},
+		{"sxm3rvsc1x0y", []float64{41.031399183, 41.0313993506, 30.229977183, 30.2299775183}},
+		{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
+		{"fmwp70q5", []float64{77.2138023376, 77.213973999, -70.1724243164, -70.1720809937}},
+		{"vq3whey", []float64{81.2315368652, 81.2329101562, 58.5653686523, 58.5667419434}},
+		{"vm55829xumn", []float64{73.7443381548, 73.7443394959, 60.4819867015, 60.4819880426}},
+		{"pc", []float64{-84.375, -78.75, 168.75, 180.0}},
+		{"76j", []float64{-33.75, -32.34375, -26.71875, -25.3125}},
+		{"du5md", []float64{23.466796875, 23.5107421875, -51.591796875, -51.5478515625}},
+		{"0b800r9", []float64{-87.1463012695, -87.1449279785, -146.237640381, -146.23626709}},
+		{"r96suj", []float64{-37.1063232422, -37.1008300781, 161.19140625, 161.202392578}},
+		{"dqn86", []float64{33.7939453125, 33.837890625, -69.521484375, -69.4775390625}},
+		{"62jjysq7fun", []float64{-43.9652466774, -43.9652453363, -71.4243963361, -71.424394995}},
+		{"s623s4p3v", []float64{12.9312086105, 12.9312515259, 11.7875146866, 11.7875576019}},
+		{"j9w5", []float64{-81.03515625, -80.859375, 75.9375, 76.2890625}},
+		{"cku2qh5ee64", []float64{71.7852795124, 71.7852808535, -117.504816949, -117.504815608}},
+		{"ypmy864vvgs", []float64{86.9358202815, 86.9358216226, 98.1009525061, 98.1009538472}},
+		{"kwe", []float64{-8.4375, -7.03125, 26.71875, 28.125}},
+		{"gmq7083dvewj", []float64{75.0604587235, 75.0604588911, -24.9366608262, -24.9366604909}},
+		{"9er", []float64{18.28125, 19.6875, -102.65625, -101.25}},
+		{"5p89tmjs9j5", []float64{-47.5205630064, -47.5205616653, -44.0585620701, -44.058560729}},
+		{"x", []float64{0.0, 45.0, 135.0, 180.0}},
+		{"ewy", []float64{37.96875, 39.375, -14.0625, -12.65625}},
+		{"jtgef", []float64{-56.9970703125, -56.953125, 72.509765625, 72.5537109375}},
+		{"9yjjw", []float64{34.716796875, 34.7607421875, -93.955078125, -93.9111328125}},
+		{"926", []float64{1.40625, 2.8125, -120.9375, -119.53125}},
+		{"bz1", []float64{84.375, 85.78125, -144.84375, -143.4375}},
+		{"yjjpq0ecnve", []float64{74.4023618102, 74.4023631513, 97.3003654182, 97.3003667593}},
+		{"w5e", []float64{19.6875, 21.09375, 94.21875, 95.625}},
+		{"hqcn9wtcr", []float64{-50.8527517319, -50.8527088165, 12.7303647995, 12.7304077148}},
+		{"qfh6xphngs", []float64{-33.2709145546, -33.2709091902, 130.039823055, 130.039833784}},
+		{"1he586fypp", []float64{-64.0560919046, -64.0560865402, -130.766186714, -130.766175985}},
+		{"4cc5sh9n3s", []float64{-79.5152020454, -79.515196681, -54.666531086, -54.6665203571}},
+		{"9y5wfm", []float64{34.9639892578, 34.9694824219, -96.2292480469, -96.2182617188}},
+		{"c97809", []float64{52.0367431641, 52.0422363281, -107.556152344, -107.545166016}},
+		{"k9g2nkbm3j5h", []float64{-35.1292287558, -35.1292285882, 27.3453609645, 27.3453612998}},
+		{"thdwugw196t", []float64{26.5185204148, 26.5185217559, 48.7326653302, 48.7326666713}},
+		{"34nm41n89c8v", []float64{-32.8655058704, -32.8655057028, -126.114044376, -126.11404404}},
+		{"buf7qgu", []float64{72.3106384277, 72.3120117188, -142.783813477, -142.782440186}},
+		{"mhvh0u7f4", []float64{-17.55443573, -17.5543928146, 52.0694446564, 52.0694875717}},
+		{"t", []float64{0.0, 45.0, 45.0, 90.0}},
+		{"f0vdwj1bu", []float64{49.6857976913, 49.6858406067, -81.9993782043, -81.999335289}},
+		{"kcke59", []float64{-37.4359130859, -37.4304199219, 40.2319335938, 40.2429199219}},
+		{"9rws4p0", []float64{42.9290771484, 42.9304504395, -114.521484375, -114.520111084}},
+		{"fhj1u03epu", []float64{67.8095269203, 67.8095322847, -82.7905762196, -82.7905654907}},
+		{"13296d9gwq1", []float64{-82.734657526, -82.7346561849, -122.934338897, -122.934337556}},
+		{"4j", []float64{-61.875, -56.25, -90.0, -78.75}},
+		{"gk5u1y2", []float64{68.2374572754, 68.2388305664, -28.3996582031, -28.3982849121}},
+		{"9v6yrwx00", []float64{30.6655883789, 30.6656312943, -97.0436096191, -97.0435667038}},
+		{"mc92", []float64{-36.5625, -36.38671875, 80.5078125, 80.859375}},
+		{"m", []float64{-45.0, 0.0, 45.0, 90.0}},
+		{"vtzr1we0jh5", []float64{78.6099457741, 78.6099471152, 77.7655689418, 77.7655702829}},
+		{"ytmmrjr08p", []float64{75.4830640554, 75.4830694199, 120.200042725, 120.200053453}},
+		{"y7q525c0mgkz", []float64{63.8731999509, 63.8732001185, 109.689126424, 109.68912676}},
+		{"s5nc", []float64{17.05078125, 17.2265625, 9.4921875, 9.84375}},
+		{"wk2", []float64{23.90625, 25.3125, 101.25, 102.65625}},
+		{"f4beky4z04y", []float64{61.0742144287, 61.0742157698, -89.0843501687, -89.0843488276}},
+		{"ywdu5yj95", []float64{82.2987556458, 82.2987985611, 116.539664268, 116.539707184}},
+		{"n3", []float64{-84.375, -78.75, 101.25, 112.5}},
+		{"0334vnb6", []float64{-82.4479293823, -82.4477577209, -167.123680115, -167.123336792}},
+		{"xg65", []float64{18.80859375, 18.984375, 171.5625, 171.9140625}},
+		{"0ebmse71br", []float64{-67.9212623835, -67.921257019, -156.946552992, -156.946542263}},
+		{"ycwd9fc", []float64{53.8920593262, 53.8934326172, 132.968902588, 132.970275879}},
+		{"0z2gsvd0tfzy", []float64{-48.573201634, -48.5732014664, -144.983568527, -144.983568192}},
+		{"e041", []float64{0.17578125, 0.3515625, -42.1875, -41.8359375}},
+		{"ntzdfpdcphj7", []float64{-57.1314592101, -57.1314590424, 123.138849624, 123.138849959}},
+		{"jx1bfyyhgds1", []float64{-50.4552562349, -50.4552560672, 70.0901824236, 70.0901827589}},
+		{"dhzuvhgspbew", []float64{27.5804938003, 27.580493968, -78.8766921312, -78.8766917959}},
+		{"4", []float64{-90.0, -45.0, -90.0, -45.0}},
+		{"teyjc173t", []float64{22.1116161346, 22.11165905, 75.986123085, 75.9861660004}},
+		{"bg57uz4rxw5", []float64{62.5739514828, 62.5739528239, -141.467531472, -141.467530131}},
+		{"52dtfpdc", []float64{-86.1353874207, -86.1352157593, -30.1427078247, -30.142364502}},
+		{"vx1j39e", []float64{85.3060913086, 85.3074645996, 68.9762878418, 68.9776611328}},
+		{"2", []float64{-45.0, 0.0, -180.0, -135.0}},
+		{"psmpz5", []float64{-64.7149658203, -64.7094726562, 164.838867188, 164.849853516}},
+		{"4xr95eeee", []float64{-49.023141861, -49.0230989456, -56.7943811417, -56.7943382263}},
+		{"5j", []float64{-61.875, -56.25, -45.0, -33.75}},
+		{"kpb", []float64{-1.40625, 0.0, 0.0, 1.40625}},
+		{"dsub48epk", []float64{26.722741127, 26.7227840424, -60.7061576843, -60.706114769}},
+		{"2urtnwtdw17", []float64{-20.1787023246, -20.1787009835, -135.409665853, -135.409664512}},
+		{"e6s30gwjxm", []float64{14.2584782839, 14.2584836483, -27.7319276333, -27.7319169044}},
+		{"qtx", []float64{-14.0625, -12.65625, 122.34375, 123.75}},
+		{"qj0qvndweq3k", []float64{-15.651620999, -15.6516208313, 90.5748634413, 90.5748637766}},
+		{"ffetyh28uyj", []float64{60.0967490673, 60.0967504084, -51.0635559261, -51.063554585}},
+		{"z56t8nwqq7", []float64{64.2848414183, 64.2848467827, 138.52447629, 138.524487019}},
+		{"7h", []float64{-22.5, -16.875, -45.0, -33.75}},
+		{"9tuuw1pkyh", []float64{33.1410956383, 33.1411010027, -105.546426773, -105.546416044}},
+		{"2m", []float64{-16.875, -11.25, -168.75, -157.5}},
+		{"h7qt", []float64{-70.83984375, -70.6640625, 20.390625, 20.7421875}},
+		{"t832ztb6psn", []float64{1.57003641129, 1.57003775239, 69.5880755782, 69.5880769193}},
+		{"wk", []float64{22.5, 28.125, 101.25, 112.5}},
+		{"ndjbb8w3n", []float64{-78.6152458191, -78.6152029037, 120.616750717, 120.616793633}},
+		{"14pf3eqg4zd5", []float64{-78.3360836841, -78.3360835165, -124.026254117, -124.026253782}},
+		{"9j", []float64{28.125, 33.75, -135.0, -123.75}},
+		{"fr6ng34", []float64{86.9732666016, 86.9746398926, -75.7919311523, -75.7905578613}},
+		{"p3ggurx2c", []float64{-79.455742836, -79.4556999207, 151.720204353, 151.720247269}},
+		{"1h1pg1myn06", []float64{-66.1297975481, -66.129796207, -133.453757465, -133.453756124}},
+		{"cqsue", []float64{82.353515625, 82.3974609375, -116.938476562, -116.89453125}},
+		{"w", []float64{0.0, 45.0, 90.0, 135.0}},
+		{"s8jkw", []float64{0.791015625, 0.8349609375, 30.146484375, 30.1904296875}},
+		{"67", []float64{-28.125, -22.5, -78.75, -67.5}},
+		{"ywe4mn", []float64{81.9909667969, 81.9964599609, 116.938476562, 116.949462891}},
+		{"0f5te71q9g", []float64{-77.7655917406, -77.7655863762, -141.183511019, -141.18350029}},
+		{"v9s6tw70swwv", []float64{53.911406938, 53.9114071056, 73.7225837633, 73.7225840986}},
+		{"0jbutv", []float64{-56.8377685547, -56.8322753906, -178.692626953, -178.681640625}},
+		{"bn271bp", []float64{80.68359375, 80.684967041, -179.561920166, -179.560546875}},
+		{"1vvyth", []float64{-56.4916992188, -56.4862060547, -92.9443359375, -92.9333496094}},
+		{"7ruk94vup", []float64{-0.59944152832, -0.599398612976, -27.7212953568, -27.7212524414}},
+		{"3hf", []float64{-18.28125, -16.875, -132.1875, -130.78125}},
+		{"741rwgds3m4k", []float64{-32.4116574973, -32.4116573296, -42.9420667514, -42.9420664161}},
+		{"2pye", []float64{-0.87890625, -0.703125, -170.859375, -170.5078125}},
+		{"2", []float64{-45.0, 0.0, -180.0, -135.0}},
+		{"e7", []float64{16.875, 22.5, -33.75, -22.5}},
+		{"f", []float64{45.0, 90.0, -90.0, -45.0}},
+		{"6", []float64{-45.0, 0.0, -90.0, -45.0}},
+		{"4c5p5ke", []float64{-83.1198120117, -83.1184387207, -51.8843078613, -51.8829345703}},
+		{"h7q", []float64{-71.71875, -70.3125, 19.6875, 21.09375}},
+		{"4fjp8", []float64{-77.431640625, -77.3876953125, -49.21875, -49.1748046875}},
+		{"p2cbvvvdt8", []float64{-85.6173992157, -85.6173938513, 148.971412182, 148.971422911}},
+		{"xxjtqz46qmm", []float64{40.3367181122, 40.3367194533, 165.534370691, 165.534372032}},
+		{"w1e", []float64{8.4375, 9.84375, 94.21875, 95.625}},
+		{"fxpg4v3e", []float64{84.9316978455, 84.9318695068, -56.4786529541, -56.4783096313}},
+		{"3be6u", []float64{-41.7041015625, -41.66015625, -96.50390625, -96.4599609375}},
+		{"9", []float64{0.0, 45.0, -135.0, -90.0}},
+		{"seqqvkuphz", []float64{19.4951051474, 19.4951105118, 31.5254724026, 31.5254831314}},
+		{"txy2t7xx", []float64{43.7020683289, 43.7022399902, 76.5300750732, 76.530418396}},
+		{"s2hc2d2s", []float64{0.232772827148, 0.232944488525, 17.9523468018, 17.9526901245}},
+		{"8zr0n4f62k", []float64{40.7967638969, 40.7967692614, -136.139477491, -136.139466763}},
+		{"th1vxpfxnp9", []float64{23.5106107593, 23.5106121004, 47.7722467482, 47.7722480893}},
+		{"n", []float64{-90.0, -45.0, 90.0, 135.0}},
+		{"33", []float64{-39.375, -33.75, -123.75, -112.5}},
+		{"gu", []float64{67.5, 73.125, -11.25, 0.0}},
+		{"9vq49", []float64{29.970703125, 30.0146484375, -92.7685546875, -92.724609375}},
+		{"tm", []float64{28.125, 33.75, 56.25, 67.5}},
+		{"dpzw0p", []float64{44.6868896484, 44.6923828125, -79.453125, -79.4421386719}},
+		{"gwg12", []float64{83.1884765625, 83.232421875, -18.28125, -18.2373046875}},
+		{"b8vphv0m5k", []float64{50.4775643349, 50.4775696993, -150.259526968, -150.259516239}},
+		{"pgpffhw1", []float64{-72.6167106628, -72.6165390015, 179.744567871, 179.744911194}},
+		{"3r3w", []float64{-3.1640625, -2.98828125, -121.640625, -121.2890625}},
+		{"u1d", []float64{53.4375, 54.84375, 2.8125, 4.21875}},
+		{"mznb8v5xu6", []float64{-5.50830245018, -5.50829708576, 88.2801353931, 88.280146122}},
+		{"8mb57vjrex4", []float64{32.9438298941, 32.9438312352, -168.577842414, -168.577841073}},
+		{"zm", []float64{73.125, 78.75, 146.25, 157.5}},
+		{"c9ef6tm74sg", []float64{53.8623873889, 53.86238873, -107.109378129, -107.109376788}},
+		{"spww", []float64{43.2421875, 43.41796875, 9.140625, 9.4921875}},
+		{"snp97n", []float64{34.0026855469, 34.0081787109, 10.6787109375, 10.6896972656}},
+		{"zp9r6emsk8xx", []float64{88.4805002622, 88.4805004299, 136.875432059, 136.875432394}},
+		{"v", []float64{45.0, 90.0, 45.0, 90.0}},
+		{"18zsh9bg", []float64{-85.0679969788, -85.0678253174, -101.754341125, -101.753997803}},
+		{"v28", []float64{47.8125, 49.21875, 56.25, 57.65625}},
+		{"4e", []float64{-73.125, -67.5, -67.5, -56.25}},
+		{"evn0wp56", []float64{28.2516860962, 28.2518577576, -2.5443649292, -2.54402160645}},
+		{"uyf9v", []float64{83.2763671875, 83.3203125, 37.4853515625, 37.529296875}},
+		{"d7", []float64{16.875, 22.5, -78.75, -67.5}},
+		{"05", []float64{-73.125, -67.5, -180.0, -168.75}},
+		{"ujj8", []float64{73.125, 73.30078125, 7.734375, 8.0859375}},
+		{"wcb7n8", []float64{10.37109375, 10.3765869141, 124.387207031, 124.398193359}},
+		{"r35s2y4e2", []float64{-38.5944128036, -38.5943698883, 151.208267212, 151.208310127}},
+		{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
+		{"8tm3h7b1f", []float64{29.7279310226, 29.727973938, -149.930334091, -149.930291176}},
+		{"3xecw9gsguw3", []float64{-2.53837538883, -2.53837522119, -106.935942136, -106.9359418}},
+		{"hqs10v", []float64{-53.2342529297, -53.2287597656, 16.9079589844, 16.9189453125}},
+		{"b21g", []float64{45.52734375, 45.703125, -166.2890625, -165.9375}},
+		{"vphhpnjt5b", []float64{85.1119422913, 85.1119476557, 50.9403312206, 50.9403419495}},
+		{"kbd", []float64{-42.1875, -40.78125, 36.5625, 37.96875}},
+		{"2c", []float64{-39.375, -33.75, -146.25, -135.0}},
+		{"07ur", []float64{-67.67578125, -67.5, -162.7734375, -162.421875}},
+		{"8e5ky1", []float64{17.7154541016, 17.7209472656, -152.666015625, -152.655029297}},
+		{"k2w84t", []float64{-42.1600341797, -42.1545410156, 20.5004882812, 20.5114746094}},
+		{"p9t4ncex81m", []float64{-81.2014035881, -81.201402247, 164.832694083, 164.832695425}},
+		{"q67rduzsu6uz", []float64{-30.9984667785, -30.9984666109, 105.951650552, 105.951650888}},
+		{"udwkypp0v", []float64{59.936041832, 59.9360847473, 31.5625619888, 31.5626049042}},
+		{"pjsu1q9qg", []float64{-58.3225107193, -58.322467804, 141.7364645, 141.736507416}},
+		{"2kj2w9b021b", []float64{-22.4024440348, -22.4024426937, -161.081542969, -161.081541628}},
+		{"5k0", []float64{-67.5, -66.09375, -33.75, -32.34375}},
+		{"t626vs8j", []float64{13.1652259827, 13.165397644, 56.8432617188, 56.8436050415}},
+		{"hd0z4zr73", []float64{-77.4791479111, -77.4791049957, 23.6855363846, 23.6855792999}},
+		{"79gjppfekhhk", []float64{-34.2341917008, -34.2341915332, -17.9700222239, -17.9700218886}},
+		{"u9u", []float64{54.84375, 56.25, 28.125, 29.53125}},
+		{"5zbfmj3n30", []float64{-45.9808301926, -45.9808248281, -9.97416973114, -9.9741590023}},
+		{"1w1nt3g4t9pp", []float64{-55.0973731466, -55.0973729789, -110.858671814, -110.858671479}},
+		{"f6bh910940", []float64{61.2654304504, 61.2654358149, -78.7052822113, -78.7052714825}},
+		{"r65q38x", []float64{-32.6486206055, -32.6472473145, 150.895843506, 150.897216797}},
+		{"xq2", []float64{35.15625, 36.5625, 146.25, 147.65625}},
+		{"q87xbntvdv8d", []float64{-42.1947657689, -42.1947656013, 117.429890111, 117.429890446}},
+		{"w1zhgmbpw", []float64{10.7115840912, 10.7116270065, 99.9868297577, 99.986872673}},
+		{"5n", []float64{-56.25, -50.625, -45.0, -33.75}},
+		{"9dz", []float64{15.46875, 16.875, -102.65625, -101.25}},
+		{"n8r794hh15gv", []float64{-87.9668216966, -87.966821529, 122.744798921, 122.744799256}},
+		{"px78re9", []float64{-49.1555786133, -49.1542053223, 162.752838135, 162.754211426}},
+		{"3pps", []float64{-4.921875, -4.74609375, -124.453125, -124.1015625}},
+		{"3s6um", []float64{-20.3466796875, -20.302734375, -108.413085938, -108.369140625}},
+		{"9dj7zre6t7", []float64{11.9508236647, 11.9508290291, -104.793895483, -104.793884754}},
+		{"4v1b", []float64{-61.875, -61.69921875, -53.7890625, -53.4375}},
+		{"1k35z", []float64{-65.4345703125, -65.390625, -122.036132812, -121.9921875}},
+		{"7z9n57", []float64{-1.74133300781, -1.73583984375, -9.70092773438, -9.68994140625}},
+		{"3gzg", []float64{-23.37890625, -23.203125, -90.3515625, -90.0}},
+		{"hy", []float64{-56.25, -50.625, 33.75, 45.0}},
+		{"2rj6t", []float64{-5.185546875, -5.1416015625, -161.147460938, -161.103515625}},
+		{"h", []float64{-90.0, -45.0, 0.0, 45.0}},
+		{"dp44nc1t", []float64{39.7329139709, 39.7330856323, -86.8888092041, -86.8884658813}},
+		{"0x1", []float64{-50.625, -49.21875, -156.09375, -154.6875}},
+		{"dmwxxf", []float64{32.2668457031, 32.2723388672, -69.2687988281, -69.2578125}},
+		{"khy29", []float64{-18.193359375, -18.1494140625, 8.8330078125, 8.876953125}},
+		{"v", []float64{45.0, 90.0, 45.0, 90.0}},
+		{"phn4", []float64{-67.1484375, -66.97265625, 143.4375, 143.7890625}},
+		{"qzhvp", []float64{-4.74609375, -4.7021484375, 130.737304688, 130.78125}},
+		{"3n", []float64{-11.25, -5.625, -135.0, -123.75}},
+		{"0nx", []float64{-53.4375, -52.03125, -170.15625, -168.75}},
+		{"19uwx04h21", []float64{-79.0129369497, -79.0129315853, -105.86151123, -105.861500502}},
+		{"7ur1q", []float64{-20.8740234375, -20.830078125, -1.142578125, -1.0986328125}},
+		{"8yn6q9vmm", []float64{34.1560220718, 34.1560649872, -137.167868614, -137.167825699}},
+		{"m4zk", []float64{-28.828125, -28.65234375, 55.1953125, 55.546875}},
+		{"9bgzpypspd0", []float64{5.48287510872, 5.48287644982, -95.6253647804, -95.6253634393}},
+		{"y1s", []float64{53.4375, 54.84375, 95.625, 97.03125}},
+		{"qsyp207nvy", []float64{-17.0042717457, -17.0042663813, 120.941866636, 120.941877365}},
+		{"rfb", []float64{-29.53125, -28.125, 168.75, 170.15625}},
+		{"j", []float64{-90.0, -45.0, 45.0, 90.0}},
+		{"5exm5p63", []float64{-69.3935966492, -69.3934249878, -12.1697616577, -12.169418335}},
+		{"cnv22fdkruw", []float64{83.0271819234, 83.0271832645, -127.58079797, -127.580796629}},
+		{"n7vg", []float64{-68.37890625, -68.203125, 109.3359375, 109.6875}},
+		{"whvgd2h3sz9", []float64{27.3342821002, 27.3342834413, 98.1908561289, 98.19085747}},
+		{"shbfuzk8vr", []float64{27.2421401739, 27.2421455383, 1.2698328495, 1.26984357834}},
+		{"44vmk", []float64{-73.6083984375, -73.564453125, -82.44140625, -82.3974609375}},
+		{"uhd1mfq", []float64{70.5445861816, 70.5459594727, 3.07342529297, 3.07479858398}},
+		{"7bz", []float64{-40.78125, -39.375, -1.40625, 0.0}},
+		{"h5b2wkdqpz", []float64{-68.7925726175, -68.7925672531, 0.629643201828, 0.629653930664}},
+		{"h1", []float64{-84.375, -78.75, 0.0, 11.25}},
+		{"3", []float64{-45.0, 0.0, -135.0, -90.0}},
+		{"408bm1", []float64{-87.1380615234, -87.1325683594, -88.7255859375, -88.7145996094}},
+		{"ggysyy5e2be6", []float64{66.9622308388, 66.9622310065, -1.80790107697, -1.8079007417}},
+		{"w4u7dn8m9ndw", []float64{16.1206699535, 16.1206701212, 96.0648427159, 96.0648430511}},
+		{"yq", []float64{78.75, 84.375, 101.25, 112.5}},
+		{"2nwuht4w", []float64{-7.70587921143, -7.70570755005, -170.306625366, -170.306282043}},
+		{"v5gqe", []float64{67.236328125, 67.2802734375, 49.7021484375, 49.74609375}},
+		{"0", []float64{-90.0, -45.0, -180.0, -135.0}},
+		{"cmehghzjm1", []float64{76.7994600534, 76.7994654179, -119.389586449, -119.38957572}},
+		{"u207q361d", []float64{45.5784130096, 45.578455925, 11.8790531158, 11.8790960312}},
+		{"n4pgq345pp", []float64{-78.1726652384, -78.172659874, 101.176142693, 101.176153421}},
+		{"b2mn8", []float64{47.548828125, 47.5927734375, -161.71875, -161.674804688}},
+		{"qbe6mp0g8et3", []float64{-41.7529202811, -41.7529201135, 128.541097529, 128.541097865}},
+		{"sr04m", []float64{39.7705078125, 39.814453125, 11.4697265625, 11.513671875}},
+		{"hfr0y7u988jx", []float64{-77.1910560317, -77.1910558641, 43.8746168464, 43.8746171817}},
+		{"jrgxg5qkg7p", []float64{-45.0252610445, -45.0252597034, 61.3124428689, 61.3124442101}},
+		{"gryut", []float64{89.384765625, 89.4287109375, -24.0380859375, -23.994140625}},
+		{"14d5b766ppxg", []float64{-75.2600834705, -75.2600833029, -132.173112966, -132.173112631}},
+		{"0ede2rgwt2", []float64{-69.6975231171, -69.6975177526, -153.968356848, -153.968346119}},
+		{"3", []float64{-45.0, 0.0, -135.0, -90.0}},
+		{"dj", []float64{28.125, 33.75, -90.0, -78.75}},
+		{"xf", []float64{11.25, 16.875, 168.75, 180.0}},
+		{"szf3p5k9rmx", []float64{43.7876281142, 43.7876294553, 37.228180021, 37.2281813622}},
+		{"b9fqkhfem7", []float64{55.9690493345, 55.9690546989, -154.156497717, -154.156486988}},
+		{"t7zw8x5c", []float64{22.2749519348, 22.2751235962, 66.8239974976, 66.8243408203}},
+		{"f87dmh", []float64{46.8237304688, 46.8292236328, -62.3583984375, -62.3474121094}},
+		{"yrd1swq12", []float64{87.4857187271, 87.4857616425, 104.268493652, 104.268536568}},
+		{"s2", []float64{0.0, 5.625, 11.25, 22.5}},
+		{"q9dhkgwy4kum", []float64{-35.7951473258, -35.7951471582, 115.530612208, 115.530612543}},
+		{"7dr", []float64{-32.34375, -30.9375, -12.65625, -11.25}},
+		{"v", []float64{45.0, 90.0, 45.0, 90.0}},
+		{"2rpk2ey43dw", []float64{-4.85693067312, -4.85692933202, -158.524402678, -158.524401337}},
+		{"3wfeg", []float64{-6.3720703125, -6.328125, -108.852539062, -108.80859375}},
+		{"ke5k4j", []float64{-27.3944091797, -27.3889160156, 27.158203125, 27.1691894531}},
+		{"z0xq", []float64{48.8671875, 49.04296875, 145.1953125, 145.546875}},
+		{"w1sy", []float64{9.4921875, 9.66796875, 96.6796875, 97.03125}},
+		{"eqm14", []float64{35.33203125, 35.3759765625, -26.630859375, -26.5869140625}},
+		{"s", []float64{0.0, 45.0, 0.0, 45.0}},
+		{"hjp9d9f", []float64{-61.6017150879, -61.6003417969, 10.6594848633, 10.6608581543}},
+		{"p92v0", []float64{-82.08984375, -82.0458984375, 158.5546875, 158.598632812}},
+		{"36m7g02m", []float64{-31.6823387146, -31.6821670532, -116.23500824, -116.234664917}},
+		{"5g70e57zjrf", []float64{-71.6117633879, -71.6117620468, -6.89403623343, -6.89403489232}},
+		{"65rkyfq4se", []float64{-25.8709841967, -25.8709788322, -79.4996237755, -79.4996130466}},
+		{"eev1", []float64{21.26953125, 21.4453125, -15.46875, -15.1171875}},
+		{"6", []float64{-45.0, 0.0, -90.0, -45.0}},
+		{"m75926t", []float64{-27.8915405273, -27.8901672363, 61.1897277832, 61.1911010742}},
+		{"1kjyeb", []float64{-66.357421875, -66.3519287109, -115.499267578, -115.48828125}},
+		{"fb8rk2yfwmrp", []float64{49.0914924257, 49.0914925933, -55.7021225989, -55.7021222636}},
+		{"y2qhd0j8x", []float64{47.1973514557, 47.197394371, 109.783244133, 109.783287048}},
+		{"m2", []float64{-45.0, -39.375, 56.25, 67.5}},
+		{"0543np5pgd23", []float64{-72.9094239883, -72.9094238207, -176.567995213, -176.567994878}},
+		{"3", []float64{-45.0, 0.0, -135.0, -90.0}},
+		{"d4h5zdhe5gy", []float64{11.9207011163, 11.9207024574, -84.0390613675, -84.0390600264}},
+		{"9rcd", []float64{43.9453125, 44.12109375, -121.640625, -121.2890625}},
+		{"ne9nrh75tq3", []float64{-69.1898868978, -69.1898855567, 114.218213707, 114.218215048}},
+		{"7wk7", []float64{-9.31640625, -9.140625, -16.5234375, -16.171875}},
+		{"995f97e", []float64{6.08367919922, 6.08505249023, -107.167510986, -107.166137695}},
+		{"60kmung", []float64{-42.5459289551, -42.5445556641, -83.843536377, -83.8421630859}},
+		{"845", []float64{11.25, 12.65625, -175.78125, -174.375}},
+		{"n", []float64{-90.0, -45.0, 90.0, 135.0}},
+		{"jehdxn0", []float64{-72.6525878906, -72.6512145996, 74.1357421875, 74.1371154785}},
+		{"y", []float64{45.0, 90.0, 90.0, 135.0}},
+		{"1d", []float64{-78.75, -73.125, -112.5, -101.25}},
+		{"rbjy", []float64{-43.9453125, -43.76953125, 176.8359375, 177.1875}},
+		{"r8qgzf4r9uy", []float64{-42.9222710431, -42.922269702, 167.335936725, 167.335938066}},
+		{"k5p", []float64{-28.125, -26.71875, 9.84375, 11.25}},
+		{"f4z7", []float64{60.99609375, 61.171875, -79.8046875, -79.453125}},
+		{"7rp35b", []float64{-5.44921875, -5.44372558594, -23.3898925781, -23.37890625}},
+		{"zn71yyn0pbc", []float64{80.4968301952, 80.4968315363, 139.52395454, 139.523955882}},
+		{"ppj7", []float64{-50.09765625, -49.921875, 142.3828125, 142.734375}},
+		{"mqv3q", []float64{-6.8115234375, -6.767578125, 63.896484375, 63.9404296875}},
+		{"tsdtmfq", []float64{26.2477111816, 26.2490844727, 71.276550293, 71.277923584}},
+		{"72ey8b14uynx", []float64{-41.0444164462, -41.0444162786, -28.4420176595, -28.4420173243}},
+		{"7qrgb", []float64{-9.1845703125, -9.140625, -22.8515625, -22.8076171875}},
+		{"w7zmkdpezcm", []float64{22.0282383263, 22.0282396674, 111.653705388, 111.653706729}},
+		{"kqwr1dh9jdbc", []float64{-7.19585834071, -7.19585817307, 20.1113973185, 20.1113976538}},
+		{"kv9jx", []float64{-13.095703125, -13.0517578125, 35.4638671875, 35.5078125}},
+		{"09", []float64{-84.375, -78.75, -157.5, -146.25}},
+		{"f8ztmmp0", []float64{50.1690673828, 50.1692390442, -56.7127990723, -56.7124557495}},
+		{"k5dj8cuwbxjg", []float64{-24.3348933198, -24.3348931521, 2.85166796297, 2.85166829824}},
+		{"xd72j5qndwhn", []float64{12.6752517745, 12.6752519421, 162.298391461, 162.298391797}},
+		{"esp42d", []float64{22.9064941406, 22.9119873047, -12.6342773438, -12.6232910156}},
+		{"5sbfys", []float64{-62.7758789062, -62.7703857422, -21.1596679688, -21.1486816406}},
+		{"8wsz02n", []float64{37.79296875, 37.794342041, -150.801086426, -150.799713135}},
+		{"zeghw8", []float64{66.884765625, 66.8902587891, 162.004394531, 162.015380859}},
+		{"u0xg7ug", []float64{48.4098815918, 48.4112548828, 11.0673522949, 11.0687255859}},
+		{"0jb11", []float64{-57.48046875, -57.4365234375, -179.956054688, -179.912109375}},
+		{"xv8cwtybm", []float64{31.2328004837, 31.232843399, 170.099816322, 170.099859238}},
+		{"ef0cwqt7", []float64{11.5498924255, 11.5500640869, -9.91344451904, -9.91310119629}},
+		{"hrh5k", []float64{-50.0537109375, -50.009765625, 17.05078125, 17.0947265625}},
+		{"pnpdsx4eb", []float64{-55.7714509964, -55.7714080811, 145.748062134, 145.748105049}},
+		{"8g2sx4gn", []float64{19.0884017944, 19.0885734558, -145.235137939, -145.234794617}},
+		{"tsue3yr4z", []float64{27.3248434067, 27.324886322, 73.9149427414, 73.9149856567}},
+		{"k4vq", []float64{-28.4765625, -28.30078125, 7.3828125, 7.734375}},
+		{"mr1f1d430h", []float64{-5.26225805283, -5.26225268841, 58.7799453735, 58.7799561024}},
+		{"dtuqkjybm", []float64{33.4740114212, 33.4740543365, -61.3381719589, -61.3381290436}},
+		{"p00zpfbj5350", []float64{-88.7535613775, -88.7535612099, 136.39540717, 136.395407505}},
+		{"n16jy8wrg38", []float64{-81.9539228082, -81.9539214671, 93.106867075, 93.1068684161}},
+		{"3ckf9t6", []float64{-37.5004577637, -37.4990844727, -94.5016479492, -94.5002746582}},
+		{"vvch78h7q7", []float64{78.0913943052, 78.0913996696, 80.3161633015, 80.3161740303}},
+		{"x", []float64{0.0, 45.0, 135.0, 180.0}},
+		{"qkr8dj44", []float64{-20.9780502319, -20.9778785706, 111.887512207, 111.88785553}},
+		{"s5dw7s", []float64{20.8081054688, 20.8135986328, 3.66943359375, 3.68041992188}},
+		{"tpt", []float64{42.1875, 43.59375, 52.03125, 53.4375}},
+		{"6vqn07ep", []float64{-14.3936347961, -14.3934631348, -47.7973937988, -47.7970504761}},
+		{"7zbup2", []float64{-0.703125, -0.697631835938, -9.87670898438, -9.86572265625}},
+		{"xd0j0wrn39f8", []float64{12.1643207967, 12.1643209644, 157.531653419, 157.531653754}},
+		{"254kywz4", []float64{-27.2526168823, -27.2524452209, -176.540679932, -176.540336609}},
+		{"6pkmr1875rp", []float64{-3.28710615635, -3.28710481524, -83.7153281271, -83.715326786}},
+		{"69bmhmbw0de", []float64{-34.2447146773, -34.2447133362, -66.9609577954, -66.9609564543}},
+		{"47jd", []float64{-72.7734375, -72.59765625, -71.015625, -70.6640625}},
+		{"mw3ngtnj", []float64{-8.6289024353, -8.62873077393, 69.0682983398, 69.0686416626}},
+		{"v", []float64{45.0, 90.0, 45.0, 90.0}},
+		{"4uyq1", []float64{-62.2265625, -62.1826171875, -47.4169921875, -47.373046875}},
+		{"9748v3e", []float64{17.0150756836, 17.0164489746, -119.999542236, -119.998168945}},
+		{"sjy7", []float64{32.87109375, 33.046875, 8.7890625, 9.140625}},
+		{"nc1jb2kb", []float64{-83.3628845215, -83.3627128601, 125.17375946, 125.174102783}},
+		{"ffryw", []float64{58.798828125, 58.8427734375, -45.087890625, -45.0439453125}},
+		{"3qfr7scg5s", []float64{-5.7302069664, -5.73020160198, -120.429575443, -120.429564714}},
+		{"1x", []float64{-50.625, -45.0, -112.5, -101.25}},
+		{"y", []float64{45.0, 90.0, 90.0, 135.0}},
+		{"0", []float64{-90.0, -45.0, -180.0, -135.0}},
+		{"n4wk", []float64{-75.234375, -75.05859375, 98.7890625, 99.140625}},
+		{"bw2d2", []float64{80.5517578125, 80.595703125, -156.796875, -156.752929688}},
+		{"ztgehr9mpt", []float64{77.9131776094, 77.9131829739, 162.610681057, 162.610691786}},
+		{"bnkb", []float64{80.15625, 80.33203125, -173.3203125, -172.96875}},
+		{"q0fmcn", []float64{-39.7375488281, -39.7320556641, 93.2080078125, 93.2189941406}},
+		{"e0e1sxt9gwm", []float64{3.11770454049, 3.1177058816, -40.5757860839, -40.5757847428}},
+		{"9qc", []float64{37.96875, 39.375, -122.34375, -120.9375}},
+		{"0cybm9snr", []float64{-80.1029920578, -80.1029491425, -136.51031971, -136.510276794}},
+		{"fp", []float64{84.375, 90.0, -90.0, -78.75}},
+		{"7u69k7", []float64{-20.8575439453, -20.8520507812, -7.54760742188, -7.53662109375}},
+		{"guh3mbvnwv7y", []float64{67.7249914035, 67.7249915712, -5.01359079033, -5.01359045506}},
+		{"vgw4wgnrd58e", []float64{65.1447393559, 65.1447395235, 87.4928004295, 87.4928007647}},
+		{"rzk732w", []float64{-3.64471435547, -3.64334106445, 174.789733887, 174.791107178}},
+		{"kf", []float64{-33.75, -28.125, 33.75, 45.0}},
+		{"rcfr28t0", []float64{-33.8790893555, -33.8789176941, 171.942901611, 171.943244934}},
+		{"5bqnms", []float64{-87.4731445312, -87.4676513672, -2.57080078125, -2.55981445312}},
+		{"fs84w", []float64{70.751953125, 70.7958984375, -67.236328125, -67.1923828125}},
+		{"mcjrsmx", []float64{-38.0264282227, -38.0250549316, 86.3291931152, 86.3305664062}},
+		{"u84", []float64{45.0, 46.40625, 25.3125, 26.71875}},
+		{"gkv4g14m", []float64{72.2084999084, 72.2086715698, -26.5838241577, -26.583480835}},
+		{"27dhxu", []float64{-24.4995117188, -24.4940185547, -165.596923828, -165.5859375}},
+		{"0v", []float64{-61.875, -56.25, -146.25, -135.0}},
+		{"bpurn", []float64{89.82421875, 89.8681640625, -173.759765625, -173.715820312}},
+		{"p5", []float64{-73.125, -67.5, 135.0, 146.25}},
+		{"f3ffsuh", []float64{55.3051757812, 55.3065490723, -74.6685791016, -74.6672058105}},
+		{"j0zbr0tb", []float64{-85.7345581055, -85.7343864441, 56.2139511108, 56.2142944336}},
+		{"vyz", []float64{82.96875, 84.375, 88.59375, 90.0}},
+		{"082p96b5ey", []float64{-87.2596514225, -87.2596460581, -157.444907427, -157.444896698}},
+		{"y", []float64{45.0, 90.0, 90.0, 135.0}},
+		{"g030qs", []float64{46.4721679688, 46.4776611328, -43.3081054688, -43.2971191406}},
+		{"54", []float64{-78.75, -73.125, -45.0, -33.75}},
+		{"fp5rcptn2gc", []float64{85.7795964181, 85.7795977592, -85.3788422048, -85.3788408637}},
+		{"dk8z85s6516h", []float64{26.650436148, 26.6504363157, -77.6893445849, -77.6893442497}},
+		{"3v1ebh18qujh", []float64{-16.1937826127, -16.193782445, -99.1382686794, -99.1382683441}},
+		{"un50j3xf9", []float64{78.7586688995, 78.7587118149, 4.46014881134, 4.46019172668}},
+		{"4b8y3gh", []float64{-86.0723876953, -86.0710144043, -55.1129150391, -55.111541748}},
+		{"efdgh", []float64{14.58984375, 14.6337890625, -7.20703125, -7.1630859375}},
+		{"1xxuk2", []float64{-47.0654296875, -47.0599365234, -101.414794922, -101.403808594}},
+		{"s", []float64{0.0, 45.0, 0.0, 45.0}},
+		{"1z6", []float64{-49.21875, -47.8125, -98.4375, -97.03125}},
+		{"s", []float64{0.0, 45.0, 0.0, 45.0}},
+		{"mxkjr1jdu5ru", []float64{-3.28991509974, -3.2899149321, 73.440352343, 73.4403526783}},
+		{"x", []float64{0.0, 45.0, 135.0, 180.0}},
+		{"3kpcn3sm", []float64{-22.315120697, -22.3149490356, -112.57106781, -112.570724487}},
+		{"buk3t0ctrmt0", []float64{69.1749724746, 69.1749726422, -140.051333159, -140.051332824}},
+		{"pp", []float64{-50.625, -45.0, 135.0, 146.25}},
+		{"4h", []float64{-67.5, -61.875, -90.0, -78.75}},
+		{"fjw1kcg4", []float64{76.1671829224, 76.1673545837, -81.3496398926, -81.3492965698}},
+		{"877wsvjfz5", []float64{19.4517821074, 19.4517874718, -163.611187935, -163.611177206}},
+		{"ru3", []float64{-21.09375, -19.6875, 170.15625, 171.5625}},
+		{"yr", []float64{84.375, 90.0, 101.25, 112.5}},
+		{"cu5x6cxq", []float64{68.7836837769, 68.7838554382, -96.1973190308, -96.196975708}},
+		{"w04vuf4bdzjm", []float64{1.02185273543, 1.02185290307, 94.0798293427, 94.0798296779}},
+		{"8", []float64{0.0, 45.0, -180.0, -135.0}},
+		{"4zdcmmp2p4", []float64{-47.5652968884, -47.5652915239, -52.1418428421, -52.1418321133}},
+		{"eft02s1hu", []float64{14.1292333603, 14.1292762756, -4.19523239136, -4.19518947601}},
+		{"zk4v9qdeg5q1", []float64{68.5031637736, 68.5031639412, 150.175689161, 150.175689496}},
+		{"8xr", []float64{40.78125, 42.1875, -147.65625, -146.25}},
+		{"3pxyrt", []float64{-1.68640136719, -1.68090820312, -123.771972656, -123.760986328}},
+		{"cmh39xszs8", []float64{73.4311580658, 73.4311634302, -117.70080328, -117.700792551}},
+		{"xrm9d0wb48", []float64{41.047668457, 41.0476738214, 154.081642628, 154.081653357}},
+		{"d4bh0k", []float64{16.1938476562, 16.1993408203, -89.9890136719, -89.9780273438}},
+		{"hk8", []float64{-64.6875, -63.28125, 11.25, 12.65625}},
+		{"9hxqk54m0wn", []float64{26.4285027981, 26.4285041392, -124.625786841, -124.6257855}},
+		{"mygnv0", []float64{-5.8447265625, -5.83923339844, 83.1884765625, 83.1994628906}},
+		{"yrjmvs", []float64{85.4077148438, 85.4132080078, 108.874511719, 108.885498047}},
+		{"52csyemvf12", []float64{-84.9274425209, -84.9274411798, -31.3469982147, -31.3469968736}},
+		{"4jrvjj", []float64{-59.5623779297, -59.5568847656, -78.8818359375, -78.8708496094}},
+		{"ys1", []float64{67.5, 68.90625, 113.90625, 115.3125}},
+		{"unf91", []float64{83.14453125, 83.1884765625, 3.5595703125, 3.603515625}},
+		{"h5che0vnt", []float64{-68.109998703, -68.1099557877, 1.5451669693, 1.54520988464}},
+		{"ugrk3", []float64{64.0283203125, 64.072265625, 43.9892578125, 44.033203125}},
+		{"9ush8c", []float64{26.1090087891, 26.1145019531, -95.5920410156, -95.5810546875}},
+		{"q92pzb", []float64{-36.6064453125, -36.6009521484, 112.840576172, 112.8515625}},
+		{"0e", []float64{-73.125, -67.5, -157.5, -146.25}},
+		{"dbt1mchu", []float64{3.03840637207, 3.03857803345, -48.9595413208, -48.959197998}},
+		{"98xv2m", []float64{3.76281738281, 3.76831054688, -101.590576172, -101.579589844}},
+		{"rqd8u195kgu", []float64{-8.29684630036, -8.29684495926, 149.942988753, 149.942990094}},
+		{"504wk58ccv", []float64{-88.8818138838, -88.8818085194, -41.3074886799, -41.307477951}},
+		{"0dzjhbn", []float64{-73.65234375, -73.650970459, -147.43927002, -147.437896729}},
+		{"sgcn", []float64{22.1484375, 22.32421875, 35.15625, 35.5078125}},
+		{"46k78jw0x65w", []float64{-76.6982056573, -76.6982054897, -72.7648819238, -72.7648815885}},
+		{"6w2cbxx3nf9", []float64{-9.49474900961, -9.4947476685, -66.4130924642, -66.4130911231}},
+		{"zxmf4", []float64{86.1328125, 86.1767578125, 165.673828125, 165.717773438}},
+		{"unf", []float64{82.96875, 84.375, 2.8125, 4.21875}},
+		{"m4p", []float64{-33.75, -32.34375, 54.84375, 56.25}},
+		{"dsc1rqss2w", []float64{26.9749438763, 26.9749492407, -65.7689452171, -65.7689344883}},
+		{"cxp", []float64{84.375, 85.78125, -102.65625, -101.25}},
+		{"zmh", []float64{73.125, 74.53125, 151.875, 153.28125}},
+		{"tynvnjc8hdb", []float64{34.6605066955, 34.6605080366, 88.5081124306, 88.5081137717}},
+		{"uk8hb", []float64{71.1474609375, 71.19140625, 11.25, 11.2939453125}},
+		{"34d", []float64{-30.9375, -29.53125, -132.1875, -130.78125}},
+		{"ts39vet4rzw5", []float64{24.2335202359, 24.2335204035, 69.8582813144, 69.8582816496}},
+		{"3rt1fx5", []float64{-2.46643066406, -2.46505737305, -116.604766846, -116.603393555}},
+		{"ujn8yhfpg", []float64{73.2842588425, 73.2843017578, 9.40717220306, 9.40721511841}},
+		{"pdbvhzj", []float64{-73.6138916016, -73.6125183105, 158.770294189, 158.77166748}},
+		{"q35", []float64{-39.375, -37.96875, 105.46875, 106.875}},
+		{"szh5424hc", []float64{39.9031591415, 39.9032020569, 39.4766664505, 39.4767093658}},
+		{"m", []float64{-45.0, 0.0, 45.0, 90.0}},
+		{"tt1wjkr44e", []float64{29.2033928633, 29.2033982277, 69.8498082161, 69.8498189449}},
+		{"1u3hdkn", []float64{-65.2807617188, -65.2793884277, -99.7366333008, -99.7352600098}},
+		{"jc9", []float64{-81.5625, -80.15625, 80.15625, 81.5625}},
+		{"627pp", []float64{-42.36328125, -42.3193359375, -74.2236328125, -74.1796875}},
+		{"g46wqb4z", []float64{58.7560844421, 58.7562561035, -41.1839675903, -41.1836242676}},
+		{"2407674", []float64{-33.1622314453, -33.1608581543, -179.546813965, -179.545440674}},
+		{"3vbsrcxu", []float64{-11.9002532959, -11.9000816345, -100.195655823, -100.1953125}},
+		{"u0mr9fpy", []float64{47.7366256714, 47.7367973328, 7.47035980225, 7.470703125}},
+		{"p1s1", []float64{-81.38671875, -81.2109375, 140.625, 140.9765625}},
+		{"ce7y6s1ugjpu", []float64{64.4026983529, 64.4026985206, -107.11415682, -107.114156485}},
+		{"tujn", []float64{23.5546875, 23.73046875, 85.78125, 86.1328125}},
+		{"fes", []float64{64.6875, 66.09375, -61.875, -60.46875}},
+		{"28te871t29y", []float64{-41.5548755229, -41.5548741817, -149.752549231, -149.75254789}},
+		{"2z9j0591", []float64{-1.9141960144, -1.91402435303, -144.842376709, -144.842033386}},
+		{"e", []float64{0.0, 45.0, -45.0, 0.0}},
+		{"90", []float64{0.0, 5.625, -135.0, -123.75}},
+		{"jbfm12r", []float64{-84.900970459, -84.899597168, 81.9786071777, 81.9799804688}},
+		{"y0ws", []float64{48.515625, 48.69140625, 99.140625, 99.4921875}},
+		{"m2", []float64{-45.0, -39.375, 56.25, 67.5}},
+		{"gpspv95sz", []float64{88.5561132431, 88.5561561584, -39.1281938553, -39.1281509399}},
+		{"7k8u95cyjdx6", []float64{-18.8748412952, -18.8748411275, -32.6487181708, -32.6487178355}},
+		{"c1fe0r", []float64{55.4095458984, 55.4150390625, -131.473388672, -131.462402344}},
+		{"668wjecj2d", []float64{-29.8613011837, -29.8612958193, -77.8037810326, -77.8037703037}},
+		{"dnq3", []float64{35.33203125, 35.5078125, -81.2109375, -80.859375}},
+		{"m3sxdxnvmrr", []float64{-35.2047483623, -35.2047470212, 62.6974926889, 62.69749403}},
+		{"zz3qpfvqzu", []float64{86.8522238731, 86.8522292376, 170.855931044, 170.855941772}},
+		{"98mjjx8bu", []float64{2.3264837265, 2.32652664185, -105.225849152, -105.225806236}},
+		{"pkmusy0e4j35", []float64{-65.2692317404, -65.2692315727, 154.545451552, 154.545451887}},
+		{"j3f9dtm5r5n", []float64{-79.8631650209, -79.8631636798, 59.8826631904, 59.8826645315}},
+		{"67up3c0uh9jn", []float64{-22.6256497577, -22.62564959, -73.0468659103, -73.046865575}},
+		{"6q0fd9wn2", []float64{-10.8012342453, -10.80119133, -77.5772094727, -77.5771665573}},
+		{"t82e5zrs", []float64{1.97410583496, 1.97427749634, 68.3782196045, 68.3785629272}},
+		{"0hstxh", []float64{-63.6987304688, -63.6932373047, -173.364257812, -173.353271484}},
+		{"qe1egcuetqe", []float64{-27.4555715919, -27.4555702507, 114.78057906, 114.780580401}},
+		{"yhp25wc4v", []float64{67.5375509262, 67.5375938416, 100.350708961, 100.350751877}},
+		{"z6uvby2nrt4k", []float64{61.5149248391, 61.5149250068, 152.962971367, 152.962971702}},
+		{"29sd0863cx", []float64{-36.2092262506, -36.2092208862, -151.146748066, -151.146737337}},
+		{"kvnx614", []float64{-15.5950927734, -15.5937194824, 42.981262207, 42.982635498}},
+		{"mu1srk07", []float64{-21.7304420471, -21.7302703857, 81.1783218384, 81.1786651611}},
+		{"5bz5bmq", []float64{-85.0932312012, -85.0918579102, -1.38702392578, -1.38565063477}},
+		{"fu4yx9fr8gtk", []float64{68.6534980685, 68.6534982361, -52.0500935242, -52.0500931889}},
+		{"3hyhj92rn", []float64{-17.5700569153, -17.5700139999, -126.320199966, -126.320157051}},
+		{"345nw", []float64{-32.607421875, -32.5634765625, -130.517578125, -130.473632812}},
+		{"q5f2p327mhy", []float64{-23.8988001645, -23.8987988234, 93.4832319617, 93.4832333028}},
+		{"0wmufb9", []float64{-54.0060424805, -54.0046691895, -149.2918396, -149.290466309}},
+		{"r", []float64{-45.0, 0.0, 135.0, 180.0}},
+		{"07d2sde", []float64{-70.2108764648, -70.2095031738, -165.384063721, -165.38269043}},
+		{"d0r2", []float64{1.40625, 1.58203125, -79.8046875, -79.453125}},
+		{"znegsexfs23h", []float64{82.1973916143, 82.197391782, 140.482018143, 140.482018478}},
+		{"sfr69qxg", []float64{13.1319236755, 13.1320953369, 44.010887146, 44.0112304688}},
+		{"tr44b8brc", []float64{39.8638486862, 39.8638916016, 59.0848588943, 59.0849018097}},
+		{"tbnqctecsf", []float64{1.21700406075, 1.21700942516, 87.6103341579, 87.6103448868}},
+		{"jpfy538qu", []float64{-45.3421640396, -45.3421211243, 49.0105247498, 49.0105676651}},
+		{"u", []float64{45.0, 90.0, 0.0, 45.0}},
+		{"gskrg0z5e", []float64{70.2732753754, 70.2733182907, -16.3818597794, -16.381816864}},
+		{"6cz", []float64{-35.15625, -33.75, -46.40625, -45.0}},
+		{"u67hm7b47423", []float64{58.4243181534, 58.424318321, 15.6995919719, 15.6995923072}},
+		{"j154zhnnkyt3", []float64{-83.8685209863, -83.8685208187, 49.5348178223, 49.5348181576}},
+		{"muqdpev4smv", []float64{-20.7211281359, -20.7211267948, 88.2272703946, 88.2272717357}},
+		{"47h3upynmsru", []float64{-72.7737144381, -72.7737142704, -72.589170076, -72.5891697407}},
+		{"g6j200", []float64{56.25, 56.2554931641, -26.3671875, -26.3562011719}},
+		{"tw", []float64{33.75, 39.375, 67.5, 78.75}},
+		{"c0pjhr520q", []float64{45.9173905849, 45.9173959494, -124.965008497, -124.964997768}},
+		{"8nx", []float64{36.5625, 37.96875, -170.15625, -168.75}},
+		{"47b2wvtns", []float64{-68.7870311737, -68.7869882584, -78.0947685242, -78.0947256088}},
+		{"vrsbq", []float64{87.2314453125, 87.275390625, 63.193359375, 63.2373046875}},
+		{"sz", []float64{39.375, 45.0, 33.75, 45.0}},
+		{"xe61b0bnw", []float64{18.5941028595, 18.5941457748, 160.312757492, 160.312800407}},
+		{"dky6qedz3w", []float64{27.1347606182, 27.1347659826, -69.6714520454, -69.6714413166}},
+		{"vmvkqx2hb", []float64{78.1314611435, 78.1315040588, 63.9184570312, 63.9184999466}},
+		{"t96m49xgr1y", []float64{7.9189632833, 7.9189646244, 70.7848772407, 70.7848785818}},
+		{"brw2urrqcfex", []float64{87.3603346758, 87.3603348434, -159.764133766, -159.764133431}},
+		{"z7m8", []float64{63.28125, 63.45703125, 153.984375, 154.3359375}},
+		{"wm6w7f38", []float64{30.6422424316, 30.642414093, 104.932479858, 104.932823181}},
+		{"rxj23rtt4y", []float64{-5.53896546364, -5.53896009922, 164.945415258, 164.945425987}},
+		{"sfr9xsyzfn", []float64{12.9473769665, 12.9473823309, 44.6358203888, 44.6358311176}},
+		{"9ubf9uq02e", []float64{27.1816080809, 27.1816134453, -100.110146999, -100.110136271}},
+		{"kj25zp1gb6j", []float64{-14.7704637051, -14.770462364, 0.310037881136, 0.31003922224}},
+		{"x4f", []float64{15.46875, 16.875, 137.8125, 139.21875}},
+		{"xnn27kkf4c", []float64{33.8176399469, 33.8176453114, 143.938525915, 143.938536644}},
+		{"61bhs9byn4", []float64{-34.3545806408, -34.3545752764, -89.8009586334, -89.8009479046}},
+		{"rv2sve92mngr", []float64{-14.6144826896, -14.614482522, 169.696759768, 169.696760103}},
+		{"zkvq2w", []float64{72.8503417969, 72.8558349609, 153.654785156, 153.665771484}},
+		{"qprmp68h7kcd", []float64{-3.32535546273, -3.32535529509, 100.514057502, 100.514057837}},
+		{"77pmzubu", []float64{-27.0874786377, -27.0873069763, -23.2130813599, -23.2127380371}},
+		{"q73t2sumh3b", []float64{-25.7689382136, -25.7689368725, 103.387366533, 103.387367874}},
+		{"3kxch9c", []float64{-19.5021057129, -19.5007324219, -112.652435303, -112.651062012}},
+		{"t", []float64{0.0, 45.0, 45.0, 90.0}},
+		{"3um1y618chw", []float64{-20.7749935985, -20.7749922574, -93.9419808984, -93.9419795573}},
+		{"45nj7sxww", []float64{-72.1763134003, -72.1762704849, -81.3981342316, -81.3980913162}},
+		{"rnkyjdv404", []float64{-8.77360224724, -8.77359688282, 141.928253174, 141.928263903}},
+		{"p3", []float64{-84.375, -78.75, 146.25, 157.5}},
+		{"sxbz", []float64{44.82421875, 45.0, 23.5546875, 23.90625}},
+		{"xuj2k", []float64{22.5439453125, 22.587890625, 176.30859375, 176.352539062}},
+		{"yhp9", []float64{67.67578125, 67.8515625, 100.546875, 100.8984375}},
+		{"1yq4", []float64{-54.4921875, -54.31640625, -92.8125, -92.4609375}},
+		{"u4m2jkw", []float64{57.6809692383, 57.6823425293, 7.62176513672, 7.62313842773}},
+		{"xb9", []float64{2.8125, 4.21875, 170.15625, 171.5625}},
+		{"ebf4e478jp", []float64{4.67060029507, 4.67060565948, -8.30064296722, -8.30063223839}},
+		{"y7venx9", []float64{66.6622924805, 66.6636657715, 109.271392822, 109.272766113}},
+		{"8qu", []float64{37.96875, 39.375, -163.125, -161.71875}},
+		{"jw2jbzms66", []float64{-53.7924420834, -53.7924367189, 67.5406086445, 67.5406193733}},
+		{"n", []float64{-90.0, -45.0, 90.0, 135.0}},
+		{"jbx", []float64{-87.1875, -85.78125, 88.59375, 90.0}},
+		{"3v4n", []float64{-15.8203125, -15.64453125, -98.4375, -98.0859375}},
+		{"0z1theg", []float64{-49.7254943848, -49.7241210938, -143.938751221, -143.93737793}},
+		{"zbz00jf21m", []float64{49.2503625154, 49.2503678799, 178.596893549, 178.596904278}},
+		{"dfpq2eg2", []float64{12.3692321777, 12.3694038391, -46.0282516479, -46.0279083252}},
+		{"z2j5bc1ph562", []float64{45.6658919156, 45.6658920832, 153.315756954, 153.31575729}},
+		{"3p3g", []float64{-3.69140625, -3.515625, -132.5390625, -132.1875}},
+		{"4rfgeu3", []float64{-45.7676696777, -45.7662963867, -74.7166442871, -74.7152709961}},
+		{"nykq", []float64{-53.7890625, -53.61328125, 129.7265625, 130.078125}},
+		{"h", []float64{-90.0, -45.0, 0.0, 45.0}},
+		{"85", []float64{16.875, 22.5, -180.0, -168.75}},
+		{"bdsdxr", []float64{59.5404052734, 59.5458984375, -150.853271484, -150.842285156}},
+		{"wsyt3duqg2", []float64{27.657866478, 27.6578718424, 121.71251893, 121.712529659}},
+		{"90", []float64{0.0, 5.625, -135.0, -123.75}},
+		{"butw", []float64{71.3671875, 71.54296875, -138.515625, -138.1640625}},
+		{"ddhpjv6b7tqh", []float64{12.5093796104, 12.5093797781, -61.6183796525, -61.6183793172}},
+		{"18ueqgd", []float64{-85.1907348633, -85.1893615723, -105.872497559, -105.871124268}},
+		{"v2g8jh1", []float64{49.2407226562, 49.2420959473, 61.3929748535, 61.3943481445}},
+		{"84umeh3gmupk", []float64{16.45947285, 16.4594730176, -173.888941817, -173.888941482}},
+		{"s4g900", []float64{15.64453125, 15.6500244141, 4.921875, 4.93286132812}},
+		{"0b313fz2", []float64{-88.3589172363, -88.358745575, -144.756889343, -144.756546021}},
+		{"4q", []float64{-56.25, -50.625, -78.75, -67.5}},
+		{"d61", []float64{11.25, 12.65625, -77.34375, -75.9375}},
+		{"w5q5298pq", []float64{18.8620233536, 18.8620662689, 98.4597301483, 98.4597730637}},
+		{"ushgx399", []float64{68.1236457825, 68.1238174438, 29.5003509521, 29.5006942749}},
+		{"73ngt", []float64{-38.759765625, -38.7158203125, -24.0380859375, -23.994140625}},
+		{"2f4smcem", []float64{-32.9938316345, -32.9936599731, -142.477226257, -142.476882935}},
+		{"0", []float64{-90.0, -45.0, -180.0, -135.0}},
+		{"8", []float64{0.0, 45.0, -180.0, -135.0}},
+		{"5u14weqgz", []float64{-67.0420503616, -67.0420074463, -9.54853534698, -9.54849243164}},
+		{"xxhuu8y4xb", []float64{40.214509964, 40.2145153284, 164.386013746, 164.386024475}},
+		{"272xeqmj", []float64{-25.3652000427, -25.3650283813, -167.897186279, -167.896842957}},
+		{"2trrhunrd1", []float64{-14.215015769, -14.2150104046, -147.087278366, -147.087267637}},
+		{"e4", []float64{11.25, 16.875, -45.0, -33.75}},
+		{"p5duz8tp", []float64{-69.4735908508, -69.4734191895, 139.203643799, 139.203987122}},
+		{"5qprz78e", []float64{-54.8679542542, -54.8677825928, -23.2353973389, -23.2350540161}},
+		{"ch5yq40qtu3", []float64{68.6107577384, 68.6107590795, -129.462299198, -129.462297857}},
+		{"u", []float64{45.0, 90.0, 0.0, 45.0}},
+		{"7qmv9c5", []float64{-8.87145996094, -8.87008666992, -25.5830383301, -25.5816650391}},
+		{"c", []float64{45.0, 90.0, -135.0, -90.0}},
+		{"hp8s7", []float64{-47.0654296875, -47.021484375, 0.8349609375, 0.87890625}},
+		{"9e4y04d17", []float64{17.9436349869, 17.9436779022, -108.629937172, -108.629894257}},
+		{"39nh", []float64{-38.671875, -38.49609375, -104.0625, -103.7109375}},
+		{"6", []float64{-45.0, 0.0, -90.0, -45.0}},
+		{"pjpxe1pvyzhm", []float64{-60.5501220189, -60.5501218513, 145.689649321, 145.689649656}},
+		{"drx", []float64{42.1875, 43.59375, -68.90625, -67.5}},
+		{"zu1c5qg0s", []float64{67.7129459381, 67.7129888535, 171.3580513, 171.358094215}},
+		{"y", []float64{45.0, 90.0, 90.0, 135.0}},
+		{"t8d", []float64{2.8125, 4.21875, 70.3125, 71.71875}},
+		{"d47w70rwe23h", []float64{13.7573739141, 13.7573740818, -84.9358485639, -84.9358482286}},
+		{"3t617", []float64{-15.2490234375, -15.205078125, -109.555664062, -109.51171875}},
+		{"qnkq1pz", []float64{-8.74649047852, -8.7451171875, 96.0301208496, 96.0314941406}},
+		{"fu", []float64{67.5, 73.125, -56.25, -45.0}},
+		{"7vs", []float64{-14.0625, -12.65625, -5.625, -4.21875}},
+		{"bztqz0h", []float64{88.3740234375, 88.3753967285, -138.554077148, -138.552703857}},
+		{"b8j", []float64{45.0, 46.40625, -150.46875, -149.0625}},
+		{"cetkxmq73", []float64{65.5079126358, 65.5079555511, -104.789958, -104.789915085}},
+		{"p91", []float64{-84.375, -82.96875, 158.90625, 160.3125}},
+		{"z4g7bn4w38", []float64{61.1619615555, 61.1619669199, 139.573810101, 139.573820829}},
+		{"j", []float64{-90.0, -45.0, 45.0, 90.0}},
+		{"g2eej64jbwj", []float64{48.3518493176, 48.3518506587, -28.5946373641, -28.594636023}},
+		{"fwshzb30mj", []float64{82.398903966, 82.3989093304, -61.5328359604, -61.5328252316}},
+		{"fv2mqt", []float64{75.4815673828, 75.4870605469, -55.6127929688, -55.6018066406}},
+		{"bzr6m3zdun5", []float64{86.1868751049, 86.186876446, -135.813499242, -135.813497901}},
+		{"et5rq77j8b", []float64{29.4182109833, 29.4182163477, -17.6508772373, -17.6508665085}},
+		{"1c", []float64{-84.375, -78.75, -101.25, -90.0}},
+		{"y1hyumh10jq", []float64{51.8391890824, 51.8391904235, 96.8719562888, 96.8719576299}},
+		{"qd42djnqxq", []float64{-33.6334955692, -33.6334902048, 115.76084733, 115.760858059}},
+		{"hsd9s", []float64{-64.423828125, -64.3798828125, 26.19140625, 26.2353515625}},
+		{"8289gq947", []float64{3.156208992, 3.15625190735, -167.902550697, -167.902507782}},
+		{"em37sw72zq4", []float64{30.1809775829, 30.180978924, -31.7896565795, -31.7896552384}},
+		{"zms25", []float64{75.9375, 75.9814453125, 152.358398438, 152.40234375}},
+		{"h25d54", []float64{-89.6374511719, -89.6319580078, 16.3037109375, 16.3146972656}},
+		{"6qc7y2t4bb", []float64{-6.36885166168, -6.36884629726, -76.7106306553, -76.7106199265}},
+		{"06vt5z8j", []float64{-73.6102867126, -73.6101150513, -160.850830078, -160.850486755}},
+		{"37q3", []float64{-26.54296875, -26.3671875, -114.9609375, -114.609375}},
+		{"sey9wu", []float64{21.3793945312, 21.3848876953, 31.9372558594, 31.9482421875}},
+		{"qk0jrj", []float64{-21.5496826172, -21.5441894531, 101.557617188, 101.568603516}},
+		{"8x6jjpm0", []float64{41.6999816895, 41.7001533508, -154.460906982, -154.46056366}},
+		{"5j1etu", []float64{-61.2377929688, -61.2322998047, -42.6379394531, -42.626953125}},
+		{"r6b", []float64{-29.53125, -28.125, 146.25, 147.65625}},
+		{"ddu3vyj07", []float64{15.8093690872, 15.8094120026, -61.263756752, -61.2637138367}},
+		{"m9fm5q2d91", []float64{-34.2425769567, -34.2425715923, 70.8076143265, 70.8076250553}},
+		{"0pxdx", []float64{-47.373046875, -47.3291015625, -169.145507812, -169.1015625}},
+		{"w", []float64{0.0, 45.0, 90.0, 135.0}},
+		{"q1e", []float64{-36.5625, -35.15625, 94.21875, 95.625}},
+		{"h3vxhm8tu", []float64{-78.8945817947, -78.8945388794, 19.172000885, 19.1720438004}},
+		{"bcxsz", []float64{54.2724609375, 54.31640625, -135.395507812, -135.3515625}},
+		{"crjh", []float64{85.078125, 85.25390625, -116.71875, -116.3671875}},
+		{"bdqejqqgwj", []float64{58.2185536623, 58.2185590267, -148.119134903, -148.119124174}},
+		{"x7zhc480u7", []float64{21.9425886869, 21.9425940514, 156.137877703, 156.137888432}},
+		{"xhr7c9nd6js9", []float64{24.5713387616, 24.5713389292, 145.270248726, 145.270249061}},
+		{"f25r3r", []float64{46.3128662109, 46.318359375, -74.1247558594, -74.1137695312}},
+		{"b4v1e8zek", []float64{60.7370996475, 60.7371425629, -172.804470062, -172.804427147}},
+		{"95cwh1k", []float64{22.1553039551, 22.1566772461, -132.709350586, -132.707977295}},
+		{"kh1r", []float64{-21.26953125, -21.09375, 1.7578125, 2.109375}},
+		{"7p", []float64{-5.625, 0.0, -45.0, -33.75}},
+		{"mgsvj", []float64{-24.43359375, -24.3896484375, 85.6494140625, 85.693359375}},
+		{"k70", []float64{-28.125, -26.71875, 11.25, 12.65625}},
+		{"pxjr5g", []float64{-49.3780517578, -49.3725585938, 165.047607422, 165.05859375}},
+		{"3", []float64{-45.0, 0.0, -135.0, -90.0}},
+		{"81pv", []float64{6.50390625, 6.6796875, -169.1015625, -168.75}},
+		{"jjg", []float64{-57.65625, -56.25, 49.21875, 50.625}},
+		{"732kjtvw", []float64{-37.2330093384, -37.232837677, -33.1491851807, -33.1488418579}},
+		{"kuc2", []float64{-18.28125, -18.10546875, 35.5078125, 35.859375}},
+		{"wn91fmw18yp", []float64{36.9006192684, 36.9006206095, 91.5134082735, 91.5134096146}},
+		{"5wdnzyz5r", []float64{-52.2133398056, -52.2132968903, -19.3370103836, -19.3369674683}},
+		{"m682wkeu80", []float64{-30.8241176605, -30.8241122961, 56.8813705444, 56.8813812733}},
+		{"r18jv9k", []float64{-35.5448913574, -35.5435180664, 135.247192383, 135.248565674}},
+		{"zr079yhvttr", []float64{85.0241656601, 85.0241670012, 146.685235351, 146.685236692}},
+		{"r4umz4vhvm", []float64{-28.5045593977, -28.5045540333, 141.291271448, 141.291282177}},
+		{"58gdwpzc3zs", []float64{-85.2989700437, -85.2989687026, -17.3037296534, -17.3037283123}},
+		{"64frgqpt0yj", []float64{-28.1350958347, -28.1350944936, -86.6827766597, -86.6827753186}},
+		{"8n18eckkw5", []float64{33.8455456495, 33.8455510139, -177.719736099, -177.71972537}},
+		{"mz326c81b1", []float64{-4.16625916958, -4.16625380516, 80.6286621094, 80.6286728382}},
+		{"hx", []float64{-50.625, -45.0, 22.5, 33.75}},
+		{"ush2juq", []float64{67.5233459473, 67.5247192383, 28.737487793, 28.738861084}},
+		{"bp6h7m8fe", []float64{86.5589618683, 86.5590047836, -177.04351902, -177.043476105}},
+		{"111", []float64{-84.375, -82.96875, -133.59375, -132.1875}},
+		{"m9hwzz", []float64{-38.1500244141, -38.14453125, 74.1687011719, 74.1796875}},
+		{"100u6e92zuk", []float64{-89.2335520685, -89.2335507274, -133.833394647, -133.833393306}},
+		{"y", []float64{45.0, 90.0, 90.0, 135.0}},
+		{"9wbmdmhu", []float64{38.9636993408, 38.9638710022, -112.043037415, -112.042694092}},
+		{"9p1jg8k", []float64{40.3871154785, 40.3884887695, -133.434448242, -133.433074951}},
+		{"vqf6kw", []float64{83.3972167969, 83.4027099609, 59.6118164062, 59.6228027344}},
+		{"gw", []float64{78.75, 84.375, -22.5, -11.25}},
+		{"h49v9", []float64{-74.970703125, -74.9267578125, 2.5048828125, 2.548828125}},
+		{"23cmz", []float64{-34.1455078125, -34.1015625, -166.684570312, -166.640625}},
+		{"71", []float64{-39.375, -33.75, -45.0, -33.75}},
+		{"5x2kvmbu", []float64{-48.3515167236, -48.3513450623, -21.9166946411, -21.9163513184}},
+		{"1nywfjs8e", []float64{-50.8144283295, -50.8143854141, -125.765175819, -125.765132904}},
+		{"7u4vm3b9qy", []float64{-21.5672886372, -21.5672832727, -7.15112328529, -7.15111255646}},
+		{"rx4n750gn", []float64{-4.50937271118, -4.50932979584, 160.445623398, 160.445666313}},
+		{"9", []float64{0.0, 45.0, -135.0, -90.0}},
+		{"nxfyqng3", []float64{-45.2703666687, -45.2701950073, 116.635322571, 116.635665894}},
+		{"tgnt", []float64{17.75390625, 17.9296875, 87.890625, 88.2421875}},
+		{"qe2k5jtbm2c", []float64{-25.985365659, -25.9853643179, 112.991521508, 112.991522849}},
+		{"d", []float64{0.0, 45.0, -90.0, -45.0}},
+		{"1jhq", []float64{-60.8203125, -60.64453125, -129.0234375, -128.671875}},
+		{"p874n4ubjm", []float64{-88.2270544767, -88.2270491123, 161.989170313, 161.989181042}},
+		{"91h2qc", []float64{5.67443847656, 5.67993164062, -128.726806641, -128.715820312}},
+		{"mzp8s0p", []float64{-5.537109375, -5.53573608398, 89.4822692871, 89.4836425781}},
+		{"ptp0rem", []float64{-61.8132019043, -61.8118286133, 167.680206299, 167.68157959}},
+		{"14", []float64{-78.75, -73.125, -135.0, -123.75}},
+		{"s4dq", []float64{15.1171875, 15.29296875, 3.1640625, 3.515625}},
+		{"uvs7", []float64{76.46484375, 76.640625, 39.7265625, 40.078125}},
+		{"wh9xq3mqh", []float64{26.5948104858, 26.5948534012, 92.3914146423, 92.3914575577}},
+		{"kz", []float64{-5.625, 0.0, 33.75, 45.0}},
+		{"s8t4hkb3", []float64{3.19032669067, 3.19049835205, 29.7183609009, 29.7187042236}},
+		{"3ry9w", []float64{-1.142578125, -1.0986328125, -114.345703125, -114.301757812}},
+		{"mf1wt5", []float64{-32.5909423828, -32.5854492188, 81.0791015625, 81.0900878906}},
+		{"e", []float64{0.0, 45.0, -45.0, 0.0}},
+		{"mh", []float64{-22.5, -16.875, 45.0, 56.25}},
+		{"75y3665k", []float64{-23.6748504639, -23.6746788025, -36.1075973511, -36.1072540283}},
+		{"sts", []float64{30.9375, 32.34375, 28.125, 29.53125}},
+		{"6fdmxb", []float64{-29.970703125, -29.9652099609, -52.7453613281, -52.734375}},
+		{"xcvf9qbx13jx", []float64{10.3214901499, 10.3214903176, 176.891616806, 176.891617142}},
+		{"n1r6pkxu86t", []float64{-82.5916823745, -82.5916810334, 100.524576455, 100.524577796}},
+		{"1m2p", []float64{-59.23828125, -59.0625, -123.75, -123.3984375}},
+		{"fz", []float64{84.375, 90.0, -56.25, -45.0}},
+		{"hgw", []float64{-70.3125, -68.90625, 42.1875, 43.59375}},
+		{"ssktp8e5hsub", []float64{24.7884432971, 24.7884434648, 29.1620342061, 29.1620345414}},
+		{"8wbw4", []float64{39.0234375, 39.0673828125, -156.708984375, -156.665039062}},
+		{"wbcdsqtpnkh9", []float64{4.69513194636, 4.69513211399, 126.053283289, 126.053283624}},
+		{"f0md", []float64{46.7578125, 46.93359375, -82.265625, -81.9140625}},
+		{"hngnmbt2pe4", []float64{-50.9298545122, -50.9298531711, 4.478969872, 4.4789712131}},
+		{"gbkn8cgjewxc", []float64{47.559420336, 47.5594205037, -5.58776054531, -5.58776021004}},
+		{"u", []float64{45.0, 90.0, 0.0, 45.0}},
+		{"b5", []float64{61.875, 67.5, -180.0, -168.75}},
+		{"w1r042nrqyk", []float64{7.0325280726, 7.0325294137, 99.951505065, 99.9515064061}},
+		{"tv6r1uuh1h", []float64{30.7885193825, 30.7885247469, 81.9965028763, 81.9965136051}},
+		{"r1hw8pqpr3", []float64{-38.1913465261, -38.1913411617, 141.336675882, 141.336686611}},
+		{"j2bcyrxdgj", []float64{-85.4319351912, -85.4319298267, 57.5897741318, 57.5897848606}},
+		{"m", []float64{-45.0, 0.0, 45.0, 90.0}},
+		{"qtxvgfvmrbf", []float64{-13.0357463658, -13.0357450247, 123.570777476, 123.570778817}},
+		{"jp", []float64{-50.625, -45.0, 45.0, 56.25}},
+		{"f76", []float64{63.28125, 64.6875, -75.9375, -74.53125}},
+		{"vz9", []float64{87.1875, 88.59375, 80.15625, 81.5625}},
+		{"wm", []float64{28.125, 33.75, 101.25, 112.5}},
+		{"c0wn5", []float64{48.8671875, 48.9111328125, -126.430664062, -126.38671875}},
+		{"7pn7whg", []float64{-4.9836730957, -4.98229980469, -35.943145752, -35.9417724609}},
+		{"s", []float64{0.0, 45.0, 0.0, 45.0}},
+		{"txwr3", []float64{43.4619140625, 43.505859375, 76.3330078125, 76.376953125}},
+		{"zc0", []float64{50.625, 52.03125, 168.75, 170.15625}},
+		{"sq7pru6gr", []float64{36.4545679092, 36.4546108246, 15.8134031296, 15.8134460449}},
+		{"nu", []float64{-67.5, -61.875, 123.75, 135.0}},
+		{"7dkt6vr", []float64{-31.3920593262, -31.3906860352, -16.0414123535, -16.0400390625}},
+		{"xm2uwefdyf1", []float64{30.3433477879, 30.343349129, 147.594056278, 147.59405762}},
+		{"mgmnc0", []float64{-25.5322265625, -25.5267333984, 85.8251953125, 85.8361816406}},
+		{"jj1shq", []float64{-61.1389160156, -61.1334228516, 47.2961425781, 47.3071289062}},
+		{"3", []float64{-45.0, 0.0, -135.0, -90.0}},
+		{"0p4y3p8tgr", []float64{-49.4841438532, -49.4841384888, -176.088041067, -176.088030338}},
+		{"gu", []float64{67.5, 73.125, -11.25, 0.0}},
+		{"e94", []float64{5.625, 7.03125, -19.6875, -18.28125}},
+		{"u7khr", []float64{64.0283203125, 64.072265625, 17.1826171875, 17.2265625}},
+		{"k1k", []float64{-37.96875, -36.5625, 5.625, 7.03125}},
+		{"wks48m7f", []float64{25.7811355591, 25.7813072205, 106.891136169, 106.891479492}},
+		{"z91w3", []float64{51.7236328125, 51.767578125, 159.653320312, 159.697265625}},
+		{"c2d6xmbp1", []float64{48.284740448, 48.2847833633, -120.267291069, -120.267248154}},
+		{"s9yur", []float64{10.5908203125, 10.634765625, 32.2998046875, 32.34375}},
+		{"7u09b46", []float64{-22.1800231934, -22.1786499023, -10.544128418, -10.542755127}},
+		{"8sndxb", []float64{22.939453125, 22.9449462891, -148.018798828, -148.0078125}},
+		{"j2g761bhsex", []float64{-85.1995566487, -85.1995553076, 60.9084056318, 60.9084069729}},
+		{"5wg", []float64{-52.03125, -50.625, -18.28125, -16.875}},
+		{"fzn", []float64{84.375, 85.78125, -47.8125, -46.40625}},
+		{"ugdpz8p4mu", []float64{66.0502123833, 66.0502177477, 36.9019496441, 36.9019603729}},
+		{"nx", []float64{-50.625, -45.0, 112.5, 123.75}},
+		{"d", []float64{0.0, 45.0, -90.0, -45.0}},
+		{"crr9e8mz59se", []float64{86.0475053452, 86.0475055128, -113.041263744, -113.041263409}},
+		{"dgmpsg", []float64{19.6160888672, 19.6215820312, -49.0100097656, -48.9990234375}},
+		{"jcfk52m03", []float64{-79.4517087936, -79.4516658783, 82.063794136, 82.0638370514}},
+		{"d8trpccuk", []float64{4.05331134796, 4.05335426331, -59.7740364075, -59.7739934921}},
+		{"93g9665", []float64{10.0744628906, 10.0758361816, -118.725128174, -118.723754883}},
+		{"sqt7cuf8d0f", []float64{37.2478620708, 37.2478634119, 18.7132385373, 18.7132398784}},
+		{"f9", []float64{50.625, 56.25, -67.5, -56.25}},
+		{"k90", []float64{-39.375, -37.96875, 22.5, 23.90625}},
+		{"k8xdhcv", []float64{-41.8263244629, -41.8249511719, 33.2624816895, 33.2638549805}},
+		{"4989w4r926t7", []float64{-81.2862400152, -81.2862398475, -66.5228856727, -66.5228853375}},
+		{"c3", []float64{50.625, 56.25, -123.75, -112.5}},
+		{"bd908pg0", []float64{59.1929626465, 59.1931343079, -156.089630127, -156.089286804}},
+		{"bq", []float64{78.75, 84.375, -168.75, -157.5}},
+		{"chcdt", []float64{72.158203125, 72.2021484375, -132.670898438, -132.626953125}},
+		{"hff8vsrzhy39", []float64{-74.3748327903, -74.3748326227, 37.5181730837, 37.5181734189}},
+		{"9gef7g6ezj", []float64{20.101531148, 20.1015365124, -95.8080339432, -95.8080232143}},
+		{"yc0u2dp", []float64{51.3830566406, 51.3844299316, 124.836273193, 124.837646484}},
+		{"w0b41f7", []float64{4.58267211914, 4.58404541016, 90.0810241699, 90.0823974609}},
+		{"8cdmwjc", []float64{9.43588256836, 9.43725585938, -142.820892334, -142.819519043}},
+		{"p4ngqtjm2", []float64{-78.150343895, -78.1503009796, 144.785041809, 144.785084724}},
+		{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
+		{"3qtzqdknx", []float64{-7.14961051941, -7.14956760406, -115.372624397, -115.372581482}},
+		{"gzjhuv9xmkg5", []float64{85.2414438687, 85.2414440364, -4.00772050023, -4.00772016495}},
+		{"8g8t7eh4y0fh", []float64{20.6273078173, 20.627307985, -145.387313068, -145.387312733}},
+		{"39w", []float64{-36.5625, -35.15625, -104.0625, -102.65625}},
+		{"z34rj8", []float64{51.85546875, 51.8609619141, 149.655761719, 149.666748047}},
+		{"c9p0zv2", []float64{50.7856750488, 50.7870483398, -102.315673828, -102.314300537}},
+		{"vh871y", []float64{70.8728027344, 70.8782958984, 45.4284667969, 45.439453125}},
+		{"7ggt8b4yfw", []float64{-22.9382622242, -22.9382568598, -6.29128217697, -6.29127144814}},
+		{"3qz3d324z", []float64{-6.76023960114, -6.76019668579, -113.455510139, -113.455467224}},
+		{"4sm2463w0w", []float64{-66.0803282261, -66.0803228617, -60.0162291527, -60.0162184238}},
+		{"26ewbu0gw3", []float64{-29.728397727, -29.7283923626, -163.793867826, -163.793857098}},
+		{"bre7js2w9z", []float64{87.7393430471, 87.7393484116, -163.937226534, -163.937215805}},
+		{"sy5ug08bn", []float64{34.5877075195, 34.5877504349, 39.1565608978, 39.1566038132}},
+		{"p4r", []float64{-77.34375, -75.9375, 144.84375, 146.25}},
+		{"qb", []float64{-45.0, -39.375, 123.75, 135.0}},
+		{"f4hj", []float64{57.12890625, 57.3046875, -84.375, -84.0234375}},
+		{"5r0f5t7d5", []float64{-50.2442550659, -50.2442121506, -32.5365686417, -32.5365257263}},
+		{"2j7n4r6r", []float64{-14.3730354309, -14.3728637695, -175.679283142, -175.678939819}},
+		{"wu92egv2wsy", []float64{25.4211013019, 25.421102643, 125.680104196, 125.680105537}},
+		{"vgtwey347bg", []float64{65.8648006618, 65.8648020029, 86.6507081687, 86.6507095098}},
+		{"q2meny7pbd18", []float64{-43.0307328701, -43.0307327025, 109.285149202, 109.285149537}},
+		{"rpe", []float64{-2.8125, -1.40625, 139.21875, 140.625}},
+		{"m69", []float64{-30.9375, -29.53125, 57.65625, 59.0625}},
+		{"w1zwd8", []float64{10.986328125, 10.9918212891, 100.656738281, 100.667724609}},
+		{"fzpf", []float64{84.7265625, 84.90234375, -45.3515625, -45.0}},
+		{"t3w", []float64{8.4375, 9.84375, 64.6875, 66.09375}},
+		{"zb11", []float64{45.17578125, 45.3515625, 170.15625, 170.5078125}},
+		{"r2prkmxpgtww", []float64{-43.6940126494, -43.6940124817, 156.641852036, 156.641852371}},
+		{"zr34g1zcj", []float64{86.274433136, 86.2744760513, 147.79894352, 147.798986435}},
+		{"19mgdk8", []float64{-82.3287963867, -82.3274230957, -104.315185547, -104.313812256}},
+		{"mkp", []float64{-22.5, -21.09375, 66.09375, 67.5}},
+		{"934qy86ssc", []float64{6.81367456913, 6.81367993355, -120.296655893, -120.296645164}},
+		{"byydj4mrm8", []float64{83.3339166641, 83.3339220285, -136.882202625, -136.882191896}},
+		{"j", []float64{-90.0, -45.0, 45.0, 90.0}},
+		{"9cjzqv5n6", []float64{6.92795276642, 6.92799568176, -92.8632259369, -92.8631830215}},
+		{"vkg1wg6s", []float64{72.0009613037, 72.0011329651, 60.7688140869, 60.7691574097}},
+		{"ynp42e9x", []float64{79.1659355164, 79.1661071777, 99.8677825928, 99.8681259155}},
+		{"uv9zwddtwn", []float64{77.2705686092, 77.2705739737, 36.5002727509, 36.5002834797}},
+		{"t17zkzszpm", []float64{8.3480912447, 8.34809660912, 50.4890120029, 50.4890227318}},
+		{"tuw779c04ukm", []float64{25.8934257366, 25.8934259042, 87.6943681017, 87.6943684369}},
+		{"37rm598", []float64{-25.8316040039, -25.8302307129, -113.400878906, -113.399505615}},
+		{"ymf18", []float64{77.607421875, 77.6513671875, 104.0625, 104.106445312}},
+		{"gd", []float64{56.25, 61.875, -22.5, -11.25}},
+		{"smz", []float64{32.34375, 33.75, 21.09375, 22.5}},
+		{"p", []float64{-90.0, -45.0, 135.0, 180.0}},
+		{"muzkh95w2u42", []float64{-17.5715374947, -17.571537327, 89.1479081288, 89.1479084641}},
+		{"hh53c48eg", []float64{-67.1780061722, -67.1779632568, 4.61507320404, 4.61511611938}},
+		{"739ewv", []float64{-35.9197998047, -35.9143066406, -31.3439941406, -31.3330078125}},
+		{"cw883", []float64{81.6064453125, 81.650390625, -111.752929688, -111.708984375}},
+		{"41xu1w37yf", []float64{-80.8243882656, -80.8243829012, -79.0336382389, -79.0336275101}},
+		{"0y750v2k", []float64{-54.2868804932, -54.2867088318, -141.997947693, -141.99760437}},
+		{"gqbgw", []float64{83.583984375, 83.6279296875, -32.431640625, -32.3876953125}},
+		{"pej", []float64{-73.125, -71.71875, 164.53125, 165.9375}},
+		{"r05t", []float64{-44.12109375, -43.9453125, 139.921875, 140.2734375}},
+		{"qfuew7wk4f", []float64{-28.8960921764, -28.896086812, 130.361484289, 130.361495018}},
+		{"nu357yhp72y", []float64{-65.4882533848, -65.4882520437, 125.326685607, 125.326686949}},
+		{"8qt7rnggz", []float64{37.1715116501, 37.1715545654, -161.054120064, -161.054077148}},
+		{"dhjq2nz", []float64{23.6357116699, 23.6370849609, -82.6075744629, -82.6062011719}},
+		{"5s4", []float64{-67.5, -66.09375, -19.6875, -18.28125}},
+		{"ge8nq842", []float64{65.7861328125, 65.7863044739, -22.211265564, -22.2109222412}},
+		{"71", []float64{-39.375, -33.75, -45.0, -33.75}},
+		{"sz59kteks87q", []float64{39.625713788, 39.6257139556, 38.8742895797, 38.874289915}},
+		{"ur2bh", []float64{85.78125, 85.8251953125, 12.48046875, 12.5244140625}},
+		{"w140u2f", []float64{5.76095581055, 5.76232910156, 93.0020141602, 93.0033874512}},
+		{"fpd3zkt6whz4", []float64{87.5202913955, 87.5202915631, -86.5098573267, -86.5098569915}},
+		{"zmej764h8kb8", []float64{76.8721358478, 76.8721360154, 150.614330247, 150.614330582}},
+		{"k4p6z", []float64{-33.2666015625, -33.22265625, 10.5029296875, 10.546875}},
+		{"f8", []float64{45.0, 50.625, -67.5, -56.25}},
+		{"utsy6pv17m", []float64{77.0789462328, 77.0789515972, 29.2745840549, 29.2745947838}},
+		{"6z5", []float64{-5.625, -4.21875, -52.03125, -50.625}},
+		{"mjdc1", []float64{-13.88671875, -13.8427734375, 48.9111328125, 48.955078125}},
+		{"gjks4c2", []float64{75.2412414551, 75.2426147461, -38.5510253906, -38.5496520996}},
+		{"fvkvvrh42ju", []float64{75.5808614194, 75.5808627605, -49.3341010809, -49.3340997398}},
+		{"yp63x30p7u7", []float64{86.0516823828, 86.0516837239, 93.4828309715, 93.4828323126}},
+		{"6rw", []float64{-2.8125, -1.40625, -70.3125, -68.90625}},
+		{"28vsqm8sb6", []float64{-40.0031411648, -40.0031358004, -149.490269423, -149.490258694}},
+		{"be72g2zcek5", []float64{63.4174847603, 63.4174861014, -152.776078731, -152.77607739}},
+		{"xry6fn699f", []float64{44.1117489338, 44.1117542982, 155.130461454, 155.130472183}},
+		{"3sf7bw01y4", []float64{-17.5888001919, -17.5887948275, -109.313707352, -109.313696623}},
+		{"k729yrqr77", []float64{-26.3700467348, -26.3700413704, 12.2365057468, 12.2365164757}},
+		{"e", []float64{0.0, 45.0, -45.0, 0.0}},
+		{"63x6dd", []float64{-36.1120605469, -36.1065673828, -68.4448242188, -68.4338378906}},
+		{"z", []float64{45.0, 90.0, 135.0, 180.0}},
+		{"mzyj", []float64{-0.52734375, -0.3515625, 87.1875, 87.5390625}},
+		{"3j6r", []float64{-14.23828125, -14.0625, -131.8359375, -131.484375}},
+		{"u3q", []float64{52.03125, 53.4375, 19.6875, 21.09375}},
+		{"nueu7mbnbn4", []float64{-63.9076530933, -63.9076517522, 129.166262448, 129.166263789}},
+		{"cyq2pkr", []float64{80.1795959473, 80.1809692383, -92.1327209473, -92.1313476562}},
+		{"gptzzke9hvh", []float64{88.5747224092, 88.5747237504, -36.5904432535, -36.5904419124}},
+		{"khd", []float64{-19.6875, -18.28125, 2.8125, 4.21875}},
+		{"ghm92hg6p5", []float64{69.1524285078, 69.1524338722, -37.2608613968, -37.260850668}},
+		{"n9e20w0", []float64{-81.5295410156, -81.5281677246, 117.092285156, 117.093658447}},
+		{"826dzs0k", []float64{1.91230773926, 1.91247940063, -164.904441833, -164.904098511}},
+		{"0d5f2", []float64{-78.3544921875, -78.310546875, -152.2265625, -152.182617188}},
+		{"70zsyg3", []float64{-39.9284362793, -39.9270629883, -34.1551208496, -34.1537475586}},
+		{"zykh8gwy", []float64{80.9675216675, 80.9676933289, 174.417228699, 174.417572021}},
+		{"4spd3s", []float64{-67.0825195312, -67.0770263672, -56.8872070312, -56.8762207031}},
+		{"r9p6f2t", []float64{-38.8888549805, -38.8874816895, 167.801055908, 167.802429199}},
+		{"6q3merq7w", []float64{-8.83652687073, -8.83648395538, -76.8405246735, -76.8404817581}},
+		{"qx1zr32", []float64{-4.34371948242, -4.34234619141, 115.279541016, 115.280914307}},
+		{"3zfnk", []float64{-0.3076171875, -0.263671875, -98.26171875, -98.2177734375}},
+		{"kd2e3", []float64{-31.7724609375, -31.728515625, 23.2470703125, 23.291015625}},
+		{"stkhr6", []float64{30.2893066406, 30.2947998047, 28.4436035156, 28.4545898438}},
+		{"nh4pzbm", []float64{-66.1363220215, -66.1349487305, 93.159942627, 93.161315918}},
+		{"zt8tf", []float64{76.9482421875, 76.9921875, 158.291015625, 158.334960938}},
+		{"gd37", []float64{58.18359375, 58.359375, -20.7421875, -20.390625}},
+		{"gnx5b45dd", []float64{82.2330951691, 82.2331380844, -35.1513576508, -35.1513147354}},
+		{"2qm7", []float64{-9.31640625, -9.140625, -161.3671875, -161.015625}},
+		{"4g0zf0kq9cpp", []float64{-71.7601996846, -71.760199517, -55.1015008986, -55.1015005633}},
+		{"977tgt", []float64{19.3194580078, 19.3249511719, -118.674316406, -118.663330078}},
+		{"md0k", []float64{-33.046875, -32.87109375, 67.8515625, 68.203125}},
+		{"v1q3nvfb3h", []float64{52.2386813164, 52.2386866808, 54.089512825, 54.0895235538}},
+		{"z96pt6u96w", []float64{53.3649623394, 53.3649677038, 160.549499989, 160.549510717}},
+		{"pu", []float64{-67.5, -61.875, 168.75, 180.0}},
+		{"6uydh", []float64{-17.9296875, -17.8857421875, -46.93359375, -46.8896484375}},
+		{"nx5mt4sk", []float64{-49.6437835693, -49.643611908, 117.295875549, 117.296218872}},
+		{"nk8jt", []float64{-63.720703125, -63.6767578125, 101.469726562, 101.513671875}},
+		{"kec1015b", []float64{-23.7249755859, -23.7248039246, 23.9113998413, 23.9117431641}},
+		{"fk388b", []float64{68.994140625, 68.9996337891, -76.6076660156, -76.5966796875}},
+		{"nsb", []float64{-63.28125, -61.875, 112.5, 113.90625}},
+		{"ndbws", []float64{-73.388671875, -73.3447265625, 113.37890625, 113.422851562}},
+		{"fs5", []float64{67.5, 68.90625, -63.28125, -61.875}},
+		{"h6x0kbec6p15", []float64{-75.8905554749, -75.8905553073, 21.3077272475, 21.3077275828}},
+		{"hy78", []float64{-54.84375, -54.66796875, 38.671875, 39.0234375}},
+		{"vpun9j4ce9", []float64{89.7640568018, 89.7640621662, 50.6728720665, 50.6728827953}},
+		{"6tyevvqrj665", []float64{-11.9670169987, -11.967016831, -58.0978783965, -58.0978780612}},
+		{"d3ktekr48n", []float64{8.02185416222, 8.02185952663, -72.2694396973, -72.2694289684}},
+		{"heyfm2up5", []float64{-68.5054206848, -68.5053777695, 32.2285223007, 32.2285652161}},
+		{"mn3f7qjr22v", []float64{-9.41403463483, -9.41403329372, 47.6109869778, 47.6109883189}},
+		{"1wngqx9b", []float64{-55.637512207, -55.6373405457, -102.719764709, -102.719421387}},
+		{"xc9jv49jej", []float64{9.46294605732, 9.46295142174, 170.3774786, 170.377489328}},
+		{"27", []float64{-28.125, -22.5, -168.75, -157.5}},
+		{"6yhqnw", []float64{-10.1623535156, -10.1568603516, -49.9877929688, -49.9768066406}},
+		{"rmhhu3qd", []float64{-16.0328292847, -16.0326576233, 152.07069397, 152.071037292}},
+		{"y00b8mhby", []float64{45.1154851913, 45.1155281067, 91.0724544525, 91.0724973679}},
+		{"yq5tr", []float64{79.6728515625, 79.716796875, 106.479492188, 106.5234375}},
+		{"cuvxw", []float64{73.037109375, 73.0810546875, -93.251953125, -93.2080078125}},
+		{"exvb4bcn", []float64{43.5988998413, 43.5990715027, -14.2918395996, -14.2914962769}},
+		{"uhdpsyx75n0", []float64{71.667112112, 71.6671134531, 3.03132534027, 3.03132668138}},
+		{"y", []float64{45.0, 90.0, 90.0, 135.0}},
+		{"qvwbzgw7q", []float64{-13.9108800888, -13.9108371735, 133.591604233, 133.591647148}},
+		{"u0rqp0bres", []float64{47.466366291, 47.4663716555, 10.503423214, 10.5034339428}},
+		{"43k2u0vbnrbd", []float64{-82.8327522799, -82.8327521123, -72.5894909352, -72.5894905999}},
+		{"7r7kwz7xxr", []float64{-3.38658392429, -3.38657855988, -28.8779389858, -28.877928257}},
+		{"fu2f", []float64{69.2578125, 69.43359375, -55.1953125, -54.84375}},
+		{"9tkjsvzxw6", []float64{30.5309307575, 30.5309361219, -106.655691862, -106.655681133}},
+		{"y9x0z", []float64{53.5693359375, 53.61328125, 122.651367188, 122.6953125}},
+		{"y4rk4b25g", []float64{58.3613920212, 58.3614349365, 100.316290855, 100.316333771}},
+		{"sxmdmu9xcn", []float64{41.202839613, 41.2028449774, 30.4891633987, 30.4891741276}},
+		{"fb0", []float64{45.0, 46.40625, -56.25, -54.84375}},
+		{"7ffkxt5", []float64{-28.7127685547, -28.7113952637, -7.7522277832, -7.75085449219}},
+		{"n1x5jn4dr", []float64{-81.0018110275, -81.0017681122, 100.067210197, 100.067253113}},
+		{"uxcdctgmj", []float64{89.1095924377, 89.1096353531, 24.6799707413, 24.6800136566}},
+		{"h7", []float64{-73.125, -67.5, 11.25, 22.5}},
+		{"b", []float64{45.0, 90.0, -180.0, -135.0}},
+		{"us1n4udk", []float64{68.5800933838, 68.5802650452, 24.0301895142, 24.0305328369}},
+		{"zjtptsj2vn", []float64{77.2779929638, 77.2779983282, 142.280373573, 142.280384302}},
+		{"x6utsqz", []float64{16.4726257324, 16.4739990234, 152.774505615, 152.775878906}},
+		{"cs9dn901rqn", []float64{70.6698024273, 70.6698037684, -110.104661286, -110.104659945}},
+		{"sjbn", []float64{33.3984375, 33.57421875, 0.0, 0.3515625}},
+		{"0fwxjyc3g9q", []float64{-74.6696452796, -74.6696439385, -136.854814589, -136.854813248}},
+		{"fk", []float64{67.5, 73.125, -78.75, -67.5}},
+		{"75hq9jh", []float64{-26.9549560547, -26.9535827637, -38.9739990234, -38.9726257324}},
+		{"kr3hg1q1", []float64{-3.37675094604, -3.37657928467, 12.7963256836, 12.7966690063}},
+		{"hfq4d2wu", []float64{-76.9008636475, -76.9006919861, 42.2956466675, 42.2959899902}},
+		{"rg6ygh", []float64{-25.5102539062, -25.5047607422, 172.749023438, 172.760009766}},
+		{"995pvrg", []float64{7.02987670898, 7.03125, -108.046417236, -108.045043945}},
+		{"s5ys", []float64{21.796875, 21.97265625, 9.140625, 9.4921875}},
+		{"289ucubzj6", []float64{-41.3252341747, -41.3252288103, -154.960902929, -154.9608922}},
+		{"4", []float64{-90.0, -45.0, -90.0, -45.0}},
+		{"7g0e3gp7cz", []float64{-27.5365501642, -27.5365447998, -10.4599392414, -10.4599285126}},
+		{"9suuudg", []float64{27.5688171387, 27.5701904297, -105.618438721, -105.61706543}},
+		{"8vdt3j8zb0", []float64{31.8918943405, 31.8918997049, -142.689399719, -142.68938899}},
+		{"cf", []float64{56.25, 61.875, -101.25, -90.0}},
+		{"jnp33f5pr9", []float64{-56.0180372, -56.0180318356, 55.276658535, 55.2766692638}},
+		{"czgmgyb", []float64{89.6415710449, 89.6429443359, -96.5148925781, -96.5135192871}},
+		{"c1kk", []float64{52.734375, 52.91015625, -129.0234375, -128.671875}},
+		{"kfm4hfe8cp4s", []float64{-31.9782876223, -31.9782874547, 40.994843021, 40.9948433563}},
+		{"9mnws4hc8h", []float64{29.2788434029, 29.2788487673, -114.427070618, -114.427059889}},
+		{"t0j7chwg", []float64{0.684413909912, 0.684585571289, 52.4360275269, 52.4363708496}},
+		{"y", []float64{45.0, 90.0, 90.0, 135.0}},
+		{"suj", []float64{22.5, 23.90625, 40.78125, 42.1875}},
+	}
+
+	for _, test := range tests {
+		lat, lon := DecodeGeoHash(test.hash)
+
+		if !compareLatitude(test.box, lat) {
+			t.Errorf("expected lat %f, got %f, hash %s", (test.box[0]+test.box[1])/2, lat, test.hash)
+		}
+		if !compareLogitude(test.box, lon) {
+			t.Errorf("expected lon %f, got %f, hash %s", (test.box[2]+test.box[3])/2, lon, test.hash)
+		}
+	}
+}
+
+func compareLatitude(box []float64, v float64) bool {
+	avg := (box[0] + box[1]) / 2
+	if compareGeo(avg, v) != 0 {
+		return false
+	}
+	return true
+}
+
+func compareLogitude(box []float64, v float64) bool {
+	avg := (box[2] + box[3]) / 2
+	if compareGeo(avg, v) != 0 {
+		return false
+	}
+	return true
+}

From 83347b91127f3d89a6d37918a9a273bb704ce0b1 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Fri, 10 May 2019 09:49:31 +0530
Subject: [PATCH 598/728] cleaning the unused GeoHashDecode method

---
 geo/benchmark_geohash_test.go |  24 -----
 geo/geohash.go                | 164 +++-------------------------------
 geo/geohash_test.go           |  25 ------
 geo/versus_test.go            |   2 +-
 4 files changed, 15 insertions(+), 200 deletions(-)

diff --git a/geo/benchmark_geohash_test.go b/geo/benchmark_geohash_test.go
index 5ed5d6b76..2cdeb1d47 100644
--- a/geo/benchmark_geohash_test.go
+++ b/geo/benchmark_geohash_test.go
@@ -18,14 +18,6 @@ import (
 	"testing"
 )
 
-func BenchmarkGeoHashLen5Decode(b *testing.B) {
-	b.ResetTimer()
-	hash := "d3hn3"
-	for i := 0; i < b.N; i++ {
-		_, _ = GeoHashDecode(hash)
-	}
-}
-
 func BenchmarkGeoHashLen5NewDecode(b *testing.B) {
 	b.ResetTimer()
 	hash := "d3hn3"
@@ -34,14 +26,6 @@ func BenchmarkGeoHashLen5NewDecode(b *testing.B) {
 	}
 }
 
-func BenchmarkGeoHashLen6Decode(b *testing.B) {
-	b.ResetTimer()
-	hash := "u4pruy"
-	for i := 0; i < b.N; i++ {
-		_, _ = GeoHashDecode(hash)
-	}
-}
-
 func BenchmarkGeoHashLen6NewDecode(b *testing.B) {
 	b.ResetTimer()
 	hash := "u4pruy"
@@ -50,14 +34,6 @@ func BenchmarkGeoHashLen6NewDecode(b *testing.B) {
 	}
 }
 
-func BenchmarkGeoHashLen7Decode(b *testing.B) {
-	b.ResetTimer()
-	hash := "u4pruyd"
-	for i := 0; i < b.N; i++ {
-		_, _ = GeoHashDecode(hash)
-	}
-}
-
 func BenchmarkGeoHashLen7NewDecode(b *testing.B) {
 	b.ResetTimer()
 	hash := "u4pruyd"
diff --git a/geo/geohash.go b/geo/geohash.go
index 4fac3a877..598a63cba 100644
--- a/geo/geohash.go
+++ b/geo/geohash.go
@@ -1,32 +1,19 @@
-// The code here was obtained from:
-//   https://github.com/mmcloughlin/geohash
-
-// The MIT License (MIT)
-// Copyright (c) 2015 Michael McLoughlin
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-
-// The above copyright notice and this permission notice shall be included in all
-// copies or substantial portions of the Software.
-
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-// SOFTWARE.
+//  Copyright (c) 2019 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 package geo
 
-import (
-	"math"
-)
-
 // encoding encapsulates an encoding defined by a given base32 alphabet.
 type encoding struct {
 	enc string
@@ -47,132 +34,9 @@ func newEncoding(encoder string) *encoding {
 	return e
 }
 
-// Decode string into bits of a 64-bit word. The string s may be at most 12
-// characters.
-func (e *encoding) decode(s string) uint64 {
-	x := uint64(0)
-	for i := 0; i < len(s); i++ {
-		x = (x << 5) | uint64(e.dec[s[i]])
-	}
-	return x
-}
-
-// Encode bits of 64-bit word into a string.
-func (e *encoding) encode(x uint64) string {
-	b := [12]byte{}
-	for i := 0; i < 12; i++ {
-		b[11-i] = e.enc[x&0x1f]
-		x >>= 5
-	}
-	return string(b[:])
-}
-
-// Base32Encoding with the Geohash alphabet.
+// base32encoding with the Geohash alphabet.
 var base32encoding = newEncoding("0123456789bcdefghjkmnpqrstuvwxyz")
 
-// BoundingBox returns the region encoded by the given string geohash.
-func geoBoundingBox(hash string) geoBox {
-	bits := uint(5 * len(hash))
-	inthash := base32encoding.decode(hash)
-	return geoBoundingBoxIntWithPrecision(inthash, bits)
-}
-
-// Box represents a rectangle in latitude/longitude space.
-type geoBox struct {
-	minLat float64
-	maxLat float64
-	minLng float64
-	maxLng float64
-}
-
-// Round returns a point inside the box, making an effort to round to minimal
-// precision.
-func (b geoBox) round() (lat, lng float64) {
-	x := maxDecimalPower(b.maxLat - b.minLat)
-	lat = math.Ceil(b.minLat/x) * x
-	x = maxDecimalPower(b.maxLng - b.minLng)
-	lng = math.Ceil(b.minLng/x) * x
-	return
-}
-
-// precalculated for performance
-var exp232 = math.Exp2(32)
-
-// errorWithPrecision returns the error range in latitude and longitude for in
-// integer geohash with bits of precision.
-func errorWithPrecision(bits uint) (latErr, lngErr float64) {
-	b := int(bits)
-	latBits := b / 2
-	lngBits := b - latBits
-	latErr = math.Ldexp(180.0, -latBits)
-	lngErr = math.Ldexp(360.0, -lngBits)
-	return
-}
-
-// minDecimalPlaces returns the minimum number of decimal places such that
-// there must exist an number with that many places within any range of width
-// r. This is intended for returning minimal precision coordinates inside a
-// box.
-func maxDecimalPower(r float64) float64 {
-	m := int(math.Floor(math.Log10(r)))
-	return math.Pow10(m)
-}
-
-// Encode the position of x within the range -r to +r as a 32-bit integer.
-func encodeRange(x, r float64) uint32 {
-	p := (x + r) / (2 * r)
-	return uint32(p * exp232)
-}
-
-// Decode the 32-bit range encoding X back to a value in the range -r to +r.
-func decodeRange(X uint32, r float64) float64 {
-	p := float64(X) / exp232
-	x := 2*r*p - r
-	return x
-}
-
-// Squash the even bitlevels of X into a 32-bit word. Odd bitlevels of X are
-// ignored, and may take any value.
-func squash(X uint64) uint32 {
-	X &= 0x5555555555555555
-	X = (X | (X >> 1)) & 0x3333333333333333
-	X = (X | (X >> 2)) & 0x0f0f0f0f0f0f0f0f
-	X = (X | (X >> 4)) & 0x00ff00ff00ff00ff
-	X = (X | (X >> 8)) & 0x0000ffff0000ffff
-	X = (X | (X >> 16)) & 0x00000000ffffffff
-	return uint32(X)
-}
-
-// Deinterleave the bits of X into 32-bit words containing the even and odd
-// bitlevels of X, respectively.
-func deinterleave(X uint64) (uint32, uint32) {
-	return squash(X), squash(X >> 1)
-}
-
-// BoundingBoxIntWithPrecision returns the region encoded by the integer
-// geohash with the specified precision.
-func geoBoundingBoxIntWithPrecision(hash uint64, bits uint) geoBox {
-	fullHash := hash << (64 - bits)
-	latInt, lngInt := deinterleave(fullHash)
-	lat := decodeRange(latInt, 90)
-	lng := decodeRange(lngInt, 180)
-	latErr, lngErr := errorWithPrecision(bits)
-	return geoBox{
-		minLat: lat,
-		maxLat: lat + latErr,
-		minLng: lng,
-		maxLng: lng + lngErr,
-	}
-}
-
-// ----------------------------------------------------------------------
-
-// Decode the string geohash to a (lat, lng) point.
-func GeoHashDecode(hash string) (lat, lng float64) {
-	box := geoBoundingBox(hash)
-	return box.round()
-}
-
 var masks = []uint64{16, 8, 4, 2, 1}
 
 // DecodeGeoHash decodes the string geohash faster with
diff --git a/geo/geohash_test.go b/geo/geohash_test.go
index d0bec329d..11b920ded 100644
--- a/geo/geohash_test.go
+++ b/geo/geohash_test.go
@@ -19,31 +19,6 @@ import (
 	"testing"
 )
 
-func TestGeoHashDecode(t *testing.T) {
-	tests := []struct {
-		hash string
-		lon  float64
-		lat  float64
-	}{
-		{"d3hn3", -73.080000, 6.730000},     // -73.05908203, 6.74560547 as per http://geohash.co/
-		{"u4pru", 10.380000, 57.620000},     // 10.39306641, 57.63427734
-		{"u4pruy", 10.410000, 57.646000},    // 10.40954590, 57.64801025
-		{"u4pruyd", 10.407000, 57.649000},   // 10.40748596, 57.64869690
-		{"u4pruydqqvj", 10.40744, 57.64911}, // 10.40743969, 57.64911063
-	}
-
-	for _, test := range tests {
-		lat, lon := GeoHashDecode(test.hash)
-
-		if compareGeo(test.lon, lon) != 0 {
-			t.Errorf("expected lon %f, got %f, hash %s", test.lon, lon, test.hash)
-		}
-		if compareGeo(test.lat, lat) != 0 {
-			t.Errorf("expected lat %f, got %f, hash %s", test.lat, lat, test.hash)
-		}
-	}
-}
-
 func TestDecodeGeoHash(t *testing.T) {
 	tests := []struct {
 		hash string
diff --git a/geo/versus_test.go b/geo/versus_test.go
index 0a067dae7..cadf96208 100644
--- a/geo/versus_test.go
+++ b/geo/versus_test.go
@@ -20,7 +20,7 @@ import (
 
 // This test basically confirms the dimensions of the
 // bounded box computed between the DecodeGeoHash method
-// and the existing original implementation from
+// and the popular implementation from
 // https://github.com/mmcloughlin/geohash.
 // DecodeGeoHash method returns the centre of the rectangle
 // than returning the box dimensions.

From 2f920ab09a5a94515984d64b6a2821fb1eda2f7c Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Fri, 10 May 2019 19:58:20 +0530
Subject: [PATCH 599/728] updating copyright header

adding reference to source
---
 geo/geohash.go | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/geo/geohash.go b/geo/geohash.go
index 598a63cba..d3d4dfa8b 100644
--- a/geo/geohash.go
+++ b/geo/geohash.go
@@ -11,6 +11,8 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+// This implementation is inspired from the geohash-js
+// ref: https://github.com/davetroy/geohash-js
 
 package geo
 

From 10662df9cf02d544913847ccc827e345085c662d Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Tue, 28 May 2019 15:38:37 +0530
Subject: [PATCH 600/728] MB-33364 - perf wrinkles with geo queries

Geo queries are reported to have high memory consumption
and higher latencies.
One plasubile root cause for this could be,
-Very high number of candidate terms computed during
 the bounded box queries. (as unlike other queries, looks like
 these terms are just mathematically created without actual
 existence in the bleve index)
-Too many candidate terms keeps more TermFieldDictionaries
 live and managed over the Disjunction heap as well as
 the term searcher objects.

This fix tries to filter out the onBoundary/notOnBoundary
terms early in the computation cycle so that it creates
lesser garbage from TFDs, term byte searchers and lighter
disjunction heap management.

Fix have shown good improvement with numbers in local/mac
environment against the functional tests with
1000 documents and num_queries=100

Average FTS latency in ms for 100 queries
63.20892059(new) Vs 189.9222353(existing)

~3X improvment in latency is observed.

And the latency spikes got better normalised.
eg: New mills Vs Old mills

80.392502	427.297942
306.575176	1555.539272
435.612816	2431.383857
307.829132	1402.138768
148.749478	936.793944
293.825909	944.349282
390.313823	469.521313
620.221195	2414.491882
---
 search/searcher/search_geoboundingbox.go      | 31 +++++++++++++++----
 search/searcher/search_geoboundingbox_test.go |  4 +--
 .../searcher/search_geopointdistance_test.go  |  2 +-
 3 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/search/searcher/search_geoboundingbox.go b/search/searcher/search_geoboundingbox.go
index ed5424f1f..ad20d1a0f 100644
--- a/search/searcher/search_geoboundingbox.go
+++ b/search/searcher/search_geoboundingbox.go
@@ -40,7 +40,7 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
 
 	// do math to produce list of terms needed for this search
 	onBoundaryTerms, notOnBoundaryTerms, err := ComputeGeoRange(0, GeoBitsShift1Minus1,
-		minLon, minLat, maxLon, maxLat, checkBoundaries)
+		minLon, minLat, maxLon, maxLat, checkBoundaries, indexReader, field)
 	if err != nil {
 		return nil, err
 	}
@@ -100,7 +100,8 @@ var geoMaxShift = document.GeoPrecisionStep * 4
 var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2
 
 func ComputeGeoRange(term uint64, shift uint,
-	sminLon, sminLat, smaxLon, smaxLat float64, checkBoundaries bool) (
+	sminLon, sminLat, smaxLon, smaxLat float64, checkBoundaries bool,
+	indexReader index.IndexReader, field string) (
 	onBoundary [][]byte, notOnBoundary [][]byte, err error) {
 	preallocBytesLen := 32
 	preallocBytes := make([]byte, preallocBytesLen)
@@ -117,6 +118,21 @@ func ComputeGeoRange(term uint64, shift uint,
 		return rv
 	}
 
+	isIndexed := func(term []byte) bool {
+		if indexReader != nil {
+			reader, err := indexReader.TermFieldReader(term, field, false, false, false)
+			if err != nil || reader == nil {
+				return false
+			}
+			if reader.Count() == 0 {
+				_ = reader.Close()
+				return false
+			}
+			_ = reader.Close()
+		}
+		return true
+	}
+
 	var computeGeoRange func(term uint64, shift uint) // declare for recursion
 
 	relateAndRecurse := func(start, end uint64, res, level uint) {
@@ -131,10 +147,13 @@ func ComputeGeoRange(term uint64, shift uint,
 		if within || (level == geoDetailLevel &&
 			geo.RectIntersects(minLon, minLat, maxLon, maxLat,
 				sminLon, sminLat, smaxLon, smaxLat)) {
-			if !within && checkBoundaries {
-				onBoundary = append(onBoundary, makePrefixCoded(int64(start), res))
-			} else {
-				notOnBoundary = append(notOnBoundary, makePrefixCoded(int64(start), res))
+			codedTerm := makePrefixCoded(int64(start), res)
+			if isIndexed(codedTerm) {
+				if !within && checkBoundaries {
+					onBoundary = append(onBoundary, makePrefixCoded(int64(start), res))
+				} else {
+					notOnBoundary = append(notOnBoundary, makePrefixCoded(int64(start), res))
+				}
 			}
 		} else if level < geoDetailLevel &&
 			geo.RectIntersects(minLon, minLat, maxLon, maxLat,
diff --git a/search/searcher/search_geoboundingbox_test.go b/search/searcher/search_geoboundingbox_test.go
index cae803412..6b3072868 100644
--- a/search/searcher/search_geoboundingbox_test.go
+++ b/search/searcher/search_geoboundingbox_test.go
@@ -215,7 +215,7 @@ func TestComputeGeoRange(t *testing.T) {
 
 	for testi, test := range tests {
 		onBoundaryRes, offBoundaryRes, err := ComputeGeoRange(0, GeoBitsShift1Minus1,
-			-1.0*test.degs, -1.0*test.degs, test.degs, test.degs, true)
+			-1.0*test.degs, -1.0*test.degs, test.degs, test.degs, true, nil, "")
 		if (err != nil) != (test.err != "") {
 			t.Errorf("test: %+v, err: %v", test, err)
 		}
@@ -275,7 +275,7 @@ func benchmarkComputeGeoRange(b *testing.B,
 
 	for i := 0; i < b.N; i++ {
 		onBoundaryRes, offBoundaryRes, err :=
-			ComputeGeoRange(0, GeoBitsShift1Minus1, minLon, minLat, maxLon, maxLat, checkBoundaries)
+			ComputeGeoRange(0, GeoBitsShift1Minus1, minLon, minLat, maxLon, maxLat, checkBoundaries, nil, "")
 		if err != nil {
 			b.Fatalf("expected no err")
 		}
diff --git a/search/searcher/search_geopointdistance_test.go b/search/searcher/search_geopointdistance_test.go
index eaf8216a3..8b7f1c968 100644
--- a/search/searcher/search_geopointdistance_test.go
+++ b/search/searcher/search_geopointdistance_test.go
@@ -113,7 +113,7 @@ func TestGeoPointDistanceCompare(t *testing.T) {
 			minLon, minLat, maxLon, maxLat float64, checkBoundaries bool) {
 			// do math to produce list of terms needed for this search
 			onBoundaryRes, offBoundaryRes, err := ComputeGeoRange(0, GeoBitsShift1Minus1,
-				minLon, minLat, maxLon, maxLat, checkBoundaries)
+				minLon, minLat, maxLon, maxLat, checkBoundaries, nil, "")
 			if err != nil {
 				t.Fatal(err)
 			}

From 7ad4bbc2adec301f007f328101f0bcf19f371e8e Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Wed, 29 May 2019 15:25:24 -0400
Subject: [PATCH 601/728] support search after style pagination (#1182)

* support search after style pagination

Typically deep pagination becomes quite expensive when using
the size+from mechanism on distributed indexes, due to each
child index having to build large result sets, and return
them over the network for merging at the coordinating
node.

Alternatively, the search after approach allows for the client
to maintain state, in this case, the sort key of the last
result for the current page.  By running the same search again,
but this time providing the last hit's sort key as this
new requests search after key, we allow for significantly
more efficient pagination.

TotalHits and Facets are unaffected by using this parameter,
because all hits are still seen by the collector.

* fix incorrectly named method

* add support for SearchBefore

Adds a capability to go one page backwards in deep pagination
of search results, similar to going forwards using SearchAfter.

* reset request back to original

* refactor integration tests to allow for testing aliases

added integration tests for search_before and search_after
which go through index aliases

* fix typo

* fix incorrect type for method receiver
---
 index_alias_impl.go                     |  46 ++++----
 index_impl.go                           |  64 +++++++++--
 search.go                               |  35 ++++++
 search/collector/topn.go                |  29 +++++
 search/sort.go                          |  29 +++++
 test/integration_test.go                | 147 ++++++++++++++++--------
 test/tests/alias/datasets/shard0/a.json |   3 +
 test/tests/alias/datasets/shard0/c.json |   3 +
 test/tests/alias/datasets/shard1/b.json |   3 +
 test/tests/alias/datasets/shard1/d.json |   3 +
 test/tests/alias/mapping.json           |   3 +
 test/tests/alias/searches.json          |  76 ++++++++++++
 test/tests/sort/searches.json           |  54 ++++++++-
 13 files changed, 415 insertions(+), 80 deletions(-)
 create mode 100644 test/tests/alias/datasets/shard0/a.json
 create mode 100644 test/tests/alias/datasets/shard0/c.json
 create mode 100644 test/tests/alias/datasets/shard1/b.json
 create mode 100644 test/tests/alias/datasets/shard1/d.json
 create mode 100644 test/tests/alias/mapping.json
 create mode 100644 test/tests/alias/searches.json

diff --git a/index_alias_impl.go b/index_alias_impl.go
index 335fcade2..4366fc795 100644
--- a/index_alias_impl.go
+++ b/index_alias_impl.go
@@ -434,6 +434,8 @@ func createChildSearchRequest(req *SearchRequest) *SearchRequest {
 		Sort:             req.Sort.Copy(),
 		IncludeLocations: req.IncludeLocations,
 		Score:            req.Score,
+		SearchAfter:      req.SearchAfter,
+		SearchBefore:     req.SearchBefore,
 	}
 	return &rv
 }
@@ -451,6 +453,14 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
 	searchStart := time.Now()
 	asyncResults := make(chan *asyncSearchResult, len(indexes))
 
+	var reverseQueryExecution bool
+	if req.SearchBefore != nil {
+		reverseQueryExecution = true
+		req.Sort.Reverse()
+		req.SearchAfter = req.SearchBefore
+		req.SearchBefore = nil
+	}
+
 	// run search on each index in separate go routine
 	var waitGroup sync.WaitGroup
 
@@ -503,7 +513,7 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
 
 	// sort all hits with the requested order
 	if len(req.Sort) > 0 {
-		sorter := newMultiSearchHitSorter(req.Sort, sr.Hits)
+		sorter := newSearchHitSorter(req.Sort, sr.Hits)
 		sort.Sort(sorter)
 	}
 
@@ -524,6 +534,17 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
 		sr.Facets.Fixup(name, fr.Size)
 	}
 
+	if reverseQueryExecution {
+		// reverse the sort back to the original
+		req.Sort.Reverse()
+		// resort using the original order
+		mhs := newSearchHitSorter(req.Sort, sr.Hits)
+		sort.Sort(mhs)
+		// reset request
+		req.SearchBefore = req.SearchAfter
+		req.SearchAfter = nil
+	}
+
 	// fix up original request
 	sr.Request = req
 	searchDuration := time.Since(searchStart)
@@ -581,26 +602,3 @@ func (f *indexAliasImplFieldDict) Close() error {
 	defer f.index.mutex.RUnlock()
 	return f.fieldDict.Close()
 }
-
-type multiSearchHitSorter struct {
-	hits          search.DocumentMatchCollection
-	sort          search.SortOrder
-	cachedScoring []bool
-	cachedDesc    []bool
-}
-
-func newMultiSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *multiSearchHitSorter {
-	return &multiSearchHitSorter{
-		sort:          sort,
-		hits:          hits,
-		cachedScoring: sort.CacheIsScore(),
-		cachedDesc:    sort.CacheDescending(),
-	}
-}
-
-func (m *multiSearchHitSorter) Len() int      { return len(m.hits) }
-func (m *multiSearchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] }
-func (m *multiSearchHitSorter) Less(i, j int) bool {
-	c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j])
-	return c < 0
-}
diff --git a/index_impl.go b/index_impl.go
index 63fe39ccb..6324d960e 100644
--- a/index_impl.go
+++ b/index_impl.go
@@ -19,6 +19,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"os"
+	"sort"
 	"sync"
 	"sync/atomic"
 	"time"
@@ -442,7 +443,20 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
 		return nil, ErrorIndexClosed
 	}
 
-	collector := collector.NewTopNCollector(req.Size, req.From, req.Sort)
+	var reverseQueryExecution bool
+	if req.SearchBefore != nil {
+		reverseQueryExecution = true
+		req.Sort.Reverse()
+		req.SearchAfter = req.SearchBefore
+		req.SearchBefore = nil
+	}
+
+	var coll *collector.TopNCollector
+	if req.SearchAfter != nil {
+		coll = collector.NewTopNCollectorAfter(req.Size, req.Sort, req.SearchAfter)
+	} else {
+		coll = collector.NewTopNCollector(req.Size, req.From, req.Sort)
+	}
 
 	// open a reader for this search
 	indexReader, err := i.i.Reader()
@@ -494,10 +508,10 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
 				facetsBuilder.Add(facetName, facetBuilder)
 			}
 		}
-		collector.SetFacetsBuilder(facetsBuilder)
+		coll.SetFacetsBuilder(facetsBuilder)
 	}
 
-	memNeeded := memNeededForSearch(req, searcher, collector)
+	memNeeded := memNeededForSearch(req, searcher, coll)
 	if cb := ctx.Value(SearchQueryStartCallbackKey); cb != nil {
 		if cbF, ok := cb.(SearchQueryStartCallbackFn); ok {
 			err = cbF(memNeeded)
@@ -515,12 +529,12 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
 		}
 	}
 
-	err = collector.Collect(ctx, searcher, indexReader)
+	err = coll.Collect(ctx, searcher, indexReader)
 	if err != nil {
 		return nil, err
 	}
 
-	hits := collector.Results()
+	hits := coll.Results()
 
 	var highlighter highlight.Highlighter
 
@@ -560,6 +574,17 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
 		logger.Printf("slow search took %s - %v", searchDuration, req)
 	}
 
+	if reverseQueryExecution {
+		// reverse the sort back to the original
+		req.Sort.Reverse()
+		// resort using the original order
+		mhs := newSearchHitSorter(req.Sort, hits)
+		sort.Sort(mhs)
+		// reset request
+		req.SearchBefore = req.SearchAfter
+		req.SearchAfter = nil
+	}
+
 	return &SearchResult{
 		Status: &SearchStatus{
 			Total:      1,
@@ -567,10 +592,10 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
 		},
 		Request:  req,
 		Hits:     hits,
-		Total:    collector.Total(),
-		MaxScore: collector.MaxScore(),
+		Total:    coll.Total(),
+		MaxScore: coll.MaxScore(),
 		Took:     searchDuration,
-		Facets:   collector.FacetResults(),
+		Facets:   coll.FacetResults(),
 	}, nil
 }
 
@@ -865,3 +890,26 @@ func deDuplicate(fields []string) []string {
 	}
 	return ret
 }
+
+type searchHitSorter struct {
+	hits          search.DocumentMatchCollection
+	sort          search.SortOrder
+	cachedScoring []bool
+	cachedDesc    []bool
+}
+
+func newSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *searchHitSorter {
+	return &searchHitSorter{
+		sort:          sort,
+		hits:          hits,
+		cachedScoring: sort.CacheIsScore(),
+		cachedDesc:    sort.CacheDescending(),
+	}
+}
+
+func (m *searchHitSorter) Len() int      { return len(m.hits) }
+func (m *searchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] }
+func (m *searchHitSorter) Less(i, j int) bool {
+	c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j])
+	return c < 0
+}
diff --git a/search.go b/search.go
index ebd69971e..e8d66f80a 100644
--- a/search.go
+++ b/search.go
@@ -262,6 +262,8 @@ func (h *HighlightRequest) AddField(field string) {
 // result score explanations.
 // Sort describes the desired order for the results to be returned.
 // Score controls the kind of scoring performed
+// SearchAfter supports deep paging by providing a minimum sort key
+// SearchBefore supports deep paging by providing a maximum sort key
 //
 // A special field named "*" can be used to return all fields.
 type SearchRequest struct {
@@ -275,6 +277,8 @@ type SearchRequest struct {
 	Sort             search.SortOrder  `json:"sort"`
 	IncludeLocations bool              `json:"includeLocations"`
 	Score            string            `json:"score,omitempty"`
+	SearchAfter      []string          `json:"search_after"`
+	SearchBefore     []string          `json:"search_before"`
 }
 
 func (r *SearchRequest) Validate() error {
@@ -285,6 +289,27 @@ func (r *SearchRequest) Validate() error {
 		}
 	}
 
+	if r.SearchAfter != nil && r.SearchBefore != nil {
+		return fmt.Errorf("cannot use search after and search before together")
+	}
+
+	if r.SearchAfter != nil {
+		if r.From != 0 {
+			return fmt.Errorf("cannot use search after with from !=0")
+		}
+		if len(r.SearchAfter) != len(r.Sort) {
+			return fmt.Errorf("search after must have same size as sort order")
+		}
+	}
+	if r.SearchBefore != nil {
+		if r.From != 0 {
+			return fmt.Errorf("cannot use search before with from !=0")
+		}
+		if len(r.SearchBefore) != len(r.Sort) {
+			return fmt.Errorf("search before must have same size as sort order")
+		}
+	}
+
 	return r.Facets.Validate()
 }
 
@@ -311,6 +336,12 @@ func (r *SearchRequest) SortByCustom(order search.SortOrder) {
 	r.Sort = order
 }
 
+// SetSearchAfter sets the request to skip over hits with a sort
+// value less than the provided sort after key
+func (r *SearchRequest) SetSearchAfter(after []string) {
+	r.SearchAfter = after
+}
+
 // UnmarshalJSON deserializes a JSON representation of
 // a SearchRequest
 func (r *SearchRequest) UnmarshalJSON(input []byte) error {
@@ -325,6 +356,8 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
 		Sort             []json.RawMessage `json:"sort"`
 		IncludeLocations bool              `json:"includeLocations"`
 		Score            string            `json:"score"`
+		SearchAfter      []string          `json:"search_after"`
+		SearchBefore     []string          `json:"search_before"`
 	}
 
 	err := json.Unmarshal(input, &temp)
@@ -352,6 +385,8 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
 	r.Facets = temp.Facets
 	r.IncludeLocations = temp.IncludeLocations
 	r.Score = temp.Score
+	r.SearchAfter = temp.SearchAfter
+	r.SearchBefore = temp.SearchBefore
 	r.Query, err = query.ParseQuery(temp.Q)
 	if err != nil {
 		return err
diff --git a/search/collector/topn.go b/search/collector/topn.go
index b30bd6ecf..a027a12c2 100644
--- a/search/collector/topn.go
+++ b/search/collector/topn.go
@@ -69,6 +69,7 @@ type TopNCollector struct {
 	lowestMatchOutsideResults *search.DocumentMatch
 	updateFieldVisitor        index.DocumentFieldTermVisitor
 	dvReader                  index.DocValueReader
+	searchAfter               *search.DocumentMatch
 }
 
 // CheckDoneEvery controls how frequently we check the context deadline
@@ -78,6 +79,21 @@ const CheckDoneEvery = uint64(1024)
 // skipping over the first 'skip' hits
 // ordering hits by the provided sort order
 func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector {
+	return newTopNCollector(size, skip, sort)
+}
+
+// NewTopNCollector builds a collector to find the top 'size' hits
+// skipping over the first 'skip' hits
+// ordering hits by the provided sort order
+func NewTopNCollectorAfter(size int, sort search.SortOrder, after []string) *TopNCollector {
+	rv := newTopNCollector(size, 0, sort)
+	rv.searchAfter = &search.DocumentMatch{
+		Sort: after,
+	}
+	return rv
+}
+
+func newTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector {
 	hc := &TopNCollector{size: size, skip: skip, sort: sort}
 
 	// pre-allocate space on the store to avoid reslicing
@@ -266,6 +282,19 @@ func MakeTopNDocumentMatchHandler(
 			if d == nil {
 				return nil
 			}
+
+			// support search after based pagination,
+			// if this hit is <= the search after sort key
+			// we should skip it
+			if hc.searchAfter != nil {
+				// exact sort order matches use hit number to break tie
+				// but we want to allow for exact match, so we pretend
+				hc.searchAfter.HitNumber = d.HitNumber
+				if hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, hc.searchAfter) <= 0 {
+					return nil
+				}
+			}
+
 			// optimization, we track lowest sorting hit already removed from heap
 			// with this one comparison, we can avoid all heap operations if
 			// this hit would have been added and then immediately removed
diff --git a/search/sort.go b/search/sort.go
index e17f70787..6e4ed80fa 100644
--- a/search/sort.go
+++ b/search/sort.go
@@ -38,6 +38,8 @@ type SearchSort interface {
 	RequiresScoring() bool
 	RequiresFields() []string
 
+	Reverse()
+
 	Copy() SearchSort
 }
 
@@ -293,6 +295,12 @@ func (so SortOrder) CacheDescending() []bool {
 	return rv
 }
 
+func (so SortOrder) Reverse() {
+	for _, soi := range so {
+		soi.Reverse()
+	}
+}
+
 // SortFieldType lets you control some internal sort behavior
 // normally leaving this to the zero-value of SortFieldAuto is fine
 type SortFieldType int
@@ -492,6 +500,15 @@ func (s *SortField) Copy() SearchSort {
 	return &rv
 }
 
+func (s *SortField) Reverse() {
+	s.Desc = !s.Desc
+	if s.Missing == SortFieldMissingFirst {
+		s.Missing = SortFieldMissingLast
+	} else {
+		s.Missing = SortFieldMissingFirst
+	}
+}
+
 // SortDocID will sort results by the document identifier
 type SortDocID struct {
 	Desc bool
@@ -533,6 +550,10 @@ func (s *SortDocID) Copy() SearchSort {
 	return &rv
 }
 
+func (s *SortDocID) Reverse() {
+	s.Desc = !s.Desc
+}
+
 // SortScore will sort results by the document match score
 type SortScore struct {
 	Desc bool
@@ -574,6 +595,10 @@ func (s *SortScore) Copy() SearchSort {
 	return &rv
 }
 
+func (s *SortScore) Reverse() {
+	s.Desc = !s.Desc
+}
+
 var maxDistance = string(numeric.MustNewPrefixCodedInt64(math.MaxInt64, 0))
 
 // NewSortGeoDistance creates SearchSort instance for sorting documents by
@@ -705,6 +730,10 @@ func (s *SortGeoDistance) Copy() SearchSort {
 	return &rv
 }
 
+func (s *SortGeoDistance) Reverse() {
+	s.Desc = !s.Desc
+}
+
 type BytesSlice [][]byte
 
 func (p BytesSlice) Len() int           { return len(p) }
diff --git a/test/integration_test.go b/test/integration_test.go
index 8cbac6f30..ed59323ff 100644
--- a/test/integration_test.go
+++ b/test/integration_test.go
@@ -17,6 +17,7 @@ package test
 import (
 	"encoding/json"
 	"flag"
+	"fmt"
 	"io/ioutil"
 	"math"
 	"os"
@@ -87,53 +88,29 @@ func runTestDir(t *testing.T, dir, datasetName string) {
 		return
 	}
 
-	// open new index
-	if !*keepIndex {
-		defer func() {
-			err := os.RemoveAll("test.bleve")
-			if err != nil {
-				t.Fatal(err)
-			}
-		}()
-	}
-	index, err := bleve.New("test.bleve", &mapping)
-	if err != nil {
-		t.Errorf("error creating new index: %v", err)
-		return
-	}
-	// set a custom index name
-	index.SetName(datasetName)
-	defer func() {
-		err := index.Close()
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+	var index bleve.Index
+	var cleanup func()
 
-	// index data
-	fis, err := ioutil.ReadDir(dir + string(filepath.Separator) + "data")
-	if err != nil {
-		t.Errorf("error reading data dir: %v", err)
-		return
-	}
-	for _, fi := range fis {
-		fileBytes, err := ioutil.ReadFile(dir + string(filepath.Separator) + "data" + string(filepath.Separator) + fi.Name())
+	// if there is a dir named 'data' open single index
+	_, err = os.Stat(dir + string(filepath.Separator) + "data")
+	if !os.IsNotExist(err) {
+
+		index, cleanup, err = loadDataSet(t, datasetName, mapping, dir+string(filepath.Separator)+"data")
 		if err != nil {
-			t.Errorf("error reading data file: %v", err)
+			t.Errorf("error loading dataset: %v", err)
 			return
 		}
-		var fileDoc interface{}
-		err = json.Unmarshal(fileBytes, &fileDoc)
-		if err != nil {
-			t.Errorf("error parsing data file as json: %v", err)
-		}
-		filename := fi.Name()
-		ext := filepath.Ext(filename)
-		id := filename[0 : len(filename)-len(ext)]
-		err = index.Index(id, fileDoc)
-		if err != nil {
-			t.Errorf("error indexing data: %v", err)
-			return
+		defer cleanup()
+	} else {
+		// if there is a dir named 'datasets' build alias over each index
+		_, err = os.Stat(dir + string(filepath.Separator) + "datasets")
+		if !os.IsNotExist(err) {
+			index, cleanup, err = loadDataSets(t, datasetName, mapping, dir+string(filepath.Separator)+"datasets")
+			if err != nil {
+				t.Errorf("error loading dataset: %v", err)
+				return
+			}
+			defer cleanup()
 		}
 	}
 
@@ -165,6 +142,7 @@ func runTestDir(t *testing.T, dir, datasetName string) {
 			if len(res.Hits) != len(search.Result.Hits) {
 				t.Errorf("test error - %s", search.Comment)
 				t.Errorf("test %d - expected hits len: %d got %d", testNum, len(search.Result.Hits), len(res.Hits))
+				t.Errorf("got hits: %v", res.Hits)
 				continue
 			}
 			for hi, hit := range search.Result.Hits {
@@ -202,12 +180,87 @@ func runTestDir(t *testing.T, dir, datasetName string) {
 					t.Errorf("test %d - expected facets: %#v got %#v", testNum, search.Result.Facets, res.Facets)
 				}
 			}
-			// check that custom index name is in results
-			for _, hit := range res.Hits {
-				if hit.Index != datasetName {
-					t.Fatalf("expected name: %s, got: %s", datasetName, hit.Index)
+			if _, ok := index.(bleve.IndexAlias); !ok {
+				// check that custom index name is in results
+				for _, hit := range res.Hits {
+					if hit.Index != datasetName {
+						t.Fatalf("expected name: %s, got: %s", datasetName, hit.Index)
+					}
 				}
 			}
 		}
 	}
 }
+
+func loadDataSet(t *testing.T, datasetName string, mapping mapping.IndexMappingImpl, path string) (bleve.Index, func(), error) {
+	idxPath := fmt.Sprintf("test-%s.bleve", datasetName)
+	index, err := bleve.New(idxPath, &mapping)
+	if err != nil {
+		return nil, nil, fmt.Errorf("error creating new index: %v", err)
+	}
+	// set a custom index name
+	index.SetName(datasetName)
+
+	// index data
+	fis, err := ioutil.ReadDir(path)
+	if err != nil {
+		return nil, nil, fmt.Errorf("error reading data dir: %v", err)
+	}
+	for _, fi := range fis {
+		fileBytes, err := ioutil.ReadFile(path + string(filepath.Separator) + fi.Name())
+		if err != nil {
+			return nil, nil, fmt.Errorf("error reading data file: %v", err)
+		}
+		var fileDoc interface{}
+		err = json.Unmarshal(fileBytes, &fileDoc)
+		if err != nil {
+			return nil, nil, fmt.Errorf("error parsing data file as json: %v", err)
+		}
+		filename := fi.Name()
+		ext := filepath.Ext(filename)
+		id := filename[0 : len(filename)-len(ext)]
+		err = index.Index(id, fileDoc)
+		if err != nil {
+			return nil, nil, fmt.Errorf("error indexing data: %v", err)
+		}
+	}
+	cleanup := func() {
+		err := index.Close()
+		if err != nil {
+			t.Fatalf("error closing index: %v", err)
+		}
+		if !*keepIndex {
+			err := os.RemoveAll(idxPath)
+			if err != nil {
+				t.Fatalf("error removing index: %v", err)
+			}
+		}
+	}
+	return index, cleanup, nil
+}
+
+func loadDataSets(t *testing.T, datasetName string, mapping mapping.IndexMappingImpl, path string) (bleve.Index, func(), error) {
+	fis, err := ioutil.ReadDir(path)
+	if err != nil {
+		return nil, nil, fmt.Errorf("error reading datasets dir: %v", err)
+	}
+	var cleanups []func()
+	alias := bleve.NewIndexAlias()
+	for _, fi := range fis {
+		idx, idxCleanup, err := loadDataSet(t, fi.Name(), mapping, path+string(filepath.Separator)+fi.Name())
+		if err != nil {
+			return nil, nil, fmt.Errorf("error loading dataset: %v", err)
+		}
+		cleanups = append(cleanups, idxCleanup)
+		alias.Add(idx)
+	}
+	alias.SetName(datasetName)
+
+	cleanupAll := func() {
+		for _, cleanup := range cleanups {
+			cleanup()
+		}
+	}
+
+	return alias, cleanupAll, nil
+}
diff --git a/test/tests/alias/datasets/shard0/a.json b/test/tests/alias/datasets/shard0/a.json
new file mode 100644
index 000000000..1aeaf2c81
--- /dev/null
+++ b/test/tests/alias/datasets/shard0/a.json
@@ -0,0 +1,3 @@
+{
+  "name": "a"
+}
\ No newline at end of file
diff --git a/test/tests/alias/datasets/shard0/c.json b/test/tests/alias/datasets/shard0/c.json
new file mode 100644
index 000000000..a584a5486
--- /dev/null
+++ b/test/tests/alias/datasets/shard0/c.json
@@ -0,0 +1,3 @@
+{
+  "name": "c"
+}
\ No newline at end of file
diff --git a/test/tests/alias/datasets/shard1/b.json b/test/tests/alias/datasets/shard1/b.json
new file mode 100644
index 000000000..1c67e0d00
--- /dev/null
+++ b/test/tests/alias/datasets/shard1/b.json
@@ -0,0 +1,3 @@
+{
+  "name": "b"
+}
\ No newline at end of file
diff --git a/test/tests/alias/datasets/shard1/d.json b/test/tests/alias/datasets/shard1/d.json
new file mode 100644
index 000000000..0a478db4d
--- /dev/null
+++ b/test/tests/alias/datasets/shard1/d.json
@@ -0,0 +1,3 @@
+{
+  "name": "d"
+}
\ No newline at end of file
diff --git a/test/tests/alias/mapping.json b/test/tests/alias/mapping.json
new file mode 100644
index 000000000..7f9bae748
--- /dev/null
+++ b/test/tests/alias/mapping.json
@@ -0,0 +1,3 @@
+{
+  "default_analyzer": "keyword"
+}
diff --git a/test/tests/alias/searches.json b/test/tests/alias/searches.json
new file mode 100644
index 000000000..fb8dd06d4
--- /dev/null
+++ b/test/tests/alias/searches.json
@@ -0,0 +1,76 @@
+[
+  {
+    "comment": "match all across shards",
+    "search": {
+      "from": 0,
+      "size": 10,
+      "sort": ["-_score", "_id"],
+      "query": {
+        "match_all": {}
+      }
+    },
+    "result": {
+      "total_hits": 4,
+      "hits": [
+        {
+          "id": "a"
+        },
+        {
+          "id": "b"
+        },
+        {
+          "id": "c"
+        },
+        {
+          "id": "d"
+        }
+      ]
+    }
+  },
+  {
+    "comment": "search after b (page 2 when size=2)",
+    "search": {
+      "from": 0,
+      "size": 2,
+      "sort": ["name"],
+      "search_after": ["b"],
+      "query": {
+        "match_all": {}
+      }
+    },
+    "result": {
+      "total_hits": 4,
+      "hits": [
+        {
+          "id": "c"
+        },
+        {
+          "id": "d"
+        }
+      ]
+    }
+  },
+  {
+    "comment": "search before c (page 1 when size=2)",
+    "search": {
+      "from": 0,
+      "size": 2,
+      "sort": ["name"],
+      "search_before": ["c"],
+      "query": {
+        "match_all": {}
+      }
+    },
+    "result": {
+      "total_hits": 4,
+      "hits": [
+        {
+          "id": "a"
+        },
+        {
+          "id": "b"
+        }
+      ]
+    }
+  }
+]
\ No newline at end of file
diff --git a/test/tests/sort/searches.json b/test/tests/sort/searches.json
index bb51720a8..8b32df5d0 100644
--- a/test/tests/sort/searches.json
+++ b/test/tests/sort/searches.json
@@ -406,5 +406,57 @@
           }
   			]
   		}
-  	}
+  	},
+  {
+    "comment": "sort by name, ascending, after marty",
+    "search": {
+      "from": 0,
+      "size": 10,
+      "query": {
+        "match_all":{}
+      },
+      "sort": ["name"],
+      "search_after": ["marty"]
+    },
+    "result": {
+      "total_hits": 6,
+      "hits": [
+        {
+          "id": "e"
+        },
+        {
+          "id": "b"
+        },
+        {
+          "id": "d"
+        }
+      ]
+    }
+  },
+  {
+    "comment": "sort by name, ascending, before nancy",
+    "search": {
+      "from": 0,
+      "size": 10,
+      "query": {
+        "match_all":{}
+      },
+      "sort": ["name"],
+      "search_before": ["nancy"]
+    },
+    "result": {
+      "total_hits": 6,
+      "hits": [
+        {
+          "id": "c"
+        },
+        {
+          "id": "f"
+        },
+        {
+          "id": "a"
+        }
+      ]
+    }
+  }
 ]

From 6225a102a716e2cacf9e6b1e5faf692b7dcc790f Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Thu, 30 May 2019 12:08:42 +0530
Subject: [PATCH 602/728] fix to avoid double prefix coding

---
 search/searcher/search_geoboundingbox.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/search/searcher/search_geoboundingbox.go b/search/searcher/search_geoboundingbox.go
index ad20d1a0f..6ec5e72dd 100644
--- a/search/searcher/search_geoboundingbox.go
+++ b/search/searcher/search_geoboundingbox.go
@@ -150,9 +150,9 @@ func ComputeGeoRange(term uint64, shift uint,
 			codedTerm := makePrefixCoded(int64(start), res)
 			if isIndexed(codedTerm) {
 				if !within && checkBoundaries {
-					onBoundary = append(onBoundary, makePrefixCoded(int64(start), res))
+					onBoundary = append(onBoundary, codedTerm)
 				} else {
-					notOnBoundary = append(notOnBoundary, makePrefixCoded(int64(start), res))
+					notOnBoundary = append(notOnBoundary, codedTerm)
 				}
 			}
 		} else if level < geoDetailLevel &&

From 4e0f481955fd1cf003ee0baf796be2e230c18ff7 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Thu, 30 May 2019 13:04:39 -0700
Subject: [PATCH 603/728] MB-33617: Supporting the "reverse" token filter

This token filter will simply reverse each token.
---
 analysis/token/reverse/reverse.go      |  66 ++++++++++
 analysis/token/reverse/reverse_test.go | 163 +++++++++++++++++++++++++
 config/config.go                       |   1 +
 3 files changed, 230 insertions(+)
 create mode 100644 analysis/token/reverse/reverse.go
 create mode 100644 analysis/token/reverse/reverse_test.go

diff --git a/analysis/token/reverse/reverse.go b/analysis/token/reverse/reverse.go
new file mode 100644
index 000000000..2dd16a531
--- /dev/null
+++ b/analysis/token/reverse/reverse.go
@@ -0,0 +1,66 @@
+//  Copyright (c) 2019 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package reverse
+
+import (
+	"unicode/utf8"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+// Name is the name used to register ReverseFilter in the bleve registry
+const Name = "reverse"
+
+type ReverseFilter struct {
+}
+
+func NewReverseFilter() *ReverseFilter {
+	return &ReverseFilter{}
+}
+
+func (f *ReverseFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		token.Term = reverse(token.Term)
+	}
+	return input
+}
+
+func ReverseFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewReverseFilter(), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(Name, ReverseFilterConstructor)
+}
+
+// reverse(..) will generate a reversed version of the provided
+// utf-8 encoded byte array and return it back to its caller.
+func reverse(s []byte) []byte {
+	j := len(s)
+	rv := make([]byte, len(s))
+	for i := 0; i < len(s); {
+		wid := 1
+		r := rune(s[i])
+		if r >= utf8.RuneSelf {
+			r, wid = utf8.DecodeRune(s[i:])
+		}
+
+		copy(rv[j-wid:j], s[i:i+wid])
+		i += wid
+		j -= wid
+	}
+	return rv
+}
diff --git a/analysis/token/reverse/reverse_test.go b/analysis/token/reverse/reverse_test.go
new file mode 100644
index 000000000..6437ecdd3
--- /dev/null
+++ b/analysis/token/reverse/reverse_test.go
@@ -0,0 +1,163 @@
+//  Copyright (c) 2019 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package reverse
+
+import (
+	"bytes"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+)
+
+func TestReverseFilter(t *testing.T) {
+	inputTokenStream := analysis.TokenStream{
+		&analysis.Token{
+			Term: []byte("one"),
+		},
+		&analysis.Token{
+			Term: []byte("TWo"),
+		},
+		&analysis.Token{
+			Term: []byte("thRee"),
+		},
+		&analysis.Token{
+			Term: []byte("four's"),
+		},
+		&analysis.Token{
+			Term: []byte("what's this in reverse"),
+		},
+		&analysis.Token{
+			Term: []byte("œ∑´®†"),
+		},
+		&analysis.Token{
+			Term: []byte("İȺȾCAT÷≥≤µ123"),
+		},
+		&analysis.Token{
+			Term: []byte("!@#$%^&*()"),
+		},
+		&analysis.Token{},
+	}
+
+	expectedTokenStream := analysis.TokenStream{
+		&analysis.Token{
+			Term: []byte("eno"),
+		},
+		&analysis.Token{
+			Term: []byte("oWT"),
+		},
+		&analysis.Token{
+			Term: []byte("eeRht"),
+		},
+		&analysis.Token{
+			Term: []byte("s'ruof"),
+		},
+		&analysis.Token{
+			Term: []byte("esrever ni siht s'tahw"),
+		},
+		&analysis.Token{
+			Term: []byte("†®´∑œ"),
+		},
+		&analysis.Token{
+			Term: []byte("321µ≤≥÷TACȾȺİ"),
+		},
+		&analysis.Token{
+			Term: []byte(")(*&^%$#@!"),
+		},
+		&analysis.Token{},
+	}
+
+	filter := NewReverseFilter()
+	outputTokenStream := filter.Filter(inputTokenStream)
+	for i := 0; i < len(expectedTokenStream); i++ {
+		if !bytes.Equal(outputTokenStream[i].Term, expectedTokenStream[i].Term) {
+			t.Errorf("[%d] expected %s got %s",
+				i+1, expectedTokenStream[i].Term, outputTokenStream[i].Term)
+		}
+	}
+}
+
+func BenchmarkReverseFilter(b *testing.B) {
+	input := analysis.TokenStream{
+		&analysis.Token{
+			Term: []byte("A"),
+		},
+		&analysis.Token{
+			Term: []byte("boiling"),
+		},
+		&analysis.Token{
+			Term: []byte("liquid"),
+		},
+		&analysis.Token{
+			Term: []byte("expanding"),
+		},
+		&analysis.Token{
+			Term: []byte("vapor"),
+		},
+		&analysis.Token{
+			Term: []byte("explosion"),
+		},
+		&analysis.Token{
+			Term: []byte("caused"),
+		},
+		&analysis.Token{
+			Term: []byte("by"),
+		},
+		&analysis.Token{
+			Term: []byte("the"),
+		},
+		&analysis.Token{
+			Term: []byte("rupture"),
+		},
+		&analysis.Token{
+			Term: []byte("of"),
+		},
+		&analysis.Token{
+			Term: []byte("a"),
+		},
+		&analysis.Token{
+			Term: []byte("vessel"),
+		},
+		&analysis.Token{
+			Term: []byte("containing"),
+		},
+		&analysis.Token{
+			Term: []byte("pressurized"),
+		},
+		&analysis.Token{
+			Term: []byte("liquid"),
+		},
+		&analysis.Token{
+			Term: []byte("above"),
+		},
+		&analysis.Token{
+			Term: []byte("its"),
+		},
+		&analysis.Token{
+			Term: []byte("boiling"),
+		},
+		&analysis.Token{
+			Term: []byte("point"),
+		},
+		&analysis.Token{
+			Term: []byte("İȺȾCAT"),
+		},
+	}
+	filter := NewReverseFilter()
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		filter.Filter(input)
+	}
+}
diff --git a/config/config.go b/config/config.go
index cf2827a5c..5550d130b 100644
--- a/config/config.go
+++ b/config/config.go
@@ -52,6 +52,7 @@ import (
 	_ "github.com/blevesearch/bleve/analysis/token/length"
 	_ "github.com/blevesearch/bleve/analysis/token/lowercase"
 	_ "github.com/blevesearch/bleve/analysis/token/ngram"
+	_ "github.com/blevesearch/bleve/analysis/token/reverse"
 	_ "github.com/blevesearch/bleve/analysis/token/shingle"
 	_ "github.com/blevesearch/bleve/analysis/token/stop"
 	_ "github.com/blevesearch/bleve/analysis/token/truncate"

From 99bcef21a044bf98f1f8ab07042bf4b25236f210 Mon Sep 17 00:00:00 2001
From: Darren Hoo <darren.hoo@gmail.com>
Date: Sat, 1 Jun 2019 00:30:26 +0800
Subject: [PATCH 604/728] makes Scorch and  cachedDocs aligned so that scorch
 run properly on arm

Update: https://github.com/blevesearch/bleve/issues/1231
---
 index/scorch/scorch.go           | 8 ++++----
 index/scorch/snapshot_segment.go | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 2641b3b19..522feca19 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -41,12 +41,14 @@ const Version uint8 = 2
 var ErrClosed = fmt.Errorf("scorch closed")
 
 type Scorch struct {
+	nextSegmentID uint64
+	stats         Stats
+	iStats        internalStats
+
 	readOnly      bool
 	version       uint8
 	config        map[string]interface{}
 	analysisQueue *index.AnalysisQueue
-	stats         Stats
-	nextSegmentID uint64
 	path          string
 
 	unsafeBatch bool
@@ -73,8 +75,6 @@ type Scorch struct {
 	onEvent      func(event Event)
 	onAsyncError func(err error)
 
-	iStats internalStats
-
 	pauseLock sync.RWMutex
 
 	pauseCount uint64
diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go
index f3a2c56a9..96742b4f9 100644
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@@ -183,9 +183,9 @@ func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) {
 }
 
 type cachedDocs struct {
+	size  uint64
 	m     sync.Mutex                  // As the cache is asynchronously prepared, need a lock
 	cache map[string]*cachedFieldDocs // Keyed by field
-	size  uint64
 }
 
 func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error {

From 2d7b31a60fcb28149a6bf3c426cea06f349f3ca1 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Wed, 29 May 2019 18:03:01 +0530
Subject: [PATCH 605/728] dictionary random Lookup api

draft version of having a new
Exists iterator api implementation
for the FieldDictionary
---
 index/index.go                      | 10 +++++++
 index/scorch/segment/empty.go       | 14 ++++++++++
 index/scorch/segment/segment.go     |  6 ++++
 index/scorch/segment/zap/dict.go    | 31 +++++++++++++++++++++
 index/scorch/snapshot_index.go      | 43 +++++++++++++++++++++++++++++
 index/scorch/snapshot_index_dict.go | 15 ++++++++++
 6 files changed, 119 insertions(+)

diff --git a/index/index.go b/index/index.go
index 6aa444cfd..aadd8f5e2 100644
--- a/index/index.go
+++ b/index/index.go
@@ -121,6 +121,11 @@ type IndexReaderOnly interface {
 	FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (FieldDict, error)
 }
 
+// better naming everywhere
+type IndexReaderRandom interface {
+	FieldDictRandom(field string) (AdvFieldDict, error)
+}
+
 // FieldTerms contains the terms used by a document, keyed by field
 type FieldTerms map[string][]string
 
@@ -230,6 +235,11 @@ type FieldDict interface {
 	Close() error
 }
 
+type AdvFieldDict interface {
+	FieldDict
+	Exists(key []byte) (error, bool)
+}
+
 // DocIDReader is the interface exposing enumeration of documents identifiers.
 // Close the reader to release associated resources.
 type DocIDReader interface {
diff --git a/index/scorch/segment/empty.go b/index/scorch/segment/empty.go
index 165a01bc1..6163eb577 100644
--- a/index/scorch/segment/empty.go
+++ b/index/scorch/segment/empty.go
@@ -91,12 +91,26 @@ func (e *EmptyDictionary) OnlyIterator(onlyTerms [][]byte,
 	return &EmptyDictionaryIterator{}
 }
 
+func (e *EmptyDictionary) ExistsIterator() AdvDictionaryIterator {
+	return &EmptyDictionaryLookupIterator{}
+}
+
 type EmptyDictionaryIterator struct{}
 
 func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) {
 	return nil, nil
 }
 
+type EmptyDictionaryLookupIterator struct{}
+
+func (e *EmptyDictionaryLookupIterator) Next() (*index.DictEntry, error) {
+	return nil, nil
+}
+
+func (e *EmptyDictionaryLookupIterator) Exists(key []byte) (error, bool) {
+	return nil, false
+}
+
 func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) {
 	return nil, nil
 }
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index b94d6f979..87f608a69 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -59,12 +59,18 @@ type TermDictionary interface {
 	AutomatonIterator(a vellum.Automaton,
 		startKeyInclusive, endKeyExclusive []byte) DictionaryIterator
 	OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator
+	ExistsIterator() AdvDictionaryIterator
 }
 
 type DictionaryIterator interface {
 	Next() (*index.DictEntry, error)
 }
 
+type AdvDictionaryIterator interface {
+	DictionaryIterator
+	Exists(key []byte) (error, bool)
+}
+
 type PostingsList interface {
 	Iterator(includeFreq, includeNorm, includeLocations bool, prealloc PostingsIterator) PostingsIterator
 
diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 190265d6e..08f89739f 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -228,6 +228,24 @@ func (d *Dictionary) OnlyIterator(onlyTerms [][]byte,
 	return rv
 }
 
+// ExistsIterator returns an exists iterator for this dictionary
+func (d *Dictionary) ExistsIterator() segment.AdvDictionaryIterator {
+	rv := &DictionaryIterator{
+		d: d,
+	}
+
+	if d.fst != nil {
+		itr, err := d.fst.Iterator(nil, nil)
+		if err == nil {
+			rv.itr = itr
+		} else if err != vellum.ErrIteratorDone {
+			rv.err = err
+		}
+	}
+
+	return rv
+}
+
 // DictionaryIterator is an iterator for term dictionary
 type DictionaryIterator struct {
 	d         *Dictionary
@@ -257,3 +275,16 @@ func (i *DictionaryIterator) Next() (*index.DictEntry, error) {
 	i.err = i.itr.Next()
 	return &i.entry, nil
 }
+
+func (i *DictionaryIterator) Exists(key []byte) (error, bool) {
+	if i.err != nil && i.err != vellum.ErrIteratorDone {
+		return i.err, false
+	} else if i.itr == nil || i.err == vellum.ErrIteratorDone {
+		return nil, false
+	}
+	if advItr, ok := i.itr.(vellum.AdvIterator); ok {
+		return advItr.Exists(key)
+	}
+
+	return fmt.Errorf("no implementation found"), false
+}
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 8babb31fa..094227f2d 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -172,6 +172,43 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
 	return rv, nil
 }
 
+func (i *IndexSnapshot) newIndexSnapshotFieldDictRandom(field string, makeItr func(i segment.TermDictionary) segment.AdvDictionaryIterator) (*IndexSnapshotFieldDict, error) {
+	results := make(chan *asynchSegmentResult)
+	for index, segment := range i.segment {
+		go func(index int, segment *SegmentSnapshot) {
+			dict, err := segment.segment.Dictionary(field)
+			if err != nil {
+				results <- &asynchSegmentResult{err: err}
+			} else {
+				results <- &asynchSegmentResult{dictItr: makeItr(dict)}
+			}
+		}(index, segment)
+	}
+
+	var err error
+	rv := &IndexSnapshotFieldDict{
+		snapshot: i,
+		cursors:  make([]*segmentDictCursor, 0, len(i.segment)),
+	}
+	for count := 0; count < len(i.segment); count++ {
+		asr := <-results
+		if asr.err != nil && err == nil {
+			err = asr.err
+		} else {
+			rv.cursors = append(rv.cursors, &segmentDictCursor{
+				itr: asr.dictItr,
+			})
+		}
+
+	}
+	// after ensuring we've read all items on channel
+	if err != nil {
+		return nil, err
+	}
+
+	return rv, nil
+}
+
 func (i *IndexSnapshot) FieldDict(field string) (index.FieldDict, error) {
 	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
 		return i.Iterator()
@@ -242,6 +279,12 @@ func (i *IndexSnapshot) FieldDictOnly(field string,
 	})
 }
 
+func (i *IndexSnapshot) FieldDictRandom(field string) (index.AdvFieldDict, error) {
+	return i.newIndexSnapshotFieldDictRandom(field, func(i segment.TermDictionary) segment.AdvDictionaryIterator {
+		return i.ExistsIterator()
+	})
+}
+
 func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {
 	results := make(chan *asynchSegmentResult)
 	for index, segment := range i.segment {
diff --git a/index/scorch/snapshot_index_dict.go b/index/scorch/snapshot_index_dict.go
index abd3bde8c..33e13fc8a 100644
--- a/index/scorch/snapshot_index_dict.go
+++ b/index/scorch/snapshot_index_dict.go
@@ -91,3 +91,18 @@ func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) {
 func (i *IndexSnapshotFieldDict) Close() error {
 	return nil
 }
+
+func (i *IndexSnapshotFieldDict) Exists(key []byte) (error, bool) {
+	if len(i.cursors) == 0 {
+		return nil, false
+	}
+
+	for _, cursor := range i.cursors {
+		if itr, ok := cursor.itr.(segment.AdvDictionaryIterator); ok {
+			if _, found := itr.Exists(key); found {
+				return nil, true
+			}
+		}
+	}
+	return nil, false
+}

From 7ca4ea2cd8bb55fa4578b89a0731f83f83b27f68 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 3 Jun 2019 12:00:40 -0700
Subject: [PATCH 606/728] MB-33617: Support unicode strings with combining
 characters

Context: reverse-token-filter
---
 analysis/token/reverse/reverse.go      | 35 ++++++++++++++++----------
 analysis/token/reverse/reverse_test.go | 25 ++++++++++++++++--
 2 files changed, 45 insertions(+), 15 deletions(-)

diff --git a/analysis/token/reverse/reverse.go b/analysis/token/reverse/reverse.go
index 2dd16a531..671ed8992 100644
--- a/analysis/token/reverse/reverse.go
+++ b/analysis/token/reverse/reverse.go
@@ -15,6 +15,7 @@
 package reverse
 
 import (
+	"unicode"
 	"unicode/utf8"
 
 	"github.com/blevesearch/bleve/analysis"
@@ -47,20 +48,28 @@ func init() {
 }
 
 // reverse(..) will generate a reversed version of the provided
-// utf-8 encoded byte array and return it back to its caller.
+// unicode array and return it back to its caller.
 func reverse(s []byte) []byte {
-	j := len(s)
-	rv := make([]byte, len(s))
-	for i := 0; i < len(s); {
-		wid := 1
-		r := rune(s[i])
-		if r >= utf8.RuneSelf {
-			r, wid = utf8.DecodeRune(s[i:])
+	cursorIn := 0
+	inputRunes := []rune(string(s))
+	cursorOut := len(s)
+	output := make([]byte, len(s))
+	for i := 0; i < len(inputRunes); {
+		wid := utf8.RuneLen(inputRunes[i])
+		i++
+		for i < len(inputRunes) {
+			r := inputRunes[i]
+			if unicode.Is(unicode.Mn, r) || unicode.Is(unicode.Me, r) || unicode.Is(unicode.Mc, r) {
+				wid += utf8.RuneLen(r)
+				i++
+			} else {
+				break
+			}
 		}
-
-		copy(rv[j-wid:j], s[i:i+wid])
-		i += wid
-		j -= wid
+		copy(output[cursorOut-wid:cursorOut], s[cursorIn:cursorIn+wid])
+		cursorIn += wid
+		cursorOut -= wid
 	}
-	return rv
+
+	return output
 }
diff --git a/analysis/token/reverse/reverse_test.go b/analysis/token/reverse/reverse_test.go
index 6437ecdd3..8e84ab983 100644
--- a/analysis/token/reverse/reverse_test.go
+++ b/analysis/token/reverse/reverse_test.go
@@ -23,6 +23,7 @@ import (
 
 func TestReverseFilter(t *testing.T) {
 	inputTokenStream := analysis.TokenStream{
+		&analysis.Token{},
 		&analysis.Token{
 			Term: []byte("one"),
 		},
@@ -47,10 +48,19 @@ func TestReverseFilter(t *testing.T) {
 		&analysis.Token{
 			Term: []byte("!@#$%^&*()"),
 		},
-		&analysis.Token{},
+		&analysis.Token{
+			Term: []byte("cafés"),
+		},
+		&analysis.Token{
+			Term: []byte("¿Dónde estás?"),
+		},
+		&analysis.Token{
+			Term: []byte("Me gustaría una cerveza."),
+		},
 	}
 
 	expectedTokenStream := analysis.TokenStream{
+		&analysis.Token{},
 		&analysis.Token{
 			Term: []byte("eno"),
 		},
@@ -75,7 +85,15 @@ func TestReverseFilter(t *testing.T) {
 		&analysis.Token{
 			Term: []byte(")(*&^%$#@!"),
 		},
-		&analysis.Token{},
+		&analysis.Token{
+			Term: []byte("séfac"),
+		},
+		&analysis.Token{
+			Term: []byte("?sátse ednóD¿"),
+		},
+		&analysis.Token{
+			Term: []byte(".azevrec anu aíratsug eM"),
+		},
 	}
 
 	filter := NewReverseFilter()
@@ -153,6 +171,9 @@ func BenchmarkReverseFilter(b *testing.B) {
 		&analysis.Token{
 			Term: []byte("İȺȾCAT"),
 		},
+		&analysis.Token{
+			Term: []byte("Me gustaría una cerveza."),
+		},
 	}
 	filter := NewReverseFilter()
 

From 0d9d5dee5b5c82639402eb3fc1ef957178d5a78f Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 3 Jun 2019 11:31:13 +0530
Subject: [PATCH 607/728] refactoring to handle errs, save castings etc

---
 index/index.go                           |  1 -
 index/scorch/segment/empty.go            | 12 +---
 index/scorch/segment/segment.go          |  6 +-
 index/scorch/segment/zap/dict.go         | 14 ++---
 index/scorch/snapshot_index.go           | 80 ++++++++----------------
 index/scorch/snapshot_index_dict.go      |  7 +--
 search/searcher/search_geoboundingbox.go | 31 ++++++---
 7 files changed, 61 insertions(+), 90 deletions(-)

diff --git a/index/index.go b/index/index.go
index aadd8f5e2..5c604a133 100644
--- a/index/index.go
+++ b/index/index.go
@@ -121,7 +121,6 @@ type IndexReaderOnly interface {
 	FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (FieldDict, error)
 }
 
-// better naming everywhere
 type IndexReaderRandom interface {
 	FieldDictRandom(field string) (AdvFieldDict, error)
 }
diff --git a/index/scorch/segment/empty.go b/index/scorch/segment/empty.go
index 6163eb577..c9f003311 100644
--- a/index/scorch/segment/empty.go
+++ b/index/scorch/segment/empty.go
@@ -91,8 +91,8 @@ func (e *EmptyDictionary) OnlyIterator(onlyTerms [][]byte,
 	return &EmptyDictionaryIterator{}
 }
 
-func (e *EmptyDictionary) ExistsIterator() AdvDictionaryIterator {
-	return &EmptyDictionaryLookupIterator{}
+func (e *EmptyDictionary) ExistsIterator() DictionaryIterator {
+	return &EmptyDictionaryIterator{}
 }
 
 type EmptyDictionaryIterator struct{}
@@ -101,13 +101,7 @@ func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) {
 	return nil, nil
 }
 
-type EmptyDictionaryLookupIterator struct{}
-
-func (e *EmptyDictionaryLookupIterator) Next() (*index.DictEntry, error) {
-	return nil, nil
-}
-
-func (e *EmptyDictionaryLookupIterator) Exists(key []byte) (error, bool) {
+func (e *EmptyDictionaryIterator) Exists(key []byte) (error, bool) {
 	return nil, false
 }
 
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index 87f608a69..f8b6e3edb 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -59,15 +59,11 @@ type TermDictionary interface {
 	AutomatonIterator(a vellum.Automaton,
 		startKeyInclusive, endKeyExclusive []byte) DictionaryIterator
 	OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator
-	ExistsIterator() AdvDictionaryIterator
+	ExistsIterator() DictionaryIterator
 }
 
 type DictionaryIterator interface {
 	Next() (*index.DictEntry, error)
-}
-
-type AdvDictionaryIterator interface {
-	DictionaryIterator
 	Exists(key []byte) (error, bool)
 }
 
diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 08f89739f..9a9ee6b86 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -229,9 +229,10 @@ func (d *Dictionary) OnlyIterator(onlyTerms [][]byte,
 }
 
 // ExistsIterator returns an exists iterator for this dictionary
-func (d *Dictionary) ExistsIterator() segment.AdvDictionaryIterator {
+func (d *Dictionary) ExistsIterator() segment.DictionaryIterator {
 	rv := &DictionaryIterator{
-		d: d,
+		d:         d,
+		omitCount: true,
 	}
 
 	if d.fst != nil {
@@ -279,12 +280,9 @@ func (i *DictionaryIterator) Next() (*index.DictEntry, error) {
 func (i *DictionaryIterator) Exists(key []byte) (error, bool) {
 	if i.err != nil && i.err != vellum.ErrIteratorDone {
 		return i.err, false
-	} else if i.itr == nil || i.err == vellum.ErrIteratorDone {
-		return nil, false
 	}
-	if advItr, ok := i.itr.(vellum.AdvIterator); ok {
-		return advItr.Exists(key)
+	if i.itr == nil || i.err == vellum.ErrIteratorDone {
+		return nil, false
 	}
-
-	return fmt.Errorf("no implementation found"), false
+	return i.itr.Exists(key)
 }
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 094227f2d..6da417881 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -126,7 +126,9 @@ func (i *IndexSnapshot) updateSize() {
 	}
 }
 
-func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {
+func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string,
+	makeItr func(i segment.TermDictionary) segment.DictionaryIterator,
+	randomLookup bool) (*IndexSnapshotFieldDict, error) {
 
 	results := make(chan *asynchSegmentResult)
 	for index, segment := range i.segment {
@@ -150,14 +152,20 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
 		if asr.err != nil && err == nil {
 			err = asr.err
 		} else {
-			next, err2 := asr.dictItr.Next()
-			if err2 != nil && err == nil {
-				err = err2
-			}
-			if next != nil {
+			if !randomLookup {
+				next, err2 := asr.dictItr.Next()
+				if err2 != nil && err == nil {
+					err = err2
+				}
+				if next != nil {
+					rv.cursors = append(rv.cursors, &segmentDictCursor{
+						itr:  asr.dictItr,
+						curr: *next,
+					})
+				}
+			} else {
 				rv.cursors = append(rv.cursors, &segmentDictCursor{
-					itr:  asr.dictItr,
-					curr: *next,
+					itr: asr.dictItr,
 				})
 			}
 		}
@@ -166,44 +174,10 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
 	if err != nil {
 		return nil, err
 	}
-	// prepare heap
-	heap.Init(rv)
-
-	return rv, nil
-}
-
-func (i *IndexSnapshot) newIndexSnapshotFieldDictRandom(field string, makeItr func(i segment.TermDictionary) segment.AdvDictionaryIterator) (*IndexSnapshotFieldDict, error) {
-	results := make(chan *asynchSegmentResult)
-	for index, segment := range i.segment {
-		go func(index int, segment *SegmentSnapshot) {
-			dict, err := segment.segment.Dictionary(field)
-			if err != nil {
-				results <- &asynchSegmentResult{err: err}
-			} else {
-				results <- &asynchSegmentResult{dictItr: makeItr(dict)}
-			}
-		}(index, segment)
-	}
-
-	var err error
-	rv := &IndexSnapshotFieldDict{
-		snapshot: i,
-		cursors:  make([]*segmentDictCursor, 0, len(i.segment)),
-	}
-	for count := 0; count < len(i.segment); count++ {
-		asr := <-results
-		if asr.err != nil && err == nil {
-			err = asr.err
-		} else {
-			rv.cursors = append(rv.cursors, &segmentDictCursor{
-				itr: asr.dictItr,
-			})
-		}
 
-	}
-	// after ensuring we've read all items on channel
-	if err != nil {
-		return nil, err
+	if !randomLookup {
+		// prepare heap
+		heap.Init(rv)
 	}
 
 	return rv, nil
@@ -212,21 +186,21 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDictRandom(field string, makeItr fu
 func (i *IndexSnapshot) FieldDict(field string) (index.FieldDict, error) {
 	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
 		return i.Iterator()
-	})
+	}, false)
 }
 
 func (i *IndexSnapshot) FieldDictRange(field string, startTerm []byte,
 	endTerm []byte) (index.FieldDict, error) {
 	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
 		return i.RangeIterator(string(startTerm), string(endTerm))
-	})
+	}, false)
 }
 
 func (i *IndexSnapshot) FieldDictPrefix(field string,
 	termPrefix []byte) (index.FieldDict, error) {
 	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
 		return i.PrefixIterator(string(termPrefix))
-	})
+	}, false)
 }
 
 func (i *IndexSnapshot) FieldDictRegexp(field string,
@@ -241,7 +215,7 @@ func (i *IndexSnapshot) FieldDictRegexp(field string,
 
 	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
 		return i.AutomatonIterator(a, prefixBeg, prefixEnd)
-	})
+	}, false)
 }
 
 func (i *IndexSnapshot) getLevAutomaton(term string,
@@ -269,20 +243,20 @@ func (i *IndexSnapshot) FieldDictFuzzy(field string,
 
 	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
 		return i.AutomatonIterator(a, prefixBeg, prefixEnd)
-	})
+	}, false)
 }
 
 func (i *IndexSnapshot) FieldDictOnly(field string,
 	onlyTerms [][]byte, includeCount bool) (index.FieldDict, error) {
 	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
 		return i.OnlyIterator(onlyTerms, includeCount)
-	})
+	}, false)
 }
 
 func (i *IndexSnapshot) FieldDictRandom(field string) (index.AdvFieldDict, error) {
-	return i.newIndexSnapshotFieldDictRandom(field, func(i segment.TermDictionary) segment.AdvDictionaryIterator {
+	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
 		return i.ExistsIterator()
-	})
+	}, true)
 }
 
 func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {
diff --git a/index/scorch/snapshot_index_dict.go b/index/scorch/snapshot_index_dict.go
index 33e13fc8a..2d1614d46 100644
--- a/index/scorch/snapshot_index_dict.go
+++ b/index/scorch/snapshot_index_dict.go
@@ -98,11 +98,10 @@ func (i *IndexSnapshotFieldDict) Exists(key []byte) (error, bool) {
 	}
 
 	for _, cursor := range i.cursors {
-		if itr, ok := cursor.itr.(segment.AdvDictionaryIterator); ok {
-			if _, found := itr.Exists(key); found {
-				return nil, true
-			}
+		if _, found := cursor.itr.Exists(key); found {
+			return nil, true
 		}
 	}
+
 	return nil, false
 }
diff --git a/search/searcher/search_geoboundingbox.go b/search/searcher/search_geoboundingbox.go
index 6ec5e72dd..ee9c3e061 100644
--- a/search/searcher/search_geoboundingbox.go
+++ b/search/searcher/search_geoboundingbox.go
@@ -118,19 +118,30 @@ func ComputeGeoRange(term uint64, shift uint,
 		return rv
 	}
 
-	isIndexed := func(term []byte) bool {
-		if indexReader != nil {
-			reader, err := indexReader.TermFieldReader(term, field, false, false, false)
-			if err != nil || reader == nil {
-				return false
+	var fieldDict index.AdvFieldDict
+	if irr, ok := indexReader.(index.IndexReaderRandom); ok {
+		fieldDict, err = irr.FieldDictRandom(field)
+		if err != nil {
+			return nil, nil, err
+		}
+	}
+
+	defer func() {
+		if fieldDict != nil {
+			cerr := fieldDict.Close()
+			if cerr != nil {
+				err = cerr
 			}
-			if reader.Count() == 0 {
-				_ = reader.Close()
-				return false
+		}
+	}()
+
+	isIndexed := func(term []byte) bool {
+		if fieldDict != nil {
+			if err, found := fieldDict.Exists(term); found && err == nil {
+				return true
 			}
-			_ = reader.Close()
 		}
-		return true
+		return false
 	}
 
 	var computeGeoRange func(term uint64, shift uint) // declare for recursion

From fc8e1392425eebbfe6143fb3d7313bee789b759c Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Thu, 6 Jun 2019 10:30:34 +0530
Subject: [PATCH 608/728] vellum SHA bump - new Exists API

---
 vendor/manifest | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vendor/manifest b/vendor/manifest
index 729d8a64f..f4be538bb 100644
--- a/vendor/manifest
+++ b/vendor/manifest
@@ -137,7 +137,7 @@
 			"importpath": "github.com/couchbase/vellum",
 			"repository": "https://github.com/couchbase/vellum",
 			"vcs": "git",
-			"revision": "28880ab96d9361ab5a74f0e12000f8fe0cd20712",
+			"revision": "5f4edc22838b1433cdb9793bcac21c9376bb4753",
 			"branch": "master",
 			"notests": true
 		}

From eb619881fe5c7bbf95ce30059ef5cc5989e5cbe4 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Thu, 6 Jun 2019 13:58:58 +0530
Subject: [PATCH 609/728] EnumeratorTests adopting Exists method

-implementing the new Exists method from
 vellum Iterator interface
---
 index/scorch/segment/zap/enumerator_test.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/index/scorch/segment/zap/enumerator_test.go b/index/scorch/segment/zap/enumerator_test.go
index b27788923..4ba4aa4c0 100644
--- a/index/scorch/segment/zap/enumerator_test.go
+++ b/index/scorch/segment/zap/enumerator_test.go
@@ -231,3 +231,7 @@ func (m *testIterator) Close() error {
 	m.curr = 654321
 	return nil
 }
+
+func (m *testIterator) Exists(key []byte) (bool, error) {
+	return false, fmt.Errorf("not implemented for enumerator unit tests")
+}

From 72eacd7c7687d1c6ba34a15bbf82585d7e6b2632 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Thu, 6 Jun 2019 14:14:06 +0530
Subject: [PATCH 610/728] support for upside_down indexType too

e Please enter the commit message for your changes. Lines starting
---
 index/index.go                           |  9 +++---
 index/scorch/segment/empty.go            |  4 +--
 index/scorch/segment/segment.go          |  2 +-
 index/scorch/segment/zap/dict.go         |  6 ++--
 index/scorch/snapshot_index.go           |  2 +-
 index/scorch/snapshot_index_dict.go      | 10 +++---
 search/searcher/search_geoboundingbox.go | 40 +++++++++++++++++-------
 7 files changed, 45 insertions(+), 28 deletions(-)

diff --git a/index/index.go b/index/index.go
index 5c604a133..a470f5669 100644
--- a/index/index.go
+++ b/index/index.go
@@ -121,8 +121,8 @@ type IndexReaderOnly interface {
 	FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (FieldDict, error)
 }
 
-type IndexReaderRandom interface {
-	FieldDictRandom(field string) (AdvFieldDict, error)
+type IndexReaderExists interface {
+	FieldDictExists(field string) (FieldDictExists, error)
 }
 
 // FieldTerms contains the terms used by a document, keyed by field
@@ -234,9 +234,8 @@ type FieldDict interface {
 	Close() error
 }
 
-type AdvFieldDict interface {
-	FieldDict
-	Exists(key []byte) (error, bool)
+type FieldDictExists interface {
+	Exists(key []byte) (bool, error)
 }
 
 // DocIDReader is the interface exposing enumeration of documents identifiers.
diff --git a/index/scorch/segment/empty.go b/index/scorch/segment/empty.go
index c9f003311..020790ca1 100644
--- a/index/scorch/segment/empty.go
+++ b/index/scorch/segment/empty.go
@@ -101,8 +101,8 @@ func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) {
 	return nil, nil
 }
 
-func (e *EmptyDictionaryIterator) Exists(key []byte) (error, bool) {
-	return nil, false
+func (e *EmptyDictionaryIterator) Exists(key []byte) (bool, error) {
+	return false, nil
 }
 
 func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) {
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index f8b6e3edb..9261c2282 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -64,7 +64,7 @@ type TermDictionary interface {
 
 type DictionaryIterator interface {
 	Next() (*index.DictEntry, error)
-	Exists(key []byte) (error, bool)
+	Exists(key []byte) (bool, error)
 }
 
 type PostingsList interface {
diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index 9a9ee6b86..cea0fe307 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -277,12 +277,12 @@ func (i *DictionaryIterator) Next() (*index.DictEntry, error) {
 	return &i.entry, nil
 }
 
-func (i *DictionaryIterator) Exists(key []byte) (error, bool) {
+func (i *DictionaryIterator) Exists(key []byte) (bool, error) {
 	if i.err != nil && i.err != vellum.ErrIteratorDone {
-		return i.err, false
+		return false, i.err
 	}
 	if i.itr == nil || i.err == vellum.ErrIteratorDone {
-		return nil, false
+		return false, nil
 	}
 	return i.itr.Exists(key)
 }
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 6da417881..434d94ea4 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -253,7 +253,7 @@ func (i *IndexSnapshot) FieldDictOnly(field string,
 	}, false)
 }
 
-func (i *IndexSnapshot) FieldDictRandom(field string) (index.AdvFieldDict, error) {
+func (i *IndexSnapshot) FieldDictExists(field string) (index.FieldDictExists, error) {
 	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
 		return i.ExistsIterator()
 	}, true)
diff --git a/index/scorch/snapshot_index_dict.go b/index/scorch/snapshot_index_dict.go
index 2d1614d46..028c401e4 100644
--- a/index/scorch/snapshot_index_dict.go
+++ b/index/scorch/snapshot_index_dict.go
@@ -92,16 +92,16 @@ func (i *IndexSnapshotFieldDict) Close() error {
 	return nil
 }
 
-func (i *IndexSnapshotFieldDict) Exists(key []byte) (error, bool) {
+func (i *IndexSnapshotFieldDict) Exists(key []byte) (bool, error) {
 	if len(i.cursors) == 0 {
-		return nil, false
+		return false, nil
 	}
 
 	for _, cursor := range i.cursors {
-		if _, found := cursor.itr.Exists(key); found {
-			return nil, true
+		if found, _ := cursor.itr.Exists(key); found {
+			return true, nil
 		}
 	}
 
-	return nil, false
+	return false, nil
 }
diff --git a/search/searcher/search_geoboundingbox.go b/search/searcher/search_geoboundingbox.go
index ee9c3e061..c81cdd9d8 100644
--- a/search/searcher/search_geoboundingbox.go
+++ b/search/searcher/search_geoboundingbox.go
@@ -22,6 +22,8 @@ import (
 	"github.com/blevesearch/bleve/search"
 )
 
+type filterFunc func(key []byte) bool
+
 var GeoBitsShift1 = (geo.GeoBits << 1)
 var GeoBitsShift1Minus1 = GeoBitsShift1 - 1
 
@@ -118,30 +120,46 @@ func ComputeGeoRange(term uint64, shift uint,
 		return rv
 	}
 
-	var fieldDict index.AdvFieldDict
-	if irr, ok := indexReader.(index.IndexReaderRandom); ok {
-		fieldDict, err = irr.FieldDictRandom(field)
+	var fieldDict index.FieldDictExists
+	var isIndexed filterFunc
+	if irr, ok := indexReader.(index.IndexReaderExists); ok {
+		fieldDict, err = irr.FieldDictExists(field)
 		if err != nil {
 			return nil, nil, err
 		}
+
+		isIndexed = func(term []byte) bool {
+			found, err := fieldDict.Exists(term)
+			return err == nil && found
+		}
 	}
 
 	defer func() {
 		if fieldDict != nil {
-			cerr := fieldDict.Close()
-			if cerr != nil {
-				err = cerr
+			if fd, ok := fieldDict.(index.FieldDict); ok {
+				cerr := fd.Close()
+				if cerr != nil {
+					err = cerr
+				}
 			}
 		}
 	}()
 
-	isIndexed := func(term []byte) bool {
-		if fieldDict != nil {
-			if err, found := fieldDict.Exists(term); found && err == nil {
-				return true
+	if isIndexed == nil {
+		isIndexed = func(term []byte) bool {
+			if indexReader != nil {
+				reader, err := indexReader.TermFieldReader(term, field, false, false, false)
+				if err != nil || reader == nil {
+					return false
+				}
+				if reader.Count() == 0 {
+					_ = reader.Close()
+					return false
+				}
+				_ = reader.Close()
 			}
+			return true
 		}
-		return false
 	}
 
 	var computeGeoRange func(term uint64, shift uint) // declare for recursion

From 7e5b7c200faf569915e2417393916bc36126a6e7 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Thu, 6 Jun 2019 14:42:54 +0530
Subject: [PATCH 611/728] Numeric range query adopts Exists Iterator

Numeric range query to use the new Exists iterator
from Vellum for filtering the candidate terms for
scorch index types.
Empirical evidence shows that this brought about
44% throughput improvements for qps.
---
 search/searcher/search_numeric_range.go      | 53 ++++++++++++++++----
 search/searcher/search_numeric_range_test.go |  2 +-
 2 files changed, 45 insertions(+), 10 deletions(-)

diff --git a/search/searcher/search_numeric_range.go b/search/searcher/search_numeric_range.go
index e52ef9a82..0bc1f6e4a 100644
--- a/search/searcher/search_numeric_range.go
+++ b/search/searcher/search_numeric_range.go
@@ -53,20 +53,49 @@ func NewNumericRangeSearcher(indexReader index.IndexReader,
 	if !*inclusiveMax && maxInt64 != math.MinInt64 {
 		maxInt64--
 	}
+
+	var fieldDict index.FieldDictExists
+	var isIndexed filterFunc
+	var err error
+	if irr, ok := indexReader.(index.IndexReaderExists); ok {
+		fieldDict, err = irr.FieldDictExists(field)
+		if err != nil {
+			return nil, err
+		}
+
+		isIndexed = func(term []byte) bool {
+			found, err := fieldDict.Exists(term)
+			return err == nil && found
+		}
+	}
+
 	// FIXME hard-coded precision, should match field declaration
 	termRanges := splitInt64Range(minInt64, maxInt64, 4)
-	terms := termRanges.Enumerate()
+	terms := termRanges.Enumerate(isIndexed)
+	if fieldDict != nil {
+		if fd, ok := fieldDict.(index.FieldDict); ok {
+			cerr := fd.Close()
+			if cerr != nil {
+				err = cerr
+			}
+		}
+	}
+
 	if len(terms) < 1 {
 		// cannot return MatchNoneSearcher because of interaction with
 		// commit f391b991c20f02681bacd197afc6d8aed444e132
 		return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options,
 			true)
 	}
-	var err error
-	terms, err = filterCandidateTerms(indexReader, terms, field)
-	if err != nil {
-		return nil, err
+
+	// for upside_down
+	if isIndexed == nil {
+		terms, err = filterCandidateTerms(indexReader, terms, field)
+		if err != nil {
+			return nil, err
+		}
 	}
+
 	if tooManyClauses(len(terms)) {
 		return nil, tooManyClausesErr(len(terms))
 	}
@@ -125,11 +154,17 @@ type termRange struct {
 	endTerm   []byte
 }
 
-func (t *termRange) Enumerate() [][]byte {
+func (t *termRange) Enumerate(filter filterFunc) [][]byte {
 	var rv [][]byte
 	next := t.startTerm
 	for bytes.Compare(next, t.endTerm) <= 0 {
-		rv = append(rv, next)
+		if filter != nil {
+			if filter(next) {
+				rv = append(rv, next)
+			}
+		} else {
+			rv = append(rv, next)
+		}
 		next = incrementBytes(next)
 	}
 	return rv
@@ -150,10 +185,10 @@ func incrementBytes(in []byte) []byte {
 
 type termRanges []*termRange
 
-func (tr termRanges) Enumerate() [][]byte {
+func (tr termRanges) Enumerate(filter filterFunc) [][]byte {
 	var rv [][]byte
 	for _, tri := range tr {
-		trie := tri.Enumerate()
+		trie := tri.Enumerate(filter)
 		rv = append(rv, trie...)
 	}
 	return rv
diff --git a/search/searcher/search_numeric_range_test.go b/search/searcher/search_numeric_range_test.go
index 9a04fd62e..b19e1b24a 100644
--- a/search/searcher/search_numeric_range_test.go
+++ b/search/searcher/search_numeric_range_test.go
@@ -25,7 +25,7 @@ func TestSplitRange(t *testing.T) {
 	min := numeric.Float64ToInt64(1.0)
 	max := numeric.Float64ToInt64(5.0)
 	ranges := splitInt64Range(min, max, 4)
-	enumerated := ranges.Enumerate()
+	enumerated := ranges.Enumerate(nil)
 	if len(enumerated) != 135 {
 		t.Errorf("expected 135 terms, got %d", len(enumerated))
 	}

From 75cdaca51a2ad20227868a152dfb599981b93623 Mon Sep 17 00:00:00 2001
From: Tyler Kovacs <tyler.kovacs@gmail.com>
Date: Mon, 10 Jun 2019 11:04:07 -0700
Subject: [PATCH 612/728] add missing setter method for SetSearchBefore (#1237)

---
 search.go | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/search.go b/search.go
index e8d66f80a..b337edc9e 100644
--- a/search.go
+++ b/search.go
@@ -342,6 +342,12 @@ func (r *SearchRequest) SetSearchAfter(after []string) {
 	r.SearchAfter = after
 }
 
+// SetSearchBefore sets the request to skip over hits with a sort
+// value greater than the provided sort before key
+func (r *SearchRequest) SetSearchBefore(before []string) {
+	r.SearchBefore = before
+}
+
 // UnmarshalJSON deserializes a JSON representation of
 // a SearchRequest
 func (r *SearchRequest) UnmarshalJSON(input []byte) error {

From aaf4e4546fe6c312d9e26438c4e40f0cdd4cac09 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Mon, 10 Jun 2019 16:16:44 -0400
Subject: [PATCH 613/728] bump to latest vellum

---
 vendor/manifest | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vendor/manifest b/vendor/manifest
index f4be538bb..2bba989a2 100644
--- a/vendor/manifest
+++ b/vendor/manifest
@@ -137,7 +137,7 @@
 			"importpath": "github.com/couchbase/vellum",
 			"repository": "https://github.com/couchbase/vellum",
 			"vcs": "git",
-			"revision": "5f4edc22838b1433cdb9793bcac21c9376bb4753",
+			"revision": "ec7b775d247f63c2ba2aee9dc9c4872a1b30058b",
 			"branch": "master",
 			"notests": true
 		}

From bfa42c077826f889a57797f7927c704ffbdfa9ad Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 11 Jun 2019 07:21:36 -0400
Subject: [PATCH 614/728] fix scorch regex literal prefix for case insensitive
 (#1245)

the previous version incorrectly returned a literal
prefix when case-insensitive matching was enabled
---
 index/scorch/segment/regexp.go      | 2 +-
 index/scorch/segment/regexp_test.go | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/index/scorch/segment/regexp.go b/index/scorch/segment/regexp.go
index 3aa151d64..3a31f4149 100644
--- a/index/scorch/segment/regexp.go
+++ b/index/scorch/segment/regexp.go
@@ -55,7 +55,7 @@ func LiteralPrefix(s *syntax.Regexp) string {
 		s = s.Sub[0]
 	}
 
-	if s.Op == syntax.OpLiteral {
+	if s.Op == syntax.OpLiteral && (s.Flags&syntax.FoldCase == 0) {
 		return string(s.Rune)
 	}
 
diff --git a/index/scorch/segment/regexp_test.go b/index/scorch/segment/regexp_test.go
index b4731d6b8..887476b28 100644
--- a/index/scorch/segment/regexp_test.go
+++ b/index/scorch/segment/regexp_test.go
@@ -40,6 +40,7 @@ func TestLiteralPrefix(t *testing.T) {
 		{`^hello`, ""},
 		{`^`, ""},
 		{`$`, ""},
+		{`(?i)mArTy`, ""},
 	}
 
 	for i, test := range tests {

From 0fc6e671a53546934073d041903bfbe52a61b4de Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Wed, 12 Jun 2019 13:49:39 -0700
Subject: [PATCH 615/728] Disable errcheck on only go1.10

See.. https://github.com/blevesearch/bleve/issues/1183
---
 .travis.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 22612c966..e00e7b994 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -16,10 +16,10 @@ script:
   - go test -race -v $(go list ./... | grep -v vendor/)
   - go vet $(go list ./... | grep -v vendor/)
   - go test ./test -v -indexType scorch
-  - if [[ ${TRAVIS_GO_VERSION} =~ ^1\.11 ]]; then
-        errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/);
-    else
+  - if [[ ${TRAVIS_GO_VERSION} =~ ^1\.10 ]]; then
         echo "errcheck skipped for go version" $TRAVIS_GO_VERSION;
+    else
+        errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/);
     fi
   - docs/project-code-coverage.sh
   - docs/build_children.sh

From 152c60bf035def2ba8327c17fc17bbb97476fc08 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Tue, 18 Jun 2019 17:40:44 +0530
Subject: [PATCH 616/728] Adding Contains method to TermDictionary

Dictionary's Contains api implmentation to use the
FST's Contains method directly, and there is
no need for a ContainsIterator for TD.
---
 index/index.go                           |  8 +++---
 index/scorch/segment/empty.go            |  6 ++---
 index/scorch/segment/segment.go          |  4 +--
 index/scorch/segment/zap/dict.go         | 33 +++---------------------
 index/scorch/snapshot_index.go           | 15 ++++++-----
 index/scorch/snapshot_index_dict.go      |  5 ++--
 search/searcher/search_geoboundingbox.go |  8 +++---
 search/searcher/search_numeric_range.go  |  8 +++---
 8 files changed, 33 insertions(+), 54 deletions(-)

diff --git a/index/index.go b/index/index.go
index a470f5669..3e866f3aa 100644
--- a/index/index.go
+++ b/index/index.go
@@ -121,8 +121,8 @@ type IndexReaderOnly interface {
 	FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (FieldDict, error)
 }
 
-type IndexReaderExists interface {
-	FieldDictExists(field string) (FieldDictExists, error)
+type IndexReaderContains interface {
+	FieldDictContains(field string) (FieldDictContains, error)
 }
 
 // FieldTerms contains the terms used by a document, keyed by field
@@ -234,8 +234,8 @@ type FieldDict interface {
 	Close() error
 }
 
-type FieldDictExists interface {
-	Exists(key []byte) (bool, error)
+type FieldDictContains interface {
+	Contains(key []byte) (bool, error)
 }
 
 // DocIDReader is the interface exposing enumeration of documents identifiers.
diff --git a/index/scorch/segment/empty.go b/index/scorch/segment/empty.go
index 020790ca1..fdc407a74 100644
--- a/index/scorch/segment/empty.go
+++ b/index/scorch/segment/empty.go
@@ -91,8 +91,8 @@ func (e *EmptyDictionary) OnlyIterator(onlyTerms [][]byte,
 	return &EmptyDictionaryIterator{}
 }
 
-func (e *EmptyDictionary) ExistsIterator() DictionaryIterator {
-	return &EmptyDictionaryIterator{}
+func (e *EmptyDictionary) Contains(key []byte) (bool, error) {
+	return false, nil
 }
 
 type EmptyDictionaryIterator struct{}
@@ -101,7 +101,7 @@ func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) {
 	return nil, nil
 }
 
-func (e *EmptyDictionaryIterator) Exists(key []byte) (bool, error) {
+func (e *EmptyDictionaryIterator) Contains(key []byte) (bool, error) {
 	return false, nil
 }
 
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index 9261c2282..34c2bc204 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -59,12 +59,12 @@ type TermDictionary interface {
 	AutomatonIterator(a vellum.Automaton,
 		startKeyInclusive, endKeyExclusive []byte) DictionaryIterator
 	OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator
-	ExistsIterator() DictionaryIterator
+
+	Contains(key []byte) (bool, error)
 }
 
 type DictionaryIterator interface {
 	Next() (*index.DictEntry, error)
-	Exists(key []byte) (bool, error)
 }
 
 type PostingsList interface {
diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
index cea0fe307..ad4a8f8dc 100644
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@@ -95,6 +95,10 @@ func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap)
 	return rv
 }
 
+func (d *Dictionary) Contains(key []byte) (bool, error) {
+	return d.fst.Contains(key)
+}
+
 // Iterator returns an iterator for this dictionary
 func (d *Dictionary) Iterator() segment.DictionaryIterator {
 	rv := &DictionaryIterator{
@@ -228,25 +232,6 @@ func (d *Dictionary) OnlyIterator(onlyTerms [][]byte,
 	return rv
 }
 
-// ExistsIterator returns an exists iterator for this dictionary
-func (d *Dictionary) ExistsIterator() segment.DictionaryIterator {
-	rv := &DictionaryIterator{
-		d:         d,
-		omitCount: true,
-	}
-
-	if d.fst != nil {
-		itr, err := d.fst.Iterator(nil, nil)
-		if err == nil {
-			rv.itr = itr
-		} else if err != vellum.ErrIteratorDone {
-			rv.err = err
-		}
-	}
-
-	return rv
-}
-
 // DictionaryIterator is an iterator for term dictionary
 type DictionaryIterator struct {
 	d         *Dictionary
@@ -276,13 +261,3 @@ func (i *DictionaryIterator) Next() (*index.DictEntry, error) {
 	i.err = i.itr.Next()
 	return &i.entry, nil
 }
-
-func (i *DictionaryIterator) Exists(key []byte) (bool, error) {
-	if i.err != nil && i.err != vellum.ErrIteratorDone {
-		return false, i.err
-	}
-	if i.itr == nil || i.err == vellum.ErrIteratorDone {
-		return false, nil
-	}
-	return i.itr.Exists(key)
-}
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 434d94ea4..dada6b76c 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -35,6 +35,7 @@ import (
 var lb1, lb2 *lev2.LevenshteinAutomatonBuilder
 
 type asynchSegmentResult struct {
+	dict    segment.TermDictionary
 	dictItr segment.DictionaryIterator
 
 	index int
@@ -137,7 +138,11 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string,
 			if err != nil {
 				results <- &asynchSegmentResult{err: err}
 			} else {
-				results <- &asynchSegmentResult{dictItr: makeItr(dict)}
+				if randomLookup {
+					results <- &asynchSegmentResult{dict: dict}
+				} else {
+					results <- &asynchSegmentResult{dictItr: makeItr(dict)}
+				}
 			}
 		}(index, segment)
 	}
@@ -165,7 +170,7 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string,
 				}
 			} else {
 				rv.cursors = append(rv.cursors, &segmentDictCursor{
-					itr: asr.dictItr,
+					dict: asr.dict,
 				})
 			}
 		}
@@ -253,10 +258,8 @@ func (i *IndexSnapshot) FieldDictOnly(field string,
 	}, false)
 }
 
-func (i *IndexSnapshot) FieldDictExists(field string) (index.FieldDictExists, error) {
-	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
-		return i.ExistsIterator()
-	}, true)
+func (i *IndexSnapshot) FieldDictContains(field string) (index.FieldDictContains, error) {
+	return i.newIndexSnapshotFieldDict(field, nil, true)
 }
 
 func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {
diff --git a/index/scorch/snapshot_index_dict.go b/index/scorch/snapshot_index_dict.go
index 028c401e4..47486c255 100644
--- a/index/scorch/snapshot_index_dict.go
+++ b/index/scorch/snapshot_index_dict.go
@@ -22,6 +22,7 @@ import (
 )
 
 type segmentDictCursor struct {
+	dict segment.TermDictionary
 	itr  segment.DictionaryIterator
 	curr index.DictEntry
 }
@@ -92,13 +93,13 @@ func (i *IndexSnapshotFieldDict) Close() error {
 	return nil
 }
 
-func (i *IndexSnapshotFieldDict) Exists(key []byte) (bool, error) {
+func (i *IndexSnapshotFieldDict) Contains(key []byte) (bool, error) {
 	if len(i.cursors) == 0 {
 		return false, nil
 	}
 
 	for _, cursor := range i.cursors {
-		if found, _ := cursor.itr.Exists(key); found {
+		if found, _ := cursor.dict.Contains(key); found {
 			return true, nil
 		}
 	}
diff --git a/search/searcher/search_geoboundingbox.go b/search/searcher/search_geoboundingbox.go
index c81cdd9d8..38cb6467f 100644
--- a/search/searcher/search_geoboundingbox.go
+++ b/search/searcher/search_geoboundingbox.go
@@ -120,16 +120,16 @@ func ComputeGeoRange(term uint64, shift uint,
 		return rv
 	}
 
-	var fieldDict index.FieldDictExists
+	var fieldDict index.FieldDictContains
 	var isIndexed filterFunc
-	if irr, ok := indexReader.(index.IndexReaderExists); ok {
-		fieldDict, err = irr.FieldDictExists(field)
+	if irr, ok := indexReader.(index.IndexReaderContains); ok {
+		fieldDict, err = irr.FieldDictContains(field)
 		if err != nil {
 			return nil, nil, err
 		}
 
 		isIndexed = func(term []byte) bool {
-			found, err := fieldDict.Exists(term)
+			found, err := fieldDict.Contains(term)
 			return err == nil && found
 		}
 	}
diff --git a/search/searcher/search_numeric_range.go b/search/searcher/search_numeric_range.go
index 0bc1f6e4a..83107f020 100644
--- a/search/searcher/search_numeric_range.go
+++ b/search/searcher/search_numeric_range.go
@@ -54,17 +54,17 @@ func NewNumericRangeSearcher(indexReader index.IndexReader,
 		maxInt64--
 	}
 
-	var fieldDict index.FieldDictExists
+	var fieldDict index.FieldDictContains
 	var isIndexed filterFunc
 	var err error
-	if irr, ok := indexReader.(index.IndexReaderExists); ok {
-		fieldDict, err = irr.FieldDictExists(field)
+	if irr, ok := indexReader.(index.IndexReaderContains); ok {
+		fieldDict, err = irr.FieldDictContains(field)
 		if err != nil {
 			return nil, err
 		}
 
 		isIndexed = func(term []byte) bool {
-			found, err := fieldDict.Exists(term)
+			found, err := fieldDict.Contains(term)
 			return err == nil && found
 		}
 	}

From 0f78b62e3372d9e8b49f18d5787c6498f1de1ecf Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Wed, 26 Jun 2019 21:03:19 +0530
Subject: [PATCH 617/728] bump to latest vellum SHA

---
 vendor/manifest | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vendor/manifest b/vendor/manifest
index 2bba989a2..20788833f 100644
--- a/vendor/manifest
+++ b/vendor/manifest
@@ -137,7 +137,7 @@
 			"importpath": "github.com/couchbase/vellum",
 			"repository": "https://github.com/couchbase/vellum",
 			"vcs": "git",
-			"revision": "ec7b775d247f63c2ba2aee9dc9c4872a1b30058b",
+			"revision": "41f2deade2cfab59facd263e918d7c05f656c2e9",
 			"branch": "master",
 			"notests": true
 		}

From a74500143a531ba1b2a7518fea0611863848a08a Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 1 Jul 2019 15:15:27 -0700
Subject: [PATCH 618/728] Preserve persistedCallbacks of an unpersisted
 snapshot until needed

The persistedCallbacks of a snapshot that failed persistance need
to be preserved until one of the two events have occurred:
- a new snapshot was introduced
- the retry-attempt of the exact same snapshot succeeded
---
 index/scorch/persister.go | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index e15fa2ab6..7c3e179fc 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -90,6 +90,10 @@ func (s *Scorch) persisterLoop() {
 	var persistWatchers []*epochWatcher
 	var lastPersistedEpoch, lastMergedEpoch uint64
 	var ew *epochWatcher
+
+	var lastUnpersistedEpoch uint64
+	var unpersistedCallbacks []index.BatchCallback
+
 	po, err := s.parsePersisterOptions()
 	if err != nil {
 		s.fireAsyncError(fmt.Errorf("persisterOptions json parsing err: %v", err))
@@ -149,11 +153,28 @@ OUTER:
 					_ = ourSnapshot.DecRef()
 					break OUTER
 				}
+
+				// save this current snapshot's epoch and persistedCallbacks, for
+				// a possible retry attempt in persisting the same snapshot again.
+				lastUnpersistedEpoch = ourSnapshot.epoch
+				unpersistedCallbacks = ourPersistedCallbacks
+
 				s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err))
 				_ = ourSnapshot.DecRef()
 				atomic.AddUint64(&s.stats.TotPersistLoopErr, 1)
 				continue OUTER
 			}
+
+			if ourPersistedCallbacks == nil &&
+				lastUnpersistedEpoch == ourSnapshot.epoch {
+				// in the event of this being a retry attempt for persisting a snapshot
+				// that had earlier failed, retrieve the persistedCallbacks associated
+				// with that snapshot.
+				ourPersistedCallbacks = unpersistedCallbacks
+				lastUnpersistedEpoch = 0
+				unpersistedCallbacks = nil
+			}
+
 			for i := range ourPersistedCallbacks {
 				ourPersistedCallbacks[i](err)
 			}

From 732150df0a0890fbcb96ca9f8f4162502f31c82c Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Tue, 2 Jul 2019 14:24:34 -0700
Subject: [PATCH 619/728] Prepend earlier segment's persisted callbacks to
 latest

This is so that no persistedCallbacks of older segments are dropped
when persisting a snapshot of segments finally does succeed.
---
 index/scorch/persister.go | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 7c3e179fc..5af6ad532 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -91,7 +91,6 @@ func (s *Scorch) persisterLoop() {
 	var lastPersistedEpoch, lastMergedEpoch uint64
 	var ew *epochWatcher
 
-	var lastUnpersistedEpoch uint64
 	var unpersistedCallbacks []index.BatchCallback
 
 	po, err := s.parsePersisterOptions()
@@ -154,10 +153,9 @@ OUTER:
 					break OUTER
 				}
 
-				// save this current snapshot's epoch and persistedCallbacks, for
-				// a possible retry attempt in persisting the same snapshot again.
-				lastUnpersistedEpoch = ourSnapshot.epoch
-				unpersistedCallbacks = ourPersistedCallbacks
+				// save this current snapshot's persistedCallbacks, to invoke during
+				// the retry attempt
+				unpersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...)
 
 				s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err))
 				_ = ourSnapshot.DecRef()
@@ -165,13 +163,11 @@ OUTER:
 				continue OUTER
 			}
 
-			if ourPersistedCallbacks == nil &&
-				lastUnpersistedEpoch == ourSnapshot.epoch {
+			if unpersistedCallbacks != nil {
 				// in the event of this being a retry attempt for persisting a snapshot
-				// that had earlier failed, retrieve the persistedCallbacks associated
-				// with that snapshot.
-				ourPersistedCallbacks = unpersistedCallbacks
-				lastUnpersistedEpoch = 0
+				// that had earlier failed, prepend the persistedCallbacks associated
+				// with earlier segment(s) to the latest persistedCallbacks
+				ourPersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...)
 				unpersistedCallbacks = nil
 			}
 

From 41fd936002da36ce8d9589dac39cea3526784507 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 1 Jul 2019 17:20:39 -0700
Subject: [PATCH 620/728] Include missing token filters

+ camelCase
+ unique
---
 config/config.go | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/config/config.go b/config/config.go
index 5550d130b..5ed2d9c3f 100644
--- a/config/config.go
+++ b/config/config.go
@@ -45,6 +45,7 @@ import (
 
 	// token filters
 	_ "github.com/blevesearch/bleve/analysis/token/apostrophe"
+	_ "github.com/blevesearch/bleve/analysis/token/camelcase"
 	_ "github.com/blevesearch/bleve/analysis/token/compound"
 	_ "github.com/blevesearch/bleve/analysis/token/edgengram"
 	_ "github.com/blevesearch/bleve/analysis/token/elision"
@@ -57,6 +58,7 @@ import (
 	_ "github.com/blevesearch/bleve/analysis/token/stop"
 	_ "github.com/blevesearch/bleve/analysis/token/truncate"
 	_ "github.com/blevesearch/bleve/analysis/token/unicodenorm"
+	_ "github.com/blevesearch/bleve/analysis/token/unique"
 
 	// tokenizers
 	_ "github.com/blevesearch/bleve/analysis/tokenizer/exception"

From e5efb60815dfa171f6daacc120fc1c10494ff447 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Wed, 31 Jul 2019 16:04:30 +0530
Subject: [PATCH 621/728] Tightening the scorch stale file clean ups

Fix for #1269.
Its observed in an under resourced situation that
the merger was treading ahead of the persister and the
scorch files were piling up. It was reaching upto 70K
with an update heavy workload.

The intention of this fix is to unflip any
ineligible files which are genuinely eligible for
clean up.

Also, adding stats to track this for future.
---
 index/scorch/merge.go     | 16 ++++++++++++++++
 index/scorch/persister.go |  9 ++++-----
 index/scorch/scorch.go    |  3 +++
 index/scorch/stats.go     |  3 +++
 4 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 83f98aab0..d7144772f 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -18,6 +18,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"os"
+	"strings"
 	"sync/atomic"
 	"time"
 
@@ -157,6 +158,7 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 
 	// process tasks in serial for now
 	var notifications []chan *IndexSnapshot
+	var filenames []string
 	for _, task := range resultMergePlan.Tasks {
 		if len(task.Segments) == 0 {
 			atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1)
@@ -181,6 +183,12 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 						segmentsToMerge = append(segmentsToMerge, zapSeg)
 						docsToDrop = append(docsToDrop, segSnapshot.deleted)
 					}
+					// track the files getting merged for unsetting the
+					// removal ineligibility. This helps to unflip files
+					// even with fast merger, slow persister work flows.
+					path := zapSeg.Path()
+					filenames = append(filenames,
+						strings.TrimPrefix(path, s.path+string(os.PathSeparator)))
 				}
 			}
 		}
@@ -222,6 +230,7 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 			}
 			err = zap.ValidateMerge(segmentsToMerge, nil, docsToDrop, seg.(*zap.Segment))
 			if err != nil {
+				s.unmarkIneligibleForRemoval(filename)
 				return fmt.Errorf("merge validation failed: %v", err)
 			}
 			oldNewDocNums = make(map[uint64][]uint64)
@@ -266,6 +275,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 		}
 	}
 
+	// once all the newly merged segment introductions are done,
+	// its safe to unflip the removal ineligibility for the replaced
+	// older segments
+	for _, f := range filenames {
+		s.unmarkIneligibleForRemoval(f)
+	}
+
 	return nil
 }
 
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 5af6ad532..b38ebcdec 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -764,12 +764,11 @@ func (s *Scorch) removeOldData() {
 	if err != nil {
 		s.fireAsyncError(fmt.Errorf("got err removing old bolt snapshots: %v", err))
 	}
+	atomic.AddUint64(&s.stats.TotSnapshotsRemovedFromMetaStore, uint64(removed))
 
-	if removed > 0 {
-		err = s.removeOldZapFiles()
-		if err != nil {
-			s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err))
-		}
+	err = s.removeOldZapFiles()
+	if err != nil {
+		s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err))
 	}
 }
 
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 522feca19..44a97d1ea 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -490,6 +490,9 @@ func (s *Scorch) StatsMap() map[string]interface{} {
 	m["CurOnDiskBytes"] = numBytesUsedDisk
 	m["CurOnDiskFiles"] = numFilesOnDisk
 
+	s.rootLock.RLock()
+	m["CurFilesIneligibleForRemoval"] = uint64(len(s.ineligibleForRemoval))
+	s.rootLock.RUnlock()
 	// TODO: consider one day removing these backwards compatible
 	// names for apps using the old names
 	m["updates"] = m["TotUpdates"]
diff --git a/index/scorch/stats.go b/index/scorch/stats.go
index 2eb832f2c..6549fddf5 100644
--- a/index/scorch/stats.go
+++ b/index/scorch/stats.go
@@ -107,6 +107,9 @@ type Stats struct {
 	TotFileMergeIntroductionsDone    uint64
 	TotFileMergeIntroductionsSkipped uint64
 
+	CurFilesIneligibleForRemoval     uint64
+	TotSnapshotsRemovedFromMetaStore uint64
+
 	TotMemMergeBeg          uint64
 	TotMemMergeErr          uint64
 	TotMemMergeDone         uint64

From 7965de170e4905501df6178cbfeb2eafda3aec2e Mon Sep 17 00:00:00 2001
From: Alexander Petrov <alldroll@gmail.com>
Date: Mon, 5 Aug 2019 09:05:33 +0100
Subject: [PATCH 622/728] Replace brute force for-loop that calls
 HasNext/Next() with roaring.IntPeekable

---
 index/scorch/segment/zap/posting.go | 25 +++++++++++--------------
 vendor/manifest                     |  2 +-
 2 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
index 417e89b4d..4c43fdb9b 100644
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@@ -324,8 +324,8 @@ func (rv *PostingsList) init1Hit(fstVal uint64) error {
 // PostingsIterator provides a way to iterate through the postings list
 type PostingsIterator struct {
 	postings *PostingsList
-	all      roaring.IntIterable
-	Actual   roaring.IntIterable
+	all      roaring.IntPeekable
+	Actual   roaring.IntPeekable
 	ActualBM *roaring.Bitmap
 
 	currChunk         uint32
@@ -662,14 +662,14 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool,
 		return i.nextDocNumAtOrAfterClean(atOrAfter)
 	}
 
-	n := i.Actual.Next()
-	for uint64(n) < atOrAfter && i.Actual.HasNext() {
-		n = i.Actual.Next()
-	}
-	if uint64(n) < atOrAfter {
+	i.Actual.AdvanceIfNeeded(uint32(atOrAfter))
+
+	if !i.Actual.HasNext() {
 		// couldn't find anything
 		return 0, false, nil
 	}
+
+	n := i.Actual.Next()
 	allN := i.all.Next()
 
 	nChunk := n / i.postings.sb.chunkFactor
@@ -706,23 +706,20 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool,
 // no deletions) where the all bitmap is the same as the actual bitmap
 func (i *PostingsIterator) nextDocNumAtOrAfterClean(
 	atOrAfter uint64) (uint64, bool, error) {
-	n := i.Actual.Next()
 
 	if !i.includeFreqNorm {
-		for uint64(n) < atOrAfter && i.Actual.HasNext() {
-			n = i.Actual.Next()
-		}
+		i.Actual.AdvanceIfNeeded(uint32(atOrAfter))
 
-		if uint64(n) < atOrAfter {
+		if !i.Actual.HasNext() {
 			return 0, false, nil // couldn't find anything
 		}
 
-		return uint64(n), true, nil
+		return uint64(i.Actual.Next()), true, nil
 	}
 
 	// freq-norm's needed, so maintain freq-norm chunk reader
 	sameChunkNexts := 0 // # of times we called Next() in the same chunk
-
+	n := i.Actual.Next()
 	nChunk := n / i.postings.sb.chunkFactor
 
 	for uint64(n) < atOrAfter && i.Actual.HasNext() {
diff --git a/vendor/manifest b/vendor/manifest
index 20788833f..c600bde8a 100644
--- a/vendor/manifest
+++ b/vendor/manifest
@@ -78,7 +78,7 @@
 			"importpath": "github.com/RoaringBitmap/roaring",
 			"repository": "https://github.com/RoaringBitmap/roaring",
 			"vcs": "",
-			"revision": "01d244c43a7e8d1191a4f369f5908ea9eb9bc9ac",
+			"revision": "d0ce1763c3526f65703c395da50da7a7fb2138d5",
 			"branch": "master",
 			"notests": true
 		},

From ee6b126383160d11cf538a751a3a62e066a40daa Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Wed, 31 Jul 2019 15:46:09 +0530
Subject: [PATCH 623/728] MB-35333 - Potential removeOldData call starve issue

If the persister is busy doing the catch up with
the ongoing mutations then there is a chance for
the older snapshot clean up call/removeOldData to
wait for a longer time to get invoked. This can
result in unbounded growth in the number of scorch
files.

This change attempt to invoke removeOldData whenever
the number of files grow beyond the thresholds.
---
 index/scorch/persister.go | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 5af6ad532..c05e1331a 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -245,20 +245,19 @@ func notifyMergeWatchers(lastPersistedEpoch uint64,
 	return watchersNext
 }
 
-func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastMergedEpoch uint64,
-	persistWatchers []*epochWatcher, po *persisterOptions) (uint64, []*epochWatcher) {
+func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64,
+	lastMergedEpoch uint64, persistWatchers []*epochWatcher,
+	po *persisterOptions) (uint64, []*epochWatcher) {
 
-	// first, let the watchers proceed if they lag behind
+	// First, let the watchers proceed if they lag behind
 	persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
 
-	// check the merger lag by counting the segment files on disk,
+	// Check the merger lag by counting the segment files on disk,
+	numFilesOnDisk, _ := s.diskFileStats()
+
 	// On finding fewer files on disk, persister takes a short pause
 	// for sufficient in-memory segments to pile up for the next
 	// memory merge cum persist loop.
-	// On finding too many files on disk, persister pause until the merger
-	// catches up to reduce the segment file count under the threshold.
-	// But if there is memory pressure, then skip this sleep maneuvers.
-	numFilesOnDisk, _ := s.diskFileStats()
 	if numFilesOnDisk < uint64(po.PersisterNapUnderNumFiles) &&
 		po.PersisterNapTimeMSec > 0 && s.paused() == 0 {
 		select {
@@ -276,6 +275,17 @@ func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastM
 		return lastMergedEpoch, persistWatchers
 	}
 
+	// Finding too many files on disk could be due to two reasons.
+	// 1. Too many older snapshots awaiting the clean up.
+	// 2. The merger could be lagging behind on merging the disk files.
+	if numFilesOnDisk > uint64(po.PersisterNapUnderNumFiles) {
+		s.removeOldData()
+		numFilesOnDisk, _ = s.diskFileStats()
+	}
+
+	// Persister pause until the merger catches up to reduce the segment
+	// file count under the threshold.
+	// But if there is memory pressure, then skip this sleep maneuvers.
 OUTER:
 	for po.PersisterNapUnderNumFiles > 0 &&
 		numFilesOnDisk >= uint64(po.PersisterNapUnderNumFiles) &&

From c997533a776fe2f236621c653512f0678e88db1f Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Mon, 12 Aug 2019 13:43:55 -0400
Subject: [PATCH 624/728] add method to access analyzer of a text field (#1276)

this change allows applications to see the analyzer set for
a text field, useful if applications want to modify the document
after mapping, but prior to indexing.
---
 document/field_text.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/document/field_text.go b/document/field_text.go
index c8e871c9d..6bd74c712 100644
--- a/document/field_text.go
+++ b/document/field_text.go
@@ -86,6 +86,10 @@ func (t *TextField) Analyze() (int, analysis.TokenFrequencies) {
 	return fieldLength, tokenFreqs
 }
 
+func (t *TextField) Analyzer() *analysis.Analyzer {
+	return t.analyzer
+}
+
 func (t *TextField) Value() []byte {
 	return t.value
 }

From f1c9cb5659531140daeb291a3d9406b42821880d Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Fri, 23 Aug 2019 15:46:01 +0530
Subject: [PATCH 625/728] bumping the vellum SHA

adopting the levenshtein package
changes in vellum
---
 index/scorch/segment/zap/dict_test.go | 20 ++++++++++++++++----
 index/scorch/snapshot_index.go        |  8 ++++----
 vendor/manifest                       |  2 +-
 3 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/index/scorch/segment/zap/dict_test.go b/index/scorch/segment/zap/dict_test.go
index 378abd854..c632917bc 100644
--- a/index/scorch/segment/zap/dict_test.go
+++ b/index/scorch/segment/zap/dict_test.go
@@ -181,6 +181,14 @@ func TestDictionary(t *testing.T) {
 }
 
 func TestDictionaryError(t *testing.T) {
+	hash := make(map[uint8]levenshtein.LevenshteinAutomatonBuilder, 4)
+	for i := 1; i <= 3; i++ {
+		lb, err := levenshtein.NewLevenshteinAutomatonBuilder(uint8(i), false)
+		if err != nil {
+			t.Errorf("NewLevenshteinAutomatonBuilder(%d, false) failed, err: %v", i, err)
+		}
+		hash[uint8(i)] = *lb
+	}
 
 	_ = os.RemoveAll("/tmp/scorch.zap")
 
@@ -206,7 +214,8 @@ func TestDictionaryError(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	a, err := levenshtein.New("summer", 2)
+	lb := hash[uint8(2)]
+	a, err := lb.BuildDfa("summer", 2)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -222,7 +231,8 @@ func TestDictionaryError(t *testing.T) {
 		t.Fatalf("expected nil error from iterator, got: %v", err)
 	}
 
-	a, err = levenshtein.New("cat", 1) // cat & bat
+	lb = hash[uint8(1)]
+	a, err = lb.BuildDfa("cat", 1) // cat & bat
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -241,7 +251,8 @@ func TestDictionaryError(t *testing.T) {
 		t.Fatalf("expected nil next and nil err, got: %v, %v", nxt, err)
 	}
 
-	a, err = levenshtein.New("cat", 2) // cat & bat
+	lb = hash[uint8(2)]
+	a, err = lb.BuildDfa("cat", 2) // cat & bat
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -260,7 +271,8 @@ func TestDictionaryError(t *testing.T) {
 		t.Fatalf("expected nil next and nil err, got: %v, %v", nxt, err)
 	}
 
-	a, err = levenshtein.New("cat", 3)
+	lb = hash[uint8(3)]
+	a, err = lb.BuildDfa("cat", 3)
 	if err != nil {
 		t.Fatal(err)
 	}
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index dada6b76c..47cc809b2 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -28,11 +28,11 @@ import (
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
 	"github.com/couchbase/vellum"
-	lev2 "github.com/couchbase/vellum/levenshtein2"
+	lev "github.com/couchbase/vellum/levenshtein"
 )
 
 // re usable, threadsafe levenshtein builders
-var lb1, lb2 *lev2.LevenshteinAutomatonBuilder
+var lb1, lb2 *lev.LevenshteinAutomatonBuilder
 
 type asynchSegmentResult struct {
 	dict    segment.TermDictionary
@@ -52,11 +52,11 @@ func init() {
 	var is interface{} = IndexSnapshot{}
 	reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size())
 	var err error
-	lb1, err = lev2.NewLevenshteinAutomatonBuilder(1, true)
+	lb1, err = lev.NewLevenshteinAutomatonBuilder(1, true)
 	if err != nil {
 		panic(fmt.Errorf("Levenshtein automaton ed1 builder err: %v", err))
 	}
-	lb2, err = lev2.NewLevenshteinAutomatonBuilder(2, true)
+	lb2, err = lev.NewLevenshteinAutomatonBuilder(2, true)
 	if err != nil {
 		panic(fmt.Errorf("Levenshtein automaton ed2 builder err: %v", err))
 	}
diff --git a/vendor/manifest b/vendor/manifest
index c600bde8a..e6f985136 100644
--- a/vendor/manifest
+++ b/vendor/manifest
@@ -137,7 +137,7 @@
 			"importpath": "github.com/couchbase/vellum",
 			"repository": "https://github.com/couchbase/vellum",
 			"vcs": "git",
-			"revision": "41f2deade2cfab59facd263e918d7c05f656c2e9",
+			"revision": "dc222902e86f298bfae0b3dec6ba8b9d874ad5f8",
 			"branch": "master",
 			"notests": true
 		}

From ad3191238ac2f57961ba55130eef292bcb3570a9 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 16 Sep 2019 14:34:18 +0530
Subject: [PATCH 626/728] =?UTF-8?q?Support=20for=20bounded=20polygon=20que?=
 =?UTF-8?q?ry=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Attempt to introduce GeoBoundingPolygonQuery
to enable the bounded polygon queries.
Polygon searcher internally finds the bounded
rectangle for the given polygon points and
apply the filter which performs the point-in-polygon
checks.
In this iteration, as simplicity is preferred over
precision(on boundary points), the point-in-polygon check
is based on ray-casting algorithm.
---
 geo/geo.go                                 |  38 +++
 search/query/geo_boundingpolygon.go        |  94 +++++++
 search/query/query.go                      |   9 +
 search/searcher/search_geopointdistance.go |  11 +-
 search/searcher/search_geopolygon.go       | 110 ++++++++
 search/searcher/search_geopolygon_test.go  | 291 +++++++++++++++++++++
 6 files changed, 548 insertions(+), 5 deletions(-)
 create mode 100644 search/query/geo_boundingpolygon.go
 create mode 100644 search/searcher/search_geopolygon.go
 create mode 100644 search/searcher/search_geopolygon_test.go

diff --git a/geo/geo.go b/geo/geo.go
index 86861b4f3..583451e30 100644
--- a/geo/geo.go
+++ b/geo/geo.go
@@ -37,6 +37,12 @@ var geoTolerance = 1E-6
 var lonScale = float64((uint64(0x1)<<GeoBits)-1) / 360.0
 var latScale = float64((uint64(0x1)<<GeoBits)-1) / 180.0
 
+// Point represents a geo point.
+type Point struct {
+	Lon float64
+	Lat float64
+}
+
 // MortonHash computes the morton hash value for the provided geo point
 // This point is ordered as lon, lat.
 func MortonHash(lon, lat float64) uint64 {
@@ -168,3 +174,35 @@ func checkLongitude(longitude float64) error {
 	}
 	return nil
 }
+
+func BoundingRectangleForPolygon(polygon []Point) (
+	float64, float64, float64, float64, error) {
+	err := checkLongitude(polygon[0].Lon)
+	if err != nil {
+		return 0, 0, 0, 0, err
+	}
+	err = checkLatitude(polygon[0].Lat)
+	if err != nil {
+		return 0, 0, 0, 0, err
+	}
+	maxY, minY := polygon[0].Lat, polygon[0].Lat
+	maxX, minX := polygon[0].Lon, polygon[0].Lon
+	for i := 1; i < len(polygon); i++ {
+		err := checkLongitude(polygon[i].Lon)
+		if err != nil {
+			return 0, 0, 0, 0, err
+		}
+		err = checkLatitude(polygon[i].Lat)
+		if err != nil {
+			return 0, 0, 0, 0, err
+		}
+
+		maxY = math.Max(maxY, polygon[i].Lat)
+		minY = math.Min(minY, polygon[i].Lat)
+
+		maxX = math.Max(maxX, polygon[i].Lon)
+		minX = math.Min(minX, polygon[i].Lon)
+	}
+
+	return minX, maxY, maxX, minY, nil
+}
diff --git a/search/query/geo_boundingpolygon.go b/search/query/geo_boundingpolygon.go
new file mode 100644
index 000000000..c227f8cda
--- /dev/null
+++ b/search/query/geo_boundingpolygon.go
@@ -0,0 +1,94 @@
+//  Copyright (c) 2019 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package query
+
+import (
+	"encoding/json"
+	"fmt"
+
+	"github.com/blevesearch/bleve/geo"
+	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/mapping"
+	"github.com/blevesearch/bleve/search"
+	"github.com/blevesearch/bleve/search/searcher"
+)
+
+type GeoBoundingPolygonQuery struct {
+	Points   []geo.Point `json:"points"`
+	FieldVal string      `json:"field,omitempty"`
+	BoostVal *Boost      `json:"boost,omitempty"`
+}
+
+func NewGeoBoundingPolygonQuery(points []geo.Point) *GeoBoundingPolygonQuery {
+	return &GeoBoundingPolygonQuery{
+		Points: points}
+}
+
+func (q *GeoBoundingPolygonQuery) SetBoost(b float64) {
+	boost := Boost(b)
+	q.BoostVal = &boost
+}
+
+func (q *GeoBoundingPolygonQuery) Boost() float64 {
+	return q.BoostVal.Value()
+}
+
+func (q *GeoBoundingPolygonQuery) SetField(f string) {
+	q.FieldVal = f
+}
+
+func (q *GeoBoundingPolygonQuery) Field() string {
+	return q.FieldVal
+}
+
+func (q *GeoBoundingPolygonQuery) Searcher(i index.IndexReader,
+	m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
+	field := q.FieldVal
+	if q.FieldVal == "" {
+		field = m.DefaultSearchField()
+	}
+
+	return searcher.NewGeoBoundedPolygonSearcher(i, q.Points, field, q.BoostVal.Value(), options)
+}
+
+func (q *GeoBoundingPolygonQuery) Validate() error {
+	return nil
+}
+
+func (q *GeoBoundingPolygonQuery) UnmarshalJSON(data []byte) error {
+	tmp := struct {
+		Points   []interface{} `json:"points"`
+		FieldVal string        `json:"field,omitempty"`
+		BoostVal *Boost        `json:"boost,omitempty"`
+	}{}
+	err := json.Unmarshal(data, &tmp)
+	if err != nil {
+		return err
+	}
+
+	q.Points = make([]geo.Point, 0, len(tmp.Points))
+	for _, i := range tmp.Points {
+		// now use our generic point parsing code from the geo package
+		lon, lat, found := geo.ExtractGeoPoint(i)
+		if !found {
+			return fmt.Errorf("geo polygon point: %v is not in a valid format", i)
+		}
+		q.Points = append(q.Points, geo.Point{Lon: lon, Lat: lat})
+	}
+
+	q.FieldVal = tmp.FieldVal
+	q.BoostVal = tmp.BoostVal
+	return nil
+}
diff --git a/search/query/query.go b/search/query/query.go
index c7c1eefb8..0c926508e 100644
--- a/search/query/query.go
+++ b/search/query/query.go
@@ -273,6 +273,15 @@ func ParseQuery(input []byte) (Query, error) {
 		}
 		return &rv, nil
 	}
+	_, hasPoints := tmp["points"]
+	if hasPoints {
+		var rv GeoBoundingPolygonQuery
+		err := json.Unmarshal(input, &rv)
+		if err != nil {
+			return nil, err
+		}
+		return &rv, nil
+	}
 	return nil, fmt.Errorf("unknown query type")
 }
 
diff --git a/search/searcher/search_geopointdistance.go b/search/searcher/search_geopointdistance.go
index a15c194e8..b01ae6a0a 100644
--- a/search/searcher/search_geopointdistance.go
+++ b/search/searcher/search_geopointdistance.go
@@ -34,7 +34,7 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon,
 	// build a searcher for the box
 	boxSearcher, err := boxSearcher(indexReader,
 		topLeftLon, topLeftLat, bottomRightLon, bottomRightLat,
-		field, boost, options)
+		field, boost, options, false)
 	if err != nil {
 		return nil, err
 	}
@@ -54,19 +54,20 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon,
 // two boxes joined through a disjunction searcher
 func boxSearcher(indexReader index.IndexReader,
 	topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64,
-	field string, boost float64, options search.SearcherOptions) (
+	field string, boost float64, options search.SearcherOptions, checkBoundaries bool) (
 	search.Searcher, error) {
 	if bottomRightLon < topLeftLon {
 		// cross date line, rewrite as two parts
 
 		leftSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
 			-180, bottomRightLat, bottomRightLon, topLeftLat,
-			field, boost, options, false)
+			field, boost, options, checkBoundaries)
 		if err != nil {
 			return nil, err
 		}
 		rightSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
-			topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options, false)
+			topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options,
+			checkBoundaries)
 		if err != nil {
 			_ = leftSearcher.Close()
 			return nil, err
@@ -85,7 +86,7 @@ func boxSearcher(indexReader index.IndexReader,
 	// build geoboundinggox searcher for that bounding box
 	boxSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
 		topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, field, boost,
-		options, false)
+		options, checkBoundaries)
 	if err != nil {
 		return nil, err
 	}
diff --git a/search/searcher/search_geopolygon.go b/search/searcher/search_geopolygon.go
new file mode 100644
index 000000000..3bb47519d
--- /dev/null
+++ b/search/searcher/search_geopolygon.go
@@ -0,0 +1,110 @@
+//  Copyright (c) 2019 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package searcher
+
+import (
+	"github.com/blevesearch/bleve/geo"
+	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/numeric"
+	"github.com/blevesearch/bleve/search"
+	"math"
+)
+
+func NewGeoBoundedPolygonSearcher(indexReader index.IndexReader,
+	polygon []geo.Point, field string, boost float64,
+	options search.SearcherOptions) (search.Searcher, error) {
+
+	// compute the bounding box enclosing the polygon
+	topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, err :=
+		geo.BoundingRectangleForPolygon(polygon)
+	if err != nil {
+		return nil, err
+	}
+
+	// build a searcher for the bounding box on the polygon
+	boxSearcher, err := boxSearcher(indexReader,
+		topLeftLon, topLeftLat, bottomRightLon, bottomRightLat,
+		field, boost, options, true)
+	if err != nil {
+		return nil, err
+	}
+
+	dvReader, err := indexReader.DocValueReader([]string{field})
+	if err != nil {
+		return nil, err
+	}
+
+	// wrap it in a filtering searcher that checks for the polygon inclusivity
+	return NewFilteringSearcher(boxSearcher,
+		buildPolygonFilter(dvReader, field, polygon)), nil
+}
+
+const float64EqualityThreshold = 1e-6
+
+func almostEqual(a, b float64) bool {
+	return math.Abs(a-b) <= float64EqualityThreshold
+}
+
+// buildPolygonFilter returns true if the point lies inside the
+// polygon. It is based on the ray-casting technique as referred
+// here: https://wrf.ecse.rpi.edu/nikola/pubdetails/pnpoly.html
+func buildPolygonFilter(dvReader index.DocValueReader, field string,
+	polygon []geo.Point) FilterFunc {
+	return func(d *search.DocumentMatch) bool {
+		var lon, lat float64
+		var found bool
+
+		err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
+			// only consider the values which are shifted 0
+			prefixCoded := numeric.PrefixCoded(term)
+			shift, err := prefixCoded.Shift()
+			if err == nil && shift == 0 {
+				i64, err := prefixCoded.Int64()
+				if err == nil {
+					lon = geo.MortonUnhashLon(uint64(i64))
+					lat = geo.MortonUnhashLat(uint64(i64))
+					found = true
+				}
+			}
+		})
+
+		// Note: this approach works for points which are strictly inside
+		// the polygon. ie it might fail for certain points on the polygon boundaries.
+		if err == nil && found {
+			nVertices := len(polygon)
+			var inside bool
+			// check for a direct vertex match
+			if almostEqual(polygon[0].Lat, lat) &&
+				almostEqual(polygon[0].Lon, lon) {
+				return true
+			}
+
+			for i := 1; i < nVertices; i++ {
+				if almostEqual(polygon[i].Lat, lat) &&
+					almostEqual(polygon[i].Lon, lon) {
+					return true
+				}
+				if (polygon[i].Lat > lat) != (polygon[i-1].Lat > lat) &&
+					lon < (polygon[i-1].Lon-polygon[i].Lon)*(lat-polygon[i].Lat)/
+						(polygon[i-1].Lat-polygon[i].Lat)+polygon[i].Lon {
+					inside = !inside
+				}
+			}
+			return inside
+
+		}
+		return false
+	}
+}
diff --git a/search/searcher/search_geopolygon_test.go b/search/searcher/search_geopolygon_test.go
new file mode 100644
index 000000000..015d82611
--- /dev/null
+++ b/search/searcher/search_geopolygon_test.go
@@ -0,0 +1,291 @@
+//  Copyright (c) 2019 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package searcher
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/geo"
+	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/store/gtreap"
+	"github.com/blevesearch/bleve/index/upsidedown"
+	"github.com/blevesearch/bleve/search"
+)
+
+func TestSimpleGeoPolygons(t *testing.T) {
+
+	tests := []struct {
+		polygon []geo.Point
+		field   string
+		want    []string
+	}{
+		// test points inside a triangle & on vertices
+		// r, s - inside and t,u - on vertices.
+		{[]geo.Point{{Lon: 1.0, Lat: 1.0}, {Lon: 2.0, Lat: 1.9}, {Lon: 2.0, Lat: 1.0}}, "loc", []string{"r", "s", "t", "u"}},
+		// non overlapping polygon for the indexed documents
+		{[]geo.Point{{Lon: 3.0, Lat: 1.0}, {Lon: 4.0, Lat: 2.5}, {Lon: 3.0, Lat: 2}}, "loc", nil},
+	}
+	i := setupGeoPolygonPoints(t)
+	indexReader, err := i.Reader()
+	if err != nil {
+		t.Error(err)
+	}
+	defer func() {
+		err = indexReader.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	for _, test := range tests {
+		got, err := testGeoPolygonSearch(indexReader, test.polygon, test.field)
+		if err != nil {
+			t.Fatal(err)
+		}
+		if !reflect.DeepEqual(got, test.want) {
+			t.Errorf("expected %v, got %v for polygon: %+v", test.want, got, test.polygon)
+		}
+	}
+}
+
+func TestRealGeoPolygons(t *testing.T) {
+
+	tests := []struct {
+		polygon []geo.Point
+		field   string
+		want    []string
+	}{
+		{[]geo.Point{{Lon: -80.881, Lat: 35.282}, {Lon: -80.858, Lat: 35.281},
+			{Lon: -80.864, Lat: 35.270}}, "loc", []string{"k", "l"}},
+		{[]geo.Point{{Lon: -82.467, Lat: 36.356}, {Lon: -78.127, Lat: 36.321}, {Lon: -80.555, Lat: 32.932},
+			{Lon: -84.807, Lat: 33.111}}, "loc", []string{"k", "l", "m"}},
+		// same polygon vertices
+		{[]geo.Point{{Lon: -82.467, Lat: 36.356}, {Lon: -82.467, Lat: 36.356}, {Lon: -82.467, Lat: 36.356}, {Lon: -82.467, Lat: 36.356}}, "loc", nil},
+		// non-overlaping polygon
+		{[]geo.Point{{Lon: -89.113, Lat: 36.400}, {Lon: -93.947, Lat: 36.471}, {Lon: -93.947, Lat: 34.031}}, "loc", nil},
+		// concave polygon with a document `n` residing inside the hands, but outside the polygon
+		{[]geo.Point{{Lon: -71.65, Lat: 42.446}, {Lon: -71.649, Lat: 42.428}, {Lon: -71.640, Lat: 42.445}, {Lon: -71.649, Lat: 42.435}}, "loc", nil},
+		// V like concave polygon with a document 'p' residing inside the bottom corner
+		{[]geo.Point{{Lon: -80.304, Lat: 40.740}, {Lon: -80.038, Lat: 40.239}, {Lon: -79.562, Lat: 40.786}, {Lon: -80.018, Lat: 40.328}}, "loc", []string{"p"}},
+		{[]geo.Point{{Lon: -111.918, Lat: 33.515}, {Lon: -111.938, Lat: 33.494}, {Lon: -111.944, Lat: 33.481}, {Lon: -111.886, Lat: 33.517},
+			{Lon: -111.919, Lat: 33.468}, {Lon: -111.929, Lat: 33.508}}, "loc", []string{"q"}},
+	}
+
+	i := setupGeoPolygonPoints(t)
+	indexReader, err := i.Reader()
+	if err != nil {
+		t.Error(err)
+	}
+	defer func() {
+		err = indexReader.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	for _, test := range tests {
+		got, err := testGeoPolygonSearch(indexReader, test.polygon, test.field)
+		if err != nil {
+			t.Fatal(err)
+		}
+		if !reflect.DeepEqual(got, test.want) {
+			t.Errorf("expected %v, got %v for polygon: %+v", test.want, got, test.polygon)
+		}
+	}
+}
+
+func TestGeoRectanglePolygon(t *testing.T) {
+
+	tests := []struct {
+		polygon []geo.Point
+		field   string
+		want    []string
+	}{
+		{[]geo.Point{{Lon: 0.001, Lat: 0.001}, {Lon: 85.002, Lat: 38.002}}, "loc",
+			[]string{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j"}},
+	}
+
+	i := setupGeo(t)
+	indexReader, err := i.Reader()
+	if err != nil {
+		t.Error(err)
+	}
+	defer func() {
+		err = indexReader.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	for _, test := range tests {
+		got, err := testGeoPolygonSearch(indexReader, test.polygon, test.field)
+		if err != nil {
+			t.Fatal(err)
+		}
+		if !reflect.DeepEqual(got, test.want) {
+			t.Errorf("expected %v, got %v for polygon: %+v", test.want, got, test.polygon)
+		}
+	}
+}
+
+func testGeoPolygonSearch(i index.IndexReader, polygon []geo.Point, field string) ([]string, error) {
+	var rv []string
+	gbs, err := NewGeoBoundedPolygonSearcher(i, polygon, field, 1.0, search.SearcherOptions{})
+	if err != nil {
+		return nil, err
+	}
+	ctx := &search.SearchContext{
+		DocumentMatchPool: search.NewDocumentMatchPool(gbs.DocumentMatchPoolSize(), 0),
+	}
+	docMatch, err := gbs.Next(ctx)
+	for docMatch != nil && err == nil {
+		rv = append(rv, string(docMatch.IndexInternalID))
+		docMatch, err = gbs.Next(ctx)
+	}
+	if err != nil {
+		return nil, err
+	}
+	return rv, nil
+}
+
+func setupGeoPolygonPoints(t *testing.T) index.Index {
+	analysisQueue := index.NewAnalysisQueue(1)
+	i, err := upsidedown.NewUpsideDownCouch(
+		gtreap.Name,
+		map[string]interface{}{
+			"path": "",
+		},
+		analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = i.Open()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	err = i.Update(&document.Document{
+		ID: "k",
+		Fields: []document.Field{
+			document.NewGeoPointField("loc", []uint64{}, -80.86469327, 35.2782),
+		},
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	err = i.Update(&document.Document{
+		ID: "l",
+		Fields: []document.Field{
+			document.NewGeoPointField("loc", []uint64{}, -80.8713, 35.28138),
+		},
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	err = i.Update(&document.Document{
+		ID: "m",
+		Fields: []document.Field{
+			document.NewGeoPointField("loc", []uint64{}, -84.25, 33.153),
+		},
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	err = i.Update(&document.Document{
+		ID: "n",
+		Fields: []document.Field{
+			document.NewGeoPointField("loc", []uint64{}, -89.992, 35.063),
+		},
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	err = i.Update(&document.Document{
+		ID: "o",
+		Fields: []document.Field{
+			document.NewGeoPointField("loc", []uint64{}, -71.648, 42.437),
+		},
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	err = i.Update(&document.Document{
+		ID: "p",
+		Fields: []document.Field{
+			document.NewGeoPointField("loc", []uint64{}, -80.016, 40.314),
+		},
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	err = i.Update(&document.Document{
+		ID: "q",
+		Fields: []document.Field{
+			document.NewGeoPointField("loc", []uint64{}, -111.919, 33.494),
+		},
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	err = i.Update(&document.Document{
+		ID: "r",
+		Fields: []document.Field{
+			document.NewGeoPointField("loc", []uint64{}, 1.5, 1.1),
+		},
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	err = i.Update(&document.Document{
+		ID: "s",
+		Fields: []document.Field{
+			document.NewGeoPointField("loc", []uint64{}, 2, 1.5),
+		},
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	err = i.Update(&document.Document{
+		ID: "t",
+		Fields: []document.Field{
+			document.NewGeoPointField("loc", []uint64{}, 2.0, 1.9),
+		},
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	err = i.Update(&document.Document{
+		ID: "u",
+		Fields: []document.Field{
+			document.NewGeoPointField("loc", []uint64{}, 2.0, 1.0),
+		},
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+	return i
+}

From 39c1fb6b4bcdc34ec4910d138ac3c120fb8752e2 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Tue, 17 Sep 2019 19:15:11 +0530
Subject: [PATCH 627/728] updating integration tests for geo polygon queries

---
 search/query/geo_boundingpolygon.go        |   4 +-
 search/query/query.go                      |   2 +-
 search/searcher/search_geopolygon_test.go  |  25 +++++
 test/tests/geo/data/amoeba_brewery.json    |   1 +
 test/tests/geo/data/communiti_brewery.json |   1 +
 test/tests/geo/data/social_brewery.json    |   1 +
 test/tests/geo/searches.json               | 102 +++++++++++++++++++++
 7 files changed, 133 insertions(+), 3 deletions(-)
 create mode 100644 test/tests/geo/data/amoeba_brewery.json
 create mode 100644 test/tests/geo/data/communiti_brewery.json
 create mode 100644 test/tests/geo/data/social_brewery.json

diff --git a/search/query/geo_boundingpolygon.go b/search/query/geo_boundingpolygon.go
index c227f8cda..41c7f7f3a 100644
--- a/search/query/geo_boundingpolygon.go
+++ b/search/query/geo_boundingpolygon.go
@@ -26,7 +26,7 @@ import (
 )
 
 type GeoBoundingPolygonQuery struct {
-	Points   []geo.Point `json:"points"`
+	Points   []geo.Point `json:"polygon_points"`
 	FieldVal string      `json:"field,omitempty"`
 	BoostVal *Boost      `json:"boost,omitempty"`
 }
@@ -69,7 +69,7 @@ func (q *GeoBoundingPolygonQuery) Validate() error {
 
 func (q *GeoBoundingPolygonQuery) UnmarshalJSON(data []byte) error {
 	tmp := struct {
-		Points   []interface{} `json:"points"`
+		Points   []interface{} `json:"polygon_points"`
 		FieldVal string        `json:"field,omitempty"`
 		BoostVal *Boost        `json:"boost,omitempty"`
 	}{}
diff --git a/search/query/query.go b/search/query/query.go
index 0c926508e..18aca228d 100644
--- a/search/query/query.go
+++ b/search/query/query.go
@@ -273,7 +273,7 @@ func ParseQuery(input []byte) (Query, error) {
 		}
 		return &rv, nil
 	}
-	_, hasPoints := tmp["points"]
+	_, hasPoints := tmp["polygon_points"]
 	if hasPoints {
 		var rv GeoBoundingPolygonQuery
 		err := json.Unmarshal(input, &rv)
diff --git a/search/searcher/search_geopolygon_test.go b/search/searcher/search_geopolygon_test.go
index 015d82611..8a6ef8f5a 100644
--- a/search/searcher/search_geopolygon_test.go
+++ b/search/searcher/search_geopolygon_test.go
@@ -83,6 +83,10 @@ func TestRealGeoPolygons(t *testing.T) {
 		{[]geo.Point{{Lon: -80.304, Lat: 40.740}, {Lon: -80.038, Lat: 40.239}, {Lon: -79.562, Lat: 40.786}, {Lon: -80.018, Lat: 40.328}}, "loc", []string{"p"}},
 		{[]geo.Point{{Lon: -111.918, Lat: 33.515}, {Lon: -111.938, Lat: 33.494}, {Lon: -111.944, Lat: 33.481}, {Lon: -111.886, Lat: 33.517},
 			{Lon: -111.919, Lat: 33.468}, {Lon: -111.929, Lat: 33.508}}, "loc", []string{"q"}},
+		// real points near cb bangalore
+		{[]geo.Point{{Lat: 12.974872, Lon: 77.607749}, {Lat: 12.971725, Lon: 77.610110},
+			{Lat: 12.972530, Lon: 77.606912}, {Lat: 12.975112, Lon: 77.603780},
+		}, "loc", []string{"amoeba", "communiti"}},
 	}
 
 	i := setupGeoPolygonPoints(t)
@@ -287,5 +291,26 @@ func setupGeoPolygonPoints(t *testing.T) index.Index {
 	if err != nil {
 		t.Fatal(err)
 	}
+
+	err = i.Update(&document.Document{
+		ID: "amoeba",
+		Fields: []document.Field{
+			document.NewGeoPointField("loc", []uint64{}, 77.60490, 12.97467),
+		},
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	err = i.Update(&document.Document{
+		ID: "communiti",
+		Fields: []document.Field{
+			document.NewGeoPointField("loc", []uint64{}, 77.608237, 12.97237),
+		},
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+
 	return i
 }
diff --git a/test/tests/geo/data/amoeba_brewery.json b/test/tests/geo/data/amoeba_brewery.json
new file mode 100644
index 000000000..8997a2415
--- /dev/null
+++ b/test/tests/geo/data/amoeba_brewery.json
@@ -0,0 +1 @@
+{"name":"amoeba brewery","city":"bangalore","state":"KAR","code":"","country":"India","phone":"","website":"","type":"brewery","updated":"2019-09-17 20:00:20","description":"brewery near cb office","address":[],"geo":{"accuracy":"APPROXIMATE","lat":12.97467,"lon":77.60490}}
\ No newline at end of file
diff --git a/test/tests/geo/data/communiti_brewery.json b/test/tests/geo/data/communiti_brewery.json
new file mode 100644
index 000000000..832ae9147
--- /dev/null
+++ b/test/tests/geo/data/communiti_brewery.json
@@ -0,0 +1 @@
+{"name":"communiti brewery","city":"bangalore","state":"KAR","code":"","country":"India","phone":"","website":"","type":"brewery","updated":"2019-09-17 20:00:20","description":"brewery near cb office","address":[],"geo":{"accuracy":"APPROXIMATE","lat":12.97237,"lon":77.608237}}
\ No newline at end of file
diff --git a/test/tests/geo/data/social_brewery.json b/test/tests/geo/data/social_brewery.json
new file mode 100644
index 000000000..ae636ad81
--- /dev/null
+++ b/test/tests/geo/data/social_brewery.json
@@ -0,0 +1 @@
+{"name":"social brewery","city":"bangalore","state":"KAR","code":"","country":"India","phone":"","website":"","type":"brewery","updated":"2019-09-17 20:00:20","description":"brewery near cb office, but outside the polygon","address":[],"geo":{"accuracy":"APPROXIMATE","lat":12.9736946,"lon":77.6042133}}
\ No newline at end of file
diff --git a/test/tests/geo/searches.json b/test/tests/geo/searches.json
index 1570cc0bd..20646b44f 100644
--- a/test/tests/geo/searches.json
+++ b/test/tests/geo/searches.json
@@ -220,5 +220,107 @@
         }
       ]
     }
+  },
+  {
+    "comment": "polygon around cb office area, using GeoJSON lat/lon as array",
+    "search": {
+      "from": 0,
+      "size": 10,
+      "query": {
+        "polygon_points": [[77.607749,12.974872],[77.6101101,12.971725],[77.606912,12.972530],[77.603780,12.975112]],
+        "field": "geo"
+      },
+      "sort": [
+        "name"
+      ]
+    },
+    "result": {
+      "total_hits": 2,
+      "hits": [
+        {
+          "id": "amoeba_brewery"
+        },
+        {
+          "id": "communiti_brewery"
+        }
+      ]
+    }
+  },
+  {
+    "comment": "polygon around cb office area, using GeoJSON lat/lon as string",
+    "search": {
+      "from": 0,
+      "size": 10,
+      "query": {
+        "polygon_points": ["12.974872, 77.607749","12.971725, 77.6101101","12.972530, 77.606912","12.975112, 77.603780"],
+        "field": "geo"
+      },
+      "sort": [
+        "name"
+      ]
+    },
+    "result": {
+      "total_hits": 2,
+      "hits": [
+        {
+          "id": "amoeba_brewery"
+        },
+        {
+          "id": "communiti_brewery"
+        }
+      ]
+    }
+  },
+  {
+    "comment": "polygon around cb office area",
+    "search": {
+      "from": 0,
+      "size": 10,
+      "query": {
+        "polygon_points": [{"lat":12.974872, "lon":77.607749}, {"lat":12.971725, "lon":77.6101101},
+          {"lat":12.972530, "lon":77.606912}, {"lat":12.975112, "lon":77.603780}],
+        "field": "geo"
+      },
+      "sort": [
+        "name"
+      ]
+    },
+    "result": {
+      "total_hits": 2,
+      "hits": [
+        {
+          "id": "amoeba_brewery"
+        },
+        {
+          "id": "communiti_brewery"
+        }
+      ]
+    }
+  },
+  {
+    "comment": "polygon around cb office area as geohash",
+    "search": {
+      "from": 0,
+      "size": 10,
+      "query": {
+        "polygon_points": ["tdr1y40", "tdr1y13", "tdr1vcx", "tdr1vfj"],
+        "field": "geo"
+      },
+      "sort": [
+        "name"
+      ]
+    },
+    "result": {
+      "total_hits": 2,
+      "hits": [
+        {
+          "id": "amoeba_brewery"
+        },
+        {
+          "id": "communiti_brewery"
+        }
+      ]
+    }
   }
 ]
+

From f84273e7fe5c015a56d9b52ab7b7cd654000aa75 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Thu, 19 Sep 2019 16:13:28 +0530
Subject: [PATCH 628/728] compute any score only when its really needed

---
 search/scorer/scorer_term.go | 87 +++++++++++++++++++-----------------
 1 file changed, 45 insertions(+), 42 deletions(-)

diff --git a/search/scorer/scorer_term.go b/search/scorer/scorer_term.go
index a0e02e438..718de2ea5 100644
--- a/search/scorer/scorer_term.go
+++ b/search/scorer/scorer_term.go
@@ -115,58 +115,61 @@ func (s *TermQueryScorer) SetQueryNorm(qnorm float64) {
 }
 
 func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.TermFieldDoc) *search.DocumentMatch {
-	var scoreExplanation *search.Explanation
-
-	// need to compute score
-	var tf float64
-	if termMatch.Freq < MaxSqrtCache {
-		tf = SqrtCache[int(termMatch.Freq)]
-	} else {
-		tf = math.Sqrt(float64(termMatch.Freq))
-	}
-	score := tf * termMatch.Norm * s.idf
-
-	if s.options.Explain {
-		childrenExplanations := make([]*search.Explanation, 3)
-		childrenExplanations[0] = &search.Explanation{
-			Value:   tf,
-			Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq),
-		}
-		childrenExplanations[1] = &search.Explanation{
-			Value:   termMatch.Norm,
-			Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID),
-		}
-		childrenExplanations[2] = s.idfExplanation
-		scoreExplanation = &search.Explanation{
-			Value:    score,
-			Message:  fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, s.queryTerm, termMatch.ID),
-			Children: childrenExplanations,
+	rv := ctx.DocumentMatchPool.Get()
+	// perform any score computations only when needed
+	if s.includeScore || s.options.Explain {
+		var scoreExplanation *search.Explanation
+		var tf float64
+		if termMatch.Freq < MaxSqrtCache {
+			tf = SqrtCache[int(termMatch.Freq)]
+		} else {
+			tf = math.Sqrt(float64(termMatch.Freq))
 		}
-	}
+		score := tf * termMatch.Norm * s.idf
 
-	// if the query weight isn't 1, multiply
-	if s.queryWeight != 1.0 {
-		score = score * s.queryWeight
 		if s.options.Explain {
-			childExplanations := make([]*search.Explanation, 2)
-			childExplanations[0] = s.queryWeightExplanation
-			childExplanations[1] = scoreExplanation
+			childrenExplanations := make([]*search.Explanation, 3)
+			childrenExplanations[0] = &search.Explanation{
+				Value:   tf,
+				Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq),
+			}
+			childrenExplanations[1] = &search.Explanation{
+				Value:   termMatch.Norm,
+				Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID),
+			}
+			childrenExplanations[2] = s.idfExplanation
 			scoreExplanation = &search.Explanation{
 				Value:    score,
-				Message:  fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, s.queryTerm, s.queryBoost, termMatch.ID),
-				Children: childExplanations,
+				Message:  fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, s.queryTerm, termMatch.ID),
+				Children: childrenExplanations,
 			}
 		}
+
+		// if the query weight isn't 1, multiply
+		if s.queryWeight != 1.0 {
+			score = score * s.queryWeight
+			if s.options.Explain {
+				childExplanations := make([]*search.Explanation, 2)
+				childExplanations[0] = s.queryWeightExplanation
+				childExplanations[1] = scoreExplanation
+				scoreExplanation = &search.Explanation{
+					Value:    score,
+					Message:  fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, s.queryTerm, s.queryBoost, termMatch.ID),
+					Children: childExplanations,
+				}
+			}
+		}
+
+		if s.includeScore {
+			rv.Score = score
+		}
+
+		if s.options.Explain {
+			rv.Expl = scoreExplanation
+		}
 	}
 
-	rv := ctx.DocumentMatchPool.Get()
 	rv.IndexInternalID = append(rv.IndexInternalID, termMatch.ID...)
-	if s.includeScore {
-		rv.Score = score
-	}
-	if s.options.Explain {
-		rv.Expl = scoreExplanation
-	}
 
 	if len(termMatch.Vectors) > 0 {
 		if cap(rv.FieldTermLocations) < len(termMatch.Vectors) {

From 7f04e653c572c81ffd94b743a3f33b989647d39b Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Thu, 10 Oct 2019 16:23:15 -0700
Subject: [PATCH 629/728] Update .travis to test on the last 3 releases of
 golang

1.13.x, 1.12.x, 1.11.x
---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index e00e7b994..8f3340510 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,9 +3,9 @@ sudo: false
 language: go
 
 go:
- - "1.10.x"
  - "1.11.x"
  - "1.12.x"
+ - "1.13.x"
 
 script:
   - go get golang.org/x/tools/cmd/cover

From bba4f5a488bedac60776d61526249c3cc8ede979 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Fri, 4 Oct 2019 18:36:37 +0530
Subject: [PATCH 630/728] adding a disk size stats for the root index

The new "num_bytes_used_disk_by_root" stats shows
the disk size used by the latest root index.
The idea is that this should help in suggesting
a sizing recommendation for RAM quota driven by
an index size resident ratio.
---
 index/scorch/persister.go |  6 ++---
 index/scorch/scorch.go    | 46 +++++++++++++++++++++++++++++++--------
 2 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 064e9e6a8..b141cfda6 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -253,7 +253,7 @@ func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64,
 	persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
 
 	// Check the merger lag by counting the segment files on disk,
-	numFilesOnDisk, _ := s.diskFileStats()
+	numFilesOnDisk, _, _ := s.diskFileStats(nil)
 
 	// On finding fewer files on disk, persister takes a short pause
 	// for sufficient in-memory segments to pile up for the next
@@ -280,7 +280,7 @@ func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64,
 	// 2. The merger could be lagging behind on merging the disk files.
 	if numFilesOnDisk > uint64(po.PersisterNapUnderNumFiles) {
 		s.removeOldData()
-		numFilesOnDisk, _ = s.diskFileStats()
+		numFilesOnDisk, _, _ = s.diskFileStats(nil)
 	}
 
 	// Persister pause until the merger catches up to reduce the segment
@@ -305,7 +305,7 @@ OUTER:
 		// let the watchers proceed if they lag behind
 		persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
 
-		numFilesOnDisk, _ = s.diskFileStats()
+		numFilesOnDisk, _, _ = s.diskFileStats(nil)
 	}
 
 	return lastMergedEpoch, persistWatchers
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 44a97d1ea..7a1046fc5 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -466,8 +466,9 @@ func (s *Scorch) Stats() json.Marshaler {
 	return &s.stats
 }
 
-func (s *Scorch) diskFileStats() (uint64, uint64) {
-	var numFilesOnDisk, numBytesUsedDisk uint64
+func (s *Scorch) diskFileStats(rootSegmentPaths map[string]struct{}) (uint64,
+	uint64, uint64) {
+	var numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot uint64
 	if s.path != "" {
 		finfos, err := ioutil.ReadDir(s.path)
 		if err == nil {
@@ -475,24 +476,48 @@ func (s *Scorch) diskFileStats() (uint64, uint64) {
 				if !finfo.IsDir() {
 					numBytesUsedDisk += uint64(finfo.Size())
 					numFilesOnDisk++
+					if rootSegmentPaths != nil {
+						fname := s.path + string(os.PathSeparator) + finfo.Name()
+						if _, fileAtRoot := rootSegmentPaths[fname]; fileAtRoot {
+							numBytesOnDiskByRoot += uint64(finfo.Size())
+						}
+					}
 				}
 			}
 		}
 	}
-	return numFilesOnDisk, numBytesUsedDisk
+	// if no root files path given, then consider all disk files.
+	if rootSegmentPaths == nil {
+		return numFilesOnDisk, numBytesUsedDisk, numBytesUsedDisk
+	}
+
+	return numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot
+}
+
+func (s *Scorch) rootDiskSegmentsPaths() map[string]struct{} {
+	rv := make(map[string]struct{}, len(s.root.segment))
+	for _, segmentSnapshot := range s.root.segment {
+		switch seg := segmentSnapshot.segment.(type) {
+		case *zap.Segment:
+			rv[seg.Path()] = struct{}{}
+		}
+	}
+	return rv
 }
 
 func (s *Scorch) StatsMap() map[string]interface{} {
 	m := s.stats.ToMap()
 
-	numFilesOnDisk, numBytesUsedDisk := s.diskFileStats()
+	s.rootLock.RLock()
+	rootSegPaths := s.rootDiskSegmentsPaths()
+	m["CurFilesIneligibleForRemoval"] = uint64(len(s.ineligibleForRemoval))
+	s.rootLock.RUnlock()
+
+	numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot := s.diskFileStats(rootSegPaths)
 
 	m["CurOnDiskBytes"] = numBytesUsedDisk
 	m["CurOnDiskFiles"] = numFilesOnDisk
 
-	s.rootLock.RLock()
-	m["CurFilesIneligibleForRemoval"] = uint64(len(s.ineligibleForRemoval))
-	s.rootLock.RUnlock()
 	// TODO: consider one day removing these backwards compatible
 	// names for apps using the old names
 	m["updates"] = m["TotUpdates"]
@@ -507,8 +532,11 @@ func (s *Scorch) StatsMap() map[string]interface{} {
 	m["num_items_introduced"] = m["TotIntroducedItems"]
 	m["num_items_persisted"] = m["TotPersistedItems"]
 	m["num_recs_to_persist"] = m["TotItemsToPersist"]
-	m["num_bytes_used_disk"] = m["CurOnDiskBytes"]
-	m["num_files_on_disk"] = m["CurOnDiskFiles"]
+	// total disk bytes found in index directory inclusive of older snapshots
+	m["num_bytes_used_disk"] = numBytesUsedDisk
+	// total disk bytes by the latest root index, exclusive of older snapshots
+	m["num_bytes_used_disk_by_root"] = numBytesOnDiskByRoot
+	m["num_files_on_disk"] = numFilesOnDisk
 	m["num_root_memorysegments"] = m["TotMemorySegmentsAtRoot"]
 	m["num_root_filesegments"] = m["TotFileSegmentsAtRoot"]
 	m["num_persister_nap_pause_completed"] = m["TotPersisterNapPauseCompleted"]

From e46f498c353272ae060b9f1bcd77750e069ffd1a Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Fri, 25 Oct 2019 06:16:40 +0530
Subject: [PATCH 631/728] Unit tests for a single segment merge policy

Its been observed that using an aggressive
merge policy, we are able to create a single
segment scorch index.
---
 index/scorch/mergeplan/merge_plan_test.go | 39 +++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/index/scorch/mergeplan/merge_plan_test.go b/index/scorch/mergeplan/merge_plan_test.go
index 3adc1f4b8..2791d09c5 100644
--- a/index/scorch/mergeplan/merge_plan_test.go
+++ b/index/scorch/mergeplan/merge_plan_test.go
@@ -232,6 +232,45 @@ func TestCalcBudget(t *testing.T) {
 	}
 }
 
+func TestCalcBudgetForSingleSegmentMergePolicy(t *testing.T) {
+	mpolicy := MergePlanOptions{
+		MaxSegmentsPerTier:   1,
+		MaxSegmentSize:       1 << 30, // ~ 1 Billion
+		SegmentsPerMergeTask: 10,
+		FloorSegmentSize:     1 << 30,
+	}
+
+	tests := []struct {
+		totalSize     int64
+		firstTierSize int64
+		o             MergePlanOptions
+		expect        int
+	}{
+		{0, mpolicy.RaiseToFloorSegmentSize(0), mpolicy, 0},
+		{1, mpolicy.RaiseToFloorSegmentSize(1), mpolicy, 1},
+		{9, mpolicy.RaiseToFloorSegmentSize(0), mpolicy, 1},
+		{1, mpolicy.RaiseToFloorSegmentSize(1), mpolicy, 1},
+		{21, mpolicy.RaiseToFloorSegmentSize(21), mpolicy, 1},
+		{21, mpolicy.RaiseToFloorSegmentSize(21), mpolicy, 1},
+		{1000, mpolicy.RaiseToFloorSegmentSize(2000), mpolicy, 1},
+		{5000, mpolicy.RaiseToFloorSegmentSize(5000), mpolicy, 1},
+		{10000, mpolicy.RaiseToFloorSegmentSize(10000), mpolicy, 1},
+		{30000, mpolicy.RaiseToFloorSegmentSize(30000), mpolicy, 1},
+		{1000000, mpolicy.RaiseToFloorSegmentSize(1000000), mpolicy, 1},
+		{1000000000, 1 << 30, mpolicy, 1},
+		{1013423541, 1 << 30, mpolicy, 1},
+		{98765442, 1 << 30, mpolicy, 1},
+	}
+
+	for testi, test := range tests {
+		res := CalcBudget(test.totalSize, test.firstTierSize, &test.o)
+		if res != test.expect {
+			t.Errorf("testi: %d, test: %#v, res: %v",
+				testi, test, res)
+		}
+	}
+}
+
 // ----------------------------------------
 
 func TestInsert1SameSizedSegmentBetweenMerges(t *testing.T) {

From 919079be42ccaa600ff03d61abbc1c0236bea7e5 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 28 Oct 2019 15:34:38 -0700
Subject: [PATCH 632/728] [GEO] Search all numeric type terms to match geo
 points

If a field (for example _all) were to index multiple numeric terms
(of type numeric and geopoint morton hash), verify all numeric
matches rather than just the last encountered entry to determine
if the entry morton unhashes into a geo point match.

Fixes: https://github.com/blevesearch/bleve/issues/1301
---
 search/searcher/search_geoboundingbox.go   | 15 +++++---
 search/searcher/search_geopointdistance.go | 15 +++++---
 search_test.go                             | 45 ++++++++++++++++++++++
 3 files changed, 64 insertions(+), 11 deletions(-)

diff --git a/search/searcher/search_geoboundingbox.go b/search/searcher/search_geoboundingbox.go
index 38cb6467f..c4b8af927 100644
--- a/search/searcher/search_geoboundingbox.go
+++ b/search/searcher/search_geoboundingbox.go
@@ -224,7 +224,8 @@ func ComputeGeoRange(term uint64, shift uint,
 func buildRectFilter(dvReader index.DocValueReader, field string,
 	minLon, minLat, maxLon, maxLat float64) FilterFunc {
 	return func(d *search.DocumentMatch) bool {
-		var lon, lat float64
+		// check geo matches against all numeric type terms indexed
+		var lons, lats []float64
 		var found bool
 		err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
 			// only consider the values which are shifted 0
@@ -234,15 +235,19 @@ func buildRectFilter(dvReader index.DocValueReader, field string,
 				var i64 int64
 				i64, err = prefixCoded.Int64()
 				if err == nil {
-					lon = geo.MortonUnhashLon(uint64(i64))
-					lat = geo.MortonUnhashLat(uint64(i64))
+					lons = append(lons, geo.MortonUnhashLon(uint64(i64)))
+					lats = append(lats, geo.MortonUnhashLat(uint64(i64)))
 					found = true
 				}
 			}
 		})
 		if err == nil && found {
-			return geo.BoundingBoxContains(lon, lat,
-				minLon, minLat, maxLon, maxLat)
+			for i := range lons {
+				if geo.BoundingBoxContains(lons[i], lats[i],
+					minLon, minLat, maxLon, maxLat) {
+					return true
+				}
+			}
 		}
 		return false
 	}
diff --git a/search/searcher/search_geopointdistance.go b/search/searcher/search_geopointdistance.go
index b01ae6a0a..2444a8423 100644
--- a/search/searcher/search_geopointdistance.go
+++ b/search/searcher/search_geopointdistance.go
@@ -96,7 +96,8 @@ func boxSearcher(indexReader index.IndexReader,
 func buildDistFilter(dvReader index.DocValueReader, field string,
 	centerLon, centerLat, maxDist float64) FilterFunc {
 	return func(d *search.DocumentMatch) bool {
-		var lon, lat float64
+		// check geo matches against all numeric tpe terms indexed
+		var lons, lats []float64
 		var found bool
 
 		err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
@@ -106,16 +107,18 @@ func buildDistFilter(dvReader index.DocValueReader, field string,
 			if err == nil && shift == 0 {
 				i64, err := prefixCoded.Int64()
 				if err == nil {
-					lon = geo.MortonUnhashLon(uint64(i64))
-					lat = geo.MortonUnhashLat(uint64(i64))
+					lons = append(lons, geo.MortonUnhashLon(uint64(i64)))
+					lats = append(lats, geo.MortonUnhashLat(uint64(i64)))
 					found = true
 				}
 			}
 		})
 		if err == nil && found {
-			dist := geo.Haversin(lon, lat, centerLon, centerLat)
-			if dist <= maxDist/1000 {
-				return true
+			for i := range lons {
+				dist := geo.Haversin(lons[i], lats[i], centerLon, centerLat)
+				if dist <= maxDist/1000 {
+					return true
+				}
 			}
 		}
 		return false
diff --git a/search_test.go b/search_test.go
index 8c78d641f..5eede44c4 100644
--- a/search_test.go
+++ b/search_test.go
@@ -1594,3 +1594,48 @@ func TestSearchScoreNone(t *testing.T) {
 		t.Fatal("unexpected score for the hit")
 	}
 }
+
+func TestGeoDistanceIssue1301(t *testing.T) {
+	shopMapping := NewDocumentMapping()
+	shopMapping.AddFieldMappingsAt("GEO", NewGeoPointFieldMapping())
+	shopIndexMapping := NewIndexMapping()
+	shopIndexMapping.DefaultMapping = shopMapping
+
+	idx, err := NewUsing("testidx", shopIndexMapping, scorch.Name, Config.DefaultKVStore, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	defer func() {
+		err := os.RemoveAll("testidx")
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	for i, g := range []string{"wecpkbeddsmf", "wecpk8tne453", "wecpkb80s09t"} {
+		if err = idx.Index(strconv.Itoa(i), map[string]interface{}{
+			"ID":  i,
+			"GEO": g,
+		}); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	// Not setting "Field" for the following query is returning inconsistent
+	// results, when there's another field indexed which is numeric and both
+	// these fields are included within _all.
+	// As reported in: https://github.com/blevesearch/bleve/issues/1301
+	lat, lon := 22.371154, 114.112603
+	q := NewGeoDistanceQuery(lon, lat, "1km")
+
+	req := NewSearchRequest(q)
+	sr, err := idx.Search(req)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if sr.Total != 3 {
+		t.Fatalf("Size expected: 3, actual %d\n", sr.Total)
+	}
+}

From c0dcea943e3f6e31be3481c3c7788737626ba688 Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 28 Oct 2019 15:49:36 -0700
Subject: [PATCH 633/728] Fix up geo point matches for within a geo-polygon as
 well

---
 search/searcher/search_geopolygon.go | 42 +++++++++++++++-------------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/search/searcher/search_geopolygon.go b/search/searcher/search_geopolygon.go
index 3bb47519d..b0d6d552a 100644
--- a/search/searcher/search_geopolygon.go
+++ b/search/searcher/search_geopolygon.go
@@ -63,7 +63,8 @@ func almostEqual(a, b float64) bool {
 func buildPolygonFilter(dvReader index.DocValueReader, field string,
 	polygon []geo.Point) FilterFunc {
 	return func(d *search.DocumentMatch) bool {
-		var lon, lat float64
+		// check geo matches against all numeric type terms indexed
+		var lons, lats []float64
 		var found bool
 
 		err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
@@ -73,8 +74,8 @@ func buildPolygonFilter(dvReader index.DocValueReader, field string,
 			if err == nil && shift == 0 {
 				i64, err := prefixCoded.Int64()
 				if err == nil {
-					lon = geo.MortonUnhashLon(uint64(i64))
-					lat = geo.MortonUnhashLat(uint64(i64))
+					lons = append(lons, geo.MortonUnhashLon(uint64(i64)))
+					lats = append(lats, geo.MortonUnhashLat(uint64(i64)))
 					found = true
 				}
 			}
@@ -84,26 +85,29 @@ func buildPolygonFilter(dvReader index.DocValueReader, field string,
 		// the polygon. ie it might fail for certain points on the polygon boundaries.
 		if err == nil && found {
 			nVertices := len(polygon)
-			var inside bool
-			// check for a direct vertex match
-			if almostEqual(polygon[0].Lat, lat) &&
-				almostEqual(polygon[0].Lon, lon) {
-				return true
-			}
-
-			for i := 1; i < nVertices; i++ {
-				if almostEqual(polygon[i].Lat, lat) &&
-					almostEqual(polygon[i].Lon, lon) {
+			for i := range lons {
+				var inside bool
+				// check for a direct vertex match
+				if almostEqual(polygon[0].Lat, lats[i]) &&
+					almostEqual(polygon[0].Lon, lons[i]) {
 					return true
 				}
-				if (polygon[i].Lat > lat) != (polygon[i-1].Lat > lat) &&
-					lon < (polygon[i-1].Lon-polygon[i].Lon)*(lat-polygon[i].Lat)/
-						(polygon[i-1].Lat-polygon[i].Lat)+polygon[i].Lon {
-					inside = !inside
+
+				for j := 1; j < nVertices; j++ {
+					if almostEqual(polygon[j].Lat, lats[i]) &&
+						almostEqual(polygon[j].Lon, lons[i]) {
+						return true
+					}
+					if (polygon[j].Lat > lats[i]) != (polygon[j-1].Lat > lats[i]) &&
+						lons[i] < (polygon[j-1].Lon-polygon[j].Lon)*(lats[i]-polygon[j].Lat)/
+							(polygon[j-1].Lat-polygon[j].Lat)+polygon[j].Lon {
+						inside = !inside
+					}
+				}
+				if inside {
+					return true
 				}
 			}
-			return inside
-
 		}
 		return false
 	}

From 1e56045b0dfbbbdf20f34b1fd4ca761b6035c8ec Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Mon, 28 Oct 2019 15:54:35 -0700
Subject: [PATCH 634/728] Fix typos/commentary

---
 search/searcher/search_geopointdistance.go | 2 +-
 search_test.go                             | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/search/searcher/search_geopointdistance.go b/search/searcher/search_geopointdistance.go
index 2444a8423..5be781fd6 100644
--- a/search/searcher/search_geopointdistance.go
+++ b/search/searcher/search_geopointdistance.go
@@ -96,7 +96,7 @@ func boxSearcher(indexReader index.IndexReader,
 func buildDistFilter(dvReader index.DocValueReader, field string,
 	centerLon, centerLat, maxDist float64) FilterFunc {
 	return func(d *search.DocumentMatch) bool {
-		// check geo matches against all numeric tpe terms indexed
+		// check geo matches against all numeric type terms indexed
 		var lons, lats []float64
 		var found bool
 
diff --git a/search_test.go b/search_test.go
index 5eede44c4..7cfd44737 100644
--- a/search_test.go
+++ b/search_test.go
@@ -1622,9 +1622,9 @@ func TestGeoDistanceIssue1301(t *testing.T) {
 		}
 	}
 
-	// Not setting "Field" for the following query is returning inconsistent
-	// results, when there's another field indexed which is numeric and both
-	// these fields are included within _all.
+	// Not setting "Field" for the following query, targets it against the _all
+	// field and this is returning inconsistent results, when there's another
+	// field indexed along with the geopoint which is numeric.
 	// As reported in: https://github.com/blevesearch/bleve/issues/1301
 	lat, lon := 22.371154, 114.112603
 	q := NewGeoDistanceQuery(lon, lat, "1km")

From a8dd7894f6394e168fc7e307aca5db3f1e295c4c Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Wed, 30 Oct 2019 12:33:39 +0530
Subject: [PATCH 635/728] fix typo in comments

---
 search/searcher/search_geopointdistance.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/search/searcher/search_geopointdistance.go b/search/searcher/search_geopointdistance.go
index b01ae6a0a..7409196a5 100644
--- a/search/searcher/search_geopointdistance.go
+++ b/search/searcher/search_geopointdistance.go
@@ -83,7 +83,7 @@ func boxSearcher(indexReader index.IndexReader,
 		return boxSearcher, nil
 	}
 
-	// build geoboundinggox searcher for that bounding box
+	// build geoboundingbox searcher for that bounding box
 	boxSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
 		topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, field, boost,
 		options, checkBoundaries)

From 13483401f70a7b9a453acc54f0ead439f72330fa Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Fri, 3 Jan 2020 12:46:31 +0530
Subject: [PATCH 636/728] Adding json tags for the geo point struct -adding UTs
 for complex self-intersecting polygons

---
 geo/geo.go                                |  4 +-
 search/searcher/search_geopolygon_test.go | 90 +++++++++++++++++++++++
 2 files changed, 92 insertions(+), 2 deletions(-)

diff --git a/geo/geo.go b/geo/geo.go
index 583451e30..b3d6909d9 100644
--- a/geo/geo.go
+++ b/geo/geo.go
@@ -39,8 +39,8 @@ var latScale = float64((uint64(0x1)<<GeoBits)-1) / 180.0
 
 // Point represents a geo point.
 type Point struct {
-	Lon float64
-	Lat float64
+	Lon float64 `json:"lon"`
+	Lat float64 `json:"lat"`
 }
 
 // MortonHash computes the morton hash value for the provided geo point
diff --git a/search/searcher/search_geopolygon_test.go b/search/searcher/search_geopolygon_test.go
index 8a6ef8f5a..fbab53a6b 100644
--- a/search/searcher/search_geopolygon_test.go
+++ b/search/searcher/search_geopolygon_test.go
@@ -314,3 +314,93 @@ func setupGeoPolygonPoints(t *testing.T) index.Index {
 
 	return i
 }
+
+type geoPoint struct {
+	title string
+	lon   float64
+	lat   float64
+}
+
+// Test points inside a complex self intersecting polygon
+func TestComplexGeoPolygons(t *testing.T) {
+
+	tests := []struct {
+		polygon []geo.Point
+		points  []geoPoint
+		field   string
+		want    []string
+	}{
+		/*
+			 /\      /\
+			/__\____/__\
+			    \  /
+			     \/
+		*/
+		// a, b, c - inside and d - on vertices.
+		{[]geo.Point{{Lon: 6.0, Lat: 2.0}, {Lon: 3.0, Lat: 4.0}, {Lon: 9.0, Lat: 6.0},
+			{Lon: 3.0, Lat: 8.0}, {Lon: 6.0, Lat: 10.0}, {Lon: 6.0, Lat: 2.0}},
+			[]geoPoint{{title: "a", lon: 3, lat: 4}, {title: "b", lon: 7, lat: 6}, {title: "c", lon: 4, lat: 8.1},
+				{title: "d", lon: 6, lat: 10.0}, {title: "e", lon: 5, lat: 6}, {title: "f", lon: 7, lat: 5}},
+			"loc", []string{"a", "b", "c", "d"}},
+		/*
+			____
+			\  /
+			 \/
+			 /\
+			/__\
+		*/
+		{[]geo.Point{{Lon: 7.0, Lat: 2.0}, {Lon: 1.0, Lat: 8.0}, {Lon: 1.0, Lat: 2.0},
+			{Lon: 7.0, Lat: 8.0}, {Lon: 7.0, Lat: 2.0}},
+			[]geoPoint{{title: "a", lon: 6, lat: 5}, {title: "b", lon: 5, lat: 5}, {title: "c", lon: 3, lat: 5.0},
+				{title: "d", lon: 2, lat: 4.0}, {title: "e", lon: 5, lat: 3}, {title: "f", lon: 4, lat: 4}},
+			"loc", []string{"a", "b", "c", "d"}},
+	}
+
+	for _, test := range tests {
+		i := setupComplexGeoPolygonPoints(t, test.points)
+		indexReader, err := i.Reader()
+		if err != nil {
+			t.Error(err)
+		}
+		got, err := testGeoPolygonSearch(indexReader, test.polygon, test.field)
+		if err != nil {
+			t.Fatal(err)
+		}
+		if !reflect.DeepEqual(got, test.want) {
+			t.Errorf("expected %v, got %v for polygon: %+v", test.want, got, test.polygon)
+		}
+		err = indexReader.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+}
+
+func setupComplexGeoPolygonPoints(t *testing.T, points []geoPoint) index.Index {
+	analysisQueue := index.NewAnalysisQueue(1)
+	i, err := upsidedown.NewUpsideDownCouch(
+		gtreap.Name,
+		map[string]interface{}{
+			"path": "",
+		},
+		analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = i.Open()
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, point := range points {
+		err = i.Update(&document.Document{
+			ID: point.title,
+			Fields: []document.Field{
+				document.NewGeoPointField("loc", []uint64{}, point.lon, point.lat),
+			},
+		})
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+	return i
+}

From 7f06ec7be8b40a5682a4ea03219cee011940be0f Mon Sep 17 00:00:00 2001
From: abhinavdangeti <abhinav@couchbase.com>
Date: Fri, 3 Jan 2020 13:40:08 -0800
Subject: [PATCH 637/728] Update .travis.yml to run for go versions 1.12.x and
 1.13.x (latest)

>> etcd-io/bbolt complains with go version 1.11.x
---
 .travis.yml | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 8f3340510..dffe4b8fd 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,7 +3,6 @@ sudo: false
 language: go
 
 go:
- - "1.11.x"
  - "1.12.x"
  - "1.13.x"
 
@@ -16,11 +15,7 @@ script:
   - go test -race -v $(go list ./... | grep -v vendor/)
   - go vet $(go list ./... | grep -v vendor/)
   - go test ./test -v -indexType scorch
-  - if [[ ${TRAVIS_GO_VERSION} =~ ^1\.10 ]]; then
-        echo "errcheck skipped for go version" $TRAVIS_GO_VERSION;
-    else
-        errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/);
-    fi
+  - errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/);
   - docs/project-code-coverage.sh
   - docs/build_children.sh
 

From e7d28f663caeffefc07e4b04d6391522a1584df0 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 6 Jan 2020 08:29:38 +0530
Subject: [PATCH 638/728] MB-37392 - incorrect results with closed polygons

Polygon query returns less results after adding
initial point as last point. This fix aims to address
that by extending the ray casting checks.
---
 search/searcher/search_geopolygon.go      | 15 +++++++++++----
 search/searcher/search_geopolygon_test.go |  5 +++--
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/search/searcher/search_geopolygon.go b/search/searcher/search_geopolygon.go
index b0d6d552a..35f3ba2a1 100644
--- a/search/searcher/search_geopolygon.go
+++ b/search/searcher/search_geopolygon.go
@@ -85,8 +85,17 @@ func buildPolygonFilter(dvReader index.DocValueReader, field string,
 		// the polygon. ie it might fail for certain points on the polygon boundaries.
 		if err == nil && found {
 			nVertices := len(polygon)
+			if len(polygon) < 3 {
+				return false
+			}
+			rayIntersectsSegment := func(point, a, b geo.Point) bool {
+				return (a.Lat > point.Lat) != (b.Lat > point.Lat) &&
+					point.Lon < (b.Lon-a.Lon)*(point.Lat-a.Lat)/(b.Lat-a.Lat)+a.Lon
+			}
+
 			for i := range lons {
-				var inside bool
+				pt := geo.Point{Lon: lons[i], Lat: lats[i]}
+				inside := rayIntersectsSegment(pt, polygon[len(polygon)-1], polygon[0])
 				// check for a direct vertex match
 				if almostEqual(polygon[0].Lat, lats[i]) &&
 					almostEqual(polygon[0].Lon, lons[i]) {
@@ -98,9 +107,7 @@ func buildPolygonFilter(dvReader index.DocValueReader, field string,
 						almostEqual(polygon[j].Lon, lons[i]) {
 						return true
 					}
-					if (polygon[j].Lat > lats[i]) != (polygon[j-1].Lat > lats[i]) &&
-						lons[i] < (polygon[j-1].Lon-polygon[j].Lon)*(lats[i]-polygon[j].Lat)/
-							(polygon[j-1].Lat-polygon[j].Lat)+polygon[j].Lon {
+					if rayIntersectsSegment(pt, polygon[j-1], polygon[j]) {
 						inside = !inside
 					}
 				}
diff --git a/search/searcher/search_geopolygon_test.go b/search/searcher/search_geopolygon_test.go
index 8a6ef8f5a..e5e5c3815 100644
--- a/search/searcher/search_geopolygon_test.go
+++ b/search/searcher/search_geopolygon_test.go
@@ -119,8 +119,9 @@ func TestGeoRectanglePolygon(t *testing.T) {
 		field   string
 		want    []string
 	}{
-		{[]geo.Point{{Lon: 0.001, Lat: 0.001}, {Lon: 85.002, Lat: 38.002}}, "loc",
-			[]string{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j"}},
+		{[]geo.Point{{Lon: 0, Lat: 0}, {Lon: 0, Lat: 50}, {Lon: 50, Lat: 50}, {Lon: 50, Lat: 0}, {Lon: 0, Lat: 0}}, "loc",
+			[]string{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j"},
+		},
 	}
 
 	i := setupGeo(t)

From 8606b51b5d2d57f0bba8a1896b308739d0261abb Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Sat, 11 Jan 2020 10:31:31 +0530
Subject: [PATCH 639/728] MB-37471 - Incomplete polygon points results emtpy
 response

Adding input validation checks for polygon points
---
 search/searcher/search_geopolygon.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/search/searcher/search_geopolygon.go b/search/searcher/search_geopolygon.go
index 35f3ba2a1..5f16aa8d2 100644
--- a/search/searcher/search_geopolygon.go
+++ b/search/searcher/search_geopolygon.go
@@ -15,6 +15,7 @@
 package searcher
 
 import (
+	"fmt"
 	"github.com/blevesearch/bleve/geo"
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/numeric"
@@ -26,6 +27,10 @@ func NewGeoBoundedPolygonSearcher(indexReader index.IndexReader,
 	polygon []geo.Point, field string, boost float64,
 	options search.SearcherOptions) (search.Searcher, error) {
 
+	if len(polygon) < 3 {
+		return nil, fmt.Errorf("Too few points specified for the polygon boundary")
+	}
+
 	// compute the bounding box enclosing the polygon
 	topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, err :=
 		geo.BoundingRectangleForPolygon(polygon)

From f99438d3d4c760ae2f79967b2663b063dd3b5a3e Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Tue, 14 Jan 2020 16:39:16 +0530
Subject: [PATCH 640/728] MB-37500 - Polygon query with invalid geohash gives
 results

Adding geohash length validation checks
---
 geo/geo.go   | 2 ++
 geo/parse.go | 8 +++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/geo/geo.go b/geo/geo.go
index 583451e30..2f11bd8dd 100644
--- a/geo/geo.go
+++ b/geo/geo.go
@@ -37,6 +37,8 @@ var geoTolerance = 1E-6
 var lonScale = float64((uint64(0x1)<<GeoBits)-1) / 360.0
 var latScale = float64((uint64(0x1)<<GeoBits)-1) / 180.0
 
+var geoHashMaxLength = 12
+
 // Point represents a geo point.
 type Point struct {
 	Lon float64
diff --git a/geo/parse.go b/geo/parse.go
index 5d833d911..8286805ff 100644
--- a/geo/parse.go
+++ b/geo/parse.go
@@ -85,9 +85,11 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
 			}
 		} else {
 			// geohash
-			lat, lon = DecodeGeoHash(geoStr)
-			foundLat = true
-			foundLon = true
+			if len(geoStr) <= geoHashMaxLength {
+				lat, lon = DecodeGeoHash(geoStr)
+				foundLat = true
+				foundLon = true
+			}
 		}
 	}
 

From f5decb6c1743113de1b3e029d997f15f68a21c81 Mon Sep 17 00:00:00 2001
From: Aviad Lichtenstadt <aviad@paloaltonetworks.com>
Date: Thu, 16 Jan 2020 11:50:05 +0200
Subject: [PATCH 641/728] Fix concurrent read write on analysis

This is the stack trace we got in our tests
```
fatal error: concurrent map read and map write

goroutine 7 [running]:
runtime.throw(0x17b1b17, 0x21)
	/usr/local/go/src/runtime/panic.go:774 +0x72 fp=0xc00c97f8b8 sp=0xc00c97f888 pc=0x431422
runtime.mapaccess2_faststr(0x15f4040, 0xc011202f90, 0x257c531, 0x1, 0xc00c97fa70, 0xcb5919)
	/usr/local/go/src/runtime/map_faststr.go:116 +0x48f fp=0xc00c97f928 sp=0xc00c97f8b8 pc=0x4150ff
github.com/blevesearch/bleve/analysis.TokenFrequencies.MergeAll(0xc011202f90, 0x14488dd, 0x2, 0xc01121cc00)
	/home/circleci/.go_workspace/pkg/mod/github.com/blevesearch/bleve@v0.0.0-20190812174355-c997533a776f/    +0x100 fp=0xc00c97fa38 sp=0xc00c97f928 pc=0x9d6770
github.com/blevesearch/bleve/document.(*CompositeField).Compose(0xc00e499d80, 0x14488dd, 0x2, 0x1, 0xc01121cc00)
	/home/circleci/.go_workspace/pkg/mod/github.com/blevesearch/bleve@v0.0.0-20190812174355-c997533a776f/document/field_composite.go:122 +0xf2 fp=0xc00c97fa80 sp=0xc00c97fa38 pc=0x9dd902
github.com/blevesearch/bleve/index/upsidedown.(*UpsideDownCouch).Analyze(0xc001dbda80, 0xc00e499d00, 0x2)
	/home/circleci/.go_workspace/pkg/mod/github.com/blevesearch/bleve@v0.0.0-20190812174355-c997533a776f/index/upsidedown/analysis.go:77 +0xc10 fp=0xc00c97ff28 sp=0xc00c97fa80 pc=0xc9b630
github.com/blevesearch/bleve/index.AnalysisWorker(0xc0004f1e00, 0xc0004f1e60)
	/home/circleci/.go_workspace/pkg/mod/github.com/blevesearch/bleve@v0.0.0-20190812174355-c997533a776f/index/analysis.go:106 +0x55 fp=0xc00c97ffd0 sp=0xc00c97ff28 pc=0x9e12b5
runtime.goexit()
	/usr/local/go/src/runtime/asm_amd64.s:1357 +0x1 fp=0xc00c97ffd8 sp=0xc00c97ffd0 pc=0x460741
created by github.com/blevesearch/bleve/index.NewAnalysisQueue
	/home/circleci/.go_workspace/pkg/mod/github.com/blevesearch/bleve@v0.0.0-20190812174355-c997533a776f/index/analysis.go:94 +0xc8
```

I think that the root cause is that iterating on the map, pointers change under the hood, which will lead to analysing the same document twice
---
 index/upsidedown/upsidedown.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/index/upsidedown/upsidedown.go b/index/upsidedown/upsidedown.go
index 24f5aae94..8e915c6ad 100644
--- a/index/upsidedown/upsidedown.go
+++ b/index/upsidedown/upsidedown.go
@@ -820,7 +820,8 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
 
 	if numUpdates > 0 {
 		go func() {
-			for _, doc := range batch.IndexOps {
+			for k := range batch.IndexOps {
+				doc := batch.IndexOps[k]
 				if doc != nil {
 					aw := index.NewAnalysisWork(udc, doc, resultChan)
 					// put the work on the queue

From 86d3ceeecdb3da770084a5e087a9b64f05d2b5a8 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Wed, 5 Feb 2020 16:42:00 +0530
Subject: [PATCH 642/728] Enabling score/id based search_after/before
 paginations

This change attempts to enable users provide the last
fetched document's score or ID values as keys in the
search after/before fields to make the deep pagination
works for the default _score or _id based sort orders.
---
 search/collector/topn.go      | 13 +++++
 test/tests/sort/data/d.json   |  2 +-
 test/tests/sort/data/e.json   |  2 +-
 test/tests/sort/data/f.json   |  2 +-
 test/tests/sort/searches.json | 92 +++++++++++++++++++++++++++++++++++
 5 files changed, 108 insertions(+), 3 deletions(-)

diff --git a/search/collector/topn.go b/search/collector/topn.go
index a027a12c2..8d4afb63a 100644
--- a/search/collector/topn.go
+++ b/search/collector/topn.go
@@ -17,6 +17,7 @@ package collector
 import (
 	"context"
 	"reflect"
+	"strconv"
 	"time"
 
 	"github.com/blevesearch/bleve/index"
@@ -90,6 +91,18 @@ func NewTopNCollectorAfter(size int, sort search.SortOrder, after []string) *Top
 	rv.searchAfter = &search.DocumentMatch{
 		Sort: after,
 	}
+
+	for pos, ss := range sort {
+		if ss.RequiresDocID() {
+			rv.searchAfter.ID = after[pos]
+		}
+		if ss.RequiresScoring() {
+			if score, err := strconv.ParseFloat(after[pos], 64); err == nil {
+				rv.searchAfter.Score = score
+			}
+		}
+	}
+
 	return rv
 }
 
diff --git a/test/tests/sort/data/d.json b/test/tests/sort/data/d.json
index 926869a87..febddcafa 100644
--- a/test/tests/sort/data/d.json
+++ b/test/tests/sort/data/d.json
@@ -2,6 +2,6 @@
 	"id": "d",
 	"age": 65,
   "born": "1978-12-02",
-	"title": "agent",
+	"title": "agent d is desperately trying out to be successful rapster!",
 	"tags": ["cats"]
 }
diff --git a/test/tests/sort/data/e.json b/test/tests/sort/data/e.json
index 436f010bd..9f1c4f9b2 100644
--- a/test/tests/sort/data/e.json
+++ b/test/tests/sort/data/e.json
@@ -2,6 +2,6 @@
 	"id": "e",
 	"name": "nancy",
   "born": "1954-10-22",
-	"title": "rapstar",
+	"title": "rapstar nancy rapster",
 	"tags": ["pain"]
 }
diff --git a/test/tests/sort/data/f.json b/test/tests/sort/data/f.json
index 14f0921a6..37618bc1a 100644
--- a/test/tests/sort/data/f.json
+++ b/test/tests/sort/data/f.json
@@ -2,6 +2,6 @@
 	"id": "f",
 	"name": "frank",
 	"age": 1,
-	"title": "taxman",
+	"title": "frank the taxman of cb, Rapster!",
 	"tags": ["vitamin","purple"]
 }
diff --git a/test/tests/sort/searches.json b/test/tests/sort/searches.json
index 8b32df5d0..6d34b25c0 100644
--- a/test/tests/sort/searches.json
+++ b/test/tests/sort/searches.json
@@ -458,5 +458,97 @@
         }
       ]
     }
+  },
+  {
+    "comment": "sort by ID, after doc d",
+    "search": {
+      "from": 0,
+      "size": 10,
+      "query": {
+        "match_all":{}
+      },
+      "sort": ["_id"],
+      "search_after": ["d"]
+    },
+    "result": {
+      "total_hits": 6,
+      "hits": [
+        {
+          "id": "e"
+        },
+        {
+          "id": "f"
+        }
+      ]
+    }
+  },
+  {
+    "comment": "sort by ID, before doc d",
+    "search": {
+      "from": 0,
+      "size": 10,
+      "query": {
+        "match_all":{}
+      },
+      "sort": ["_id"],
+      "search_before": ["d"]
+    },
+    "result": {
+      "total_hits": 6,
+      "hits": [
+        {
+          "id": "a"
+        },
+        {
+          "id": "b"
+        },
+        {
+          "id": "c"
+        }
+      ]
+    }
+  },
+  {
+    "comment": "sort by score, after score 0.286889[ e(299646) > f(286889) > d(222224)]",
+    "search": {
+      "from": 0,
+      "size": 10,
+      "query": {
+        "query":"rapster"
+      },
+      "sort": ["_score"],
+      "search_after": ["0.286889"]
+    },
+    "result": {
+      "total_hits": 3,
+      "hits": [
+        {
+          "id": "f"
+        },
+        {
+          "id": "e"
+        }
+      ]
+    }
+  },
+  {
+    "comment": "sort by score, before score f/0.286889[ e(299646) > f(286889) > d(222224)]",
+    "search": {
+      "from": 0,
+      "size": 10,
+      "query": {
+        "query":"rapster"
+      },
+      "sort": ["_score"],
+      "search_before": ["0.286889"]
+    },
+    "result": {
+      "total_hits": 3,
+      "hits": [
+        {
+          "id": "d"
+        }
+      ]
+    }
   }
 ]

From 5d3ddd2760afdbff717be25440d94c5200fe623b Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Mon, 17 Feb 2020 14:39:53 -0500
Subject: [PATCH 643/728] update manifest to fix issue with vellum (#1338)

fixes #1336
---
 vendor/manifest | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vendor/manifest b/vendor/manifest
index e6f985136..3d906dd66 100644
--- a/vendor/manifest
+++ b/vendor/manifest
@@ -78,7 +78,7 @@
 			"importpath": "github.com/RoaringBitmap/roaring",
 			"repository": "https://github.com/RoaringBitmap/roaring",
 			"vcs": "",
-			"revision": "d0ce1763c3526f65703c395da50da7a7fb2138d5",
+			"revision": "4208ad825dda03a6a3d2197df8ec57948aebcc12",
 			"branch": "master",
 			"notests": true
 		},

From 78a985bede29cc9e5707a73bc9b94d75a4ce2717 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Thu, 27 Feb 2020 14:39:09 +0530
Subject: [PATCH 644/728] Adding snowball stemmers for en,fr,it,es,de

---
 analysis/lang/de/stemmer_de_snowball.go      | 49 +++++++++++
 analysis/lang/de/stemmer_de_test.go          | 91 ++++++++++++++++++++
 analysis/lang/en/stemmer_en_snowball.go      | 49 +++++++++++
 analysis/lang/en/stemmer_en_test.go          | 79 +++++++++++++++++
 analysis/lang/es/stemmer_es_snowball.go      | 49 +++++++++++
 analysis/lang/es/stemmer_es_snowball_test.go | 79 +++++++++++++++++
 analysis/lang/fr/stemmer_fr_snowball.go      | 49 +++++++++++
 analysis/lang/fr/stemmer_fr_snowball_test.go | 79 +++++++++++++++++
 analysis/lang/it/stemmer_it_snowball.go      | 49 +++++++++++
 analysis/lang/it/stemmer_it_snowball_test.go | 79 +++++++++++++++++
 10 files changed, 652 insertions(+)
 create mode 100644 analysis/lang/de/stemmer_de_snowball.go
 create mode 100644 analysis/lang/de/stemmer_de_test.go
 create mode 100644 analysis/lang/en/stemmer_en_snowball.go
 create mode 100644 analysis/lang/en/stemmer_en_test.go
 create mode 100644 analysis/lang/es/stemmer_es_snowball.go
 create mode 100644 analysis/lang/es/stemmer_es_snowball_test.go
 create mode 100644 analysis/lang/fr/stemmer_fr_snowball.go
 create mode 100644 analysis/lang/fr/stemmer_fr_snowball_test.go
 create mode 100644 analysis/lang/it/stemmer_it_snowball.go
 create mode 100644 analysis/lang/it/stemmer_it_snowball_test.go

diff --git a/analysis/lang/de/stemmer_de_snowball.go b/analysis/lang/de/stemmer_de_snowball.go
new file mode 100644
index 000000000..efda0660b
--- /dev/null
+++ b/analysis/lang/de/stemmer_de_snowball.go
@@ -0,0 +1,49 @@
+//  Copyright (c) 2020 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package de
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+
+	"github.com/blevesearch/snowballstem"
+	"github.com/blevesearch/snowballstem/german"
+)
+
+const SnowballStemmerName = "stemmer_de_snowball"
+
+type GermanStemmerFilter struct {
+}
+
+func NewGermanStemmerFilter() *GermanStemmerFilter {
+	return &GermanStemmerFilter{}
+}
+
+func (s *GermanStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		env := snowballstem.NewEnv(string(token.Term))
+		german.Stem(env)
+		token.Term = []byte(env.Current())
+	}
+	return input
+}
+
+func GermanStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewGermanStemmerFilter(), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(SnowballStemmerName, GermanStemmerFilterConstructor)
+}
diff --git a/analysis/lang/de/stemmer_de_test.go b/analysis/lang/de/stemmer_de_test.go
new file mode 100644
index 000000000..a319f7273
--- /dev/null
+++ b/analysis/lang/de/stemmer_de_test.go
@@ -0,0 +1,91 @@
+//  Copyright (c) 2020 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package de
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+func TestSnowballGermanStemmer(t *testing.T) {
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("abzuschrecken"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("abzuschreck"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("abzuwarten"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("abzuwart"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("zwirnfabrik"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("zwirnfabr"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("zyniker"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("zynik"),
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	filter, err := cache.TokenFilterNamed(SnowballStemmerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := filter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
+		}
+	}
+}
diff --git a/analysis/lang/en/stemmer_en_snowball.go b/analysis/lang/en/stemmer_en_snowball.go
new file mode 100644
index 000000000..225bb0664
--- /dev/null
+++ b/analysis/lang/en/stemmer_en_snowball.go
@@ -0,0 +1,49 @@
+//  Copyright (c) 2020 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package en
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+
+	"github.com/blevesearch/snowballstem"
+	"github.com/blevesearch/snowballstem/english"
+)
+
+const SnowballStemmerName = "stemmer_en_snowball"
+
+type EnglishStemmerFilter struct {
+}
+
+func NewEnglishStemmerFilter() *EnglishStemmerFilter {
+	return &EnglishStemmerFilter{}
+}
+
+func (s *EnglishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		env := snowballstem.NewEnv(string(token.Term))
+		english.Stem(env)
+		token.Term = []byte(env.Current())
+	}
+	return input
+}
+
+func EnglishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewEnglishStemmerFilter(), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(SnowballStemmerName, EnglishStemmerFilterConstructor)
+}
diff --git a/analysis/lang/en/stemmer_en_test.go b/analysis/lang/en/stemmer_en_test.go
new file mode 100644
index 000000000..bc5016d07
--- /dev/null
+++ b/analysis/lang/en/stemmer_en_test.go
@@ -0,0 +1,79 @@
+//  Copyright (c) 2020 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package en
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+func TestSnowballEnglishStemmer(t *testing.T) {
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("enjoy"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("enjoy"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("enjoyed"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("enjoy"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("enjoyable"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("enjoy"),
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	filter, err := cache.TokenFilterNamed(SnowballStemmerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := filter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
+		}
+	}
+}
diff --git a/analysis/lang/es/stemmer_es_snowball.go b/analysis/lang/es/stemmer_es_snowball.go
new file mode 100644
index 000000000..9ee768a9d
--- /dev/null
+++ b/analysis/lang/es/stemmer_es_snowball.go
@@ -0,0 +1,49 @@
+//  Copyright (c) 2020 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package es
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+
+	"github.com/blevesearch/snowballstem"
+	"github.com/blevesearch/snowballstem/spanish"
+)
+
+const SnowballStemmerName = "stemmer_es_snowball"
+
+type SpanishStemmerFilter struct {
+}
+
+func NewSpanishStemmerFilter() *SpanishStemmerFilter {
+	return &SpanishStemmerFilter{}
+}
+
+func (s *SpanishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		env := snowballstem.NewEnv(string(token.Term))
+		spanish.Stem(env)
+		token.Term = []byte(env.Current())
+	}
+	return input
+}
+
+func SpanishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewSpanishStemmerFilter(), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(SnowballStemmerName, SpanishStemmerFilterConstructor)
+}
diff --git a/analysis/lang/es/stemmer_es_snowball_test.go b/analysis/lang/es/stemmer_es_snowball_test.go
new file mode 100644
index 000000000..d976fc821
--- /dev/null
+++ b/analysis/lang/es/stemmer_es_snowball_test.go
@@ -0,0 +1,79 @@
+//  Copyright (c) 2020 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package es
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+func TestSnowballSpanishStemmer(t *testing.T) {
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("agresivos"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("agres"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("agresivamente"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("agres"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("agresividad"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("agres"),
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	filter, err := cache.TokenFilterNamed(SnowballStemmerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := filter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
+		}
+	}
+}
diff --git a/analysis/lang/fr/stemmer_fr_snowball.go b/analysis/lang/fr/stemmer_fr_snowball.go
new file mode 100644
index 000000000..e137ce211
--- /dev/null
+++ b/analysis/lang/fr/stemmer_fr_snowball.go
@@ -0,0 +1,49 @@
+//  Copyright (c) 2020 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fr
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+
+	"github.com/blevesearch/snowballstem"
+	"github.com/blevesearch/snowballstem/french"
+)
+
+const SnowballStemmerName = "stemmer_fr_snowball"
+
+type FrenchStemmerFilter struct {
+}
+
+func NewFrenchStemmerFilter() *FrenchStemmerFilter {
+	return &FrenchStemmerFilter{}
+}
+
+func (s *FrenchStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		env := snowballstem.NewEnv(string(token.Term))
+		french.Stem(env)
+		token.Term = []byte(env.Current())
+	}
+	return input
+}
+
+func FrenchStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewFrenchStemmerFilter(), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(SnowballStemmerName, FrenchStemmerFilterConstructor)
+}
diff --git a/analysis/lang/fr/stemmer_fr_snowball_test.go b/analysis/lang/fr/stemmer_fr_snowball_test.go
new file mode 100644
index 000000000..aeafa4317
--- /dev/null
+++ b/analysis/lang/fr/stemmer_fr_snowball_test.go
@@ -0,0 +1,79 @@
+//  Copyright (c) 2020 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fr
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+func TestSnowballFrenchStemmer(t *testing.T) {
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("antagoniste"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("antagon"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("barbouillait"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("barbouill"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("calculateur"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("calcul"),
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	filter, err := cache.TokenFilterNamed(SnowballStemmerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := filter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
+		}
+	}
+}
diff --git a/analysis/lang/it/stemmer_it_snowball.go b/analysis/lang/it/stemmer_it_snowball.go
new file mode 100644
index 000000000..04c6bd701
--- /dev/null
+++ b/analysis/lang/it/stemmer_it_snowball.go
@@ -0,0 +1,49 @@
+//  Copyright (c) 2020 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package it
+
+import (
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+
+	"github.com/blevesearch/snowballstem"
+	"github.com/blevesearch/snowballstem/italian"
+)
+
+const SnowballStemmerName = "stemmer_it_snowball"
+
+type ItalianStemmerFilter struct {
+}
+
+func NewItalianStemmerFilter() *ItalianStemmerFilter {
+	return &ItalianStemmerFilter{}
+}
+
+func (s *ItalianStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		env := snowballstem.NewEnv(string(token.Term))
+		italian.Stem(env)
+		token.Term = []byte(env.Current())
+	}
+	return input
+}
+
+func ItalianStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewItalianStemmerFilter(), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(SnowballStemmerName, ItalianStemmerFilterConstructor)
+}
diff --git a/analysis/lang/it/stemmer_it_snowball_test.go b/analysis/lang/it/stemmer_it_snowball_test.go
new file mode 100644
index 000000000..844f2f543
--- /dev/null
+++ b/analysis/lang/it/stemmer_it_snowball_test.go
@@ -0,0 +1,79 @@
+//  Copyright (c) 2020 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package it
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+func TestSnowballItalianStemmer(t *testing.T) {
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("aizzata"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("aizz"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("aizzargli"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("aizz"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("aizzasse"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("aizz"),
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	filter, err := cache.TokenFilterNamed(SnowballStemmerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := filter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
+		}
+	}
+}

From c12c3e7b3130d86f7fd7ee9311225997616bf8fc Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 16 Mar 2020 15:09:55 +0530
Subject: [PATCH 645/728] Initialise the root snapshot's DecRef errs with
 Close()

During the scorch's Close call, the root snapshot's
DecRef return errs were ignored. Fixing that to bubble up
the errors so that the higher levels may listen and
act on it.
---
 index/scorch/scorch.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 7a1046fc5..a88486e27 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -263,7 +263,7 @@ func (s *Scorch) Close() (err error) {
 		err = s.rootBolt.Close()
 		s.rootLock.Lock()
 		if s.root != nil {
-			_ = s.root.DecRef()
+			err = s.root.DecRef()
 		}
 		s.root = nil
 		s.rootLock.Unlock()

From 1b6b97982689cabf814a151a6f4827db67b1d9c4 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Tue, 17 Mar 2020 08:35:34 +0530
Subject: [PATCH 646/728] Fixing the err override for bolt's Close

---
 index/scorch/scorch.go | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index a88486e27..796713148 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -263,7 +263,10 @@ func (s *Scorch) Close() (err error) {
 		err = s.rootBolt.Close()
 		s.rootLock.Lock()
 		if s.root != nil {
-			err = s.root.DecRef()
+			err2 := s.root.DecRef()
+			if err == nil {
+				err = err2
+			}
 		}
 		s.root = nil
 		s.rootLock.Unlock()

From dce4ebbcd24dbf56bb2211188ca368dc572189ec Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Tue, 17 Mar 2020 19:56:05 +0530
Subject: [PATCH 647/728] Fix for issue #1352

Resetting the closeCh in Open api so that scorch
index's main routine trio remains in their active
work loops.
---
 index/scorch/scorch.go      |   1 +
 index/scorch/scorch_test.go | 110 ++++++++++++++++++++++++++++++++++++
 2 files changed, 111 insertions(+)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 796713148..8db3fa966 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -223,6 +223,7 @@ func (s *Scorch) openBolt() error {
 	s.introducerNotifier = make(chan *epochWatcher, 1)
 	s.revertToSnapshots = make(chan *snapshotReversion)
 	s.persisterNotifier = make(chan *epochWatcher, 1)
+	s.closeCh = make(chan struct{})
 
 	if !s.readOnly && s.path != "" {
 		err := s.removeOldZapFiles() // Before persister or merger create any new files.
diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index 550a584ae..302a86a5e 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -163,6 +163,116 @@ func TestIndexOpenReopen(t *testing.T) {
 	}
 }
 
+func TestIndexOpenReopenWithInsert(t *testing.T) {
+	cfg := CreateConfig("TestIndexOpenReopen")
+	err := InitTest(cfg)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer func() {
+		err := DestroyTest(cfg)
+		if err != nil {
+			t.Log(err)
+		}
+	}()
+
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+
+	var expectedCount uint64
+	reader, err := idx.Reader()
+	if err != nil {
+		t.Fatal(err)
+	}
+	docCount, err := reader.DocCount()
+	if err != nil {
+		t.Error(err)
+	}
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+	err = reader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// insert a doc
+	doc := document.NewDocument("1")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+	expectedCount++
+
+	reader, err = idx.Reader()
+	if err != nil {
+		t.Fatal(err)
+	}
+	docCount, err = reader.DocCount()
+	if err != nil {
+		t.Error(err)
+	}
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+	err = reader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// now close it
+	err = idx.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// try to open the index and insert data
+	err = idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+
+	// insert a doc
+	doc = document.NewDocument("2")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test2")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+	expectedCount++
+
+	// check the doc count again after reopening it
+	reader, err = idx.Reader()
+	if err != nil {
+		t.Fatal(err)
+	}
+	docCount, err = reader.DocCount()
+	if err != nil {
+		t.Error(err)
+	}
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+	err = reader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// now close it
+	err = idx.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
 func TestIndexInsert(t *testing.T) {
 	cfg := CreateConfig("TestIndexInsert")
 	err := InitTest(cfg)

From bac2f33954b316485be0f796e2964a9e773ca204 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 16 Mar 2020 13:11:49 +0530
Subject: [PATCH 648/728] MB-38303 - merger leaks index snapshot refCounts on
 index closure

Upon an index closure event, the file merger skips the
reference count decrement operations at line no: https://github.com/blevesearch/
bleve/blob/05d86ea8f6e30456949f612cf68cf4a27ce8c9c5/index/scorch/merge.go#L261
leading to the leaking of index snapshots. This will further
lead to the segment file leaking as those segment files will
never get ceremoniously un-mapped or handle closed in the event
of an index closure.

details ref - https://issues.couchbase.com/browse/MB-38303
---
 index/scorch/merge.go | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index d7144772f..c39e3117d 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -159,6 +159,21 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 	// process tasks in serial for now
 	var notifications []chan *IndexSnapshot
 	var filenames []string
+	// clean up any pending notifications from introducer on exit
+	// from an index closure.
+	defer func() {
+		for _, notification := range notifications {
+			select {
+			case newSnapshot := <-notification:
+				atomic.AddUint64(&s.stats.TotFileMergeIntroductionsDone, 1)
+				if newSnapshot != nil {
+					_ = newSnapshot.DecRef()
+				}
+			default:
+			}
+		}
+	}()
+
 	for _, task := range resultMergePlan.Tasks {
 		if len(task.Segments) == 0 {
 			atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1)
@@ -217,6 +232,7 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 				s.unmarkIneligibleForRemoval(filename)
 				atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
 				if err == segment.ErrClosed {
+					// handle any pending index snapshot introduction notifications on exit
 					return err
 				}
 				return fmt.Errorf("merging failed: %v", err)
@@ -253,6 +269,7 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 		// give it to the introducer
 		select {
 		case <-s.closeCh:
+			// handle any pending index snapshot introduction notifications on exit
 			_ = seg.Close()
 			return segment.ErrClosed
 		case s.merges <- sm:
@@ -265,6 +282,7 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 	for _, notification := range notifications {
 		select {
 		case <-s.closeCh:
+			// handle any pending index snapshot introduction notifications on exit
 			atomic.AddUint64(&s.stats.TotFileMergeIntroductionsSkipped, 1)
 			return segment.ErrClosed
 		case newSnapshot := <-notification:

From 0b8c270ec5ef44df759e8c9a33fe348f7ca3fde8 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Wed, 18 Mar 2020 17:15:42 +0530
Subject: [PATCH 649/728] Offline RollbackPoints/Rollback apis for scorch index

This change comprises of enabling the rollback related scorch APIs
to work only with closed/offline indexes.
Both the API signatures have modified to accept an index path
pointing to the scorch index location to accommodate this change.

The two API approach(RollbackPoints api for fetching the rollback points
and the Rollback api for performing the actual rollback) is retained in
favour of modularity and simplicity of usage.

The Rollback api would make the meta/bolt store with the given
rollback point as the most recent epoch to be loaded.

During the process of restoring the index state to the relevant
rollback point, the new Rollback implementation cleans up the
irrelevant epoch data from the bolt store. This is done to favour
a lean version of persisted epochs in root bolt anytime to further
lead a more efficient disk clean up/utilisation.
Also, this helps to better safe guard against any potential
recursive rollback request as we aren't introducing any newer
data into the system.
---
 index/scorch/snapshot_rollback.go      | 135 ++++++++++++++++---------
 index/scorch/snapshot_rollback_test.go |  74 +++++++++-----
 2 files changed, 138 insertions(+), 71 deletions(-)

diff --git a/index/scorch/snapshot_rollback.go b/index/scorch/snapshot_rollback.go
index 470868d0e..d9f89f46f 100644
--- a/index/scorch/snapshot_rollback.go
+++ b/index/scorch/snapshot_rollback.go
@@ -17,6 +17,7 @@ package scorch
 import (
 	"fmt"
 	"log"
+	"os"
 
 	"github.com/blevesearch/bleve/index/scorch/segment"
 	bolt "github.com/etcd-io/bbolt"
@@ -34,13 +35,22 @@ func (r *RollbackPoint) GetInternal(key []byte) []byte {
 // RollbackPoints returns an array of rollback points available for
 // the application to rollback to, with more recent rollback points
 // (higher epochs) coming first.
-func (s *Scorch) RollbackPoints() ([]*RollbackPoint, error) {
-	if s.rootBolt == nil {
-		return nil, fmt.Errorf("RollbackPoints: root is nil")
+func RollbackPoints(path string) ([]*RollbackPoint, error) {
+	if len(path) == 0 {
+		return nil, fmt.Errorf("RollbackPoints: invalid path")
+	}
+
+	rootBoltPath := path + string(os.PathSeparator) + "root.bolt"
+	rootBoltOpt := &bolt.Options{
+		ReadOnly: true,
+	}
+	rootBolt, err := bolt.Open(rootBoltPath, 0600, rootBoltOpt)
+	if err != nil || rootBolt == nil {
+		return nil, err
 	}
 
 	// start a read-only bolt transaction
-	tx, err := s.rootBolt.Begin(false)
+	tx, err := rootBolt.Begin(false)
 	if err != nil {
 		return nil, fmt.Errorf("RollbackPoints: failed to start" +
 			" read-only transaction")
@@ -49,6 +59,7 @@ func (s *Scorch) RollbackPoints() ([]*RollbackPoint, error) {
 	// read-only bolt transactions to be rolled back
 	defer func() {
 		_ = tx.Rollback()
+		_ = rootBolt.Close()
 	}()
 
 	snapshots := tx.Bucket(boltSnapshotsBucket)
@@ -105,69 +116,97 @@ func (s *Scorch) RollbackPoints() ([]*RollbackPoint, error) {
 	return rollbackPoints, nil
 }
 
-// Rollback atomically and durably (if unsafeBatch is unset) brings
-// the store back to the point in time as represented by the
-// RollbackPoint. Rollback() should only be passed a RollbackPoint
-// that came from the same store using the RollbackPoints() API.
-func (s *Scorch) Rollback(to *RollbackPoint) error {
+// Rollback atomically and durably brings the store back to the point
+// in time as represented by the RollbackPoint.
+// Rollback() should only be passed a RollbackPoint that came from the
+// same store using the RollbackPoints() API along with the index path.
+func Rollback(path string, to *RollbackPoint) error {
 	if to == nil {
 		return fmt.Errorf("Rollback: RollbackPoint is nil")
 	}
-
-	if s.rootBolt == nil {
-		return fmt.Errorf("Rollback: root is nil")
+	if len(path) == 0 {
+		return fmt.Errorf("Rollback: index path is empty")
 	}
 
-	revert := &snapshotReversion{}
-
-	s.rootLock.Lock()
+	rootBoltPath := path + string(os.PathSeparator) + "root.bolt"
+	rootBoltOpt := &bolt.Options{
+		ReadOnly: false,
+	}
+	rootBolt, err := bolt.Open(rootBoltPath, 0600, rootBoltOpt)
+	if err != nil || rootBolt == nil {
+		return err
+	}
+	defer func() {
+		err1 := rootBolt.Close()
+		if err1 != nil && err == nil {
+			err = err1
+		}
+	}()
 
-	err := s.rootBolt.View(func(tx *bolt.Tx) error {
+	// pick all the persisted epochs in bolt store
+	var found bool
+	var persistedEpochs []uint64
+	err = rootBolt.View(func(tx *bolt.Tx) error {
 		snapshots := tx.Bucket(boltSnapshotsBucket)
 		if snapshots == nil {
-			return fmt.Errorf("Rollback: no snapshots available")
-		}
-
-		pos := segment.EncodeUvarintAscending(nil, to.epoch)
-
-		snapshot := snapshots.Bucket(pos)
-		if snapshot == nil {
-			return fmt.Errorf("Rollback: snapshot not found")
+			return nil
 		}
-
-		indexSnapshot, err := s.loadSnapshot(snapshot)
-		if err != nil {
-			return fmt.Errorf("Rollback: unable to load snapshot: %v", err)
-		}
-
-		// add segments referenced by loaded index snapshot to the
-		// ineligibleForRemoval map
-		for _, segSnap := range indexSnapshot.segment {
-			filename := zapFileName(segSnap.id)
-			s.ineligibleForRemoval[filename] = true
+		sc := snapshots.Cursor()
+		for sk, _ := sc.Last(); sk != nil; sk, _ = sc.Prev() {
+			_, snapshotEpoch, err := segment.DecodeUvarintAscending(sk)
+			if err != nil {
+				continue
+			}
+			if snapshotEpoch == to.epoch {
+				found = true
+			}
+			persistedEpochs = append(persistedEpochs, snapshotEpoch)
 		}
-
-		revert.snapshot = indexSnapshot
-		revert.applied = make(chan error)
-		revert.persisted = make(chan error)
-
 		return nil
 	})
 
-	s.rootLock.Unlock()
+	if len(persistedEpochs) == 0 {
+		return fmt.Errorf("Rollback: no persisted epochs found in bolt")
+	}
+	if !found {
+		return fmt.Errorf("Rollback: target epoch %d not found in bolt", to.epoch)
+	}
 
+	// start a write transaction
+	tx, err := rootBolt.Begin(true)
 	if err != nil {
 		return err
 	}
 
-	// introduce the reversion
-	s.revertToSnapshots <- revert
+	defer func() {
+		if err == nil {
+			err = tx.Commit()
+		} else {
+			_ = tx.Rollback()
+		}
+		if err == nil {
+			err = rootBolt.Sync()
+		}
+	}()
 
-	// block until this snapshot is applied
-	err = <-revert.applied
-	if err != nil {
-		return fmt.Errorf("Rollback: failed with err: %v", err)
+	snapshots := tx.Bucket(boltSnapshotsBucket)
+	if snapshots == nil {
+		return nil
+	}
+	for _, epoch := range persistedEpochs {
+		k := segment.EncodeUvarintAscending(nil, epoch)
+		if err != nil {
+			continue
+		}
+		if epoch == to.epoch {
+			// return here as it already processed until the given epoch
+			return nil
+		}
+		err = snapshots.DeleteBucket(k)
+		if err == bolt.ErrBucketNotFound {
+			err = nil
+		}
 	}
 
-	return <-revert.persisted
+	return err
 }
diff --git a/index/scorch/snapshot_rollback_test.go b/index/scorch/snapshot_rollback_test.go
index 73523a0ba..e137e0f09 100644
--- a/index/scorch/snapshot_rollback_test.go
+++ b/index/scorch/snapshot_rollback_test.go
@@ -44,38 +44,26 @@ func TestIndexRollback(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	defer func() {
-		err := idx.Close()
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
 
-	sh, ok := idx.(*Scorch)
+	_, ok := idx.(*Scorch)
 	if !ok {
 		t.Fatalf("Not a scorch index?")
 	}
 
-	err = sh.openBolt()
-	if err != nil {
-		t.Fatalf("error opening index: %v", err)
-	}
-
-	// start background goroutines except for the merger, which
-	// simulates a super slow merger
-	sh.asyncTasks.Add(2)
-	go sh.mainLoop()
-	go sh.persisterLoop()
-
+	indexPath, _ := cfg["path"].(string)
 	// should have no rollback points initially
-	rollbackPoints, err := sh.RollbackPoints()
-	if err != nil {
+	rollbackPoints, err := RollbackPoints(indexPath)
+	if err == nil {
 		t.Fatalf("expected no err, got: %v, %d", err, len(rollbackPoints))
 	}
 	if len(rollbackPoints) != 0 {
 		t.Fatalf("expected no rollbackPoints, got %d", len(rollbackPoints))
 	}
 
+	err = idx.Open()
+	if err != nil {
+		t.Fatal(err)
+	}
 	// create a batch, insert 2 new documents
 	batch := index.NewBatch()
 	doc := document.NewDocument("1")
@@ -98,8 +86,13 @@ func TestIndexRollback(t *testing.T) {
 		_ = readerSlow.Close()
 	}()
 
+	err = idx.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
 	// fetch rollback points after first batch
-	rollbackPoints, err = sh.RollbackPoints()
+	rollbackPoints, err = RollbackPoints(indexPath)
 	if err != nil {
 		t.Fatalf("expected no err, got: %v, %d", err, len(rollbackPoints))
 	}
@@ -110,6 +103,10 @@ func TestIndexRollback(t *testing.T) {
 	// set this as a rollback point for the future
 	rollbackPoint := rollbackPoints[0]
 
+	err = idx.Open()
+	if err != nil {
+		t.Fatal(err)
+	}
 	// create another batch, insert 2 new documents, and delete an existing one
 	batch = index.NewBatch()
 	doc = document.NewDocument("3")
@@ -125,7 +122,12 @@ func TestIndexRollback(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	rollbackPointsB, err := sh.RollbackPoints()
+	err = idx.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	rollbackPointsB, err := RollbackPoints(indexPath)
 	if err != nil || len(rollbackPointsB) <= len(rollbackPoints) {
 		t.Fatalf("expected no err, got: %v, %d", err, len(rollbackPointsB))
 	}
@@ -140,6 +142,11 @@ func TestIndexRollback(t *testing.T) {
 		t.Fatalf("expected rollbackPoint epoch to still be available")
 	}
 
+	err = idx.Open()
+	if err != nil {
+		t.Fatal(err)
+	}
+
 	reader, err := idx.Reader()
 	if err != nil {
 		t.Fatal(err)
@@ -176,8 +183,24 @@ func TestIndexRollback(t *testing.T) {
 		t.Fatal(err)
 	}
 
+	err = idx.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// rollback to a non existing rollback point
+	err = Rollback(indexPath, &RollbackPoint{epoch: 100})
+	if err == nil {
+		t.Fatalf("expected err: Rollback: target epoch 100 not found in bolt")
+	}
+
 	// rollback to the selected rollback point
-	err = sh.Rollback(rollbackPoint)
+	err = Rollback(indexPath, rollbackPoint)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	err = idx.Open()
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -217,4 +240,9 @@ func TestIndexRollback(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
+
+	err = idx.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
 }

From a9895fdf9c72cfaa202128a963697d9a98765369 Mon Sep 17 00:00:00 2001
From: Mohsen Samiei <34813843+mohsensamiei@users.noreply.github.com>
Date: Wed, 25 Mar 2020 06:31:57 +0430
Subject: [PATCH 650/728] chore: fixes bbolt url (#1357)

---
 index/scorch/persister.go         | 2 +-
 index/scorch/scorch.go            | 2 +-
 index/scorch/snapshot_rollback.go | 2 +-
 index/store/boltdb/iterator.go    | 2 +-
 index/store/boltdb/reader.go      | 2 +-
 index/store/boltdb/store.go       | 2 +-
 index/store/boltdb/store_test.go  | 2 +-
 vendor/manifest                   | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index b141cfda6..d7c335047 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -33,7 +33,7 @@ import (
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
 	"github.com/blevesearch/bleve/index/scorch/segment/zap"
-	bolt "github.com/etcd-io/bbolt"
+	bolt "go.etcd.io/bbolt"
 )
 
 var DefaultChunkFactor uint32 = 1024
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 8db3fa966..67154544b 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -31,7 +31,7 @@ import (
 	"github.com/blevesearch/bleve/index/scorch/segment/zap"
 	"github.com/blevesearch/bleve/index/store"
 	"github.com/blevesearch/bleve/registry"
-	bolt "github.com/etcd-io/bbolt"
+	bolt "go.etcd.io/bbolt"
 )
 
 const Name = "scorch"
diff --git a/index/scorch/snapshot_rollback.go b/index/scorch/snapshot_rollback.go
index 470868d0e..4e13aea9a 100644
--- a/index/scorch/snapshot_rollback.go
+++ b/index/scorch/snapshot_rollback.go
@@ -19,7 +19,7 @@ import (
 	"log"
 
 	"github.com/blevesearch/bleve/index/scorch/segment"
-	bolt "github.com/etcd-io/bbolt"
+	bolt "go.etcd.io/bbolt"
 )
 
 type RollbackPoint struct {
diff --git a/index/store/boltdb/iterator.go b/index/store/boltdb/iterator.go
index 4b5019f1f..cf4da87c3 100644
--- a/index/store/boltdb/iterator.go
+++ b/index/store/boltdb/iterator.go
@@ -17,7 +17,7 @@ package boltdb
 import (
 	"bytes"
 
-	bolt "github.com/etcd-io/bbolt"
+	bolt "go.etcd.io/bbolt"
 )
 
 type Iterator struct {
diff --git a/index/store/boltdb/reader.go b/index/store/boltdb/reader.go
index 4cd94183c..7977ebbe5 100644
--- a/index/store/boltdb/reader.go
+++ b/index/store/boltdb/reader.go
@@ -16,7 +16,7 @@ package boltdb
 
 import (
 	"github.com/blevesearch/bleve/index/store"
-	bolt "github.com/etcd-io/bbolt"
+	bolt "go.etcd.io/bbolt"
 )
 
 type Reader struct {
diff --git a/index/store/boltdb/store.go b/index/store/boltdb/store.go
index 56613d531..3c749693c 100644
--- a/index/store/boltdb/store.go
+++ b/index/store/boltdb/store.go
@@ -30,7 +30,7 @@ import (
 
 	"github.com/blevesearch/bleve/index/store"
 	"github.com/blevesearch/bleve/registry"
-	bolt "github.com/etcd-io/bbolt"
+	bolt "go.etcd.io/bbolt"
 )
 
 const (
diff --git a/index/store/boltdb/store_test.go b/index/store/boltdb/store_test.go
index 6411c239c..ae4cca3e0 100644
--- a/index/store/boltdb/store_test.go
+++ b/index/store/boltdb/store_test.go
@@ -20,7 +20,7 @@ import (
 
 	"github.com/blevesearch/bleve/index/store"
 	"github.com/blevesearch/bleve/index/store/test"
-	bolt "github.com/etcd-io/bbolt"
+	bolt "go.etcd.io/bbolt"
 )
 
 func open(t *testing.T, mo store.MergeOperator) store.KVStore {
diff --git a/vendor/manifest b/vendor/manifest
index 3d906dd66..431363eef 100644
--- a/vendor/manifest
+++ b/vendor/manifest
@@ -26,7 +26,7 @@
 			"notests": true
 		},
 		{
-			"importpath": "github.com/etcd-io/bbolt",
+			"importpath": "go.etcd.io/bbolt",
 			"repository": "https://github.com/etcd-io/bbolt",
 			"vcs": "",
 			"revision": "7ee3ded59d4835e10f3e7d0f7603c42aa5e83820",

From 7da5cb5eb65ac807e3e87b86251622362baa6686 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 23 Mar 2020 14:23:08 +0530
Subject: [PATCH 651/728] Improving ownership handling during merge
 introductions

As a part of notifying the merger during the merge
introductions, the introducer bumps the newer
snapshot's reference count. But in situations
where the index is getting closed, there is a
chance that the merge requested merger(file/in memory)
routines would have already exited.
This would cause the newly introduced, ref count
bumped snapshot to leak as there is none to decrement
the bumped up reference count.

The fix is about tighening the merge introduction
notifications handling at the merger side.
As the introducer non-preemptively handles the
intro notify channel, its safe for the merger
to blockingly awaits for each of those merge introduction
notifications and decrements the reference counts
where applicable.

Unlike earlier, merger is processing each of the merge
tasks and awaits its introductions to complete before
proceeding to the next merge task. We don't expect any serious
performance wrinkles here as the introducer ought to be
faster with the merge introductions compared to the merge
tasks.

Added a few introduction related stats to track any
performance considerations raising out of this.
---
 index/scorch/introducer.go |  2 ++
 index/scorch/merge.go      | 59 +++++++++++++-------------------------
 index/scorch/stats.go      | 10 ++++---
 3 files changed, 28 insertions(+), 43 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index ac627796f..d3f2ce086 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -312,6 +312,8 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
 	close(persist.applied)
 }
 
+// The introducer should definitely handle the segmentMerge.notify
+// channel before exiting the introduceMerge.
 func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 	atomic.AddUint64(&s.stats.TotIntroduceMergeBeg, 1)
 	defer atomic.AddUint64(&s.stats.TotIntroduceMergeEnd, 1)
diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index c39e3117d..6faad6809 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -157,22 +157,7 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 	atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(resultMergePlan.Tasks)))
 
 	// process tasks in serial for now
-	var notifications []chan *IndexSnapshot
 	var filenames []string
-	// clean up any pending notifications from introducer on exit
-	// from an index closure.
-	defer func() {
-		for _, notification := range notifications {
-			select {
-			case newSnapshot := <-notification:
-				atomic.AddUint64(&s.stats.TotFileMergeIntroductionsDone, 1)
-				if newSnapshot != nil {
-					_ = newSnapshot.DecRef()
-				}
-			default:
-			}
-		}
-	}()
 
 	for _, task := range resultMergePlan.Tasks {
 		if len(task.Segments) == 0 {
@@ -232,7 +217,6 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 				s.unmarkIneligibleForRemoval(filename)
 				atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
 				if err == segment.ErrClosed {
-					// handle any pending index snapshot introduction notifications on exit
 					return err
 				}
 				return fmt.Errorf("merging failed: %v", err)
@@ -262,35 +246,33 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 			old:           oldMap,
 			oldNewDocNums: oldNewDocNums,
 			new:           seg,
-			notify:        make(chan *IndexSnapshot, 1),
+			notify:        make(chan *IndexSnapshot),
 		}
-		notifications = append(notifications, sm.notify)
 
 		// give it to the introducer
 		select {
 		case <-s.closeCh:
-			// handle any pending index snapshot introduction notifications on exit
 			_ = seg.Close()
 			return segment.ErrClosed
 		case s.merges <- sm:
 			atomic.AddUint64(&s.stats.TotFileMergeIntroductions, 1)
 		}
 
-		atomic.AddUint64(&s.stats.TotFileMergePlanTasksDone, 1)
-	}
-
-	for _, notification := range notifications {
-		select {
-		case <-s.closeCh:
-			// handle any pending index snapshot introduction notifications on exit
-			atomic.AddUint64(&s.stats.TotFileMergeIntroductionsSkipped, 1)
-			return segment.ErrClosed
-		case newSnapshot := <-notification:
-			atomic.AddUint64(&s.stats.TotFileMergeIntroductionsDone, 1)
-			if newSnapshot != nil {
-				_ = newSnapshot.DecRef()
-			}
+		introStartTime := time.Now()
+		// it is safe to blockingly wait for the merge introduction
+		// here as the introducer is bound to handle the notify channel.
+		newSnapshot := <-sm.notify
+		introTime := uint64(time.Since(introStartTime))
+		atomic.AddUint64(&s.stats.TotFileMergeZapIntroductionTime, introTime)
+		if atomic.LoadUint64(&s.stats.MaxFileMergeZapIntroductionTime) < introTime {
+			atomic.StoreUint64(&s.stats.MaxFileMergeZapIntroductionTime, introTime)
+		}
+		atomic.AddUint64(&s.stats.TotFileMergeIntroductionsDone, 1)
+		if newSnapshot != nil {
+			_ = newSnapshot.DecRef()
 		}
+
+		atomic.AddUint64(&s.stats.TotFileMergePlanTasksDone, 1)
 	}
 
 	// once all the newly merged segment introductions are done,
@@ -362,7 +344,7 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 		old:           make(map[uint64]*SegmentSnapshot),
 		oldNewDocNums: make(map[uint64][]uint64),
 		new:           seg,
-		notify:        make(chan *IndexSnapshot, 1),
+		notify:        make(chan *IndexSnapshot),
 	}
 
 	for i, idx := range sbsIndexes {
@@ -378,14 +360,13 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 	case s.merges <- sm:
 	}
 
-	select { // wait for introduction to complete
-	case <-s.closeCh:
-		return nil, 0, segment.ErrClosed
-	case newSnapshot := <-sm.notify:
+	// blockingly wait for the introduction to complete
+	newSnapshot := <-sm.notify
+	if newSnapshot != nil {
 		atomic.AddUint64(&s.stats.TotMemMergeSegments, uint64(len(sbs)))
 		atomic.AddUint64(&s.stats.TotMemMergeDone, 1)
-		return newSnapshot, newSegmentID, nil
 	}
+	return newSnapshot, newSegmentID, nil
 }
 
 func (s *Scorch) ReportBytesWritten(bytesWritten uint64) {
diff --git a/index/scorch/stats.go b/index/scorch/stats.go
index 6549fddf5..e638362a7 100644
--- a/index/scorch/stats.go
+++ b/index/scorch/stats.go
@@ -98,10 +98,12 @@ type Stats struct {
 	TotFileSegmentsAtRoot     uint64
 	TotFileMergeWrittenBytes  uint64
 
-	TotFileMergeZapBeg  uint64
-	TotFileMergeZapEnd  uint64
-	TotFileMergeZapTime uint64
-	MaxFileMergeZapTime uint64
+	TotFileMergeZapBeg              uint64
+	TotFileMergeZapEnd              uint64
+	TotFileMergeZapTime             uint64
+	MaxFileMergeZapTime             uint64
+	TotFileMergeZapIntroductionTime uint64
+	MaxFileMergeZapIntroductionTime uint64
 
 	TotFileMergeIntroductions        uint64
 	TotFileMergeIntroductionsDone    uint64

From 9c779503146ab4f0080943e4f14c64bbb4aa397b Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Wed, 25 Mar 2020 15:23:16 +0530
Subject: [PATCH 652/728] cleaning up the online rollback logic

---
 index/scorch/introducer.go | 80 --------------------------------------
 index/scorch/scorch.go     |  2 -
 2 files changed, 82 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index ac627796f..9a27de94e 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -77,11 +77,6 @@ OUTER:
 		case persist := <-s.persists:
 			s.introducePersist(persist)
 
-		case revertTo := <-s.revertToSnapshots:
-			err := s.revertToSnapshot(revertTo)
-			if err != nil {
-				continue OUTER
-			}
 		}
 
 		var epochCurr uint64
@@ -443,81 +438,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 	close(nextMerge.notify)
 }
 
-func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
-	atomic.AddUint64(&s.stats.TotIntroduceRevertBeg, 1)
-	defer atomic.AddUint64(&s.stats.TotIntroduceRevertEnd, 1)
-
-	if revertTo.snapshot == nil {
-		err := fmt.Errorf("Cannot revert to a nil snapshot")
-		revertTo.applied <- err
-		return err
-	}
-
-	// acquire lock
-	s.rootLock.Lock()
-
-	// prepare a new index snapshot, based on next snapshot
-	newSnapshot := &IndexSnapshot{
-		parent:   s,
-		segment:  make([]*SegmentSnapshot, len(revertTo.snapshot.segment)),
-		offsets:  revertTo.snapshot.offsets,
-		internal: revertTo.snapshot.internal,
-		epoch:    s.nextSnapshotEpoch,
-		refs:     1,
-		creator:  "revertToSnapshot",
-	}
-	s.nextSnapshotEpoch++
-
-	var docsToPersistCount, memSegments, fileSegments uint64
-	// iterate through segments
-	for i, segmentSnapshot := range revertTo.snapshot.segment {
-		newSnapshot.segment[i] = &SegmentSnapshot{
-			id:         segmentSnapshot.id,
-			segment:    segmentSnapshot.segment,
-			deleted:    segmentSnapshot.deleted,
-			cachedDocs: segmentSnapshot.cachedDocs,
-			creator:    segmentSnapshot.creator,
-		}
-		newSnapshot.segment[i].segment.AddRef()
-
-		// remove segment from ineligibleForRemoval map
-		filename := zapFileName(segmentSnapshot.id)
-		delete(s.ineligibleForRemoval, filename)
-
-		if isMemorySegment(segmentSnapshot) {
-			docsToPersistCount += segmentSnapshot.Count()
-			memSegments++
-		} else {
-			fileSegments++
-		}
-	}
-
-	atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
-	atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
-	atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
-
-	if revertTo.persisted != nil {
-		s.rootPersisted = append(s.rootPersisted, revertTo.persisted)
-	}
-
-	newSnapshot.updateSize()
-	// swap in new snapshot
-	rootPrev := s.root
-	s.root = newSnapshot
-
-	atomic.StoreUint64(&s.stats.CurRootEpoch, s.root.epoch)
-	// release lock
-	s.rootLock.Unlock()
-
-	if rootPrev != nil {
-		_ = rootPrev.DecRef()
-	}
-
-	close(revertTo.applied)
-
-	return nil
-}
-
 func isMemorySegment(s *SegmentSnapshot) bool {
 	switch s.segment.(type) {
 	case *zap.SegmentBase:
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 7a1046fc5..884c85845 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -67,7 +67,6 @@ type Scorch struct {
 	persists           chan *persistIntroduction
 	merges             chan *segmentMerge
 	introducerNotifier chan *epochWatcher
-	revertToSnapshots  chan *snapshotReversion
 	persisterNotifier  chan *epochWatcher
 	rootBolt           *bolt.DB
 	asyncTasks         sync.WaitGroup
@@ -221,7 +220,6 @@ func (s *Scorch) openBolt() error {
 	s.persists = make(chan *persistIntroduction)
 	s.merges = make(chan *segmentMerge)
 	s.introducerNotifier = make(chan *epochWatcher, 1)
-	s.revertToSnapshots = make(chan *snapshotReversion)
 	s.persisterNotifier = make(chan *epochWatcher, 1)
 
 	if !s.readOnly && s.path != "" {

From d9d505af5115efbe19e4e17f1c89bdfdf655c2d3 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Thu, 26 Mar 2020 10:11:56 +0530
Subject: [PATCH 653/728] -pick only younger epochs including the target one 
 while iterating over the persisted epochs.

---
 index/scorch/snapshot_rollback.go | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/index/scorch/snapshot_rollback.go b/index/scorch/snapshot_rollback.go
index d9f89f46f..c22b112f7 100644
--- a/index/scorch/snapshot_rollback.go
+++ b/index/scorch/snapshot_rollback.go
@@ -143,16 +143,17 @@ func Rollback(path string, to *RollbackPoint) error {
 		}
 	}()
 
-	// pick all the persisted epochs in bolt store
+	// pick all the younger persisted epochs in bolt store
+	// including the target one.
 	var found bool
-	var persistedEpochs []uint64
+	var eligibleEpochs []uint64
 	err = rootBolt.View(func(tx *bolt.Tx) error {
 		snapshots := tx.Bucket(boltSnapshotsBucket)
 		if snapshots == nil {
 			return nil
 		}
 		sc := snapshots.Cursor()
-		for sk, _ := sc.Last(); sk != nil; sk, _ = sc.Prev() {
+		for sk, _ := sc.Last(); sk != nil && !found; sk, _ = sc.Prev() {
 			_, snapshotEpoch, err := segment.DecodeUvarintAscending(sk)
 			if err != nil {
 				continue
@@ -160,12 +161,12 @@ func Rollback(path string, to *RollbackPoint) error {
 			if snapshotEpoch == to.epoch {
 				found = true
 			}
-			persistedEpochs = append(persistedEpochs, snapshotEpoch)
+			eligibleEpochs = append(eligibleEpochs, snapshotEpoch)
 		}
 		return nil
 	})
 
-	if len(persistedEpochs) == 0 {
+	if len(eligibleEpochs) == 0 {
 		return fmt.Errorf("Rollback: no persisted epochs found in bolt")
 	}
 	if !found {
@@ -193,7 +194,7 @@ func Rollback(path string, to *RollbackPoint) error {
 	if snapshots == nil {
 		return nil
 	}
-	for _, epoch := range persistedEpochs {
+	for _, epoch := range eligibleEpochs {
 		k := segment.EncodeUvarintAscending(nil, epoch)
 		if err != nil {
 			continue

From 314af131849d2adcfab4620b73c3c194c486e5e8 Mon Sep 17 00:00:00 2001
From: Abhinav Dangeti <abhinav@couchbase.com>
Date: Wed, 1 Apr 2020 17:43:29 -0700
Subject: [PATCH 654/728] Update .travis.yml to include testing on go version
 1.14.x

+ Also, go fmt ./..
---
 .travis.yml          | 1 +
 geo/geo.go           | 2 +-
 geo/geo_dist_test.go | 2 +-
 geo/geo_test.go      | 4 ++--
 geo/sloppy_test.go   | 4 ++--
 5 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index dffe4b8fd..7b7297afe 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -5,6 +5,7 @@ language: go
 go:
  - "1.12.x"
  - "1.13.x"
+ - "1.14.x"
 
 script:
   - go get golang.org/x/tools/cmd/cover
diff --git a/geo/geo.go b/geo/geo.go
index 077e73bf0..b18ace433 100644
--- a/geo/geo.go
+++ b/geo/geo.go
@@ -33,7 +33,7 @@ var minLonRad = minLon * degreesToRadian
 var minLatRad = minLat * degreesToRadian
 var maxLonRad = maxLon * degreesToRadian
 var maxLatRad = maxLat * degreesToRadian
-var geoTolerance = 1E-6
+var geoTolerance = 1e-6
 var lonScale = float64((uint64(0x1)<<GeoBits)-1) / 360.0
 var latScale = float64((uint64(0x1)<<GeoBits)-1) / 180.0
 
diff --git a/geo/geo_dist_test.go b/geo/geo_dist_test.go
index 5c8abff45..a7d67b269 100644
--- a/geo/geo_dist_test.go
+++ b/geo/geo_dist_test.go
@@ -113,7 +113,7 @@ func TestHaversinDistance(t *testing.T) {
 		if math.IsNaN(test.want) && !math.IsNaN(got) {
 			t.Errorf("expected NaN, got %f", got)
 		}
-		if !math.IsNaN(test.want) && math.Abs(got-test.want) > 1E-2 {
+		if !math.IsNaN(test.want) && math.Abs(got-test.want) > 1e-2 {
 			t.Errorf("expected %f got %f", test.want, got)
 		}
 	}
diff --git a/geo/geo_test.go b/geo/geo_test.go
index 3b707d75b..52a38e273 100644
--- a/geo/geo_test.go
+++ b/geo/geo_test.go
@@ -89,10 +89,10 @@ func TestRectFromPointDistance(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	if math.Abs(upperLeftLat-1) > 1E-2 {
+	if math.Abs(upperLeftLat-1) > 1e-2 {
 		t.Errorf("expected bounding box upper left lat to be almost 1, got %f", upperLeftLat)
 	}
-	if math.Abs(lowerRightLat+1) > 1E-2 {
+	if math.Abs(lowerRightLat+1) > 1e-2 {
 		t.Errorf("expected bounding box lower right lat to be almost -1, got %f", lowerRightLat)
 	}
 }
diff --git a/geo/sloppy_test.go b/geo/sloppy_test.go
index 4fefdc5b9..0f74e375b 100644
--- a/geo/sloppy_test.go
+++ b/geo/sloppy_test.go
@@ -21,7 +21,7 @@ import (
 
 func TestCos(t *testing.T) {
 
-	cosDelta := 1E-15
+	cosDelta := 1e-15
 
 	tests := []struct {
 		in   float64
@@ -55,7 +55,7 @@ func TestCos(t *testing.T) {
 
 func TestAsin(t *testing.T) {
 
-	asinDelta := 1E-7
+	asinDelta := 1e-7
 
 	tests := []struct {
 		in   float64

From ca3bfc95ff942e5846c005e05c6d0b5236e30de0 Mon Sep 17 00:00:00 2001
From: Abhinav Dangeti <abhinav@couchbase.com>
Date: Fri, 3 Apr 2020 11:58:09 -0700
Subject: [PATCH 655/728] Upgrade etcd-io/bbolt SHA to v1.3.4

---
 vendor/manifest | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vendor/manifest b/vendor/manifest
index 431363eef..1c0b482ef 100644
--- a/vendor/manifest
+++ b/vendor/manifest
@@ -29,7 +29,7 @@
 			"importpath": "go.etcd.io/bbolt",
 			"repository": "https://github.com/etcd-io/bbolt",
 			"vcs": "",
-			"revision": "7ee3ded59d4835e10f3e7d0f7603c42aa5e83820",
+			"revision": "68cc10a767ea1c6b9e8dcb9847317ff192d6d974",
 			"branch": "master",
 			"notests": true
 		},

From ff2708cf7f5025afa81c670299339b527b48131a Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Sun, 5 Apr 2020 17:03:56 -0400
Subject: [PATCH 656/728] adopt Go modules

As of this commit, the Bleve project has adopted Go modules.
Further, the zap file format used by scorch has been broken
out into it's own module: github.com/blevesearch/zap

This allows us to develop breaking changes to the file
format as new major versions of the zap module.  Applications
can now also build Bleve with support for multiple versions
of zap.  This allows compatibility with existing indexes,
while still allowing for new indexes to take advantage of
newer file formats.

Applications that want to directly control which versions
are supported can use two new functions inside scorch:

func RegisterPlugin(plugin segment.Plugin, makeDefault bool)
func ResetPlugins()

The bleve command-line tool no longer includes the zap
sub-command.  This functionality is now available as
it's own command-line tool in the zap repository.

Finally, this commit also ends support for traditional
GOPATH builds using the standard 'go get' tool.

Closes #1350
---
 cmd/bleve/cmd/scorch/snapshot.go              |   4 +-
 cmd/bleve/cmd/zap.go                          |  25 -
 cmd/bleve/cmd/zap/dict.go                     | 116 ---
 cmd/bleve/cmd/zap/docvalue.go                 | 280 ------
 cmd/bleve/cmd/zap/explore.go                  | 142 ---
 cmd/bleve/cmd/zap/fields.go                   |  66 --
 cmd/bleve/cmd/zap/footer.go                   |  44 -
 cmd/bleve/cmd/zap/root.go                     |  58 --
 cmd/bleve/cmd/zap/stored.go                   |  78 --
 go.mod                                        |  26 +
 index/scorch/introducer.go                    |  13 +-
 index/scorch/merge.go                         |  42 +-
 index/scorch/optimize.go                      |  74 +-
 index/scorch/persister.go                     |  61 +-
 index/scorch/scorch.go                        |  26 +-
 index/scorch/scorch_test.go                   |  29 +
 index/scorch/segment/empty.go                 |   8 +-
 index/scorch/segment/plugin.go                |  58 ++
 index/scorch/segment/segment.go               |  16 +
 index/scorch/segment/unadorned.go             | 148 +++
 index/scorch/segment/zap/README.md            | 158 ---
 index/scorch/segment/zap/build.go             | 151 ---
 index/scorch/segment/zap/build_test.go        | 556 -----------
 index/scorch/segment/zap/contentcoder.go      | 230 -----
 index/scorch/segment/zap/contentcoder_test.go | 125 ---
 index/scorch/segment/zap/count.go             |  61 --
 index/scorch/segment/zap/dict.go              | 263 -----
 index/scorch/segment/zap/dict_test.go         | 337 -------
 index/scorch/segment/zap/docvalues.go         | 311 ------
 index/scorch/segment/zap/enumerator.go        | 126 ---
 index/scorch/segment/zap/enumerator_test.go   | 237 -----
 index/scorch/segment/zap/intcoder.go          | 172 ----
 index/scorch/segment/zap/intcoder_test.go     | 269 ------
 index/scorch/segment/zap/merge.go             | 862 -----------------
 index/scorch/segment/zap/merge_test.go        | 870 -----------------
 index/scorch/segment/zap/new.go               | 839 ----------------
 index/scorch/segment/zap/posting.go           | 897 ------------------
 index/scorch/segment/zap/read.go              |  43 -
 index/scorch/segment/zap/segment.go           | 572 -----------
 index/scorch/segment/zap/segment_test.go      | 737 --------------
 index/scorch/segment/zap/write.go             | 145 ---
 index/scorch/segment/zap/write_test.go        |  86 --
 index/scorch/segment/zap/zap.md               | 177 ----
 index/scorch/segment_plugin.go                |  77 ++
 vendor/manifest                               | 145 ---
 45 files changed, 468 insertions(+), 9292 deletions(-)
 delete mode 100644 cmd/bleve/cmd/zap.go
 delete mode 100644 cmd/bleve/cmd/zap/dict.go
 delete mode 100644 cmd/bleve/cmd/zap/docvalue.go
 delete mode 100644 cmd/bleve/cmd/zap/explore.go
 delete mode 100644 cmd/bleve/cmd/zap/fields.go
 delete mode 100644 cmd/bleve/cmd/zap/footer.go
 delete mode 100644 cmd/bleve/cmd/zap/root.go
 delete mode 100644 cmd/bleve/cmd/zap/stored.go
 create mode 100644 go.mod
 create mode 100644 index/scorch/segment/plugin.go
 create mode 100644 index/scorch/segment/unadorned.go
 delete mode 100644 index/scorch/segment/zap/README.md
 delete mode 100644 index/scorch/segment/zap/build.go
 delete mode 100644 index/scorch/segment/zap/build_test.go
 delete mode 100644 index/scorch/segment/zap/contentcoder.go
 delete mode 100644 index/scorch/segment/zap/contentcoder_test.go
 delete mode 100644 index/scorch/segment/zap/count.go
 delete mode 100644 index/scorch/segment/zap/dict.go
 delete mode 100644 index/scorch/segment/zap/dict_test.go
 delete mode 100644 index/scorch/segment/zap/docvalues.go
 delete mode 100644 index/scorch/segment/zap/enumerator.go
 delete mode 100644 index/scorch/segment/zap/enumerator_test.go
 delete mode 100644 index/scorch/segment/zap/intcoder.go
 delete mode 100644 index/scorch/segment/zap/intcoder_test.go
 delete mode 100644 index/scorch/segment/zap/merge.go
 delete mode 100644 index/scorch/segment/zap/merge_test.go
 delete mode 100644 index/scorch/segment/zap/new.go
 delete mode 100644 index/scorch/segment/zap/posting.go
 delete mode 100644 index/scorch/segment/zap/read.go
 delete mode 100644 index/scorch/segment/zap/segment.go
 delete mode 100644 index/scorch/segment/zap/segment_test.go
 delete mode 100644 index/scorch/segment/zap/write.go
 delete mode 100644 index/scorch/segment/zap/write_test.go
 delete mode 100644 index/scorch/segment/zap/zap.md
 create mode 100644 index/scorch/segment_plugin.go
 delete mode 100644 vendor/manifest

diff --git a/cmd/bleve/cmd/scorch/snapshot.go b/cmd/bleve/cmd/scorch/snapshot.go
index df13d4901..3dee55895 100644
--- a/cmd/bleve/cmd/scorch/snapshot.go
+++ b/cmd/bleve/cmd/scorch/snapshot.go
@@ -18,7 +18,7 @@ import (
 	"fmt"
 	"strconv"
 
-	"github.com/blevesearch/bleve/index/scorch/segment/zap"
+	seg "github.com/blevesearch/bleve/index/scorch/segment"
 	"github.com/spf13/cobra"
 )
 
@@ -49,7 +49,7 @@ var snapshotCmd = &cobra.Command{
 			segments := snapshot.Segments()
 			for i, segmentSnap := range segments {
 				segment := segmentSnap.Segment()
-				if segment, ok := segment.(*zap.Segment); ok {
+				if segment, ok := segment.(seg.PersistedSegment); ok {
 					fmt.Printf("%d %s\n", i, segment.Path())
 				}
 			}
diff --git a/cmd/bleve/cmd/zap.go b/cmd/bleve/cmd/zap.go
deleted file mode 100644
index b84d5f2b3..000000000
--- a/cmd/bleve/cmd/zap.go
+++ /dev/null
@@ -1,25 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package cmd
-
-import (
-	"github.com/blevesearch/bleve/cmd/bleve/cmd/zap"
-)
-
-// make zap command-line tool a bleve sub-command
-
-func init() {
-	RootCmd.AddCommand(zap.RootCmd)
-}
diff --git a/cmd/bleve/cmd/zap/dict.go b/cmd/bleve/cmd/zap/dict.go
deleted file mode 100644
index 3cd256fa5..000000000
--- a/cmd/bleve/cmd/zap/dict.go
+++ /dev/null
@@ -1,116 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
-	"encoding/binary"
-	"fmt"
-	"math"
-
-	"github.com/RoaringBitmap/roaring"
-	"github.com/blevesearch/bleve/index/scorch/segment/zap"
-	"github.com/couchbase/vellum"
-	"github.com/spf13/cobra"
-)
-
-// dictCmd represents the dict command
-var dictCmd = &cobra.Command{
-	Use:   "dict [path] [field]",
-	Short: "dict prints the term dictionary for the specified field",
-	Long:  `The dict command lets you print the term dictionary for the specified field.`,
-	RunE: func(cmd *cobra.Command, args []string) error {
-		if len(args) < 2 {
-			return fmt.Errorf("must specify field")
-		}
-
-		data := segment.Data()
-
-		addr, err := segment.DictAddr(args[1])
-		if err != nil {
-			return fmt.Errorf("error determining address: %v", err)
-		}
-		fmt.Printf("dictionary for field starts at %d (%x)\n", addr, addr)
-
-		vellumLen, read := binary.Uvarint(data[addr : addr+binary.MaxVarintLen64])
-		fmt.Printf("vellum length: %d\n", vellumLen)
-		fstBytes := data[addr+uint64(read) : addr+uint64(read)+vellumLen]
-		fmt.Printf("raw vellum data:\n % x\n", fstBytes)
-		fmt.Printf("dictionary:\n")
-		var termsCount, hit1Count int64
-		if fstBytes != nil {
-			fst, err := vellum.Load(fstBytes)
-			if err != nil {
-				return fmt.Errorf("dictionary field %s vellum err: %v", args[1], err)
-			}
-
-			itr, err := fst.Iterator(nil, nil)
-			for err == nil {
-				currTerm, currVal := itr.Current()
-				extra := ""
-				if currVal&zap.FSTValEncodingMask == zap.FSTValEncoding1Hit {
-					docNum, normBits := zap.FSTValDecode1Hit(currVal)
-					norm := math.Float32frombits(uint32(normBits))
-					extra = fmt.Sprintf("-- docNum: %d, norm: %f", docNum, norm)
-					fmt.Printf(" %s - %d (%x) %s\n", currTerm, currVal, currVal, extra)
-					hit1Count++
-				} else {
-					// fetch the postings size, cardinality in case of non 1 hits
-					l, c := readPostingCardinality(currVal, data)
-					fmt.Printf(" %s - %d (%x) posting byteSize: %d cardinality: %d\n",
-						currTerm, currVal, currVal, l, c)
-				}
-				termsCount++
-				err = itr.Next()
-			}
-			if err != nil && err != vellum.ErrIteratorDone {
-				return fmt.Errorf("error iterating dictionary: %v", err)
-			}
-			fmt.Printf("Total terms in dictionary : %d 1hit count: %d\n", termsCount, hit1Count)
-		}
-
-		return nil
-	},
-}
-
-func init() {
-	RootCmd.AddCommand(dictCmd)
-}
-
-func readPostingCardinality(postingsOffset uint64, data []byte) (int, uint64) {
-	// read the location of the freq/norm details
-	var n uint64
-	var read int
-
-	_, read = binary.Uvarint(data[postingsOffset+n : postingsOffset+binary.MaxVarintLen64])
-	n += uint64(read)
-
-	_, read = binary.Uvarint(data[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
-	n += uint64(read)
-
-	var postingsLen uint64
-	postingsLen, read = binary.Uvarint(data[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
-	n += uint64(read)
-
-	roaringBytes := data[postingsOffset+n : postingsOffset+n+postingsLen]
-
-	r := roaring.NewBitmap()
-
-	_, err := r.FromBuffer(roaringBytes)
-	if err != nil {
-		fmt.Printf("error loading roaring bitmap: %v", err)
-	}
-
-	return len(roaringBytes), r.GetCardinality()
-}
diff --git a/cmd/bleve/cmd/zap/docvalue.go b/cmd/bleve/cmd/zap/docvalue.go
deleted file mode 100644
index b8563be93..000000000
--- a/cmd/bleve/cmd/zap/docvalue.go
+++ /dev/null
@@ -1,280 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
-	"bytes"
-	"encoding/binary"
-	"fmt"
-	"log"
-	"math"
-	"sort"
-	"strconv"
-
-	"github.com/blevesearch/bleve/index/scorch/segment/zap"
-	"github.com/golang/snappy"
-	"github.com/spf13/cobra"
-)
-
-// docvalueCmd represents the docvalue command
-var docvalueCmd = &cobra.Command{
-	Use:   "docvalue [path] <field> optional <docNum> optional",
-	Short: "docvalue prints the docvalue details by field, and docNum",
-	Long:  `The docvalue command lets you explore the docValues in order of field and by doc number.`,
-	RunE: func(cmd *cobra.Command, args []string) error {
-		if len(args) < 1 {
-			return fmt.Errorf("must specify index file path")
-		}
-
-		data := segment.Data()
-		crcOffset := len(data) - 4
-		verOffset := crcOffset - 4
-		chunkOffset := verOffset - 4
-		fieldsOffset := chunkOffset - 16
-		fieldsIndexOffset := binary.BigEndian.Uint64(data[fieldsOffset : fieldsOffset+8])
-		fieldsIndexEnd := uint64(len(data) - zap.FooterSize)
-
-		// iterate through fields index
-		var fieldInv []string
-		var id uint64
-		for fieldsIndexOffset+(8*id) < fieldsIndexEnd {
-			addr := binary.BigEndian.Uint64(
-				data[fieldsIndexOffset+(8*id) : fieldsIndexOffset+(8*id)+8])
-			var n uint64
-			_, read := binary.Uvarint(data[addr+n : fieldsIndexEnd])
-			n += uint64(read)
-
-			var nameLen uint64
-			nameLen, read = binary.Uvarint(data[addr+n : fieldsIndexEnd])
-			n += uint64(read)
-
-			name := string(data[addr+n : addr+n+nameLen])
-
-			id++
-			fieldInv = append(fieldInv, name)
-		}
-
-		dvLoc := segment.DocValueOffset()
-		var n int
-		var fieldName string
-		var fieldID uint16
-		var fieldDvSize float64
-		var read, fieldStartLoc, fieldEndLoc uint64
-		var fieldChunkCount, fieldDvStart, fieldDvEnd, totalDvSize uint64
-		var fieldChunkLens []uint64
-
-		// if no fields are specified then print the docValue offsets for all fields set
-		for id, field := range fieldInv {
-			fieldStartLoc, n = binary.Uvarint(
-				data[dvLoc+read : dvLoc+read+binary.MaxVarintLen64])
-			if n <= 0 {
-				return fmt.Errorf("loadDvIterators: failed to read the "+
-					" docvalue offsets for field %d", fieldID)
-			}
-
-			read += uint64(n)
-			fieldEndLoc, n = binary.Uvarint(
-				data[dvLoc+read : dvLoc+read+binary.MaxVarintLen64])
-			if n <= 0 {
-				return fmt.Errorf("Failed to read the docvalue offset "+
-					"end for field %d", fieldID)
-			}
-
-			read += uint64(n)
-			if fieldStartLoc == math.MaxUint64 && len(args) == 1 {
-				fmt.Printf("FieldID: %d '%s' docvalue at %d (%x) not "+
-					" persisted \n", id, field, fieldStartLoc, fieldStartLoc)
-				continue
-			}
-
-			var chunkOffsetsPosition, offset, numChunks uint64
-			if fieldEndLoc-fieldStartLoc > 16 {
-				numChunks = binary.BigEndian.Uint64(data[fieldEndLoc-8 : fieldEndLoc])
-				// read the length of chunk offsets
-				chunkOffsetsLen := binary.BigEndian.Uint64(data[fieldEndLoc-16 : fieldEndLoc-8])
-				// acquire position of chunk offsets
-				chunkOffsetsPosition = (fieldEndLoc - 16) - chunkOffsetsLen
-			}
-
-			// read the chunk offsets
-			chunkLens := make([]uint64, numChunks)
-			dvSize := uint64(0)
-			for i := 0; i < int(numChunks); i++ {
-				length, read := binary.Uvarint(
-					data[chunkOffsetsPosition+offset : chunkOffsetsPosition+offset+
-						binary.MaxVarintLen64])
-				if read <= 0 {
-					return fmt.Errorf("Corrupted chunk offset during segment load")
-				}
-
-				offset += uint64(read)
-				chunkLens[i] = length
-				dvSize += length
-			}
-
-			totalDvSize += dvSize
-			// if no field args are given, then print out the dv locations for all fields
-			if len(args) == 1 {
-				mbsize := float64(dvSize) / (1024 * 1024)
-				fmt.Printf("FieldID: %d '%s' docvalue at %d (%x) numChunks "+
-					"%d  diskSize %.6f MB\n", id, field, fieldStartLoc,
-					fieldStartLoc, numChunks, mbsize)
-				continue
-			}
-
-			// if the field is the requested one for more details,
-			// then remember the details
-			if field == args[1] {
-				fieldDvStart = fieldStartLoc
-				fieldDvEnd = fieldEndLoc
-				fieldName = field
-				fieldID = uint16(id)
-				fieldDvSize = float64(dvSize) / (1024 * 1024)
-				fieldChunkLens = append(fieldChunkLens, chunkLens...)
-				fieldChunkCount = numChunks
-			}
-		}
-
-		mbsize := float64(totalDvSize) / (1024 * 1024)
-		fmt.Printf("Total Doc Values Size on Disk: %.6f MB\n", mbsize)
-
-		// done with the fields dv locs printing for the given zap file
-		if len(args) == 1 {
-			return nil
-		}
-
-		if fieldName == "" || fieldDvEnd == 0 {
-			return fmt.Errorf("No docvalue persisted for given field arg: %s",
-				args[1])
-		}
-
-		if len(args) == 2 {
-			fmt.Printf("FieldID: %d '%s' docvalue at %d (%x) numChunks "+
-				"%d  diskSize %.6f MB\n", fieldID, fieldName, fieldDvStart,
-				fieldDvStart, fieldChunkCount, fieldDvSize)
-			fmt.Printf("Number of docvalue chunks: %d\n", fieldChunkCount)
-			return nil
-		}
-
-		localDocNum, err := strconv.Atoi(args[2])
-		if err != nil {
-			return fmt.Errorf("Unable to parse doc number: %v", err)
-		}
-
-		if localDocNum >= int(segment.NumDocs()) {
-			return fmt.Errorf("Invalid doc number %d (valid 0 - %d)",
-				localDocNum, segment.NumDocs()-1)
-		}
-
-		// find the chunkNumber where the docValues are stored
-		docInChunk := uint64(localDocNum) / uint64(segment.ChunkFactor())
-
-		if fieldChunkCount < docInChunk {
-			return fmt.Errorf("No chunk exists for chunk number: %d for "+
-				"localDocNum: %d", docInChunk, localDocNum)
-		}
-
-		start, end := readChunkBoundary(int(docInChunk), fieldChunkLens)
-		destChunkDataLoc := fieldDvStart + start
-		curChunkEnd := fieldDvStart + end
-
-		// read the number of docs reside in the chunk
-		var numDocs uint64
-		var nr int
-		numDocs, nr = binary.Uvarint(
-			data[destChunkDataLoc : destChunkDataLoc+binary.MaxVarintLen64])
-		if nr <= 0 {
-			return fmt.Errorf("Failed to read the chunk")
-		}
-
-		chunkMetaLoc := destChunkDataLoc + uint64(nr)
-		curChunkHeader := make([]zap.MetaData, int(numDocs))
-		offset := uint64(0)
-		for i := 0; i < int(numDocs); i++ {
-			curChunkHeader[i].DocNum, nr = binary.Uvarint(
-				data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
-			offset += uint64(nr)
-			curChunkHeader[i].DocDvOffset, nr = binary.Uvarint(
-				data[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
-			offset += uint64(nr)
-		}
-
-		compressedDataLoc := chunkMetaLoc + offset
-		dataLength := curChunkEnd - compressedDataLoc
-		curChunkData := data[compressedDataLoc : compressedDataLoc+dataLength]
-
-		start, end = getDocValueLocs(uint64(localDocNum), curChunkHeader)
-		if start == math.MaxUint64 || end == math.MaxUint64 {
-			fmt.Printf("No field values found for localDocNum: %d\n",
-				localDocNum)
-			fmt.Printf("Try docNums present in chunk: %s\n",
-				metaDataDocNums(curChunkHeader))
-			return nil
-		}
-		// uncompress the already loaded data
-		uncompressed, err := snappy.Decode(nil, curChunkData)
-		if err != nil {
-			log.Printf("snappy err %+v ", err)
-			return err
-		}
-
-		var termSeparator byte = 0xff
-		var termSeparatorSplitSlice = []byte{termSeparator}
-
-		// pick the terms for the given docNum
-		uncompressed = uncompressed[start:end]
-		for {
-			i := bytes.Index(uncompressed, termSeparatorSplitSlice)
-			if i < 0 {
-				break
-			}
-
-			fmt.Printf(" %s ", uncompressed[0:i])
-			uncompressed = uncompressed[i+1:]
-		}
-		fmt.Printf(" \n ")
-		return nil
-	},
-}
-
-func getDocValueLocs(docNum uint64, metaHeader []zap.MetaData) (uint64, uint64) {
-	i := sort.Search(len(metaHeader), func(i int) bool {
-		return metaHeader[i].DocNum >= docNum
-	})
-	if i < len(metaHeader) && metaHeader[i].DocNum == docNum {
-		return zap.ReadDocValueBoundary(i, metaHeader)
-	}
-	return math.MaxUint64, math.MaxUint64
-}
-
-func metaDataDocNums(metaHeader []zap.MetaData) string {
-	docNums := ""
-	for _, meta := range metaHeader {
-		docNums += fmt.Sprintf("%d", meta.DocNum) + ", "
-	}
-	return docNums
-}
-
-func readChunkBoundary(chunk int, offsets []uint64) (uint64, uint64) {
-	var start uint64
-	if chunk > 0 {
-		start = offsets[chunk-1]
-	}
-	return start, offsets[chunk]
-}
-
-func init() {
-	RootCmd.AddCommand(docvalueCmd)
-}
diff --git a/cmd/bleve/cmd/zap/explore.go b/cmd/bleve/cmd/zap/explore.go
deleted file mode 100644
index deac086cb..000000000
--- a/cmd/bleve/cmd/zap/explore.go
+++ /dev/null
@@ -1,142 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
-	"encoding/binary"
-	"fmt"
-	"math"
-
-	"github.com/RoaringBitmap/roaring"
-	"github.com/blevesearch/bleve/index/scorch/segment/zap"
-	"github.com/couchbase/vellum"
-	"github.com/spf13/cobra"
-)
-
-// exploreCmd represents the explore command
-var exploreCmd = &cobra.Command{
-	Use:   "explore [path] [field] <term> <docNum>",
-	Short: "explores the index by field, then term (optional), and then docNum (optional)",
-	Long:  `The explore command lets you explore the index in order of field, then optionally by term, then optionally again by doc number.`,
-	RunE: func(cmd *cobra.Command, args []string) error {
-		if len(args) < 2 {
-			return fmt.Errorf("must specify field")
-		}
-
-		data := segment.Data()
-
-		addr, err := segment.DictAddr(args[1])
-		if err != nil {
-			return fmt.Errorf("error determining address: %v", err)
-		}
-		fmt.Printf("dictionary for field starts at %d (%x)\n", addr, addr)
-
-		vellumLen, read := binary.Uvarint(data[addr : addr+binary.MaxVarintLen64])
-		fmt.Printf("vellum length: %d\n", vellumLen)
-		fstBytes := data[addr+uint64(read) : addr+uint64(read)+vellumLen]
-		fmt.Printf("raw vellum data:\n % x\n", fstBytes)
-
-		if len(args) >= 3 {
-			if fstBytes != nil {
-				fst, err := vellum.Load(fstBytes)
-				if err != nil {
-					return fmt.Errorf("dictionary field %s vellum err: %v", args[1], err)
-				}
-				postingsAddr, exists, err := fst.Get([]byte(args[2]))
-				if err != nil {
-					return fmt.Errorf("error looking for term : %v", err)
-				}
-				if exists {
-					fmt.Printf("FST val is %d (%x)\n", postingsAddr, postingsAddr)
-
-					if postingsAddr&zap.FSTValEncodingMask == zap.FSTValEncoding1Hit {
-						docNum, normBits := zap.FSTValDecode1Hit(postingsAddr)
-						norm := math.Float32frombits(uint32(normBits))
-						fmt.Printf("Posting List is 1-hit encoded, docNum: %d, norm: %f\n",
-							docNum, norm)
-						return nil
-					}
-
-					if postingsAddr&zap.FSTValEncodingMask != zap.FSTValEncodingGeneral {
-						return fmt.Errorf("unknown fst val encoding")
-					}
-
-					var n uint64
-					freqAddr, read := binary.Uvarint(data[postingsAddr : postingsAddr+binary.MaxVarintLen64])
-					n += uint64(read)
-
-					var locAddr uint64
-					locAddr, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
-					n += uint64(read)
-
-					var postingListLen uint64
-					postingListLen, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
-					n += uint64(read)
-
-					fmt.Printf("Posting List Length: %d\n", postingListLen)
-					bitmap := roaring.New()
-					_, err = bitmap.FromBuffer(data[postingsAddr+n : postingsAddr+n+postingListLen])
-					if err != nil {
-						return err
-					}
-					fmt.Printf("Posting List: %v\n", bitmap)
-
-					fmt.Printf("Freq details at: %d (%x)\n", freqAddr, freqAddr)
-					numChunks, r2 := binary.Uvarint(data[freqAddr : freqAddr+binary.MaxVarintLen64])
-					n = uint64(r2)
-
-					var freqOffsets []uint64
-					for j := uint64(0); j < numChunks; j++ {
-						chunkLen, r3 := binary.Uvarint(data[freqAddr+n : freqAddr+n+binary.MaxVarintLen64])
-						n += uint64(r3)
-						freqOffsets = append(freqOffsets, chunkLen)
-					}
-					running := freqAddr + n
-					for k, offset := range freqOffsets {
-						fmt.Printf("freq chunk: %d, len %d, start at %d (%x) end %d (%x)\n", k, offset, running, running, running+offset, running+offset)
-						running += offset
-					}
-
-					fmt.Printf("Loc details at: %d (%x)\n", locAddr, locAddr)
-					numLChunks, r4 := binary.Uvarint(data[locAddr : locAddr+binary.MaxVarintLen64])
-					n = uint64(r4)
-					fmt.Printf("there are %d loc chunks\n", numLChunks)
-
-					var locOffsets []uint64
-					for j := uint64(0); j < numLChunks; j++ {
-						lchunkLen, r4 := binary.Uvarint(data[locAddr+n : locAddr+n+binary.MaxVarintLen64])
-						n += uint64(r4)
-						locOffsets = append(locOffsets, lchunkLen)
-					}
-
-					running2 := locAddr + n
-					for k, offset := range locOffsets {
-						fmt.Printf("loc chunk: %d, len %d(%x), start at %d (%x) end %d (%x)\n", k, offset, offset, running2, running2, running2+offset, running2+offset)
-						running2 += offset
-					}
-
-				} else {
-					fmt.Printf("dictionary does not contain term '%s'\n", args[2])
-				}
-			}
-		}
-
-		return nil
-	},
-}
-
-func init() {
-	RootCmd.AddCommand(exploreCmd)
-}
diff --git a/cmd/bleve/cmd/zap/fields.go b/cmd/bleve/cmd/zap/fields.go
deleted file mode 100644
index cf8cc3d86..000000000
--- a/cmd/bleve/cmd/zap/fields.go
+++ /dev/null
@@ -1,66 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
-	"encoding/binary"
-	"fmt"
-
-	"github.com/blevesearch/bleve/index/scorch/segment/zap"
-	"github.com/spf13/cobra"
-)
-
-// fieldsCmd represents the fields command
-var fieldsCmd = &cobra.Command{
-	Use:   "fields [path]",
-	Short: "fields prints the fields in the specified file",
-	Long:  `The fields command lets you print the fields in the specified file.`,
-	RunE: func(cmd *cobra.Command, args []string) error {
-
-		data := segment.Data()
-
-		crcOffset := len(data) - 4
-		verOffset := crcOffset - 4
-		chunkOffset := verOffset - 4
-		fieldsOffset := chunkOffset - 16
-		fieldsIndexOffset := binary.BigEndian.Uint64(data[fieldsOffset : fieldsOffset+8])
-		fieldsIndexEnd := uint64(len(data) - zap.FooterSize)
-
-		// iterate through fields index
-		var fieldID uint64
-		for fieldsIndexOffset+(8*fieldID) < fieldsIndexEnd {
-			addr := binary.BigEndian.Uint64(data[fieldsIndexOffset+(8*fieldID) : fieldsIndexOffset+(8*fieldID)+8])
-			var n uint64
-			dictLoc, read := binary.Uvarint(data[addr+n : fieldsIndexEnd])
-			n += uint64(read)
-
-			var nameLen uint64
-			nameLen, read = binary.Uvarint(data[addr+n : fieldsIndexEnd])
-			n += uint64(read)
-
-			name := string(data[addr+n : addr+n+nameLen])
-
-			fmt.Printf("field %d '%s' starts at %d (%x)\n", fieldID, name, dictLoc, dictLoc)
-
-			fieldID++
-		}
-
-		return nil
-	},
-}
-
-func init() {
-	RootCmd.AddCommand(fieldsCmd)
-}
diff --git a/cmd/bleve/cmd/zap/footer.go b/cmd/bleve/cmd/zap/footer.go
deleted file mode 100644
index 96078ded6..000000000
--- a/cmd/bleve/cmd/zap/footer.go
+++ /dev/null
@@ -1,44 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
-	"fmt"
-
-	"github.com/spf13/cobra"
-)
-
-// footerCmd represents the footer command
-var footerCmd = &cobra.Command{
-	Use:   "footer [path]",
-	Short: "prints the contents of the zap footer",
-	Long:  `The footer command will print the contents of the footer.`,
-	RunE: func(cmd *cobra.Command, args []string) error {
-		data := segment.Data()
-		fmt.Printf("Length: %d\n", len(data))
-		fmt.Printf("CRC: %#x\n", segment.CRC())
-		fmt.Printf("Version: %d\n", segment.Version())
-		fmt.Printf("Chunk Factor: %d\n", segment.ChunkFactor())
-		fmt.Printf("Fields Idx: %d (%#x)\n", segment.FieldsIndexOffset(), segment.FieldsIndexOffset())
-		fmt.Printf("Stored Idx: %d (%#x)\n", segment.StoredIndexOffset(), segment.StoredIndexOffset())
-		fmt.Printf("DocValue Idx: %d (%#x)\n", segment.DocValueOffset(), segment.DocValueOffset())
-		fmt.Printf("Num Docs: %d\n", segment.NumDocs())
-		return nil
-	},
-}
-
-func init() {
-	RootCmd.AddCommand(footerCmd)
-}
diff --git a/cmd/bleve/cmd/zap/root.go b/cmd/bleve/cmd/zap/root.go
deleted file mode 100644
index ee2b62602..000000000
--- a/cmd/bleve/cmd/zap/root.go
+++ /dev/null
@@ -1,58 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
-	"fmt"
-	"os"
-
-	"github.com/blevesearch/bleve/index/scorch/segment/zap"
-	"github.com/spf13/cobra"
-)
-
-var segment *zap.Segment
-
-// RootCmd represents the base command when called without any subcommands
-var RootCmd = &cobra.Command{
-	Use:   "zap",
-	Short: "command-line tool to interact with a zap file",
-	Long:  `Zap is a command-line tool to interact with a zap file.`,
-	PersistentPreRunE: func(cmd *cobra.Command, args []string) error {
-
-		if len(args) < 1 {
-			return fmt.Errorf("must specify path to zap file")
-		}
-
-		segInf, err := zap.Open(args[0])
-		if err != nil {
-			return fmt.Errorf("error opening zap file: %v", err)
-		}
-		segment = segInf.(*zap.Segment)
-
-		return nil
-	},
-	PersistentPostRunE: func(cmd *cobra.Command, args []string) error {
-		return nil
-	},
-}
-
-// Execute adds all child commands to the root command sets flags appropriately.
-// This is called by main.main(). It only needs to happen once to the rootCmd.
-func Execute() {
-	if err := RootCmd.Execute(); err != nil {
-		fmt.Println(err)
-		os.Exit(-1)
-	}
-}
diff --git a/cmd/bleve/cmd/zap/stored.go b/cmd/bleve/cmd/zap/stored.go
deleted file mode 100644
index 28d62c0cb..000000000
--- a/cmd/bleve/cmd/zap/stored.go
+++ /dev/null
@@ -1,78 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
-	"encoding/binary"
-	"fmt"
-	"strconv"
-
-	"github.com/golang/snappy"
-	"github.com/spf13/cobra"
-)
-
-// storedCmd represents the stored command
-var storedCmd = &cobra.Command{
-	Use:   "stored [path] [docNum]",
-	Short: "prints the stored section for a doc number",
-	Long:  `The stored command will print the raw stored data bytes for the specified document number.`,
-	RunE: func(cmd *cobra.Command, args []string) error {
-		if len(args) < 2 {
-			return fmt.Errorf("must specify doc number")
-		}
-		docNum, err := strconv.Atoi(args[1])
-		if err != nil {
-			return fmt.Errorf("unable to parse doc number: %v", err)
-		}
-		if docNum >= int(segment.NumDocs()) {
-			return fmt.Errorf("invalid doc number %d (valid 0 - %d)", docNum, segment.NumDocs()-1)
-		}
-		data := segment.Data()
-		storedIdx := segment.StoredIndexOffset()
-		// read docNum entry in the index
-		indexPos := storedIdx + (8 * uint64(docNum))
-		storedStartAddr := binary.BigEndian.Uint64(data[indexPos : indexPos+8])
-		fmt.Printf("Stored field starts at %d (%#x)\n", storedStartAddr, storedStartAddr)
-
-		var n uint64
-		metaLen, read := binary.Uvarint(data[storedStartAddr : storedStartAddr+binary.MaxVarintLen64])
-		n += uint64(read)
-		fmt.Printf("Meta Len: %d\n", metaLen)
-		var dataLen uint64
-		dataLen, read = binary.Uvarint(data[storedStartAddr+n : storedStartAddr+n+binary.MaxVarintLen64])
-		n += uint64(read)
-		fmt.Printf("Data Len: %d\n", dataLen)
-		meta := data[storedStartAddr+n : storedStartAddr+n+metaLen]
-		fmt.Printf("Raw meta: % x\n", meta)
-		raw := data[storedStartAddr+n+metaLen : storedStartAddr+n+metaLen+dataLen]
-		fmt.Printf("Raw data (len %d): % x\n", len(raw), raw)
-
-		// handle _id field special case
-		idFieldValLen, _ := binary.Uvarint(meta)
-		fmt.Printf("Raw _id (len %d): % x\n", idFieldValLen, raw[:idFieldValLen])
-		fmt.Printf("Raw fields (len %d): % x\n", dataLen-idFieldValLen, raw[idFieldValLen:])
-		uncompressed, err := snappy.Decode(nil, raw[idFieldValLen:])
-		if err != nil {
-			panic(err)
-		}
-		fmt.Printf("Uncompressed fields (len %d): % x\n", len(uncompressed), uncompressed)
-
-		return nil
-	},
-}
-
-func init() {
-	RootCmd.AddCommand(storedCmd)
-}
diff --git a/go.mod b/go.mod
new file mode 100644
index 000000000..7a1483824
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,26 @@
+module github.com/blugelabs/bleve
+
+go 1.13
+
+require (
+	github.com/RoaringBitmap/roaring v0.4.21
+	github.com/blevesearch/bleve v1.0.0
+	github.com/blevesearch/blevex v0.0.0-20190916190636-152f0fe5c040
+	github.com/blevesearch/go-porterstemmer v1.0.3
+	github.com/blevesearch/segment v0.9.0
+	github.com/blevesearch/snowballstem v0.9.0
+	github.com/blevesearch/zap/v11 v11.0.1
+	github.com/blevesearch/zap/v12 v12.0.1
+	github.com/couchbase/ghistogram v0.1.0 // indirect
+	github.com/couchbase/moss v0.0.0-20190322010551-a0cae174c498
+	github.com/couchbase/vellum v1.0.0
+	github.com/golang/protobuf v1.3.2
+	github.com/kljensen/snowball v0.6.0
+	github.com/rcrowley/go-metrics v0.0.0-20190826022208-cac0b30c2563
+	github.com/spf13/cobra v0.0.5
+	github.com/steveyen/gtreap v0.0.0-20150807155958-0abe01ef9be2
+	github.com/syndtr/goleveldb v1.0.0
+	github.com/willf/bitset v1.1.10
+	go.etcd.io/bbolt v1.3.4
+	golang.org/x/text v0.3.0
+)
diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index d0f1f2318..e5f00f80e 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -21,7 +21,6 @@ import (
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
-	"github.com/blevesearch/bleve/index/scorch/segment/zap"
 )
 
 type segmentIntroduction struct {
@@ -406,11 +405,11 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 		atomic.AddUint64(&s.stats.TotIntroducedSegmentsMerge, 1)
 
 		switch nextMerge.new.(type) {
-		case *zap.SegmentBase:
+		case segment.PersistedSegment:
+			fileSegments++
+		default:
 			docsToPersistCount += nextMerge.new.Count() - newSegmentDeleted.GetCardinality()
 			memSegments++
-		case *zap.Segment:
-			fileSegments++
 		}
 	}
 
@@ -442,9 +441,9 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 
 func isMemorySegment(s *SegmentSnapshot) bool {
 	switch s.segment.(type) {
-	case *zap.SegmentBase:
-		return true
-	default:
+	case segment.PersistedSegment:
 		return false
+	default:
+		return true
 	}
 }
diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 6faad6809..37dca529a 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -25,7 +25,6 @@ import (
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index/scorch/mergeplan"
 	"github.com/blevesearch/bleve/index/scorch/segment"
-	"github.com/blevesearch/bleve/index/scorch/segment/zap"
 )
 
 func (s *Scorch) mergerLoop() {
@@ -131,18 +130,18 @@ func (s *Scorch) parseMergePlannerOptions() (*mergeplan.MergePlanOptions,
 
 func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 	options *mergeplan.MergePlanOptions) error {
-	// build list of zap segments in this snapshot
-	var onlyZapSnapshots []mergeplan.Segment
+	// build list of persisted segments in this snapshot
+	var onlyPersistedSnapshots []mergeplan.Segment
 	for _, segmentSnapshot := range ourSnapshot.segment {
-		if _, ok := segmentSnapshot.segment.(*zap.Segment); ok {
-			onlyZapSnapshots = append(onlyZapSnapshots, segmentSnapshot)
+		if _, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok {
+			onlyPersistedSnapshots = append(onlyPersistedSnapshots, segmentSnapshot)
 		}
 	}
 
 	atomic.AddUint64(&s.stats.TotFileMergePlan, 1)
 
 	// give this list to the planner
-	resultMergePlan, err := mergeplan.Plan(onlyZapSnapshots, options)
+	resultMergePlan, err := mergeplan.Plan(onlyPersistedSnapshots, options)
 	if err != nil {
 		atomic.AddUint64(&s.stats.TotFileMergePlanErr, 1)
 		return fmt.Errorf("merge planning err: %v", err)
@@ -169,24 +168,24 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 
 		oldMap := make(map[uint64]*SegmentSnapshot)
 		newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
-		segmentsToMerge := make([]*zap.Segment, 0, len(task.Segments))
+		segmentsToMerge := make([]segment.Segment, 0, len(task.Segments))
 		docsToDrop := make([]*roaring.Bitmap, 0, len(task.Segments))
 
 		for _, planSegment := range task.Segments {
 			if segSnapshot, ok := planSegment.(*SegmentSnapshot); ok {
 				oldMap[segSnapshot.id] = segSnapshot
-				if zapSeg, ok := segSnapshot.segment.(*zap.Segment); ok {
+				if persistedSeg, ok := segSnapshot.segment.(segment.PersistedSegment); ok {
 					if segSnapshot.LiveSize() == 0 {
 						atomic.AddUint64(&s.stats.TotFileMergeSegmentsEmpty, 1)
 						oldMap[segSnapshot.id] = nil
 					} else {
-						segmentsToMerge = append(segmentsToMerge, zapSeg)
+						segmentsToMerge = append(segmentsToMerge, segSnapshot.segment)
 						docsToDrop = append(docsToDrop, segSnapshot.deleted)
 					}
 					// track the files getting merged for unsetting the
 					// removal ineligibility. This helps to unflip files
 					// even with fast merger, slow persister work flows.
-					path := zapSeg.Path()
+					path := persistedSeg.Path()
 					filenames = append(filenames,
 						strings.TrimPrefix(path, s.path+string(os.PathSeparator)))
 				}
@@ -203,8 +202,8 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 			fileMergeZapStartTime := time.Now()
 
 			atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1)
-			newDocNums, _, err := zap.Merge(segmentsToMerge, docsToDrop, path,
-				DefaultChunkFactor, s.closeCh, s)
+			newDocNums, _, err := s.segPlugin.Merge(segmentsToMerge, docsToDrop, path,
+				s.closeCh, s)
 			atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
 
 			fileMergeZapTime := uint64(time.Since(fileMergeZapStartTime))
@@ -222,17 +221,12 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 				return fmt.Errorf("merging failed: %v", err)
 			}
 
-			seg, err = zap.Open(path)
+			seg, err = s.segPlugin.Open(path)
 			if err != nil {
 				s.unmarkIneligibleForRemoval(filename)
 				atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
 				return err
 			}
-			err = zap.ValidateMerge(segmentsToMerge, nil, docsToDrop, seg.(*zap.Segment))
-			if err != nil {
-				s.unmarkIneligibleForRemoval(filename)
-				return fmt.Errorf("merge validation failed: %v", err)
-			}
 			oldNewDocNums = make(map[uint64][]uint64)
 			for i, segNewDocNums := range newDocNums {
 				oldNewDocNums[task.Segments[i].Id()] = segNewDocNums
@@ -297,8 +291,8 @@ type segmentMerge struct {
 // persisted segment, and synchronously introduce that new segment
 // into the root
 func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
-	sbs []*zap.SegmentBase, sbsDrops []*roaring.Bitmap, sbsIndexes []int,
-	chunkFactor uint32) (*IndexSnapshot, uint64, error) {
+	sbs []segment.Segment, sbsDrops []*roaring.Bitmap,
+	sbsIndexes []int) (*IndexSnapshot, uint64, error) {
 	atomic.AddUint64(&s.stats.TotMemMergeBeg, 1)
 
 	memMergeZapStartTime := time.Now()
@@ -310,7 +304,7 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 	path := s.path + string(os.PathSeparator) + filename
 
 	newDocNums, _, err :=
-		zap.MergeSegmentBases(sbs, sbsDrops, path, chunkFactor, s.closeCh, s)
+		s.segPlugin.Merge(sbs, sbsDrops, path, s.closeCh, s)
 
 	atomic.AddUint64(&s.stats.TotMemMergeZapEnd, 1)
 
@@ -325,15 +319,11 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 		return nil, 0, err
 	}
 
-	seg, err := zap.Open(path)
+	seg, err := s.segPlugin.Open(path)
 	if err != nil {
 		atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
 		return nil, 0, err
 	}
-	err = zap.ValidateMerge(nil, sbs, sbsDrops, seg.(*zap.Segment))
-	if err != nil {
-		return nil, 0, fmt.Errorf("in-memory merge validation failed: %v", err)
-	}
 
 	// update persisted stats
 	atomic.AddUint64(&s.stats.TotPersistedItems, seg.Count())
diff --git a/index/scorch/optimize.go b/index/scorch/optimize.go
index b33e3be3d..b9cb9228a 100644
--- a/index/scorch/optimize.go
+++ b/index/scorch/optimize.go
@@ -18,10 +18,8 @@ import (
 	"fmt"
 
 	"github.com/RoaringBitmap/roaring"
-
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
-	"github.com/blevesearch/bleve/index/scorch/segment/zap"
 )
 
 var OptimizeConjunction = true
@@ -81,25 +79,25 @@ func (o *OptimizeTFRConjunction) Finish() (index.Optimized, error) {
 	}
 
 	for i := range o.snapshot.segment {
-		itr0, ok := o.tfrs[0].iterators[i].(*zap.PostingsIterator)
-		if !ok || itr0.ActualBM == nil {
+		itr0, ok := o.tfrs[0].iterators[i].(segment.OptimizablePostingsIterator)
+		if !ok || itr0.ActualBitmap() == nil {
 			continue
 		}
 
-		itr1, ok := o.tfrs[1].iterators[i].(*zap.PostingsIterator)
-		if !ok || itr1.ActualBM == nil {
+		itr1, ok := o.tfrs[1].iterators[i].(segment.OptimizablePostingsIterator)
+		if !ok || itr1.ActualBitmap() == nil {
 			continue
 		}
 
-		bm := roaring.And(itr0.ActualBM, itr1.ActualBM)
+		bm := roaring.And(itr0.ActualBitmap(), itr1.ActualBitmap())
 
 		for _, tfr := range o.tfrs[2:] {
-			itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
-			if !ok || itr.ActualBM == nil {
+			itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
+			if !ok || itr.ActualBitmap() == nil {
 				continue
 			}
 
-			bm.And(itr.ActualBM)
+			bm.And(itr.ActualBitmap())
 		}
 
 		// in this conjunction optimization, the postings iterators
@@ -107,10 +105,9 @@ func (o *OptimizeTFRConjunction) Finish() (index.Optimized, error) {
 		// regular conjunction searcher machinery will still be used,
 		// but the underlying bitmap will be smaller.
 		for _, tfr := range o.tfrs {
-			itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
-			if ok && itr.ActualBM != nil {
-				itr.ActualBM = bm
-				itr.Actual = bm.Iterator()
+			itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
+			if ok && itr.ActualBitmap() != nil {
+				itr.ReplaceActual(bm)
 			}
 		}
 	}
@@ -191,9 +188,9 @@ OUTER:
 				continue OUTER
 			}
 
-			itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
+			itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
 			if !ok {
-				// We optimize zap postings iterators only.
+				// We only optimize postings iterators that support this operation.
 				return nil, nil
 			}
 
@@ -201,12 +198,6 @@ OUTER:
 			// can perform several optimizations up-front here.
 			docNum1Hit, ok := itr.DocNum1Hit()
 			if ok {
-				if docNum1Hit == zap.DocNum1HitFinished {
-					// An empty docNum here means the entire AND is empty.
-					oTFR.iterators[i] = segment.AnEmptyPostingsIterator
-					continue OUTER
-				}
-
 				if docNum1HitLastOk && docNum1HitLast != docNum1Hit {
 					// The docNum1Hit doesn't match the previous
 					// docNum1HitLast, so the entire AND is empty.
@@ -220,14 +211,14 @@ OUTER:
 				continue
 			}
 
-			if itr.ActualBM == nil {
+			if itr.ActualBitmap() == nil {
 				// An empty actual bitmap means the entire AND is empty.
 				oTFR.iterators[i] = segment.AnEmptyPostingsIterator
 				continue OUTER
 			}
 
 			// Collect the actual bitmap for more processing later.
-			actualBMs = append(actualBMs, itr.ActualBM)
+			actualBMs = append(actualBMs, itr.ActualBitmap())
 		}
 
 		if docNum1HitLastOk {
@@ -245,11 +236,7 @@ OUTER:
 
 			// The actual bitmaps and docNum1Hits all contain or have
 			// the same 1-hit docNum, so that's our AND'ed result.
-			oTFR.iterators[i], err = zap.PostingsIteratorFrom1Hit(
-				docNum1HitLast, zap.NormBits1Hit, false, false)
-			if err != nil {
-				return nil, nil
-			}
+			oTFR.iterators[i] = segment.NewUnadornedPostingsIteratorFrom1Hit(docNum1HitLast)
 
 			continue OUTER
 		}
@@ -263,11 +250,7 @@ OUTER:
 
 		if len(actualBMs) == 1 {
 			// If we've only 1 actual bitmap, then that's our result.
-			oTFR.iterators[i], err = zap.PostingsIteratorFromBitmap(
-				actualBMs[0], false, false)
-			if err != nil {
-				return nil, nil
-			}
+			oTFR.iterators[i] = segment.NewUnadornedPostingsIteratorFromBitmap(actualBMs[0])
 
 			continue OUTER
 		}
@@ -279,11 +262,7 @@ OUTER:
 			bm.And(actualBM)
 		}
 
-		oTFR.iterators[i], err = zap.PostingsIteratorFromBitmap(
-			bm, false, false)
-		if err != nil {
-			return nil, nil
-		}
+		oTFR.iterators[i] = segment.NewUnadornedPostingsIteratorFromBitmap(bm)
 	}
 
 	return oTFR, nil
@@ -337,13 +316,13 @@ func (o *OptimizeTFRDisjunctionUnadorned) Finish() (rv index.Optimized, err erro
 		var cMax uint64
 
 		for _, tfr := range o.tfrs {
-			itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
+			itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
 			if !ok {
 				return nil, nil
 			}
 
-			if itr.ActualBM != nil {
-				c := itr.ActualBM.GetCardinality()
+			if itr.ActualBitmap() != nil {
+				c := itr.ActualBitmap().GetCardinality()
 				if cMax < c {
 					cMax = c
 				}
@@ -379,7 +358,7 @@ func (o *OptimizeTFRDisjunctionUnadorned) Finish() (rv index.Optimized, err erro
 		actualBMs = actualBMs[:0]
 
 		for _, tfr := range o.tfrs {
-			itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
+			itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
 			if !ok {
 				return nil, nil
 			}
@@ -390,8 +369,8 @@ func (o *OptimizeTFRDisjunctionUnadorned) Finish() (rv index.Optimized, err erro
 				continue
 			}
 
-			if itr.ActualBM != nil {
-				actualBMs = append(actualBMs, itr.ActualBM)
+			if itr.ActualBitmap() != nil {
+				actualBMs = append(actualBMs, itr.ActualBitmap())
 			}
 		}
 
@@ -410,10 +389,7 @@ func (o *OptimizeTFRDisjunctionUnadorned) Finish() (rv index.Optimized, err erro
 
 		bm.AddMany(docNums)
 
-		oTFR.iterators[i], err = zap.PostingsIteratorFromBitmap(bm, false, false)
-		if err != nil {
-			return nil, nil
-		}
+		oTFR.iterators[i] = segment.NewUnadornedPostingsIteratorFromBitmap(bm)
 	}
 
 	return oTFR, nil
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index d7c335047..30e75df77 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -32,12 +32,9 @@ import (
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
-	"github.com/blevesearch/bleve/index/scorch/segment/zap"
 	bolt "go.etcd.io/bbolt"
 )
 
-var DefaultChunkFactor uint32 = 1024
-
 // DefaultPersisterNapTimeMSec is kept to zero as this helps in direct
 // persistence of segments with the default safe batch option.
 // If the default safe batch option results in high number of
@@ -360,13 +357,13 @@ var DefaultMinSegmentsForInMemoryMerge = 2
 func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot) (
 	bool, error) {
 	// collect the in-memory zap segments (SegmentBase instances)
-	var sbs []*zap.SegmentBase
+	var sbs []segment.Segment
 	var sbsDrops []*roaring.Bitmap
 	var sbsIndexes []int
 
 	for i, segmentSnapshot := range snapshot.segment {
-		if sb, ok := segmentSnapshot.segment.(*zap.SegmentBase); ok {
-			sbs = append(sbs, sb)
+		if _, ok := segmentSnapshot.segment.(segment.PersistedSegment); !ok {
+			sbs = append(sbs, segmentSnapshot.segment)
 			sbsDrops = append(sbsDrops, segmentSnapshot.deleted)
 			sbsIndexes = append(sbsIndexes, i)
 		}
@@ -377,7 +374,7 @@ func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot) (
 	}
 
 	newSnapshot, newSegmentID, err := s.mergeSegmentBases(
-		snapshot, sbs, sbsDrops, sbsIndexes, DefaultChunkFactor)
+		snapshot, sbs, sbsDrops, sbsIndexes)
 	if err != nil {
 		return false, err
 	}
@@ -459,13 +456,13 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
 	if err != nil {
 		return err
 	}
-	err = metaBucket.Put([]byte("type"), []byte(zap.Type))
+	err = metaBucket.Put(boltMetaDataSegmentTypeKey, []byte(s.segPlugin.Type()))
 	if err != nil {
 		return err
 	}
 	buf := make([]byte, binary.MaxVarintLen32)
-	binary.BigEndian.PutUint32(buf, zap.Version)
-	err = metaBucket.Put([]byte("version"), buf)
+	binary.BigEndian.PutUint32(buf, s.segPlugin.Version())
+	err = metaBucket.Put(boltMetaDataSegmentVersionKey, buf)
 	if err != nil {
 		return err
 	}
@@ -494,11 +491,19 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
 			return err
 		}
 		switch seg := segmentSnapshot.segment.(type) {
-		case *zap.SegmentBase:
+		case segment.PersistedSegment:
+			path := seg.Path()
+			filename := strings.TrimPrefix(path, s.path+string(os.PathSeparator))
+			err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
+			if err != nil {
+				return err
+			}
+			filenames = append(filenames, filename)
+		case segment.UnpersistedSegment:
 			// need to persist this to disk
 			filename := zapFileName(segmentSnapshot.id)
 			path := s.path + string(os.PathSeparator) + filename
-			err = zap.PersistSegmentBase(seg, path)
+			err = seg.Persist(path)
 			if err != nil {
 				return fmt.Errorf("error persisting segment: %v", err)
 			}
@@ -508,14 +513,7 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
 				return err
 			}
 			filenames = append(filenames, filename)
-		case *zap.Segment:
-			path := seg.Path()
-			filename := strings.TrimPrefix(path, s.path+string(os.PathSeparator))
-			err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
-			if err != nil {
-				return err
-			}
-			filenames = append(filenames, filename)
+
 		default:
 			return fmt.Errorf("unknown segment type: %T", seg)
 		}
@@ -553,7 +551,7 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
 			}
 		}()
 		for segmentID, path := range newSegmentPaths {
-			newSegments[segmentID], err = zap.Open(path)
+			newSegments[segmentID], err = s.segPlugin.Open(path)
 			if err != nil {
 				return fmt.Errorf("error opening new segment at %s, %v", path, err)
 			}
@@ -609,6 +607,8 @@ var boltPathKey = []byte{'p'}
 var boltDeletedKey = []byte{'d'}
 var boltInternalKey = []byte{'i'}
 var boltMetaDataKey = []byte{'m'}
+var boltMetaDataSegmentTypeKey = []byte("type")
+var boltMetaDataSegmentVersionKey = []byte("version")
 
 func (s *Scorch) loadFromBolt() error {
 	return s.rootBolt.View(func(tx *bolt.Tx) error {
@@ -693,6 +693,23 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
 		refs:     1,
 		creator:  "loadSnapshot",
 	}
+	// first we look for the meta-data bucket, this will tell us
+	// which segment type/version was used for this snapshot
+	// all operations for this scorch will use this type/version
+	metaBucket := snapshot.Bucket(boltMetaDataKey)
+	if metaBucket == nil {
+		_ = rv.DecRef()
+		return nil, fmt.Errorf("meta-data bucket missing")
+	}
+	segmentType := string(metaBucket.Get(boltMetaDataSegmentTypeKey))
+	segmentVersion := binary.BigEndian.Uint32(
+		metaBucket.Get(boltMetaDataSegmentVersionKey))
+	err := s.loadSegmentPlugin(segmentType, segmentVersion)
+	if err != nil {
+		_ = rv.DecRef()
+		return nil, fmt.Errorf(
+			"unable to load correct segment wrapper: %v", err)
+	}
 	var running uint64
 	c := snapshot.Cursor()
 	for k, _ := c.First(); k != nil; k, _ = c.Next() {
@@ -737,7 +754,7 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro
 		return nil, fmt.Errorf("segment path missing")
 	}
 	segmentPath := s.path + string(os.PathSeparator) + string(pathBytes)
-	segment, err := zap.Open(segmentPath)
+	segment, err := s.segPlugin.Open(segmentPath)
 	if err != nil {
 		return nil, fmt.Errorf("error opening bolt segment: %v", err)
 	}
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index da6a53357..80f9e3a79 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -28,7 +28,6 @@ import (
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
-	"github.com/blevesearch/bleve/index/scorch/segment/zap"
 	"github.com/blevesearch/bleve/index/store"
 	"github.com/blevesearch/bleve/registry"
 	bolt "go.etcd.io/bbolt"
@@ -77,6 +76,8 @@ type Scorch struct {
 	pauseLock sync.RWMutex
 
 	pauseCount uint64
+
+	segPlugin segment.Plugin
 }
 
 type internalStats struct {
@@ -100,7 +101,25 @@ func NewScorch(storeName string,
 		nextSnapshotEpoch:    1,
 		closeCh:              make(chan struct{}),
 		ineligibleForRemoval: map[string]bool{},
+		segPlugin:            defaultSegmentPlugin,
 	}
+
+	// check if the caller has requested a specific segment type/version
+	forcedSegmentVersion, ok := config["forceSegmentVersion"].(int)
+	if ok {
+		forcedSegmentType, ok2 := config["forceSegmentType"].(string)
+		if !ok2 {
+			return nil, fmt.Errorf(
+				"forceSegmentVersion set to %d, must also specify forceSegmentType", forcedSegmentVersion)
+		}
+
+		err := rv.loadSegmentPlugin(forcedSegmentType,
+			uint32(forcedSegmentVersion))
+		if err != nil {
+			return nil, err
+		}
+	}
+
 	rv.root = &IndexSnapshot{parent: rv, refs: 1, creator: "NewScorch"}
 	ro, ok := config["read_only"].(bool)
 	if ok {
@@ -351,7 +370,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
 	var newSegment segment.Segment
 	var bufBytes uint64
 	if len(analysisResults) > 0 {
-		newSegment, bufBytes, err = zap.AnalysisResultsToSegmentBase(analysisResults, DefaultChunkFactor)
+		newSegment, bufBytes, err = s.segPlugin.New(analysisResults)
 		if err != nil {
 			return err
 		}
@@ -499,8 +518,7 @@ func (s *Scorch) diskFileStats(rootSegmentPaths map[string]struct{}) (uint64,
 func (s *Scorch) rootDiskSegmentsPaths() map[string]struct{} {
 	rv := make(map[string]struct{}, len(s.root.segment))
 	for _, segmentSnapshot := range s.root.segment {
-		switch seg := segmentSnapshot.segment.(type) {
-		case *zap.Segment:
+		if seg, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok {
 			rv[seg.Path()] = struct{}{}
 		}
 	}
diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index 302a86a5e..b168728ee 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -2121,3 +2121,32 @@ func TestAllFieldWithDifferentTermVectorsEnabled(t *testing.T) {
 		t.Errorf("Error updating index: %v", err)
 	}
 }
+
+func TestForceVersion(t *testing.T) {
+	cfg := map[string]interface{}{}
+	cfg["forceSegmentType"] = "zap"
+	cfg["forceSegmentVersion"] = 11
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
+	if err != nil {
+		t.Fatalf("error opening a supported vesion: %v", err)
+	}
+	s := idx.(*Scorch)
+	if s.segPlugin.Version() != 11 {
+		t.Fatalf("wrong segment wrapper version loaded, expected %d got %d", 11, s.segPlugin.Version())
+	}
+	cfg["forceSegmentVersion"] = 12
+	idx, err = NewScorch(Name, cfg, analysisQueue)
+	if err != nil {
+		t.Fatalf("error opening a supported vesion: %v", err)
+	}
+	s = idx.(*Scorch)
+	if s.segPlugin.Version() != 12 {
+		t.Fatalf("wrong segment wrapper version loaded, expected %d got %d", 12, s.segPlugin.Version())
+	}
+	cfg["forceSegmentVersion"] = 10
+	idx, err = NewScorch(Name, cfg, analysisQueue)
+	if err == nil {
+		t.Fatalf("expected an error opening an unsupported vesion, got nil")
+	}
+}
\ No newline at end of file
diff --git a/index/scorch/segment/empty.go b/index/scorch/segment/empty.go
index fdc407a74..340db73a6 100644
--- a/index/scorch/segment/empty.go
+++ b/index/scorch/segment/empty.go
@@ -105,10 +105,6 @@ func (e *EmptyDictionaryIterator) Contains(key []byte) (bool, error) {
 	return false, nil
 }
 
-func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) {
-	return nil, nil
-}
-
 type EmptyPostingsList struct{}
 
 func (e *EmptyPostingsList) Iterator(includeFreq, includeNorm, includeLocations bool,
@@ -130,6 +126,10 @@ func (e *EmptyPostingsIterator) Next() (Posting, error) {
 	return nil, nil
 }
 
+func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) {
+	return nil, nil
+}
+
 func (e *EmptyPostingsIterator) Size() int {
 	return 0
 }
diff --git a/index/scorch/segment/plugin.go b/index/scorch/segment/plugin.go
new file mode 100644
index 000000000..d8aaa0b6d
--- /dev/null
+++ b/index/scorch/segment/plugin.go
@@ -0,0 +1,58 @@
+//  Copyright (c) 2020 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package segment
+
+import (
+	"github.com/RoaringBitmap/roaring"
+	"github.com/blevesearch/bleve/index"
+)
+
+// Plugin represents the essential functions required by a package to plug in
+// it's segment implementation
+type Plugin interface {
+
+	// Type is the name for this segment plugin
+	Type() string
+
+	// Version is a numeric value identifying a specific version of this type.
+	// When incompatible changes are made to a particular type of plugin, the
+	// version must be incremented.
+	Version() uint32
+
+	// New takes a set of AnalysisResults and turns them into a new Segment
+	New(results []*index.AnalysisResult) (Segment, uint64, error)
+
+	// Open attempts to open the file at the specified path and
+	// return the corresponding Segment
+	Open(path string) (Segment, error)
+
+	// Merge takes a set of Segments, and creates a new segment on disk at
+	// the specified path.
+	// Drops is a set of bitmaps (one for each segment) indicating which
+	// documents can be dropped from the segments during the merge.
+	// If the closeCh channel is closed, Merge will cease doing work at
+	// the next opportunity, and return an error (closed).
+	// StatsReporter can optionally be provided, in which case progress
+	// made during the merge is reported while operation continues.
+	// Returns:
+	// A slice of new document numbers (one for each input segment),
+	// this allows the caller to know a particular document's new
+	// document number in the newly merged segment.
+	// The number of bytes written to the new segment file.
+	// An error, if any occurred.
+	Merge(segments []Segment, drops []*roaring.Bitmap, path string,
+		closeCh chan struct{}, s StatsReporter) (
+		[][]uint64, uint64, error)
+}
diff --git a/index/scorch/segment/segment.go b/index/scorch/segment/segment.go
index 34c2bc204..ddd0d0910 100644
--- a/index/scorch/segment/segment.go
+++ b/index/scorch/segment/segment.go
@@ -50,6 +50,16 @@ type Segment interface {
 	DecRef() error
 }
 
+type UnpersistedSegment interface {
+	Segment
+	Persist(path string) error
+}
+
+type PersistedSegment interface {
+	Segment
+	Path() string
+}
+
 type TermDictionary interface {
 	PostingsList(term []byte, except *roaring.Bitmap, prealloc PostingsList) (PostingsList, error)
 
@@ -96,6 +106,12 @@ type PostingsIterator interface {
 	Size() int
 }
 
+type OptimizablePostingsIterator interface {
+	ActualBitmap() *roaring.Bitmap
+	DocNum1Hit() (uint64, bool)
+	ReplaceActual(*roaring.Bitmap)
+}
+
 type Posting interface {
 	Number() uint64
 
diff --git a/index/scorch/segment/unadorned.go b/index/scorch/segment/unadorned.go
new file mode 100644
index 000000000..9a4d6c76c
--- /dev/null
+++ b/index/scorch/segment/unadorned.go
@@ -0,0 +1,148 @@
+//  Copyright (c) 2020 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package segment
+
+import (
+	"github.com/RoaringBitmap/roaring"
+	"math"
+	"reflect"
+)
+
+var reflectStaticSizeUnadornedPostingsIteratorBitmap int
+var reflectStaticSizeUnadornedPostingsIterator1Hit int
+var reflectStaticSizeUnadornedPosting int
+
+
+func init() {
+	var pib UnadornedPostingsIteratorBitmap
+	reflectStaticSizeUnadornedPostingsIteratorBitmap = int(reflect.TypeOf(pib).Size())
+	var pi1h UnadornedPostingsIterator1Hit
+	reflectStaticSizeUnadornedPostingsIterator1Hit = int(reflect.TypeOf(pi1h).Size())
+	var up UnadornedPosting
+	reflectStaticSizeUnadornedPosting = int(reflect.TypeOf(up).Size())
+}
+
+type UnadornedPostingsIteratorBitmap struct{
+	actual   roaring.IntPeekable
+	actualBM *roaring.Bitmap
+}
+
+func (i *UnadornedPostingsIteratorBitmap) Next() (Posting, error) {
+	return i.nextAtOrAfter(0)
+}
+
+func (i *UnadornedPostingsIteratorBitmap) Advance(docNum uint64) (Posting, error) {
+	return i.nextAtOrAfter(docNum)
+}
+
+func (i *UnadornedPostingsIteratorBitmap) nextAtOrAfter(atOrAfter uint64) (Posting, error) {
+	docNum, exists := i.nextDocNumAtOrAfter(atOrAfter)
+	if !exists {
+		return nil, nil
+	}
+	return UnadornedPosting(docNum), nil
+}
+
+func (i *UnadornedPostingsIteratorBitmap) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool) {
+	if i.actual == nil || !i.actual.HasNext() {
+		return 0, false
+	}
+	i.actual.AdvanceIfNeeded(uint32(atOrAfter))
+
+	if !i.actual.HasNext() {
+		return 0, false // couldn't find anything
+	}
+
+	return uint64(i.actual.Next()), true
+}
+
+func (i *UnadornedPostingsIteratorBitmap) Size() int {
+	return reflectStaticSizeUnadornedPostingsIteratorBitmap
+}
+
+func NewUnadornedPostingsIteratorFromBitmap(bm *roaring.Bitmap) PostingsIterator {
+	return &UnadornedPostingsIteratorBitmap{
+		actualBM: bm,
+		actual: bm.Iterator(),
+	}
+}
+
+const docNum1HitFinished = math.MaxUint64
+
+type UnadornedPostingsIterator1Hit struct{
+	docNum uint64
+}
+
+func (i *UnadornedPostingsIterator1Hit) Next() (Posting, error) {
+	return i.nextAtOrAfter(0)
+}
+
+func (i *UnadornedPostingsIterator1Hit) Advance(docNum uint64) (Posting, error) {
+	return i.nextAtOrAfter(docNum)
+}
+
+func (i *UnadornedPostingsIterator1Hit) nextAtOrAfter(atOrAfter uint64) (Posting, error) {
+	docNum, exists := i.nextDocNumAtOrAfter(atOrAfter)
+	if !exists {
+		return nil, nil
+	}
+	return UnadornedPosting(docNum), nil
+}
+
+func (i *UnadornedPostingsIterator1Hit) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool) {
+	if i.docNum == docNum1HitFinished {
+		return 0, false
+	}
+	if i.docNum < atOrAfter {
+		// advanced past our 1-hit
+		i.docNum = docNum1HitFinished // consume our 1-hit docNum
+		return 0, false
+	}
+	docNum := i.docNum
+	i.docNum = docNum1HitFinished // consume our 1-hit docNum
+	return docNum, true
+}
+
+func (i *UnadornedPostingsIterator1Hit) Size() int {
+	return reflectStaticSizeUnadornedPostingsIterator1Hit
+}
+
+func NewUnadornedPostingsIteratorFrom1Hit(docNum1Hit uint64) PostingsIterator {
+	return &UnadornedPostingsIterator1Hit{
+		docNum1Hit,
+	}
+}
+
+type UnadornedPosting uint64
+
+func (p UnadornedPosting) Number() uint64 {
+	return uint64(p)
+}
+
+func (p UnadornedPosting) Frequency() uint64 {
+	return 0
+}
+
+func (p UnadornedPosting) Norm() float64 {
+	return 0
+}
+
+func (p UnadornedPosting) Locations() []Location {
+	return nil
+}
+
+func (p UnadornedPosting) Size() int {
+	return reflectStaticSizeUnadornedPosting
+}
\ No newline at end of file
diff --git a/index/scorch/segment/zap/README.md b/index/scorch/segment/zap/README.md
deleted file mode 100644
index 0facb669f..000000000
--- a/index/scorch/segment/zap/README.md
+++ /dev/null
@@ -1,158 +0,0 @@
-# zap file format
-
-Advanced ZAP File Format Documentation is [here](zap.md).
-
-The file is written in the reverse order that we typically access data.  This helps us write in one pass since later sections of the file require file offsets of things we've already written.
-
-Current usage:
-
-- mmap the entire file
-- crc-32 bytes and version are in fixed position at end of the file
-- reading remainder of footer could be version specific
-- remainder of footer gives us:
-  - 3 important offsets (docValue , fields index and stored data index)
-  - 2 important values (number of docs and chunk factor)
-- field data is processed once and memoized onto the heap so that we never have to go back to disk for it
-- access to stored data by doc number means first navigating to the stored data index, then accessing a fixed position offset into that slice, which gives us the actual address of the data.  the first bytes of that section tell us the size of data so that we know where it ends.
-- access to all other indexed data follows the following pattern:
-  - first know the field name -> convert to id
-  - next navigate to term dictionary for that field
-    - some operations stop here and do dictionary ops
-  - next use dictionary to navigate to posting list for a specific term
-  - walk posting list
-  - if necessary, walk posting details as we go
-  - if location info is desired, consult location bitmap to see if it is there
-
-## stored fields section
-
-- for each document
-  - preparation phase:
-    - produce a slice of metadata bytes and data bytes
-    - produce these slices in field id order
-    - field value is appended to the data slice
-    - metadata slice is varint encoded with the following values for each field value
-      - field id (uint16)
-      - field type (byte)
-      - field value start offset in uncompressed data slice (uint64)
-      - field value length (uint64)
-      - field number of array positions (uint64)
-      - one additional value for each array position (uint64)
-      - compress the data slice using snappy
-  - file writing phase:
-    - remember the start offset for this document
-    - write out meta data length (varint uint64)
-    - write out compressed data length (varint uint64)
-    - write out the metadata bytes
-    - write out the compressed data bytes
-
-## stored fields idx
-
-- for each document
-  - write start offset (remembered from previous section) of stored data (big endian uint64)
-
-With this index and a known document number, we have direct access to all the stored field data.
-
-## posting details (freq/norm) section
-
-- for each posting list
-  - produce a slice containing multiple consecutive chunks (each chunk is varint stream)
-  - produce a slice remembering offsets of where each chunk starts
-  - preparation phase:
-    - for each hit in the posting list
-    - if this hit is in next chunk close out encoding of last chunk and record offset start of next
-    - encode term frequency (uint64)
-    - encode norm factor (float32)
-  - file writing phase:
-    - remember start position for this posting list details
-    - write out number of chunks that follow (varint uint64)
-    - write out length of each chunk (each a varint uint64)
-    - write out the byte slice containing all the chunk data
-
-If you know the doc number you're interested in, this format lets you jump to the correct chunk (docNum/chunkFactor) directly and then seek within that chunk until you find it.
-
-## posting details (location) section
-
-- for each posting list
-  - produce a slice containing multiple consecutive chunks (each chunk is varint stream)
-  - produce a slice remembering offsets of where each chunk starts
-  - preparation phase:
-    - for each hit in the posting list
-    - if this hit is in next chunk close out encoding of last chunk and record offset start of next
-    - encode field (uint16)
-    - encode field pos (uint64)
-    - encode field start (uint64)
-    - encode field end (uint64)
-    - encode number of array positions to follow (uint64)
-    - encode each array position (each uint64)
-  - file writing phase:
-    - remember start position for this posting list details
-    - write out number of chunks that follow (varint uint64)
-    - write out length of each chunk (each a varint uint64)
-    - write out the byte slice containing all the chunk data
-
-If you know the doc number you're interested in, this format lets you jump to the correct chunk (docNum/chunkFactor) directly and then seek within that chunk until you find it.
-
-## postings list section
-
-- for each posting list
-  - preparation phase:
-    - encode roaring bitmap posting list to bytes (so we know the length)
-  - file writing phase:
-    - remember the start position for this posting list
-    - write freq/norm details offset (remembered from previous, as varint uint64)
-    - write location details offset (remembered from previous, as varint uint64)
-    - write length of encoded roaring bitmap
-    - write the serialized roaring bitmap data
-
-## dictionary
-
-- for each field
-  - preparation phase:
-    - encode vellum FST with dictionary data pointing to file offset of posting list (remembered from previous)
-  - file writing phase:
-    - remember the start position of this persistDictionary
-    - write length of vellum data (varint uint64)
-    - write out vellum data
-
-## fields section
-
-- for each field
-  - file writing phase:
-    - remember start offset for each field
-    - write dictionary address (remembered from previous) (varint uint64)
-    - write length of field name (varint uint64)
-    - write field name bytes
-
-## fields idx
-
-- for each field
-  - file writing phase:
-    - write big endian uint64 of start offset for each field
-
-NOTE: currently we don't know or record the length of this fields index.  Instead we rely on the fact that we know it immediately precedes a footer of known size.
-
-## fields DocValue
-
-- for each field
-  - preparation phase:
-    - produce a slice containing multiple consecutive chunks, where each chunk is composed of a meta section followed by compressed columnar field data
-    - produce a slice remembering the length of each chunk
-  - file writing phase:
-    - remember the start position of this first field DocValue offset in the footer
-    - write out number of chunks that follow (varint uint64)
-    - write out length of each chunk (each a varint uint64)
-    - write out the byte slice containing all the chunk data
-
-NOTE: currently the meta header inside each chunk gives clue to the location offsets and size of the data pertaining to a given docID and any
-read operation leverage that meta information to extract the document specific data from the file.
-
-## footer
-
-- file writing phase
-  - write number of docs (big endian uint64)
-  - write stored field index location (big endian uint64)
-  - write field index location (big endian uint64)
-  - write field docValue location (big endian uint64)
-  - write out chunk factor (big endian uint32)
-  - write out version (big endian uint32)
-  - write out file CRC of everything preceding this (big endian uint32)
diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go
deleted file mode 100644
index c02333cee..000000000
--- a/index/scorch/segment/zap/build.go
+++ /dev/null
@@ -1,151 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
-	"bufio"
-	"github.com/couchbase/vellum"
-	"math"
-	"os"
-)
-
-const Version uint32 = 11
-
-const Type string = "zap"
-
-const fieldNotUninverted = math.MaxUint64
-
-// PersistSegmentBase persists SegmentBase in the zap file format.
-func PersistSegmentBase(sb *SegmentBase, path string) error {
-	flag := os.O_RDWR | os.O_CREATE
-
-	f, err := os.OpenFile(path, flag, 0600)
-	if err != nil {
-		return err
-	}
-
-	cleanup := func() {
-		_ = f.Close()
-		_ = os.Remove(path)
-	}
-
-	br := bufio.NewWriter(f)
-
-	_, err = br.Write(sb.mem)
-	if err != nil {
-		cleanup()
-		return err
-	}
-
-	err = persistFooter(sb.numDocs, sb.storedIndexOffset, sb.fieldsIndexOffset, sb.docValueOffset,
-		sb.chunkFactor, sb.memCRC, br)
-	if err != nil {
-		cleanup()
-		return err
-	}
-
-	err = br.Flush()
-	if err != nil {
-		cleanup()
-		return err
-	}
-
-	err = f.Sync()
-	if err != nil {
-		cleanup()
-		return err
-	}
-
-	err = f.Close()
-	if err != nil {
-		cleanup()
-		return err
-	}
-
-	return nil
-}
-
-func persistStoredFieldValues(fieldID int,
-	storedFieldValues [][]byte, stf []byte, spf [][]uint64,
-	curr int, metaEncode varintEncoder, data []byte) (
-	int, []byte, error) {
-	for i := 0; i < len(storedFieldValues); i++ {
-		// encode field
-		_, err := metaEncode(uint64(fieldID))
-		if err != nil {
-			return 0, nil, err
-		}
-		// encode type
-		_, err = metaEncode(uint64(stf[i]))
-		if err != nil {
-			return 0, nil, err
-		}
-		// encode start offset
-		_, err = metaEncode(uint64(curr))
-		if err != nil {
-			return 0, nil, err
-		}
-		// end len
-		_, err = metaEncode(uint64(len(storedFieldValues[i])))
-		if err != nil {
-			return 0, nil, err
-		}
-		// encode number of array pos
-		_, err = metaEncode(uint64(len(spf[i])))
-		if err != nil {
-			return 0, nil, err
-		}
-		// encode all array positions
-		for _, pos := range spf[i] {
-			_, err = metaEncode(pos)
-			if err != nil {
-				return 0, nil, err
-			}
-		}
-
-		data = append(data, storedFieldValues[i]...)
-		curr += len(storedFieldValues[i])
-	}
-
-	return curr, data, nil
-}
-
-func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32,
-	fieldsMap map[string]uint16, fieldsInv []string, numDocs uint64,
-	storedIndexOffset uint64, fieldsIndexOffset uint64, docValueOffset uint64,
-	dictLocs []uint64) (*SegmentBase, error) {
-	sb := &SegmentBase{
-		mem:               mem,
-		memCRC:            memCRC,
-		chunkFactor:       chunkFactor,
-		fieldsMap:         fieldsMap,
-		fieldsInv:         fieldsInv,
-		numDocs:           numDocs,
-		storedIndexOffset: storedIndexOffset,
-		fieldsIndexOffset: fieldsIndexOffset,
-		docValueOffset:    docValueOffset,
-		dictLocs:          dictLocs,
-		fieldDvReaders:    make(map[uint16]*docValueReader),
-		fieldFSTs:         make(map[uint16]*vellum.FST),
-	}
-	sb.updateSize()
-
-	err := sb.loadDvReaders()
-	if err != nil {
-		return nil, err
-	}
-
-	return sb, nil
-}
diff --git a/index/scorch/segment/zap/build_test.go b/index/scorch/segment/zap/build_test.go
deleted file mode 100644
index 9d1b584f2..000000000
--- a/index/scorch/segment/zap/build_test.go
+++ /dev/null
@@ -1,556 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
-	"os"
-	"testing"
-
-	"github.com/blevesearch/bleve/analysis"
-	"github.com/blevesearch/bleve/document"
-	"github.com/blevesearch/bleve/index"
-)
-
-func TestBuild(t *testing.T) {
-	_ = os.RemoveAll("/tmp/scorch.zap")
-
-	sb, _, err := buildTestSegment()
-	if err != nil {
-		t.Fatal(err)
-	}
-	err = PersistSegmentBase(sb, "/tmp/scorch.zap")
-	if err != nil {
-		t.Fatal(err)
-	}
-}
-
-func buildTestSegment() (*SegmentBase, uint64, error) {
-	doc := &document.Document{
-		ID: "a",
-		Fields: []document.Field{
-			document.NewTextFieldCustom("_id", nil, []byte("a"), document.IndexField|document.StoreField, nil),
-			document.NewTextFieldCustom("name", nil, []byte("wow"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-		},
-		CompositeFields: []*document.CompositeField{
-			document.NewCompositeField("_all", true, nil, []string{"_id"}),
-		},
-	}
-
-	// forge analyzed docs
-	results := []*index.AnalysisResult{
-		&index.AnalysisResult{
-			Document: doc,
-			Analyzed: []analysis.TokenFrequencies{
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      1,
-						Position: 1,
-						Term:     []byte("a"),
-					},
-				}, nil, false),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      3,
-						Position: 1,
-						Term:     []byte("wow"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("some"),
-					},
-					&analysis.Token{
-						Start:    5,
-						End:      10,
-						Position: 2,
-						Term:     []byte("thing"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("cold"),
-					},
-				}, []uint64{0}, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("dark"),
-					},
-				}, []uint64{1}, true),
-			},
-			Length: []int{
-				1,
-				1,
-				2,
-				1,
-				1,
-			},
-		},
-	}
-
-	// fix up composite fields
-	for _, ar := range results {
-		for i, f := range ar.Document.Fields {
-			for _, cf := range ar.Document.CompositeFields {
-				cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
-			}
-		}
-	}
-
-	return AnalysisResultsToSegmentBase(results, 1024)
-}
-
-func buildTestSegmentMulti() (*SegmentBase, uint64, error) {
-	results := buildTestAnalysisResultsMulti()
-
-	return AnalysisResultsToSegmentBase(results, 1024)
-}
-
-func buildTestSegmentMultiWithChunkFactor(chunkFactor uint32) (*SegmentBase, uint64, error) {
-	results := buildTestAnalysisResultsMulti()
-
-	return AnalysisResultsToSegmentBase(results, chunkFactor)
-}
-
-func buildTestSegmentMultiWithDifferentFields(includeDocA, includeDocB bool) (*SegmentBase, uint64, error) {
-	results := buildTestAnalysisResultsMultiWithDifferentFields(includeDocA, includeDocB)
-
-	return AnalysisResultsToSegmentBase(results, 1024)
-}
-
-func buildTestAnalysisResultsMulti() []*index.AnalysisResult {
-	doc := &document.Document{
-		ID: "a",
-		Fields: []document.Field{
-			document.NewTextFieldCustom("_id", nil, []byte("a"), document.IndexField|document.StoreField, nil),
-			document.NewTextFieldCustom("name", nil, []byte("wow"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-		},
-		CompositeFields: []*document.CompositeField{
-			document.NewCompositeField("_all", true, nil, []string{"_id"}),
-		},
-	}
-
-	doc2 := &document.Document{
-		ID: "b",
-		Fields: []document.Field{
-			document.NewTextFieldCustom("_id", nil, []byte("b"), document.IndexField|document.StoreField, nil),
-			document.NewTextFieldCustom("name", nil, []byte("who"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-		},
-		CompositeFields: []*document.CompositeField{
-			document.NewCompositeField("_all", true, nil, []string{"_id"}),
-		},
-	}
-
-	// forge analyzed docs
-	results := []*index.AnalysisResult{
-		&index.AnalysisResult{
-			Document: doc,
-			Analyzed: []analysis.TokenFrequencies{
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      1,
-						Position: 1,
-						Term:     []byte("a"),
-					},
-				}, nil, false),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      3,
-						Position: 1,
-						Term:     []byte("wow"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("some"),
-					},
-					&analysis.Token{
-						Start:    5,
-						End:      10,
-						Position: 2,
-						Term:     []byte("thing"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("cold"),
-					},
-				}, []uint64{0}, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("dark"),
-					},
-				}, []uint64{1}, true),
-			},
-			Length: []int{
-				1,
-				1,
-				2,
-				1,
-				1,
-			},
-		},
-		&index.AnalysisResult{
-			Document: doc2,
-			Analyzed: []analysis.TokenFrequencies{
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      1,
-						Position: 1,
-						Term:     []byte("b"),
-					},
-				}, nil, false),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      3,
-						Position: 1,
-						Term:     []byte("who"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("some"),
-					},
-					&analysis.Token{
-						Start:    5,
-						End:      10,
-						Position: 2,
-						Term:     []byte("thing"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("cold"),
-					},
-				}, []uint64{0}, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("dark"),
-					},
-				}, []uint64{1}, true),
-			},
-			Length: []int{
-				1,
-				1,
-				2,
-				1,
-				1,
-			},
-		},
-	}
-
-	// fix up composite fields
-	for _, ar := range results {
-		for i, f := range ar.Document.Fields {
-			for _, cf := range ar.Document.CompositeFields {
-				cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
-			}
-		}
-	}
-
-	return results
-}
-
-func buildTestAnalysisResultsMultiWithDifferentFields(includeDocA, includeDocB bool) []*index.AnalysisResult {
-	results := []*index.AnalysisResult{}
-
-	if includeDocA {
-		doc := &document.Document{
-			ID: "a",
-			Fields: []document.Field{
-				document.NewTextFieldCustom("_id", nil, []byte("a"),
-					document.IndexField|document.StoreField, nil),
-				document.NewTextField("name", []uint64{}, []byte("ABC")),
-				document.NewTextField("dept", []uint64{}, []byte("ABC dept")),
-				document.NewTextField("manages.id", []uint64{}, []byte("XYZ")),
-				document.NewTextField("manages.count", []uint64{}, []byte("1")),
-			},
-			CompositeFields: []*document.CompositeField{
-				document.NewCompositeField("_all", true, nil, []string{"_id"}),
-			},
-		}
-
-		result := &index.AnalysisResult{
-			Document: doc,
-			Analyzed: []analysis.TokenFrequencies{
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      1,
-						Position: 1,
-						Term:     []byte("a"),
-					},
-				}, nil, false),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      3,
-						Position: 1,
-						Term:     []byte("ABC"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      3,
-						Position: 1,
-						Term:     []byte("ABC"),
-					},
-					&analysis.Token{
-						Start:    4,
-						End:      8,
-						Position: 2,
-						Term:     []byte("dept"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      3,
-						Position: 1,
-						Term:     []byte("XYZ"),
-					},
-				}, []uint64{0}, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      1,
-						Position: 1,
-						Term:     []byte("1"),
-					},
-				}, []uint64{1}, true),
-			},
-			Length: []int{
-				1,
-				1,
-				2,
-				1,
-				1,
-			},
-		}
-
-		results = append(results, result)
-	}
-
-	if includeDocB {
-		doc := &document.Document{
-			ID: "b",
-			Fields: []document.Field{
-				document.NewTextFieldCustom("_id", nil, []byte("b"),
-					document.IndexField|document.StoreField, nil),
-				document.NewTextField("name", []uint64{}, []byte("XYZ")),
-				document.NewTextField("dept", []uint64{}, []byte("ABC dept")),
-				document.NewTextField("reportsTo.id", []uint64{}, []byte("ABC")),
-			},
-			CompositeFields: []*document.CompositeField{
-				document.NewCompositeField("_all", true, nil, []string{"_id"}),
-			},
-		}
-
-		result := &index.AnalysisResult{
-			Document: doc,
-			Analyzed: []analysis.TokenFrequencies{
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      1,
-						Position: 1,
-						Term:     []byte("b"),
-					},
-				}, nil, false),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      3,
-						Position: 1,
-						Term:     []byte("XYZ"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      3,
-						Position: 1,
-						Term:     []byte("ABC"),
-					},
-					&analysis.Token{
-						Start:    4,
-						End:      8,
-						Position: 2,
-						Term:     []byte("dept"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      3,
-						Position: 1,
-						Term:     []byte("ABC"),
-					},
-				}, []uint64{0}, true),
-			},
-			Length: []int{
-				1,
-				1,
-				2,
-				1,
-			},
-		}
-
-		results = append(results, result)
-	}
-
-	// fix up composite fields
-	for _, ar := range results {
-		for i, f := range ar.Document.Fields {
-			for _, cf := range ar.Document.CompositeFields {
-				cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
-			}
-		}
-	}
-
-	return results
-}
-
-func buildTestSegmentWithDefaultFieldMapping(chunkFactor uint32) (
-	*SegmentBase, []string, error) {
-	doc := &document.Document{
-		ID: "a",
-		Fields: []document.Field{
-			document.NewTextFieldCustom("_id", nil, []byte("a"),
-				document.IndexField|document.StoreField, nil),
-			document.NewTextField("name", nil, []byte("wow")),
-			document.NewTextField("desc", nil, []byte("some thing")),
-			document.NewTextField("tag", []uint64{0}, []byte("cold")),
-		},
-		CompositeFields: []*document.CompositeField{
-			document.NewCompositeField("_all", true, nil, []string{"_id"}),
-		},
-	}
-
-	var fields []string
-	fields = append(fields, "_id")
-	fields = append(fields, "name")
-	fields = append(fields, "desc")
-	fields = append(fields, "tag")
-
-	// forge analyzed docs
-	results := []*index.AnalysisResult{
-		&index.AnalysisResult{
-			Document: doc,
-			Analyzed: []analysis.TokenFrequencies{
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      1,
-						Position: 1,
-						Term:     []byte("a"),
-					},
-				}, nil, false),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      3,
-						Position: 1,
-						Term:     []byte("wow"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("some"),
-					},
-					&analysis.Token{
-						Start:    5,
-						End:      10,
-						Position: 2,
-						Term:     []byte("thing"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("cold"),
-					},
-				}, []uint64{0}, true),
-			},
-			Length: []int{
-				1,
-				1,
-				2,
-				1,
-				1,
-			},
-		},
-	}
-
-	// fix up composite fields
-	for _, ar := range results {
-		for i, f := range ar.Document.Fields {
-			for _, cf := range ar.Document.CompositeFields {
-				cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
-			}
-		}
-	}
-
-	sb, _, err := AnalysisResultsToSegmentBase(results, chunkFactor)
-
-	return sb, fields, err
-}
diff --git a/index/scorch/segment/zap/contentcoder.go b/index/scorch/segment/zap/contentcoder.go
deleted file mode 100644
index b9ff8179b..000000000
--- a/index/scorch/segment/zap/contentcoder.go
+++ /dev/null
@@ -1,230 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
-	"bytes"
-	"encoding/binary"
-	"io"
-	"reflect"
-
-	"github.com/golang/snappy"
-)
-
-var reflectStaticSizeMetaData int
-
-func init() {
-	var md MetaData
-	reflectStaticSizeMetaData = int(reflect.TypeOf(md).Size())
-}
-
-var termSeparator byte = 0xff
-var termSeparatorSplitSlice = []byte{termSeparator}
-
-type chunkedContentCoder struct {
-	final     []byte
-	chunkSize uint64
-	currChunk uint64
-	chunkLens []uint64
-
-	w                io.Writer
-	progressiveWrite bool
-
-	chunkMetaBuf bytes.Buffer
-	chunkBuf     bytes.Buffer
-
-	chunkMeta []MetaData
-
-	compressed []byte // temp buf for snappy compression
-}
-
-// MetaData represents the data information inside a
-// chunk.
-type MetaData struct {
-	DocNum      uint64 // docNum of the data inside the chunk
-	DocDvOffset uint64 // offset of data inside the chunk for the given docid
-}
-
-// newChunkedContentCoder returns a new chunk content coder which
-// packs data into chunks based on the provided chunkSize
-func newChunkedContentCoder(chunkSize uint64, maxDocNum uint64,
-	w io.Writer, progressiveWrite bool) *chunkedContentCoder {
-	total := maxDocNum/chunkSize + 1
-	rv := &chunkedContentCoder{
-		chunkSize:        chunkSize,
-		chunkLens:        make([]uint64, total),
-		chunkMeta:        make([]MetaData, 0, total),
-		w:                w,
-		progressiveWrite: progressiveWrite,
-	}
-
-	return rv
-}
-
-// Reset lets you reuse this chunked content coder. Buffers are reset
-// and re used. You cannot change the chunk size.
-func (c *chunkedContentCoder) Reset() {
-	c.currChunk = 0
-	c.final = c.final[:0]
-	c.chunkBuf.Reset()
-	c.chunkMetaBuf.Reset()
-	for i := range c.chunkLens {
-		c.chunkLens[i] = 0
-	}
-	c.chunkMeta = c.chunkMeta[:0]
-}
-
-// Close indicates you are done calling Add() this allows
-// the final chunk to be encoded.
-func (c *chunkedContentCoder) Close() error {
-	return c.flushContents()
-}
-
-func (c *chunkedContentCoder) flushContents() error {
-	// flush the contents, with meta information at first
-	buf := make([]byte, binary.MaxVarintLen64)
-	n := binary.PutUvarint(buf, uint64(len(c.chunkMeta)))
-	_, err := c.chunkMetaBuf.Write(buf[:n])
-	if err != nil {
-		return err
-	}
-
-	// write out the metaData slice
-	for _, meta := range c.chunkMeta {
-		_, err := writeUvarints(&c.chunkMetaBuf, meta.DocNum, meta.DocDvOffset)
-		if err != nil {
-			return err
-		}
-	}
-
-	// write the metadata to final data
-	metaData := c.chunkMetaBuf.Bytes()
-	c.final = append(c.final, c.chunkMetaBuf.Bytes()...)
-	// write the compressed data to the final data
-	c.compressed = snappy.Encode(c.compressed[:cap(c.compressed)], c.chunkBuf.Bytes())
-	c.final = append(c.final, c.compressed...)
-
-	c.chunkLens[c.currChunk] = uint64(len(c.compressed) + len(metaData))
-
-	if c.progressiveWrite {
-		_, err := c.w.Write(c.final)
-		if err != nil {
-			return err
-		}
-		c.final = c.final[:0]
-	}
-
-	return nil
-}
-
-// Add encodes the provided byte slice into the correct chunk for the provided
-// doc num.  You MUST call Add() with increasing docNums.
-func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
-	chunk := docNum / c.chunkSize
-	if chunk != c.currChunk {
-		// flush out the previous chunk details
-		err := c.flushContents()
-		if err != nil {
-			return err
-		}
-		// clearing the chunk specific meta for next chunk
-		c.chunkBuf.Reset()
-		c.chunkMetaBuf.Reset()
-		c.chunkMeta = c.chunkMeta[:0]
-		c.currChunk = chunk
-	}
-
-	// get the starting offset for this doc
-	dvOffset := c.chunkBuf.Len()
-	dvSize, err := c.chunkBuf.Write(vals)
-	if err != nil {
-		return err
-	}
-
-	c.chunkMeta = append(c.chunkMeta, MetaData{
-		DocNum:      docNum,
-		DocDvOffset: uint64(dvOffset + dvSize),
-	})
-	return nil
-}
-
-// Write commits all the encoded chunked contents to the provided writer.
-//
-// | ..... data ..... | chunk offsets (varints)
-// | position of chunk offsets (uint64) | number of offsets (uint64) |
-//
-func (c *chunkedContentCoder) Write() (int, error) {
-	var tw int
-
-	if c.final != nil {
-		// write out the data section first
-		nw, err := c.w.Write(c.final)
-		tw += nw
-		if err != nil {
-			return tw, err
-		}
-	}
-
-	chunkOffsetsStart := uint64(tw)
-
-	if cap(c.final) < binary.MaxVarintLen64 {
-		c.final = make([]byte, binary.MaxVarintLen64)
-	} else {
-		c.final = c.final[0:binary.MaxVarintLen64]
-	}
-	chunkOffsets := modifyLengthsToEndOffsets(c.chunkLens)
-	// write out the chunk offsets
-	for _, chunkOffset := range chunkOffsets {
-		n := binary.PutUvarint(c.final, chunkOffset)
-		nw, err := c.w.Write(c.final[:n])
-		tw += nw
-		if err != nil {
-			return tw, err
-		}
-	}
-
-	chunkOffsetsLen := uint64(tw) - chunkOffsetsStart
-
-	c.final = c.final[0:8]
-	// write out the length of chunk offsets
-	binary.BigEndian.PutUint64(c.final, chunkOffsetsLen)
-	nw, err := c.w.Write(c.final)
-	tw += nw
-	if err != nil {
-		return tw, err
-	}
-
-	// write out the number of chunks
-	binary.BigEndian.PutUint64(c.final, uint64(len(c.chunkLens)))
-	nw, err = c.w.Write(c.final)
-	tw += nw
-	if err != nil {
-		return tw, err
-	}
-
-	c.final = c.final[:0]
-
-	return tw, nil
-}
-
-// ReadDocValueBoundary elicits the start, end offsets from a
-// metaData header slice
-func ReadDocValueBoundary(chunk int, metaHeaders []MetaData) (uint64, uint64) {
-	var start uint64
-	if chunk > 0 {
-		start = metaHeaders[chunk-1].DocDvOffset
-	}
-	return start, metaHeaders[chunk].DocDvOffset
-}
diff --git a/index/scorch/segment/zap/contentcoder_test.go b/index/scorch/segment/zap/contentcoder_test.go
deleted file mode 100644
index 62ffde413..000000000
--- a/index/scorch/segment/zap/contentcoder_test.go
+++ /dev/null
@@ -1,125 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
-	"bytes"
-	"testing"
-)
-
-func TestChunkedContentCoder(t *testing.T) {
-
-	tests := []struct {
-		maxDocNum uint64
-		chunkSize uint64
-		docNums   []uint64
-		vals      [][]byte
-		expected  []byte
-	}{
-		{
-			maxDocNum: 0,
-			chunkSize: 1,
-			docNums:   []uint64{0},
-			vals:      [][]byte{[]byte("bleve")},
-			// 1 chunk, chunk-0 length 11(b), value
-			expected: []byte{0x1, 0x0, 0x5, 0x5, 0x10, 0x62, 0x6c, 0x65, 0x76, 0x65,
-				0xa,
-				0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1,
-				0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
-		},
-		{
-			maxDocNum: 1,
-			chunkSize: 1,
-			docNums:   []uint64{0, 1},
-			vals: [][]byte{
-				[]byte("upside"),
-				[]byte("scorch"),
-			},
-
-			expected: []byte{0x1, 0x0, 0x6, 0x6, 0x14, 0x75, 0x70, 0x73, 0x69, 0x64,
-				0x65, 0x1, 0x1, 0x6, 0x6, 0x14, 0x73, 0x63, 0x6f, 0x72, 0x63, 0x68,
-				0xb, 0x16,
-				0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2,
-				0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2},
-		},
-	}
-
-	for _, test := range tests {
-
-		var actual bytes.Buffer
-		cic := newChunkedContentCoder(test.chunkSize, test.maxDocNum, &actual, false)
-		for i, docNum := range test.docNums {
-			err := cic.Add(docNum, test.vals[i])
-			if err != nil {
-				t.Fatalf("error adding to intcoder: %v", err)
-			}
-		}
-		_ = cic.Close()
-		_, err := cic.Write()
-		if err != nil {
-			t.Fatalf("error writing: %v", err)
-		}
-
-		if !bytes.Equal(test.expected, actual.Bytes()) {
-			t.Errorf("got:%s, expected:%s", string(actual.Bytes()), string(test.expected))
-		}
-	}
-}
-
-func TestChunkedContentCoders(t *testing.T) {
-	maxDocNum := uint64(5)
-	chunkSize := uint64(1)
-	docNums := []uint64{0, 1, 2, 3, 4, 5}
-	vals := [][]byte{
-		[]byte("scorch"),
-		[]byte("does"),
-		[]byte("better"),
-		[]byte("than"),
-		[]byte("upside"),
-		[]byte("down"),
-	}
-
-	var actual1, actual2 bytes.Buffer
-	// chunkedContentCoder that writes out at the end
-	cic1 := newChunkedContentCoder(chunkSize, maxDocNum, &actual1, false)
-	// chunkedContentCoder that writes out in chunks
-	cic2 := newChunkedContentCoder(chunkSize, maxDocNum, &actual2, true)
-
-	for i, docNum := range docNums {
-		err := cic1.Add(docNum, vals[i])
-		if err != nil {
-			t.Fatalf("error adding to intcoder: %v", err)
-		}
-		err = cic2.Add(docNum, vals[i])
-		if err != nil {
-			t.Fatalf("error adding to intcoder: %v", err)
-		}
-	}
-	_ = cic1.Close()
-	_ = cic2.Close()
-
-	_, err := cic1.Write()
-	if err != nil {
-		t.Fatalf("error writing: %v", err)
-	}
-	_, err = cic2.Write()
-	if err != nil {
-		t.Fatalf("error writing: %v", err)
-	}
-
-	if !bytes.Equal(actual1.Bytes(), actual2.Bytes()) {
-		t.Errorf("%s != %s", string(actual1.Bytes()), string(actual2.Bytes()))
-	}
-}
diff --git a/index/scorch/segment/zap/count.go b/index/scorch/segment/zap/count.go
deleted file mode 100644
index 50290f888..000000000
--- a/index/scorch/segment/zap/count.go
+++ /dev/null
@@ -1,61 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
-	"hash/crc32"
-	"io"
-
-	"github.com/blevesearch/bleve/index/scorch/segment"
-)
-
-// CountHashWriter is a wrapper around a Writer which counts the number of
-// bytes which have been written and computes a crc32 hash
-type CountHashWriter struct {
-	w   io.Writer
-	crc uint32
-	n   int
-	s   segment.StatsReporter
-}
-
-// NewCountHashWriter returns a CountHashWriter which wraps the provided Writer
-func NewCountHashWriter(w io.Writer) *CountHashWriter {
-	return &CountHashWriter{w: w}
-}
-
-func NewCountHashWriterWithStatsReporter(w io.Writer, s segment.StatsReporter) *CountHashWriter {
-	return &CountHashWriter{w: w, s: s}
-}
-
-// Write writes the provided bytes to the wrapped writer and counts the bytes
-func (c *CountHashWriter) Write(b []byte) (int, error) {
-	n, err := c.w.Write(b)
-	c.crc = crc32.Update(c.crc, crc32.IEEETable, b[:n])
-	c.n += n
-	if c.s != nil {
-		c.s.ReportBytesWritten(uint64(n))
-	}
-	return n, err
-}
-
-// Count returns the number of bytes written
-func (c *CountHashWriter) Count() int {
-	return c.n
-}
-
-// Sum32 returns the CRC-32 hash of the content written to this writer
-func (c *CountHashWriter) Sum32() uint32 {
-	return c.crc
-}
diff --git a/index/scorch/segment/zap/dict.go b/index/scorch/segment/zap/dict.go
deleted file mode 100644
index ad4a8f8dc..000000000
--- a/index/scorch/segment/zap/dict.go
+++ /dev/null
@@ -1,263 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
-	"bytes"
-	"fmt"
-
-	"github.com/RoaringBitmap/roaring"
-	"github.com/blevesearch/bleve/index"
-	"github.com/blevesearch/bleve/index/scorch/segment"
-	"github.com/couchbase/vellum"
-)
-
-// Dictionary is the zap representation of the term dictionary
-type Dictionary struct {
-	sb        *SegmentBase
-	field     string
-	fieldID   uint16
-	fst       *vellum.FST
-	fstReader *vellum.Reader
-}
-
-// PostingsList returns the postings list for the specified term
-func (d *Dictionary) PostingsList(term []byte, except *roaring.Bitmap,
-	prealloc segment.PostingsList) (segment.PostingsList, error) {
-	var preallocPL *PostingsList
-	pl, ok := prealloc.(*PostingsList)
-	if ok && pl != nil {
-		preallocPL = pl
-	}
-	return d.postingsList(term, except, preallocPL)
-}
-
-func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
-	if d.fstReader == nil {
-		if rv == nil || rv == emptyPostingsList {
-			return emptyPostingsList, nil
-		}
-		return d.postingsListInit(rv, except), nil
-	}
-
-	postingsOffset, exists, err := d.fstReader.Get(term)
-	if err != nil {
-		return nil, fmt.Errorf("vellum err: %v", err)
-	}
-	if !exists {
-		if rv == nil || rv == emptyPostingsList {
-			return emptyPostingsList, nil
-		}
-		return d.postingsListInit(rv, except), nil
-	}
-
-	return d.postingsListFromOffset(postingsOffset, except, rv)
-}
-
-func (d *Dictionary) postingsListFromOffset(postingsOffset uint64, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
-	rv = d.postingsListInit(rv, except)
-
-	err := rv.read(postingsOffset, d)
-	if err != nil {
-		return nil, err
-	}
-
-	return rv, nil
-}
-
-func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap) *PostingsList {
-	if rv == nil || rv == emptyPostingsList {
-		rv = &PostingsList{}
-	} else {
-		postings := rv.postings
-		if postings != nil {
-			postings.Clear()
-		}
-
-		*rv = PostingsList{} // clear the struct
-
-		rv.postings = postings
-	}
-	rv.sb = d.sb
-	rv.except = except
-	return rv
-}
-
-func (d *Dictionary) Contains(key []byte) (bool, error) {
-	return d.fst.Contains(key)
-}
-
-// Iterator returns an iterator for this dictionary
-func (d *Dictionary) Iterator() segment.DictionaryIterator {
-	rv := &DictionaryIterator{
-		d: d,
-	}
-
-	if d.fst != nil {
-		itr, err := d.fst.Iterator(nil, nil)
-		if err == nil {
-			rv.itr = itr
-		} else if err != vellum.ErrIteratorDone {
-			rv.err = err
-		}
-	}
-
-	return rv
-}
-
-// PrefixIterator returns an iterator which only visits terms having the
-// the specified prefix
-func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator {
-	rv := &DictionaryIterator{
-		d: d,
-	}
-
-	kBeg := []byte(prefix)
-	kEnd := segment.IncrementBytes(kBeg)
-
-	if d.fst != nil {
-		itr, err := d.fst.Iterator(kBeg, kEnd)
-		if err == nil {
-			rv.itr = itr
-		} else if err != vellum.ErrIteratorDone {
-			rv.err = err
-		}
-	}
-
-	return rv
-}
-
-// RangeIterator returns an iterator which only visits terms between the
-// start and end terms.  NOTE: bleve.index API specifies the end is inclusive.
-func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator {
-	rv := &DictionaryIterator{
-		d: d,
-	}
-
-	// need to increment the end position to be inclusive
-	var endBytes []byte
-	if len(end) > 0 {
-		endBytes = []byte(end)
-		if endBytes[len(endBytes)-1] < 0xff {
-			endBytes[len(endBytes)-1]++
-		} else {
-			endBytes = append(endBytes, 0xff)
-		}
-	}
-
-	if d.fst != nil {
-		itr, err := d.fst.Iterator([]byte(start), endBytes)
-		if err == nil {
-			rv.itr = itr
-		} else if err != vellum.ErrIteratorDone {
-			rv.err = err
-		}
-	}
-
-	return rv
-}
-
-// AutomatonIterator returns an iterator which only visits terms
-// having the the vellum automaton and start/end key range
-func (d *Dictionary) AutomatonIterator(a vellum.Automaton,
-	startKeyInclusive, endKeyExclusive []byte) segment.DictionaryIterator {
-	rv := &DictionaryIterator{
-		d: d,
-	}
-
-	if d.fst != nil {
-		itr, err := d.fst.Search(a, startKeyInclusive, endKeyExclusive)
-		if err == nil {
-			rv.itr = itr
-		} else if err != vellum.ErrIteratorDone {
-			rv.err = err
-		}
-	}
-
-	return rv
-}
-
-func (d *Dictionary) OnlyIterator(onlyTerms [][]byte,
-	includeCount bool) segment.DictionaryIterator {
-
-	rv := &DictionaryIterator{
-		d:         d,
-		omitCount: !includeCount,
-	}
-
-	var buf bytes.Buffer
-	builder, err := vellum.New(&buf, nil)
-	if err != nil {
-		rv.err = err
-		return rv
-	}
-	for _, term := range onlyTerms {
-		err = builder.Insert(term, 0)
-		if err != nil {
-			rv.err = err
-			return rv
-		}
-	}
-	err = builder.Close()
-	if err != nil {
-		rv.err = err
-		return rv
-	}
-
-	onlyFST, err := vellum.Load(buf.Bytes())
-	if err != nil {
-		rv.err = err
-		return rv
-	}
-
-	itr, err := d.fst.Search(onlyFST, nil, nil)
-	if err == nil {
-		rv.itr = itr
-	} else if err != vellum.ErrIteratorDone {
-		rv.err = err
-	}
-
-	return rv
-}
-
-// DictionaryIterator is an iterator for term dictionary
-type DictionaryIterator struct {
-	d         *Dictionary
-	itr       vellum.Iterator
-	err       error
-	tmp       PostingsList
-	entry     index.DictEntry
-	omitCount bool
-}
-
-// Next returns the next entry in the dictionary
-func (i *DictionaryIterator) Next() (*index.DictEntry, error) {
-	if i.err != nil && i.err != vellum.ErrIteratorDone {
-		return nil, i.err
-	} else if i.itr == nil || i.err == vellum.ErrIteratorDone {
-		return nil, nil
-	}
-	term, postingsOffset := i.itr.Current()
-	i.entry.Term = string(term)
-	if !i.omitCount {
-		i.err = i.tmp.read(postingsOffset, i.d)
-		if i.err != nil {
-			return nil, i.err
-		}
-		i.entry.Count = i.tmp.Count()
-	}
-	i.err = i.itr.Next()
-	return &i.entry, nil
-}
diff --git a/index/scorch/segment/zap/dict_test.go b/index/scorch/segment/zap/dict_test.go
deleted file mode 100644
index c632917bc..000000000
--- a/index/scorch/segment/zap/dict_test.go
+++ /dev/null
@@ -1,337 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
-	"os"
-	"reflect"
-	"testing"
-
-	"github.com/blevesearch/bleve/analysis"
-	"github.com/blevesearch/bleve/document"
-	"github.com/blevesearch/bleve/index"
-	"github.com/couchbase/vellum/levenshtein"
-)
-
-func buildTestSegmentForDict() (*SegmentBase, uint64, error) {
-	doc := &document.Document{
-		ID: "a",
-		Fields: []document.Field{
-			document.NewTextFieldCustom("_id", nil, []byte("a"), document.IndexField|document.StoreField, nil),
-			document.NewTextFieldCustom("desc", nil, []byte("apple ball cat dog egg fish bat"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-		},
-	}
-
-	// forge analyzed docs
-	results := []*index.AnalysisResult{
-		&index.AnalysisResult{
-			Document: doc,
-			Analyzed: []analysis.TokenFrequencies{
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      1,
-						Position: 1,
-						Term:     []byte("a"),
-					},
-				}, nil, false),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      5,
-						Position: 1,
-						Term:     []byte("apple"),
-					},
-					&analysis.Token{
-						Start:    6,
-						End:      10,
-						Position: 2,
-						Term:     []byte("ball"),
-					},
-					&analysis.Token{
-						Start:    11,
-						End:      14,
-						Position: 3,
-						Term:     []byte("cat"),
-					},
-					&analysis.Token{
-						Start:    15,
-						End:      18,
-						Position: 4,
-						Term:     []byte("dog"),
-					},
-					&analysis.Token{
-						Start:    19,
-						End:      22,
-						Position: 5,
-						Term:     []byte("egg"),
-					},
-					&analysis.Token{
-						Start:    20,
-						End:      24,
-						Position: 6,
-						Term:     []byte("fish"),
-					},
-					&analysis.Token{
-						Start:    25,
-						End:      28,
-						Position: 7,
-						Term:     []byte("bat"),
-					},
-				}, nil, true),
-			},
-			Length: []int{
-				1,
-				7,
-			},
-		},
-	}
-
-	return AnalysisResultsToSegmentBase(results, 1024)
-}
-
-func TestDictionary(t *testing.T) {
-
-	_ = os.RemoveAll("/tmp/scorch.zap")
-
-	testSeg, _, _ := buildTestSegmentForDict()
-	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
-	if err != nil {
-		t.Fatalf("error persisting segment: %v", err)
-	}
-
-	segment, err := Open("/tmp/scorch.zap")
-	if err != nil {
-		t.Fatalf("error opening segment: %v", err)
-	}
-	defer func() {
-		cerr := segment.Close()
-		if cerr != nil {
-			t.Fatalf("error closing segment: %v", err)
-		}
-	}()
-
-	dict, err := segment.Dictionary("desc")
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	// test basic full iterator
-	expected := []string{"apple", "ball", "bat", "cat", "dog", "egg", "fish"}
-	var got []string
-	itr := dict.Iterator()
-	next, err := itr.Next()
-	for next != nil && err == nil {
-		got = append(got, next.Term)
-		next, err = itr.Next()
-	}
-	if err != nil {
-		t.Fatalf("dict itr error: %v", err)
-	}
-
-	if !reflect.DeepEqual(expected, got) {
-		t.Errorf("expected: %v, got: %v", expected, got)
-	}
-
-	// test prefix iterator
-	expected = []string{"ball", "bat"}
-	got = got[:0]
-	itr = dict.PrefixIterator("b")
-	next, err = itr.Next()
-	for next != nil && err == nil {
-		got = append(got, next.Term)
-		next, err = itr.Next()
-	}
-	if err != nil {
-		t.Fatalf("dict itr error: %v", err)
-	}
-
-	if !reflect.DeepEqual(expected, got) {
-		t.Errorf("expected: %v, got: %v", expected, got)
-	}
-
-	// test range iterator
-	expected = []string{"cat", "dog", "egg"}
-	got = got[:0]
-	itr = dict.RangeIterator("cat", "egg")
-	next, err = itr.Next()
-	for next != nil && err == nil {
-		got = append(got, next.Term)
-		next, err = itr.Next()
-	}
-	if err != nil {
-		t.Fatalf("dict itr error: %v", err)
-	}
-
-	if !reflect.DeepEqual(expected, got) {
-		t.Errorf("expected: %v, got: %v", expected, got)
-	}
-}
-
-func TestDictionaryError(t *testing.T) {
-	hash := make(map[uint8]levenshtein.LevenshteinAutomatonBuilder, 4)
-	for i := 1; i <= 3; i++ {
-		lb, err := levenshtein.NewLevenshteinAutomatonBuilder(uint8(i), false)
-		if err != nil {
-			t.Errorf("NewLevenshteinAutomatonBuilder(%d, false) failed, err: %v", i, err)
-		}
-		hash[uint8(i)] = *lb
-	}
-
-	_ = os.RemoveAll("/tmp/scorch.zap")
-
-	testSeg, _, _ := buildTestSegmentForDict()
-	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
-	if err != nil {
-		t.Fatalf("error persisting segment: %v", err)
-	}
-
-	segment, err := Open("/tmp/scorch.zap")
-	if err != nil {
-		t.Fatalf("error opening segment: %v", err)
-	}
-	defer func() {
-		cerr := segment.Close()
-		if cerr != nil {
-			t.Fatalf("error closing segment: %v", err)
-		}
-	}()
-
-	dict, err := segment.Dictionary("desc")
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	lb := hash[uint8(2)]
-	a, err := lb.BuildDfa("summer", 2)
-	if err != nil {
-		t.Fatal(err)
-	}
-	itr := dict.AutomatonIterator(a, nil, nil)
-	if itr == nil {
-		t.Fatalf("got nil itr")
-	}
-	nxt, err := itr.Next()
-	if nxt != nil {
-		t.Fatalf("expected nil next")
-	}
-	if err != nil {
-		t.Fatalf("expected nil error from iterator, got: %v", err)
-	}
-
-	lb = hash[uint8(1)]
-	a, err = lb.BuildDfa("cat", 1) // cat & bat
-	if err != nil {
-		t.Fatal(err)
-	}
-	itr = dict.AutomatonIterator(a, nil, nil)
-	if itr == nil {
-		t.Fatalf("got nil itr")
-	}
-	for i := 0; i < 2; i++ {
-		nxt, err = itr.Next()
-		if nxt == nil || err != nil {
-			t.Fatalf("expected non-nil next and nil err, got: %v, %v", nxt, err)
-		}
-	}
-	nxt, err = itr.Next()
-	if nxt != nil || err != nil {
-		t.Fatalf("expected nil next and nil err, got: %v, %v", nxt, err)
-	}
-
-	lb = hash[uint8(2)]
-	a, err = lb.BuildDfa("cat", 2) // cat & bat
-	if err != nil {
-		t.Fatal(err)
-	}
-	itr = dict.AutomatonIterator(a, nil, nil)
-	if itr == nil {
-		t.Fatalf("got nil itr")
-	}
-	for i := 0; i < 2; i++ {
-		nxt, err = itr.Next()
-		if nxt == nil || err != nil {
-			t.Fatalf("expected non-nil next and nil err, got: %v, %v", nxt, err)
-		}
-	}
-	nxt, err = itr.Next()
-	if nxt != nil || err != nil {
-		t.Fatalf("expected nil next and nil err, got: %v, %v", nxt, err)
-	}
-
-	lb = hash[uint8(3)]
-	a, err = lb.BuildDfa("cat", 3)
-	if err != nil {
-		t.Fatal(err)
-	}
-	itr = dict.AutomatonIterator(a, nil, nil)
-	if itr == nil {
-		t.Fatalf("got nil itr")
-	}
-	for i := 0; i < 5; i++ {
-		nxt, err = itr.Next()
-		if nxt == nil || err != nil {
-			t.Fatalf("expected non-nil next and nil err, got: %v, %v", nxt, err)
-		}
-	}
-	nxt, err = itr.Next()
-	if nxt != nil || err != nil {
-		t.Fatalf("expected nil next and nil err, got: %v, %v", nxt, err)
-	}
-}
-
-func TestDictionaryBug1156(t *testing.T) {
-
-	_ = os.RemoveAll("/tmp/scorch.zap")
-
-	testSeg, _, _ := buildTestSegmentForDict()
-	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
-	if err != nil {
-		t.Fatalf("error persisting segment: %v", err)
-	}
-
-	segment, err := Open("/tmp/scorch.zap")
-	if err != nil {
-		t.Fatalf("error opening segment: %v", err)
-	}
-	defer func() {
-		cerr := segment.Close()
-		if cerr != nil {
-			t.Fatalf("error closing segment: %v", err)
-		}
-	}()
-
-	dict, err := segment.Dictionary("desc")
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	// test range iterator
-	expected := []string{"cat", "dog", "egg", "fish"}
-	var got []string
-	itr := dict.RangeIterator("cat", "")
-	next, err := itr.Next()
-	for next != nil && err == nil {
-		got = append(got, next.Term)
-		next, err = itr.Next()
-	}
-	if err != nil {
-		t.Fatalf("dict itr error: %v", err)
-	}
-
-	if !reflect.DeepEqual(expected, got) {
-		t.Errorf("expected: %v, got: %v", expected, got)
-	}
-}
diff --git a/index/scorch/segment/zap/docvalues.go b/index/scorch/segment/zap/docvalues.go
deleted file mode 100644
index a819ca239..000000000
--- a/index/scorch/segment/zap/docvalues.go
+++ /dev/null
@@ -1,311 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
-	"bytes"
-	"encoding/binary"
-	"fmt"
-	"math"
-	"reflect"
-	"sort"
-
-	"github.com/blevesearch/bleve/index"
-	"github.com/blevesearch/bleve/index/scorch/segment"
-	"github.com/blevesearch/bleve/size"
-	"github.com/golang/snappy"
-)
-
-var reflectStaticSizedocValueReader int
-
-func init() {
-	var dvi docValueReader
-	reflectStaticSizedocValueReader = int(reflect.TypeOf(dvi).Size())
-}
-
-type docNumTermsVisitor func(docNum uint64, terms []byte) error
-
-type docVisitState struct {
-	dvrs    map[uint16]*docValueReader
-	segment *SegmentBase
-}
-
-type docValueReader struct {
-	field          string
-	curChunkNum    uint64
-	chunkOffsets   []uint64
-	dvDataLoc      uint64
-	curChunkHeader []MetaData
-	curChunkData   []byte // compressed data cache
-	uncompressed   []byte // temp buf for snappy decompression
-}
-
-func (di *docValueReader) size() int {
-	return reflectStaticSizedocValueReader + size.SizeOfPtr +
-		len(di.field) +
-		len(di.chunkOffsets)*size.SizeOfUint64 +
-		len(di.curChunkHeader)*reflectStaticSizeMetaData +
-		len(di.curChunkData)
-}
-
-func (di *docValueReader) cloneInto(rv *docValueReader) *docValueReader {
-	if rv == nil {
-		rv = &docValueReader{}
-	}
-
-	rv.field = di.field
-	rv.curChunkNum = math.MaxUint64
-	rv.chunkOffsets = di.chunkOffsets // immutable, so it's sharable
-	rv.dvDataLoc = di.dvDataLoc
-	rv.curChunkHeader = rv.curChunkHeader[:0]
-	rv.curChunkData = nil
-	rv.uncompressed = rv.uncompressed[:0]
-
-	return rv
-}
-
-func (di *docValueReader) fieldName() string {
-	return di.field
-}
-
-func (di *docValueReader) curChunkNumber() uint64 {
-	return di.curChunkNum
-}
-
-func (s *SegmentBase) loadFieldDocValueReader(field string,
-	fieldDvLocStart, fieldDvLocEnd uint64) (*docValueReader, error) {
-	// get the docValue offset for the given fields
-	if fieldDvLocStart == fieldNotUninverted {
-		// no docValues found, nothing to do
-		return nil, nil
-	}
-
-	// read the number of chunks, and chunk offsets position
-	var numChunks, chunkOffsetsPosition uint64
-
-	if fieldDvLocEnd-fieldDvLocStart > 16 {
-		numChunks = binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-8 : fieldDvLocEnd])
-		// read the length of chunk offsets
-		chunkOffsetsLen := binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-16 : fieldDvLocEnd-8])
-		// acquire position of chunk offsets
-		chunkOffsetsPosition = (fieldDvLocEnd - 16) - chunkOffsetsLen
-	} else {
-		return nil, fmt.Errorf("loadFieldDocValueReader: fieldDvLoc too small: %d-%d", fieldDvLocEnd, fieldDvLocStart)
-	}
-
-	fdvIter := &docValueReader{
-		curChunkNum:  math.MaxUint64,
-		field:        field,
-		chunkOffsets: make([]uint64, int(numChunks)),
-	}
-
-	// read the chunk offsets
-	var offset uint64
-	for i := 0; i < int(numChunks); i++ {
-		loc, read := binary.Uvarint(s.mem[chunkOffsetsPosition+offset : chunkOffsetsPosition+offset+binary.MaxVarintLen64])
-		if read <= 0 {
-			return nil, fmt.Errorf("corrupted chunk offset during segment load")
-		}
-		fdvIter.chunkOffsets[i] = loc
-		offset += uint64(read)
-	}
-
-	// set the data offset
-	fdvIter.dvDataLoc = fieldDvLocStart
-
-	return fdvIter, nil
-}
-
-func (di *docValueReader) loadDvChunk(chunkNumber uint64, s *SegmentBase) error {
-	// advance to the chunk where the docValues
-	// reside for the given docNum
-	destChunkDataLoc, curChunkEnd := di.dvDataLoc, di.dvDataLoc
-	start, end := readChunkBoundary(int(chunkNumber), di.chunkOffsets)
-	if start >= end {
-		di.curChunkHeader = di.curChunkHeader[:0]
-		di.curChunkData = nil
-		di.curChunkNum = chunkNumber
-		di.uncompressed = di.uncompressed[:0]
-		return nil
-	}
-
-	destChunkDataLoc += start
-	curChunkEnd += end
-
-	// read the number of docs reside in the chunk
-	numDocs, read := binary.Uvarint(s.mem[destChunkDataLoc : destChunkDataLoc+binary.MaxVarintLen64])
-	if read <= 0 {
-		return fmt.Errorf("failed to read the chunk")
-	}
-	chunkMetaLoc := destChunkDataLoc + uint64(read)
-
-	offset := uint64(0)
-	if cap(di.curChunkHeader) < int(numDocs) {
-		di.curChunkHeader = make([]MetaData, int(numDocs))
-	} else {
-		di.curChunkHeader = di.curChunkHeader[:int(numDocs)]
-	}
-	for i := 0; i < int(numDocs); i++ {
-		di.curChunkHeader[i].DocNum, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
-		offset += uint64(read)
-		di.curChunkHeader[i].DocDvOffset, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
-		offset += uint64(read)
-	}
-
-	compressedDataLoc := chunkMetaLoc + offset
-	dataLength := curChunkEnd - compressedDataLoc
-	di.curChunkData = s.mem[compressedDataLoc : compressedDataLoc+dataLength]
-	di.curChunkNum = chunkNumber
-	di.uncompressed = di.uncompressed[:0]
-	return nil
-}
-
-func (di *docValueReader) iterateAllDocValues(s *SegmentBase, visitor docNumTermsVisitor) error {
-	for i := 0; i < len(di.chunkOffsets); i++ {
-		err := di.loadDvChunk(uint64(i), s)
-		if err != nil {
-			return err
-		}
-		if di.curChunkData == nil || len(di.curChunkHeader) == 0 {
-			continue
-		}
-
-		// uncompress the already loaded data
-		uncompressed, err := snappy.Decode(di.uncompressed[:cap(di.uncompressed)], di.curChunkData)
-		if err != nil {
-			return err
-		}
-		di.uncompressed = uncompressed
-
-		start := uint64(0)
-		for _, entry := range di.curChunkHeader {
-			err = visitor(entry.DocNum, uncompressed[start:entry.DocDvOffset])
-			if err != nil {
-				return err
-			}
-
-			start = entry.DocDvOffset
-		}
-	}
-
-	return nil
-}
-
-func (di *docValueReader) visitDocValues(docNum uint64,
-	visitor index.DocumentFieldTermVisitor) error {
-	// binary search the term locations for the docNum
-	start, end := di.getDocValueLocs(docNum)
-	if start == math.MaxUint64 || end == math.MaxUint64 || start == end {
-		return nil
-	}
-
-	var uncompressed []byte
-	var err error
-	// use the uncompressed copy if available
-	if len(di.uncompressed) > 0 {
-		uncompressed = di.uncompressed
-	} else {
-		// uncompress the already loaded data
-		uncompressed, err = snappy.Decode(di.uncompressed[:cap(di.uncompressed)], di.curChunkData)
-		if err != nil {
-			return err
-		}
-		di.uncompressed = uncompressed
-	}
-
-	// pick the terms for the given docNum
-	uncompressed = uncompressed[start:end]
-	for {
-		i := bytes.Index(uncompressed, termSeparatorSplitSlice)
-		if i < 0 {
-			break
-		}
-
-		visitor(di.field, uncompressed[0:i])
-		uncompressed = uncompressed[i+1:]
-	}
-
-	return nil
-}
-
-func (di *docValueReader) getDocValueLocs(docNum uint64) (uint64, uint64) {
-	i := sort.Search(len(di.curChunkHeader), func(i int) bool {
-		return di.curChunkHeader[i].DocNum >= docNum
-	})
-	if i < len(di.curChunkHeader) && di.curChunkHeader[i].DocNum == docNum {
-		return ReadDocValueBoundary(i, di.curChunkHeader)
-	}
-	return math.MaxUint64, math.MaxUint64
-}
-
-// VisitDocumentFieldTerms is an implementation of the
-// DocumentFieldTermVisitable interface
-func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
-	visitor index.DocumentFieldTermVisitor, dvsIn segment.DocVisitState) (
-	segment.DocVisitState, error) {
-	dvs, ok := dvsIn.(*docVisitState)
-	if !ok || dvs == nil {
-		dvs = &docVisitState{}
-	} else {
-		if dvs.segment != s {
-			dvs.segment = s
-			dvs.dvrs = nil
-		}
-	}
-
-	var fieldIDPlus1 uint16
-	if dvs.dvrs == nil {
-		dvs.dvrs = make(map[uint16]*docValueReader, len(fields))
-		for _, field := range fields {
-			if fieldIDPlus1, ok = s.fieldsMap[field]; !ok {
-				continue
-			}
-			fieldID := fieldIDPlus1 - 1
-			if dvIter, exists := s.fieldDvReaders[fieldID]; exists &&
-				dvIter != nil {
-				dvs.dvrs[fieldID] = dvIter.cloneInto(dvs.dvrs[fieldID])
-			}
-		}
-	}
-
-	// find the chunkNumber where the docValues are stored
-	docInChunk := localDocNum / uint64(s.chunkFactor)
-	var dvr *docValueReader
-	for _, field := range fields {
-		if fieldIDPlus1, ok = s.fieldsMap[field]; !ok {
-			continue
-		}
-		fieldID := fieldIDPlus1 - 1
-		if dvr, ok = dvs.dvrs[fieldID]; ok && dvr != nil {
-			// check if the chunk is already loaded
-			if docInChunk != dvr.curChunkNumber() {
-				err := dvr.loadDvChunk(docInChunk, s)
-				if err != nil {
-					return dvs, err
-				}
-			}
-
-			_ = dvr.visitDocValues(localDocNum, visitor)
-		}
-	}
-	return dvs, nil
-}
-
-// VisitableDocValueFields returns the list of fields with
-// persisted doc value terms ready to be visitable using the
-// VisitDocumentFieldTerms method.
-func (s *SegmentBase) VisitableDocValueFields() ([]string, error) {
-	return s.fieldDvNames, nil
-}
diff --git a/index/scorch/segment/zap/enumerator.go b/index/scorch/segment/zap/enumerator.go
deleted file mode 100644
index cd6ff73c7..000000000
--- a/index/scorch/segment/zap/enumerator.go
+++ /dev/null
@@ -1,126 +0,0 @@
-//  Copyright (c) 2018 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
-	"bytes"
-
-	"github.com/couchbase/vellum"
-)
-
-// enumerator provides an ordered traversal of multiple vellum
-// iterators.  Like JOIN of iterators, the enumerator produces a
-// sequence of (key, iteratorIndex, value) tuples, sorted by key ASC,
-// then iteratorIndex ASC, where the same key might be seen or
-// repeated across multiple child iterators.
-type enumerator struct {
-	itrs   []vellum.Iterator
-	currKs [][]byte
-	currVs []uint64
-
-	lowK    []byte
-	lowIdxs []int
-	lowCurr int
-}
-
-// newEnumerator returns a new enumerator over the vellum Iterators
-func newEnumerator(itrs []vellum.Iterator) (*enumerator, error) {
-	rv := &enumerator{
-		itrs:    itrs,
-		currKs:  make([][]byte, len(itrs)),
-		currVs:  make([]uint64, len(itrs)),
-		lowIdxs: make([]int, 0, len(itrs)),
-	}
-	for i, itr := range rv.itrs {
-		rv.currKs[i], rv.currVs[i] = itr.Current()
-	}
-	rv.updateMatches(false)
-	if rv.lowK == nil && len(rv.lowIdxs) == 0 {
-		return rv, vellum.ErrIteratorDone
-	}
-	return rv, nil
-}
-
-// updateMatches maintains the low key matches based on the currKs
-func (m *enumerator) updateMatches(skipEmptyKey bool) {
-	m.lowK = nil
-	m.lowIdxs = m.lowIdxs[:0]
-	m.lowCurr = 0
-
-	for i, key := range m.currKs {
-		if (key == nil && m.currVs[i] == 0) || // in case of empty iterator
-			(len(key) == 0 && skipEmptyKey) { // skip empty keys
-			continue
-		}
-
-		cmp := bytes.Compare(key, m.lowK)
-		if cmp < 0 || len(m.lowIdxs) == 0 {
-			// reached a new low
-			m.lowK = key
-			m.lowIdxs = m.lowIdxs[:0]
-			m.lowIdxs = append(m.lowIdxs, i)
-		} else if cmp == 0 {
-			m.lowIdxs = append(m.lowIdxs, i)
-		}
-	}
-}
-
-// Current returns the enumerator's current key, iterator-index, and
-// value.  If the enumerator is not pointing at a valid value (because
-// Next returned an error previously), Current will return nil,0,0.
-func (m *enumerator) Current() ([]byte, int, uint64) {
-	var i int
-	var v uint64
-	if m.lowCurr < len(m.lowIdxs) {
-		i = m.lowIdxs[m.lowCurr]
-		v = m.currVs[i]
-	}
-	return m.lowK, i, v
-}
-
-// Next advances the enumerator to the next key/iterator/value result,
-// else vellum.ErrIteratorDone is returned.
-func (m *enumerator) Next() error {
-	m.lowCurr += 1
-	if m.lowCurr >= len(m.lowIdxs) {
-		// move all the current low iterators forwards
-		for _, vi := range m.lowIdxs {
-			err := m.itrs[vi].Next()
-			if err != nil && err != vellum.ErrIteratorDone {
-				return err
-			}
-			m.currKs[vi], m.currVs[vi] = m.itrs[vi].Current()
-		}
-		// can skip any empty keys encountered at this point
-		m.updateMatches(true)
-	}
-	if m.lowK == nil && len(m.lowIdxs) == 0 {
-		return vellum.ErrIteratorDone
-	}
-	return nil
-}
-
-// Close all the underlying Iterators.  The first error, if any, will
-// be returned.
-func (m *enumerator) Close() error {
-	var rv error
-	for _, itr := range m.itrs {
-		err := itr.Close()
-		if rv == nil {
-			rv = err
-		}
-	}
-	return rv
-}
diff --git a/index/scorch/segment/zap/enumerator_test.go b/index/scorch/segment/zap/enumerator_test.go
deleted file mode 100644
index 4ba4aa4c0..000000000
--- a/index/scorch/segment/zap/enumerator_test.go
+++ /dev/null
@@ -1,237 +0,0 @@
-//  Copyright (c) 2018 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied.  See the License for the specific language governing
-// permissions and limitations under the License.
-
-package zap
-
-import (
-	"fmt"
-	"testing"
-
-	"github.com/couchbase/vellum"
-)
-
-type enumTestEntry struct {
-	key string
-	val uint64
-}
-
-type enumTestWant struct {
-	key string
-	idx int
-	val uint64
-}
-
-func TestEnumerator(t *testing.T) {
-	tests := []struct {
-		desc string
-		in   [][]enumTestEntry
-		want []enumTestWant
-	}{
-		{
-			desc: "two non-empty enumerators with no duplicate keys",
-			in: [][]enumTestEntry{
-				[]enumTestEntry{
-					{"a", 1},
-					{"c", 3},
-					{"e", 5},
-				},
-				[]enumTestEntry{
-					{"b", 2},
-					{"d", 4},
-					{"f", 6},
-				},
-			},
-			want: []enumTestWant{
-				{"a", 0, 1},
-				{"b", 1, 2},
-				{"c", 0, 3},
-				{"d", 1, 4},
-				{"e", 0, 5},
-				{"f", 1, 6},
-			},
-		},
-		{
-			desc: "two non-empty enumerators with duplicate keys",
-			in: [][]enumTestEntry{
-				[]enumTestEntry{
-					{"a", 1},
-					{"c", 3},
-					{"e", 5},
-				},
-				[]enumTestEntry{
-					{"a", 2},
-					{"c", 4},
-					{"e", 6},
-				},
-			},
-			want: []enumTestWant{
-				{"a", 0, 1},
-				{"a", 1, 2},
-				{"c", 0, 3},
-				{"c", 1, 4},
-				{"e", 0, 5},
-				{"e", 1, 6},
-			},
-		},
-		{
-			desc: "first iterator is empty",
-			in: [][]enumTestEntry{
-				[]enumTestEntry{},
-				[]enumTestEntry{
-					{"a", 2},
-					{"c", 4},
-					{"e", 6},
-				},
-			},
-			want: []enumTestWant{
-				{"a", 1, 2},
-				{"c", 1, 4},
-				{"e", 1, 6},
-			},
-		},
-		{
-			desc: "last iterator is empty",
-			in: [][]enumTestEntry{
-				[]enumTestEntry{
-					{"a", 1},
-					{"c", 3},
-					{"e", 5},
-				},
-				[]enumTestEntry{},
-			},
-			want: []enumTestWant{
-				{"a", 0, 1},
-				{"c", 0, 3},
-				{"e", 0, 5},
-			},
-		},
-		{
-			desc: "two different length enumerators with duplicate keys",
-			in: [][]enumTestEntry{
-				[]enumTestEntry{
-					{"a", 1},
-					{"c", 3},
-					{"e", 5},
-				},
-				[]enumTestEntry{
-					{"a", 2},
-					{"b", 4},
-					{"d", 1000},
-					{"e", 6},
-				},
-			},
-			want: []enumTestWant{
-				{"a", 0, 1},
-				{"a", 1, 2},
-				{"b", 1, 4},
-				{"c", 0, 3},
-				{"d", 1, 1000},
-				{"e", 0, 5},
-				{"e", 1, 6},
-			},
-		},
-	}
-
-	for _, test := range tests {
-		var itrs []vellum.Iterator
-		for _, entries := range test.in {
-			itrs = append(itrs, &testIterator{entries: entries})
-		}
-
-		enumerator, err := newEnumerator(itrs)
-		if err != nil {
-			t.Fatalf("%s - expected no err on newNumerator, got: %v", test.desc, err)
-		}
-
-		wanti := 0
-		for wanti < len(test.want) {
-			if err != nil {
-				t.Fatalf("%s - wanted no err, got: %v", test.desc, err)
-			}
-
-			currK, currIdx, currV := enumerator.Current()
-
-			want := test.want[wanti]
-			if want.key != string(currK) {
-				t.Fatalf("%s - wrong key, wanted: %#v, got: %q, %d, %d", test.desc,
-					want, currK, currIdx, currV)
-			}
-			if want.idx != currIdx {
-				t.Fatalf("%s - wrong idx, wanted: %#v, got: %q, %d, %d", test.desc,
-					want, currK, currIdx, currV)
-			}
-			if want.val != currV {
-				t.Fatalf("%s - wrong val, wanted: %#v, got: %q, %d, %d", test.desc,
-					want, currK, currIdx, currV)
-			}
-
-			wanti += 1
-
-			err = enumerator.Next()
-		}
-
-		if err != vellum.ErrIteratorDone {
-			t.Fatalf("%s - expected ErrIteratorDone, got: %v", test.desc, err)
-		}
-
-		err = enumerator.Close()
-		if err != nil {
-			t.Fatalf("%s - expected nil err on close, got: %v", test.desc, err)
-		}
-
-		for _, itr := range itrs {
-			if itr.(*testIterator).curr != 654321 {
-				t.Fatalf("%s - expected child iter to be closed", test.desc)
-			}
-		}
-	}
-}
-
-type testIterator struct {
-	entries []enumTestEntry
-	curr    int
-}
-
-func (m *testIterator) Current() ([]byte, uint64) {
-	if m.curr >= len(m.entries) {
-		return nil, 0
-	}
-	return []byte(m.entries[m.curr].key), m.entries[m.curr].val
-}
-
-func (m *testIterator) Next() error {
-	m.curr++
-	if m.curr >= len(m.entries) {
-		return vellum.ErrIteratorDone
-	}
-	return nil
-}
-
-func (m *testIterator) Seek(key []byte) error {
-	return fmt.Errorf("not implemented for enumerator unit tests")
-}
-
-func (m *testIterator) Reset(f *vellum.FST,
-	startKeyInclusive, endKeyExclusive []byte, aut vellum.Automaton) error {
-	return fmt.Errorf("not implemented for enumerator unit tests")
-}
-
-func (m *testIterator) Close() error {
-	m.curr = 654321
-	return nil
-}
-
-func (m *testIterator) Exists(key []byte) (bool, error) {
-	return false, fmt.Errorf("not implemented for enumerator unit tests")
-}
diff --git a/index/scorch/segment/zap/intcoder.go b/index/scorch/segment/zap/intcoder.go
deleted file mode 100644
index 571d06edb..000000000
--- a/index/scorch/segment/zap/intcoder.go
+++ /dev/null
@@ -1,172 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
-	"bytes"
-	"encoding/binary"
-	"io"
-)
-
-type chunkedIntCoder struct {
-	final     []byte
-	chunkSize uint64
-	chunkBuf  bytes.Buffer
-	chunkLens []uint64
-	currChunk uint64
-
-	buf []byte
-}
-
-// newChunkedIntCoder returns a new chunk int coder which packs data into
-// chunks based on the provided chunkSize and supports up to the specified
-// maxDocNum
-func newChunkedIntCoder(chunkSize uint64, maxDocNum uint64) *chunkedIntCoder {
-	total := maxDocNum/chunkSize + 1
-	rv := &chunkedIntCoder{
-		chunkSize: chunkSize,
-		chunkLens: make([]uint64, total),
-		final:     make([]byte, 0, 64),
-	}
-
-	return rv
-}
-
-// Reset lets you reuse this chunked int coder.  buffers are reset and reused
-// from previous use.  you cannot change the chunk size or max doc num.
-func (c *chunkedIntCoder) Reset() {
-	c.final = c.final[:0]
-	c.chunkBuf.Reset()
-	c.currChunk = 0
-	for i := range c.chunkLens {
-		c.chunkLens[i] = 0
-	}
-}
-
-// Add encodes the provided integers into the correct chunk for the provided
-// doc num.  You MUST call Add() with increasing docNums.
-func (c *chunkedIntCoder) Add(docNum uint64, vals ...uint64) error {
-	chunk := docNum / c.chunkSize
-	if chunk != c.currChunk {
-		// starting a new chunk
-		c.Close()
-		c.chunkBuf.Reset()
-		c.currChunk = chunk
-	}
-
-	if len(c.buf) < binary.MaxVarintLen64 {
-		c.buf = make([]byte, binary.MaxVarintLen64)
-	}
-
-	for _, val := range vals {
-		wb := binary.PutUvarint(c.buf, val)
-		_, err := c.chunkBuf.Write(c.buf[:wb])
-		if err != nil {
-			return err
-		}
-	}
-
-	return nil
-}
-
-func (c *chunkedIntCoder) AddBytes(docNum uint64, buf []byte) error {
-	chunk := docNum / c.chunkSize
-	if chunk != c.currChunk {
-		// starting a new chunk
-		c.Close()
-		c.chunkBuf.Reset()
-		c.currChunk = chunk
-	}
-
-	_, err := c.chunkBuf.Write(buf)
-	return err
-}
-
-// Close indicates you are done calling Add() this allows the final chunk
-// to be encoded.
-func (c *chunkedIntCoder) Close() {
-	encodingBytes := c.chunkBuf.Bytes()
-	c.chunkLens[c.currChunk] = uint64(len(encodingBytes))
-	c.final = append(c.final, encodingBytes...)
-	c.currChunk = uint64(cap(c.chunkLens)) // sentinel to detect double close
-}
-
-// Write commits all the encoded chunked integers to the provided writer.
-func (c *chunkedIntCoder) Write(w io.Writer) (int, error) {
-	bufNeeded := binary.MaxVarintLen64 * (1 + len(c.chunkLens))
-	if len(c.buf) < bufNeeded {
-		c.buf = make([]byte, bufNeeded)
-	}
-	buf := c.buf
-
-	// convert the chunk lengths into chunk offsets
-	chunkOffsets := modifyLengthsToEndOffsets(c.chunkLens)
-
-	// write out the number of chunks & each chunk offsets
-	n := binary.PutUvarint(buf, uint64(len(chunkOffsets)))
-	for _, chunkOffset := range chunkOffsets {
-		n += binary.PutUvarint(buf[n:], chunkOffset)
-	}
-
-	tw, err := w.Write(buf[:n])
-	if err != nil {
-		return tw, err
-	}
-
-	// write out the data
-	nw, err := w.Write(c.final)
-	tw += nw
-	if err != nil {
-		return tw, err
-	}
-	return tw, nil
-}
-
-func (c *chunkedIntCoder) FinalSize() int {
-	return len(c.final)
-}
-
-// modifyLengthsToEndOffsets converts the chunk length array
-// to a chunk offset array. The readChunkBoundary
-// will figure out the start and end of every chunk from
-// these offsets. Starting offset of i'th index is stored
-// in i-1'th position except for 0'th index and ending offset
-// is stored at i'th index position.
-// For 0'th element, starting position is always zero.
-// eg:
-// Lens ->  5 5 5 5 => 5 10 15 20
-// Lens ->  0 5 0 5 => 0 5 5 10
-// Lens ->  0 0 0 5 => 0 0 0 5
-// Lens ->  5 0 0 0 => 5 5 5 5
-// Lens ->  0 5 0 0 => 0 5 5 5
-// Lens ->  0 0 5 0 => 0 0 5 5
-func modifyLengthsToEndOffsets(lengths []uint64) []uint64 {
-	var runningOffset uint64
-	var index, i int
-	for i = 1; i <= len(lengths); i++ {
-		runningOffset += lengths[i-1]
-		lengths[index] = runningOffset
-		index++
-	}
-	return lengths
-}
-
-func readChunkBoundary(chunk int, offsets []uint64) (uint64, uint64) {
-	var start uint64
-	if chunk > 0 {
-		start = offsets[chunk-1]
-	}
-	return start, offsets[chunk]
-}
diff --git a/index/scorch/segment/zap/intcoder_test.go b/index/scorch/segment/zap/intcoder_test.go
deleted file mode 100644
index 952e0669d..000000000
--- a/index/scorch/segment/zap/intcoder_test.go
+++ /dev/null
@@ -1,269 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
-	"bytes"
-	"reflect"
-	"testing"
-)
-
-func TestChunkIntCoder(t *testing.T) {
-	tests := []struct {
-		maxDocNum uint64
-		chunkSize uint64
-		docNums   []uint64
-		vals      [][]uint64
-		expected  []byte
-	}{
-		{
-			maxDocNum: 0,
-			chunkSize: 1,
-			docNums:   []uint64{0},
-			vals: [][]uint64{
-				[]uint64{3},
-			},
-			// 1 chunk, chunk-0 length 1, value 3
-			expected: []byte{0x1, 0x1, 0x3},
-		},
-		{
-			maxDocNum: 1,
-			chunkSize: 1,
-			docNums:   []uint64{0, 1},
-			vals: [][]uint64{
-				[]uint64{3},
-				[]uint64{7},
-			},
-			// 2 chunks, chunk-0 offset 1, chunk-1 offset 2, value 3, value 7
-			expected: []byte{0x2, 0x1, 0x2, 0x3, 0x7},
-		},
-	}
-
-	for _, test := range tests {
-
-		cic := newChunkedIntCoder(test.chunkSize, test.maxDocNum)
-		for i, docNum := range test.docNums {
-			err := cic.Add(docNum, test.vals[i]...)
-			if err != nil {
-				t.Fatalf("error adding to intcoder: %v", err)
-			}
-		}
-		cic.Close()
-		var actual bytes.Buffer
-		_, err := cic.Write(&actual)
-		if err != nil {
-			t.Fatalf("error writing: %v", err)
-		}
-		if !reflect.DeepEqual(test.expected, actual.Bytes()) {
-			t.Errorf("got % x, expected % x", actual.Bytes(), test.expected)
-		}
-	}
-}
-
-func TestChunkLengthToOffsets(t *testing.T) {
-
-	tests := []struct {
-		lengths         []uint64
-		expectedOffsets []uint64
-	}{
-		{
-			lengths:         []uint64{5, 5, 5, 5, 5},
-			expectedOffsets: []uint64{5, 10, 15, 20, 25},
-		},
-		{
-			lengths:         []uint64{0, 5, 0, 5, 0},
-			expectedOffsets: []uint64{0, 5, 5, 10, 10},
-		},
-		{
-			lengths:         []uint64{0, 0, 0, 0, 5},
-			expectedOffsets: []uint64{0, 0, 0, 0, 5},
-		},
-		{
-			lengths:         []uint64{5, 0, 0, 0, 0},
-			expectedOffsets: []uint64{5, 5, 5, 5, 5},
-		},
-		{
-			lengths:         []uint64{0, 5, 0, 0, 0},
-			expectedOffsets: []uint64{0, 5, 5, 5, 5},
-		},
-		{
-			lengths:         []uint64{0, 0, 0, 5, 0},
-			expectedOffsets: []uint64{0, 0, 0, 5, 5},
-		},
-		{
-			lengths:         []uint64{0, 0, 0, 5, 5},
-			expectedOffsets: []uint64{0, 0, 0, 5, 10},
-		},
-		{
-			lengths:         []uint64{5, 5, 5, 0, 0},
-			expectedOffsets: []uint64{5, 10, 15, 15, 15},
-		},
-		{
-			lengths:         []uint64{5},
-			expectedOffsets: []uint64{5},
-		},
-		{
-			lengths:         []uint64{5, 5},
-			expectedOffsets: []uint64{5, 10},
-		},
-	}
-
-	for i, test := range tests {
-		modifyLengthsToEndOffsets(test.lengths)
-		if !reflect.DeepEqual(test.expectedOffsets, test.lengths) {
-			t.Errorf("Test: %d failed, got %+v, expected %+v", i, test.lengths, test.expectedOffsets)
-		}
-	}
-}
-
-func TestChunkReadBoundaryFromOffsets(t *testing.T) {
-
-	tests := []struct {
-		chunkNumber   int
-		offsets       []uint64
-		expectedStart uint64
-		expectedEnd   uint64
-	}{
-		{
-			offsets:       []uint64{5, 10, 15, 20, 25},
-			chunkNumber:   4,
-			expectedStart: 20,
-			expectedEnd:   25,
-		},
-		{
-			offsets:       []uint64{5, 10, 15, 20, 25},
-			chunkNumber:   0,
-			expectedStart: 0,
-			expectedEnd:   5,
-		},
-		{
-			offsets:       []uint64{5, 10, 15, 20, 25},
-			chunkNumber:   2,
-			expectedStart: 10,
-			expectedEnd:   15,
-		},
-		{
-			offsets:       []uint64{0, 5, 5, 10, 10},
-			chunkNumber:   4,
-			expectedStart: 10,
-			expectedEnd:   10,
-		},
-		{
-			offsets:       []uint64{0, 5, 5, 10, 10},
-			chunkNumber:   1,
-			expectedStart: 0,
-			expectedEnd:   5,
-		},
-		{
-			offsets:       []uint64{5, 5, 5, 5, 5},
-			chunkNumber:   0,
-			expectedStart: 0,
-			expectedEnd:   5,
-		},
-		{
-			offsets:       []uint64{5, 5, 5, 5, 5},
-			chunkNumber:   4,
-			expectedStart: 5,
-			expectedEnd:   5,
-		},
-		{
-			offsets:       []uint64{5, 5, 5, 5, 5},
-			chunkNumber:   1,
-			expectedStart: 5,
-			expectedEnd:   5,
-		},
-		{
-			offsets:       []uint64{0, 5, 5, 5, 5},
-			chunkNumber:   1,
-			expectedStart: 0,
-			expectedEnd:   5,
-		},
-		{
-			offsets:       []uint64{0, 5, 5, 5, 5},
-			chunkNumber:   0,
-			expectedStart: 0,
-			expectedEnd:   0,
-		},
-		{
-			offsets:       []uint64{0, 0, 0, 5, 5},
-			chunkNumber:   2,
-			expectedStart: 0,
-			expectedEnd:   0,
-		},
-		{
-			offsets:       []uint64{0, 0, 0, 5, 5},
-			chunkNumber:   1,
-			expectedStart: 0,
-			expectedEnd:   0,
-		},
-		{
-			offsets:       []uint64{0, 0, 0, 0, 5},
-			chunkNumber:   4,
-			expectedStart: 0,
-			expectedEnd:   5,
-		},
-		{
-			offsets:       []uint64{0, 0, 0, 0, 5},
-			chunkNumber:   2,
-			expectedStart: 0,
-			expectedEnd:   0,
-		},
-		{
-			offsets:       []uint64{5, 10, 15, 15, 15},
-			chunkNumber:   0,
-			expectedStart: 0,
-			expectedEnd:   5,
-		},
-		{
-			offsets:       []uint64{5, 10, 15, 15, 15},
-			chunkNumber:   1,
-			expectedStart: 5,
-			expectedEnd:   10,
-		},
-		{
-			offsets:       []uint64{5, 10, 15, 15, 15},
-			chunkNumber:   2,
-			expectedStart: 10,
-			expectedEnd:   15,
-		},
-		{
-			offsets:       []uint64{5, 10, 15, 15, 15},
-			chunkNumber:   3,
-			expectedStart: 15,
-			expectedEnd:   15,
-		},
-		{
-			offsets:       []uint64{5, 10, 15, 15, 15},
-			chunkNumber:   4,
-			expectedStart: 15,
-			expectedEnd:   15,
-		},
-		{
-			offsets:       []uint64{5},
-			chunkNumber:   0,
-			expectedStart: 0,
-			expectedEnd:   5,
-		},
-	}
-
-	for i, test := range tests {
-		s, e := readChunkBoundary(test.chunkNumber, test.offsets)
-		if test.expectedStart != s || test.expectedEnd != e {
-			t.Errorf("Test: %d failed for chunkNumber: %d got start: %d end: %d,"+
-				" expected start: %d end: %d", i, test.chunkNumber, s, e,
-				test.expectedStart, test.expectedEnd)
-		}
-	}
-}
diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go
deleted file mode 100644
index 50bd7207a..000000000
--- a/index/scorch/segment/zap/merge.go
+++ /dev/null
@@ -1,862 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
-	"bufio"
-	"bytes"
-	"encoding/binary"
-	"fmt"
-	"math"
-	"os"
-	"sort"
-
-	"github.com/RoaringBitmap/roaring"
-	seg "github.com/blevesearch/bleve/index/scorch/segment"
-	"github.com/couchbase/vellum"
-	"github.com/golang/snappy"
-)
-
-var DefaultFileMergerBufferSize = 1024 * 1024
-
-// ValidateMerge can be set by applications to perform additional checks
-// on a new segment produced by a merge, by default this does nothing.
-// Caller should provide EITHER segments or memSegments, but not both.
-// This API is experimental and may be removed at any time.
-var ValidateMerge = func(segments []*Segment, memSegments []*SegmentBase, drops []*roaring.Bitmap, newSegment *Segment) error {
-	return nil
-}
-
-const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc
-
-// Merge takes a slice of zap segments and bit masks describing which
-// documents may be dropped, and creates a new segment containing the
-// remaining data.  This new segment is built at the specified path,
-// with the provided chunkFactor.
-func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
-	chunkFactor uint32, closeCh chan struct{}, s seg.StatsReporter) (
-	[][]uint64, uint64, error) {
-	segmentBases := make([]*SegmentBase, len(segments))
-	for segmenti, segment := range segments {
-		segmentBases[segmenti] = &segment.SegmentBase
-	}
-
-	return MergeSegmentBases(segmentBases, drops, path, chunkFactor, closeCh, s)
-}
-
-func MergeSegmentBases(segmentBases []*SegmentBase, drops []*roaring.Bitmap, path string,
-	chunkFactor uint32, closeCh chan struct{}, s seg.StatsReporter) (
-	[][]uint64, uint64, error) {
-	flag := os.O_RDWR | os.O_CREATE
-
-	f, err := os.OpenFile(path, flag, 0600)
-	if err != nil {
-		return nil, 0, err
-	}
-
-	cleanup := func() {
-		_ = f.Close()
-		_ = os.Remove(path)
-	}
-
-	// buffer the output
-	br := bufio.NewWriterSize(f, DefaultFileMergerBufferSize)
-
-	// wrap it for counting (tracking offsets)
-	cr := NewCountHashWriterWithStatsReporter(br, s)
-
-	newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, _, _, _, err :=
-		MergeToWriter(segmentBases, drops, chunkFactor, cr, closeCh)
-	if err != nil {
-		cleanup()
-		return nil, 0, err
-	}
-
-	err = persistFooter(numDocs, storedIndexOffset, fieldsIndexOffset,
-		docValueOffset, chunkFactor, cr.Sum32(), cr)
-	if err != nil {
-		cleanup()
-		return nil, 0, err
-	}
-
-	err = br.Flush()
-	if err != nil {
-		cleanup()
-		return nil, 0, err
-	}
-
-	err = f.Sync()
-	if err != nil {
-		cleanup()
-		return nil, 0, err
-	}
-
-	err = f.Close()
-	if err != nil {
-		cleanup()
-		return nil, 0, err
-	}
-
-	return newDocNums, uint64(cr.Count()), nil
-}
-
-func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap,
-	chunkFactor uint32, cr *CountHashWriter, closeCh chan struct{}) (
-	newDocNums [][]uint64,
-	numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64,
-	dictLocs []uint64, fieldsInv []string, fieldsMap map[string]uint16,
-	err error) {
-	docValueOffset = uint64(fieldNotUninverted)
-
-	var fieldsSame bool
-	fieldsSame, fieldsInv = mergeFields(segments)
-	fieldsMap = mapFields(fieldsInv)
-
-	numDocs = computeNewDocCount(segments, drops)
-
-	if isClosed(closeCh) {
-		return nil, 0, 0, 0, 0, nil, nil, nil, seg.ErrClosed
-	}
-
-	if numDocs > 0 {
-		storedIndexOffset, newDocNums, err = mergeStoredAndRemap(segments, drops,
-			fieldsMap, fieldsInv, fieldsSame, numDocs, cr, closeCh)
-		if err != nil {
-			return nil, 0, 0, 0, 0, nil, nil, nil, err
-		}
-
-		dictLocs, docValueOffset, err = persistMergedRest(segments, drops,
-			fieldsInv, fieldsMap, fieldsSame,
-			newDocNums, numDocs, chunkFactor, cr, closeCh)
-		if err != nil {
-			return nil, 0, 0, 0, 0, nil, nil, nil, err
-		}
-	} else {
-		dictLocs = make([]uint64, len(fieldsInv))
-	}
-
-	fieldsIndexOffset, err = persistFields(fieldsInv, cr, dictLocs)
-	if err != nil {
-		return nil, 0, 0, 0, 0, nil, nil, nil, err
-	}
-
-	return newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, dictLocs, fieldsInv, fieldsMap, nil
-}
-
-// mapFields takes the fieldsInv list and returns a map of fieldName
-// to fieldID+1
-func mapFields(fields []string) map[string]uint16 {
-	rv := make(map[string]uint16, len(fields))
-	for i, fieldName := range fields {
-		rv[fieldName] = uint16(i) + 1
-	}
-	return rv
-}
-
-// computeNewDocCount determines how many documents will be in the newly
-// merged segment when obsoleted docs are dropped
-func computeNewDocCount(segments []*SegmentBase, drops []*roaring.Bitmap) uint64 {
-	var newDocCount uint64
-	for segI, segment := range segments {
-		newDocCount += segment.numDocs
-		if drops[segI] != nil {
-			newDocCount -= drops[segI].GetCardinality()
-		}
-	}
-	return newDocCount
-}
-
-func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
-	fieldsInv []string, fieldsMap map[string]uint16, fieldsSame bool,
-	newDocNumsIn [][]uint64, newSegDocCount uint64, chunkFactor uint32,
-	w *CountHashWriter, closeCh chan struct{}) ([]uint64, uint64, error) {
-
-	var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64)
-	var bufLoc []uint64
-
-	var postings *PostingsList
-	var postItr *PostingsIterator
-
-	rv := make([]uint64, len(fieldsInv))
-	fieldDvLocsStart := make([]uint64, len(fieldsInv))
-	fieldDvLocsEnd := make([]uint64, len(fieldsInv))
-
-	tfEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
-	locEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
-
-	var vellumBuf bytes.Buffer
-	newVellum, err := vellum.New(&vellumBuf, nil)
-	if err != nil {
-		return nil, 0, err
-	}
-
-	newRoaring := roaring.NewBitmap()
-
-	// for each field
-	for fieldID, fieldName := range fieldsInv {
-
-		// collect FST iterators from all active segments for this field
-		var newDocNums [][]uint64
-		var drops []*roaring.Bitmap
-		var dicts []*Dictionary
-		var itrs []vellum.Iterator
-
-		var segmentsInFocus []*SegmentBase
-
-		for segmentI, segment := range segments {
-
-			// check for the closure in meantime
-			if isClosed(closeCh) {
-				return nil, 0, seg.ErrClosed
-			}
-
-			dict, err2 := segment.dictionary(fieldName)
-			if err2 != nil {
-				return nil, 0, err2
-			}
-			if dict != nil && dict.fst != nil {
-				itr, err2 := dict.fst.Iterator(nil, nil)
-				if err2 != nil && err2 != vellum.ErrIteratorDone {
-					return nil, 0, err2
-				}
-				if itr != nil {
-					newDocNums = append(newDocNums, newDocNumsIn[segmentI])
-					if dropsIn[segmentI] != nil && !dropsIn[segmentI].IsEmpty() {
-						drops = append(drops, dropsIn[segmentI])
-					} else {
-						drops = append(drops, nil)
-					}
-					dicts = append(dicts, dict)
-					itrs = append(itrs, itr)
-					segmentsInFocus = append(segmentsInFocus, segment)
-				}
-			}
-		}
-
-		var prevTerm []byte
-
-		newRoaring.Clear()
-
-		var lastDocNum, lastFreq, lastNorm uint64
-
-		// determines whether to use "1-hit" encoding optimization
-		// when a term appears in only 1 doc, with no loc info,
-		// has freq of 1, and the docNum fits into 31-bits
-		use1HitEncoding := func(termCardinality uint64) (bool, uint64, uint64) {
-			if termCardinality == uint64(1) && locEncoder.FinalSize() <= 0 {
-				docNum := uint64(newRoaring.Minimum())
-				if under32Bits(docNum) && docNum == lastDocNum && lastFreq == 1 {
-					return true, docNum, lastNorm
-				}
-			}
-			return false, 0, 0
-		}
-
-		finishTerm := func(term []byte) error {
-			tfEncoder.Close()
-			locEncoder.Close()
-
-			postingsOffset, err := writePostings(newRoaring,
-				tfEncoder, locEncoder, use1HitEncoding, w, bufMaxVarintLen64)
-			if err != nil {
-				return err
-			}
-
-			if postingsOffset > 0 {
-				err = newVellum.Insert(term, postingsOffset)
-				if err != nil {
-					return err
-				}
-			}
-
-			newRoaring.Clear()
-
-			tfEncoder.Reset()
-			locEncoder.Reset()
-
-			lastDocNum = 0
-			lastFreq = 0
-			lastNorm = 0
-
-			return nil
-		}
-
-		enumerator, err := newEnumerator(itrs)
-
-		for err == nil {
-			term, itrI, postingsOffset := enumerator.Current()
-
-			if !bytes.Equal(prevTerm, term) {
-				// check for the closure in meantime
-				if isClosed(closeCh) {
-					return nil, 0, seg.ErrClosed
-				}
-
-				// if the term changed, write out the info collected
-				// for the previous term
-				err = finishTerm(prevTerm)
-				if err != nil {
-					return nil, 0, err
-				}
-			}
-
-			postings, err = dicts[itrI].postingsListFromOffset(
-				postingsOffset, drops[itrI], postings)
-			if err != nil {
-				return nil, 0, err
-			}
-
-			postItr = postings.iterator(true, true, true, postItr)
-
-			if fieldsSame {
-				// can optimize by copying freq/norm/loc bytes directly
-				lastDocNum, lastFreq, lastNorm, err = mergeTermFreqNormLocsByCopying(
-					term, postItr, newDocNums[itrI], newRoaring,
-					tfEncoder, locEncoder)
-			} else {
-				lastDocNum, lastFreq, lastNorm, bufLoc, err = mergeTermFreqNormLocs(
-					fieldsMap, term, postItr, newDocNums[itrI], newRoaring,
-					tfEncoder, locEncoder, bufLoc)
-			}
-			if err != nil {
-				return nil, 0, err
-			}
-
-			prevTerm = prevTerm[:0] // copy to prevTerm in case Next() reuses term mem
-			prevTerm = append(prevTerm, term...)
-
-			err = enumerator.Next()
-		}
-		if err != vellum.ErrIteratorDone {
-			return nil, 0, err
-		}
-
-		err = finishTerm(prevTerm)
-		if err != nil {
-			return nil, 0, err
-		}
-
-		dictOffset := uint64(w.Count())
-
-		err = newVellum.Close()
-		if err != nil {
-			return nil, 0, err
-		}
-		vellumData := vellumBuf.Bytes()
-
-		// write out the length of the vellum data
-		n := binary.PutUvarint(bufMaxVarintLen64, uint64(len(vellumData)))
-		_, err = w.Write(bufMaxVarintLen64[:n])
-		if err != nil {
-			return nil, 0, err
-		}
-
-		// write this vellum to disk
-		_, err = w.Write(vellumData)
-		if err != nil {
-			return nil, 0, err
-		}
-
-		rv[fieldID] = dictOffset
-
-		// get the field doc value offset (start)
-		fieldDvLocsStart[fieldID] = uint64(w.Count())
-
-		// update the field doc values
-		fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), newSegDocCount-1, w, true)
-
-		fdvReadersAvailable := false
-		var dvIterClone *docValueReader
-		for segmentI, segment := range segmentsInFocus {
-			// check for the closure in meantime
-			if isClosed(closeCh) {
-				return nil, 0, seg.ErrClosed
-			}
-
-			fieldIDPlus1 := uint16(segment.fieldsMap[fieldName])
-			if dvIter, exists := segment.fieldDvReaders[fieldIDPlus1-1]; exists &&
-				dvIter != nil {
-				fdvReadersAvailable = true
-				dvIterClone = dvIter.cloneInto(dvIterClone)
-				err = dvIterClone.iterateAllDocValues(segment, func(docNum uint64, terms []byte) error {
-					if newDocNums[segmentI][docNum] == docDropped {
-						return nil
-					}
-					err := fdvEncoder.Add(newDocNums[segmentI][docNum], terms)
-					if err != nil {
-						return err
-					}
-					return nil
-				})
-				if err != nil {
-					return nil, 0, err
-				}
-			}
-		}
-
-		if fdvReadersAvailable {
-			err = fdvEncoder.Close()
-			if err != nil {
-				return nil, 0, err
-			}
-
-			// persist the doc value details for this field
-			_, err = fdvEncoder.Write()
-			if err != nil {
-				return nil, 0, err
-			}
-
-			// get the field doc value offset (end)
-			fieldDvLocsEnd[fieldID] = uint64(w.Count())
-		} else {
-			fieldDvLocsStart[fieldID] = fieldNotUninverted
-			fieldDvLocsEnd[fieldID] = fieldNotUninverted
-		}
-
-		// reset vellum buffer and vellum builder
-		vellumBuf.Reset()
-		err = newVellum.Reset(&vellumBuf)
-		if err != nil {
-			return nil, 0, err
-		}
-	}
-
-	fieldDvLocsOffset := uint64(w.Count())
-
-	buf := bufMaxVarintLen64
-	for i := 0; i < len(fieldDvLocsStart); i++ {
-		n := binary.PutUvarint(buf, fieldDvLocsStart[i])
-		_, err := w.Write(buf[:n])
-		if err != nil {
-			return nil, 0, err
-		}
-		n = binary.PutUvarint(buf, fieldDvLocsEnd[i])
-		_, err = w.Write(buf[:n])
-		if err != nil {
-			return nil, 0, err
-		}
-	}
-
-	return rv, fieldDvLocsOffset, nil
-}
-
-func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *PostingsIterator,
-	newDocNums []uint64, newRoaring *roaring.Bitmap,
-	tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder, bufLoc []uint64) (
-	lastDocNum uint64, lastFreq uint64, lastNorm uint64, bufLocOut []uint64, err error) {
-	next, err := postItr.Next()
-	for next != nil && err == nil {
-		hitNewDocNum := newDocNums[next.Number()]
-		if hitNewDocNum == docDropped {
-			return 0, 0, 0, nil, fmt.Errorf("see hit with dropped docNum")
-		}
-
-		newRoaring.Add(uint32(hitNewDocNum))
-
-		nextFreq := next.Frequency()
-		nextNorm := uint64(math.Float32bits(float32(next.Norm())))
-
-		locs := next.Locations()
-
-		err = tfEncoder.Add(hitNewDocNum,
-			encodeFreqHasLocs(nextFreq, len(locs) > 0), nextNorm)
-		if err != nil {
-			return 0, 0, 0, nil, err
-		}
-
-		if len(locs) > 0 {
-			numBytesLocs := 0
-			for _, loc := range locs {
-				ap := loc.ArrayPositions()
-				numBytesLocs += totalUvarintBytes(uint64(fieldsMap[loc.Field()]-1),
-					loc.Pos(), loc.Start(), loc.End(), uint64(len(ap)), ap)
-			}
-
-			err = locEncoder.Add(hitNewDocNum, uint64(numBytesLocs))
-			if err != nil {
-				return 0, 0, 0, nil, err
-			}
-
-			for _, loc := range locs {
-				ap := loc.ArrayPositions()
-				if cap(bufLoc) < 5+len(ap) {
-					bufLoc = make([]uint64, 0, 5+len(ap))
-				}
-				args := bufLoc[0:5]
-				args[0] = uint64(fieldsMap[loc.Field()] - 1)
-				args[1] = loc.Pos()
-				args[2] = loc.Start()
-				args[3] = loc.End()
-				args[4] = uint64(len(ap))
-				args = append(args, ap...)
-				err = locEncoder.Add(hitNewDocNum, args...)
-				if err != nil {
-					return 0, 0, 0, nil, err
-				}
-			}
-		}
-
-		lastDocNum = hitNewDocNum
-		lastFreq = nextFreq
-		lastNorm = nextNorm
-
-		next, err = postItr.Next()
-	}
-
-	return lastDocNum, lastFreq, lastNorm, bufLoc, err
-}
-
-func mergeTermFreqNormLocsByCopying(term []byte, postItr *PostingsIterator,
-	newDocNums []uint64, newRoaring *roaring.Bitmap,
-	tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder) (
-	lastDocNum uint64, lastFreq uint64, lastNorm uint64, err error) {
-	nextDocNum, nextFreq, nextNorm, nextFreqNormBytes, nextLocBytes, err :=
-		postItr.nextBytes()
-	for err == nil && len(nextFreqNormBytes) > 0 {
-		hitNewDocNum := newDocNums[nextDocNum]
-		if hitNewDocNum == docDropped {
-			return 0, 0, 0, fmt.Errorf("see hit with dropped doc num")
-		}
-
-		newRoaring.Add(uint32(hitNewDocNum))
-		err = tfEncoder.AddBytes(hitNewDocNum, nextFreqNormBytes)
-		if err != nil {
-			return 0, 0, 0, err
-		}
-
-		if len(nextLocBytes) > 0 {
-			err = locEncoder.AddBytes(hitNewDocNum, nextLocBytes)
-			if err != nil {
-				return 0, 0, 0, err
-			}
-		}
-
-		lastDocNum = hitNewDocNum
-		lastFreq = nextFreq
-		lastNorm = nextNorm
-
-		nextDocNum, nextFreq, nextNorm, nextFreqNormBytes, nextLocBytes, err =
-			postItr.nextBytes()
-	}
-
-	return lastDocNum, lastFreq, lastNorm, err
-}
-
-func writePostings(postings *roaring.Bitmap, tfEncoder, locEncoder *chunkedIntCoder,
-	use1HitEncoding func(uint64) (bool, uint64, uint64),
-	w *CountHashWriter, bufMaxVarintLen64 []byte) (
-	offset uint64, err error) {
-	termCardinality := postings.GetCardinality()
-	if termCardinality <= 0 {
-		return 0, nil
-	}
-
-	if use1HitEncoding != nil {
-		encodeAs1Hit, docNum1Hit, normBits1Hit := use1HitEncoding(termCardinality)
-		if encodeAs1Hit {
-			return FSTValEncode1Hit(docNum1Hit, normBits1Hit), nil
-		}
-	}
-
-	tfOffset := uint64(w.Count())
-	_, err = tfEncoder.Write(w)
-	if err != nil {
-		return 0, err
-	}
-
-	locOffset := uint64(w.Count())
-	_, err = locEncoder.Write(w)
-	if err != nil {
-		return 0, err
-	}
-
-	postingsOffset := uint64(w.Count())
-
-	n := binary.PutUvarint(bufMaxVarintLen64, tfOffset)
-	_, err = w.Write(bufMaxVarintLen64[:n])
-	if err != nil {
-		return 0, err
-	}
-
-	n = binary.PutUvarint(bufMaxVarintLen64, locOffset)
-	_, err = w.Write(bufMaxVarintLen64[:n])
-	if err != nil {
-		return 0, err
-	}
-
-	_, err = writeRoaringWithLen(postings, w, bufMaxVarintLen64)
-	if err != nil {
-		return 0, err
-	}
-
-	return postingsOffset, nil
-}
-
-type varintEncoder func(uint64) (int, error)
-
-func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
-	fieldsMap map[string]uint16, fieldsInv []string, fieldsSame bool, newSegDocCount uint64,
-	w *CountHashWriter, closeCh chan struct{}) (uint64, [][]uint64, error) {
-	var rv [][]uint64 // The remapped or newDocNums for each segment.
-
-	var newDocNum uint64
-
-	var curr int
-	var data, compressed []byte
-	var metaBuf bytes.Buffer
-	varBuf := make([]byte, binary.MaxVarintLen64)
-	metaEncode := func(val uint64) (int, error) {
-		wb := binary.PutUvarint(varBuf, val)
-		return metaBuf.Write(varBuf[:wb])
-	}
-
-	vals := make([][][]byte, len(fieldsInv))
-	typs := make([][]byte, len(fieldsInv))
-	poss := make([][][]uint64, len(fieldsInv))
-
-	var posBuf []uint64
-
-	docNumOffsets := make([]uint64, newSegDocCount)
-
-	vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx)
-	defer visitDocumentCtxPool.Put(vdc)
-
-	// for each segment
-	for segI, segment := range segments {
-		// check for the closure in meantime
-		if isClosed(closeCh) {
-			return 0, nil, seg.ErrClosed
-		}
-
-		segNewDocNums := make([]uint64, segment.numDocs)
-
-		dropsI := drops[segI]
-
-		// optimize when the field mapping is the same across all
-		// segments and there are no deletions, via byte-copying
-		// of stored docs bytes directly to the writer
-		if fieldsSame && (dropsI == nil || dropsI.GetCardinality() == 0) {
-			err := segment.copyStoredDocs(newDocNum, docNumOffsets, w)
-			if err != nil {
-				return 0, nil, err
-			}
-
-			for i := uint64(0); i < segment.numDocs; i++ {
-				segNewDocNums[i] = newDocNum
-				newDocNum++
-			}
-			rv = append(rv, segNewDocNums)
-
-			continue
-		}
-
-		// for each doc num
-		for docNum := uint64(0); docNum < segment.numDocs; docNum++ {
-			// TODO: roaring's API limits docNums to 32-bits?
-			if dropsI != nil && dropsI.Contains(uint32(docNum)) {
-				segNewDocNums[docNum] = docDropped
-				continue
-			}
-
-			segNewDocNums[docNum] = newDocNum
-
-			curr = 0
-			metaBuf.Reset()
-			data = data[:0]
-
-			posTemp := posBuf
-
-			// collect all the data
-			for i := 0; i < len(fieldsInv); i++ {
-				vals[i] = vals[i][:0]
-				typs[i] = typs[i][:0]
-				poss[i] = poss[i][:0]
-			}
-			err := segment.visitDocument(vdc, docNum, func(field string, typ byte, value []byte, pos []uint64) bool {
-				fieldID := int(fieldsMap[field]) - 1
-				vals[fieldID] = append(vals[fieldID], value)
-				typs[fieldID] = append(typs[fieldID], typ)
-
-				// copy array positions to preserve them beyond the scope of this callback
-				var curPos []uint64
-				if len(pos) > 0 {
-					if cap(posTemp) < len(pos) {
-						posBuf = make([]uint64, len(pos)*len(fieldsInv))
-						posTemp = posBuf
-					}
-					curPos = posTemp[0:len(pos)]
-					copy(curPos, pos)
-					posTemp = posTemp[len(pos):]
-				}
-				poss[fieldID] = append(poss[fieldID], curPos)
-
-				return true
-			})
-			if err != nil {
-				return 0, nil, err
-			}
-
-			// _id field special case optimizes ExternalID() lookups
-			idFieldVal := vals[uint16(0)][0]
-			_, err = metaEncode(uint64(len(idFieldVal)))
-			if err != nil {
-				return 0, nil, err
-			}
-
-			// now walk the non-"_id" fields in order
-			for fieldID := 1; fieldID < len(fieldsInv); fieldID++ {
-				storedFieldValues := vals[fieldID]
-
-				stf := typs[fieldID]
-				spf := poss[fieldID]
-
-				var err2 error
-				curr, data, err2 = persistStoredFieldValues(fieldID,
-					storedFieldValues, stf, spf, curr, metaEncode, data)
-				if err2 != nil {
-					return 0, nil, err2
-				}
-			}
-
-			metaBytes := metaBuf.Bytes()
-
-			compressed = snappy.Encode(compressed[:cap(compressed)], data)
-
-			// record where we're about to start writing
-			docNumOffsets[newDocNum] = uint64(w.Count())
-
-			// write out the meta len and compressed data len
-			_, err = writeUvarints(w,
-				uint64(len(metaBytes)),
-				uint64(len(idFieldVal)+len(compressed)))
-			if err != nil {
-				return 0, nil, err
-			}
-			// now write the meta
-			_, err = w.Write(metaBytes)
-			if err != nil {
-				return 0, nil, err
-			}
-			// now write the _id field val (counted as part of the 'compressed' data)
-			_, err = w.Write(idFieldVal)
-			if err != nil {
-				return 0, nil, err
-			}
-			// now write the compressed data
-			_, err = w.Write(compressed)
-			if err != nil {
-				return 0, nil, err
-			}
-
-			newDocNum++
-		}
-
-		rv = append(rv, segNewDocNums)
-	}
-
-	// return value is the start of the stored index
-	storedIndexOffset := uint64(w.Count())
-
-	// now write out the stored doc index
-	for _, docNumOffset := range docNumOffsets {
-		err := binary.Write(w, binary.BigEndian, docNumOffset)
-		if err != nil {
-			return 0, nil, err
-		}
-	}
-
-	return storedIndexOffset, rv, nil
-}
-
-// copyStoredDocs writes out a segment's stored doc info, optimized by
-// using a single Write() call for the entire set of bytes.  The
-// newDocNumOffsets is filled with the new offsets for each doc.
-func (s *SegmentBase) copyStoredDocs(newDocNum uint64, newDocNumOffsets []uint64,
-	w *CountHashWriter) error {
-	if s.numDocs <= 0 {
-		return nil
-	}
-
-	indexOffset0, storedOffset0, _, _, _ :=
-		s.getDocStoredOffsets(0) // the segment's first doc
-
-	indexOffsetN, storedOffsetN, readN, metaLenN, dataLenN :=
-		s.getDocStoredOffsets(s.numDocs - 1) // the segment's last doc
-
-	storedOffset0New := uint64(w.Count())
-
-	storedBytes := s.mem[storedOffset0 : storedOffsetN+readN+metaLenN+dataLenN]
-	_, err := w.Write(storedBytes)
-	if err != nil {
-		return err
-	}
-
-	// remap the storedOffset's for the docs into new offsets relative
-	// to storedOffset0New, filling the given docNumOffsetsOut array
-	for indexOffset := indexOffset0; indexOffset <= indexOffsetN; indexOffset += 8 {
-		storedOffset := binary.BigEndian.Uint64(s.mem[indexOffset : indexOffset+8])
-		storedOffsetNew := storedOffset - storedOffset0 + storedOffset0New
-		newDocNumOffsets[newDocNum] = storedOffsetNew
-		newDocNum += 1
-	}
-
-	return nil
-}
-
-// mergeFields builds a unified list of fields used across all the
-// input segments, and computes whether the fields are the same across
-// segments (which depends on fields to be sorted in the same way
-// across segments)
-func mergeFields(segments []*SegmentBase) (bool, []string) {
-	fieldsSame := true
-
-	var segment0Fields []string
-	if len(segments) > 0 {
-		segment0Fields = segments[0].Fields()
-	}
-
-	fieldsExist := map[string]struct{}{}
-	for _, segment := range segments {
-		fields := segment.Fields()
-		for fieldi, field := range fields {
-			fieldsExist[field] = struct{}{}
-			if len(segment0Fields) != len(fields) || segment0Fields[fieldi] != field {
-				fieldsSame = false
-			}
-		}
-	}
-
-	rv := make([]string, 0, len(fieldsExist))
-	// ensure _id stays first
-	rv = append(rv, "_id")
-	for k := range fieldsExist {
-		if k != "_id" {
-			rv = append(rv, k)
-		}
-	}
-
-	sort.Strings(rv[1:]) // leave _id as first
-
-	return fieldsSame, rv
-}
-
-func isClosed(closeCh chan struct{}) bool {
-	select {
-	case <-closeCh:
-		return true
-	default:
-		return false
-	}
-}
diff --git a/index/scorch/segment/zap/merge_test.go b/index/scorch/segment/zap/merge_test.go
deleted file mode 100644
index 175671226..000000000
--- a/index/scorch/segment/zap/merge_test.go
+++ /dev/null
@@ -1,870 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
-	"fmt"
-	"os"
-	"reflect"
-	"sort"
-	"strings"
-	"testing"
-
-	"github.com/RoaringBitmap/roaring"
-	"github.com/blevesearch/bleve/analysis"
-	"github.com/blevesearch/bleve/document"
-	"github.com/blevesearch/bleve/index"
-)
-
-func TestMerge(t *testing.T) {
-	_ = os.RemoveAll("/tmp/scorch.zap")
-	_ = os.RemoveAll("/tmp/scorch2.zap")
-	_ = os.RemoveAll("/tmp/scorch3.zap")
-
-	testSeg, _, _ := buildTestSegmentMulti()
-	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	testSeg2, _, _ := buildTestSegmentMulti2()
-	err = PersistSegmentBase(testSeg2, "/tmp/scorch2.zap")
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	segment, err := Open("/tmp/scorch.zap")
-	if err != nil {
-		t.Fatalf("error opening segment: %v", err)
-	}
-	defer func() {
-		cerr := segment.Close()
-		if cerr != nil {
-			t.Fatalf("error closing segment: %v", err)
-		}
-	}()
-
-	segment2, err := Open("/tmp/scorch2.zap")
-	if err != nil {
-		t.Fatalf("error opening segment: %v", err)
-	}
-	defer func() {
-		cerr := segment2.Close()
-		if cerr != nil {
-			t.Fatalf("error closing segment: %v", err)
-		}
-	}()
-
-	segsToMerge := make([]*Segment, 2)
-	segsToMerge[0] = segment.(*Segment)
-	segsToMerge[1] = segment2.(*Segment)
-
-	_, _, err = Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024, nil, nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	segm, err := Open("/tmp/scorch3.zap")
-	if err != nil {
-		t.Fatalf("error opening merged segment: %v", err)
-	}
-	seg3 := segm.(*Segment)
-	defer func() {
-		cerr := seg3.Close()
-		if cerr != nil {
-			t.Fatalf("error closing segment: %v", err)
-		}
-	}()
-
-	if seg3.Path() != "/tmp/scorch3.zap" {
-		t.Fatalf("wrong path")
-	}
-	if seg3.Count() != 4 {
-		t.Fatalf("wrong count")
-	}
-	if len(seg3.Fields()) != 5 {
-		t.Fatalf("wrong # fields: %#v\n", seg3.Fields())
-	}
-
-	testMergeWithSelf(t, seg3, 4)
-}
-
-func TestMergeWithEmptySegment(t *testing.T) {
-	testMergeWithEmptySegments(t, true, 1)
-}
-
-func TestMergeWithEmptySegments(t *testing.T) {
-	testMergeWithEmptySegments(t, true, 5)
-}
-
-func TestMergeWithEmptySegmentFirst(t *testing.T) {
-	testMergeWithEmptySegments(t, false, 1)
-}
-
-func TestMergeWithEmptySegmentsFirst(t *testing.T) {
-	testMergeWithEmptySegments(t, false, 5)
-}
-
-func testMergeWithEmptySegments(t *testing.T, before bool, numEmptySegments int) {
-	_ = os.RemoveAll("/tmp/scorch.zap")
-
-	testSeg, _, _ := buildTestSegmentMulti()
-	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
-	if err != nil {
-		t.Fatal(err)
-	}
-	segment, err := Open("/tmp/scorch.zap")
-	if err != nil {
-		t.Fatalf("error opening segment: %v", err)
-	}
-	defer func() {
-		cerr := segment.Close()
-		if cerr != nil {
-			t.Fatalf("error closing segment: %v", err)
-		}
-	}()
-
-	var segsToMerge []*Segment
-
-	if before {
-		segsToMerge = append(segsToMerge, segment.(*Segment))
-	}
-
-	for i := 0; i < numEmptySegments; i++ {
-		fname := fmt.Sprintf("scorch-empty-%d.zap", i)
-
-		_ = os.RemoveAll("/tmp/" + fname)
-
-		emptySegment, _, err := AnalysisResultsToSegmentBase([]*index.AnalysisResult{}, 1024)
-		if err != nil {
-			t.Fatal(err)
-		}
-		err = PersistSegmentBase(emptySegment, "/tmp/"+fname)
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		emptyFileSegment, err := Open("/tmp/" + fname)
-		if err != nil {
-			t.Fatalf("error opening segment: %v", err)
-		}
-		defer func(emptyFileSegment *Segment) {
-			cerr := emptyFileSegment.Close()
-			if cerr != nil {
-				t.Fatalf("error closing segment: %v", err)
-			}
-		}(emptyFileSegment.(*Segment))
-
-		segsToMerge = append(segsToMerge, emptyFileSegment.(*Segment))
-	}
-
-	if !before {
-		segsToMerge = append(segsToMerge, segment.(*Segment))
-	}
-
-	_ = os.RemoveAll("/tmp/scorch3.zap")
-
-	drops := make([]*roaring.Bitmap, len(segsToMerge))
-
-	_, _, err = Merge(segsToMerge, drops, "/tmp/scorch3.zap", 1024, nil, nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	segm, err := Open("/tmp/scorch3.zap")
-	if err != nil {
-		t.Fatalf("error opening merged segment: %v", err)
-	}
-	segCur := segm.(*Segment)
-	defer func() {
-		cerr := segCur.Close()
-		if cerr != nil {
-			t.Fatalf("error closing segment: %v", err)
-		}
-	}()
-
-	if segCur.Path() != "/tmp/scorch3.zap" {
-		t.Fatalf("wrong path")
-	}
-	if segCur.Count() != 2 {
-		t.Fatalf("wrong count, numEmptySegments: %d, got count: %d", numEmptySegments, segCur.Count())
-	}
-	if len(segCur.Fields()) != 5 {
-		t.Fatalf("wrong # fields: %#v\n", segCur.Fields())
-	}
-
-	testMergeWithSelf(t, segCur, 2)
-}
-
-func testMergeWithSelf(t *testing.T, segCur *Segment, expectedCount uint64) {
-	// trying merging the segment with itself for a few rounds
-	var diffs []string
-
-	for i := 0; i < 10; i++ {
-		fname := fmt.Sprintf("scorch-self-%d.zap", i)
-
-		_ = os.RemoveAll("/tmp/" + fname)
-
-		segsToMerge := make([]*Segment, 1)
-		segsToMerge[0] = segCur
-
-		_, _, err := Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/"+fname, 1024, nil, nil)
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		segm, err := Open("/tmp/" + fname)
-		if err != nil {
-			t.Fatalf("error opening merged segment: %v", err)
-		}
-		segNew := segm.(*Segment)
-		defer func(s *Segment) {
-			cerr := s.Close()
-			if cerr != nil {
-				t.Fatalf("error closing segment: %v", err)
-			}
-		}(segNew)
-
-		if segNew.Count() != expectedCount {
-			t.Fatalf("wrong count")
-		}
-		if len(segNew.Fields()) != 5 {
-			t.Fatalf("wrong # fields: %#v\n", segNew.Fields())
-		}
-
-		diff := compareSegments(segCur, segNew)
-		if diff != "" {
-			diffs = append(diffs, fname+" is different than previous:\n"+diff)
-		}
-
-		segCur = segNew
-	}
-
-	if len(diffs) > 0 {
-		t.Errorf("mismatches after repeated self-merging: %v", strings.Join(diffs, "\n"))
-	}
-}
-
-func compareSegments(a, b *Segment) string {
-	var rv []string
-
-	if a.Count() != b.Count() {
-		return "counts"
-	}
-
-	afields := append([]string(nil), a.Fields()...)
-	bfields := append([]string(nil), b.Fields()...)
-	sort.Strings(afields)
-	sort.Strings(bfields)
-	if !reflect.DeepEqual(afields, bfields) {
-		return "fields"
-	}
-
-	for _, fieldName := range afields {
-		adict, err := a.Dictionary(fieldName)
-		if err != nil {
-			return fmt.Sprintf("adict err: %v", err)
-		}
-		bdict, err := b.Dictionary(fieldName)
-		if err != nil {
-			return fmt.Sprintf("bdict err: %v", err)
-		}
-
-		if adict.(*Dictionary).fst.Len() != bdict.(*Dictionary).fst.Len() {
-			rv = append(rv, fmt.Sprintf("field %s, dict fst Len()'s  different: %v %v",
-				fieldName, adict.(*Dictionary).fst.Len(), bdict.(*Dictionary).fst.Len()))
-		}
-
-		aitr := adict.Iterator()
-		bitr := bdict.Iterator()
-		for {
-			anext, aerr := aitr.Next()
-			bnext, berr := bitr.Next()
-			if aerr != berr {
-				rv = append(rv, fmt.Sprintf("field %s, dict iterator Next() errors different: %v %v",
-					fieldName, aerr, berr))
-				break
-			}
-			if !reflect.DeepEqual(anext, bnext) {
-				rv = append(rv, fmt.Sprintf("field %s, dict iterator Next() results different: %#v %#v",
-					fieldName, anext, bnext))
-				// keep going to try to see more diff details at the postingsList level
-			}
-			if aerr != nil || anext == nil ||
-				berr != nil || bnext == nil {
-				break
-			}
-
-			for _, next := range []*index.DictEntry{anext, bnext} {
-				if next == nil {
-					continue
-				}
-
-				aplist, aerr := adict.(*Dictionary).postingsList([]byte(next.Term), nil, nil)
-				bplist, berr := bdict.(*Dictionary).postingsList([]byte(next.Term), nil, nil)
-				if aerr != berr {
-					rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsList() errors different: %v %v",
-						fieldName, next.Term, aerr, berr))
-				}
-
-				if (aplist != nil) != (bplist != nil) {
-					rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsList() results different: %v %v",
-						fieldName, next.Term, aplist, bplist))
-					break
-				}
-
-				if aerr != nil || aplist == nil ||
-					berr != nil || bplist == nil {
-					break
-				}
-
-				if aplist.Count() != bplist.Count() {
-					rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsList().Count()'s different: %v %v",
-						fieldName, next.Term, aplist.Count(), bplist.Count()))
-				}
-
-				apitr := aplist.Iterator(true, true, true, nil)
-				bpitr := bplist.Iterator(true, true, true, nil)
-				if (apitr != nil) != (bpitr != nil) {
-					rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsList.Iterator() results different: %v %v",
-						fieldName, next.Term, apitr, bpitr))
-					break
-				}
-
-				for {
-					apitrn, aerr := apitr.Next()
-					bpitrn, berr := bpitr.Next()
-					if aerr != berr {
-						rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsListIterator Next() errors different: %v %v",
-							fieldName, next.Term, aerr, berr))
-					}
-
-					if (apitrn != nil) != (bpitrn != nil) {
-						rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsListIterator Next() results different: %v %v",
-							fieldName, next.Term, apitrn, bpitrn))
-						break
-					}
-
-					if aerr != nil || apitrn == nil ||
-						berr != nil || bpitrn == nil {
-						break
-					}
-
-					if apitrn.Number() != bpitrn.Number() {
-						rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsListIterator Next() Number()'s different: %v %v",
-							fieldName, next.Term, apitrn.Number(), bpitrn.Number()))
-					}
-
-					if apitrn.Frequency() != bpitrn.Frequency() {
-						rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsListIterator Next() Frequency()'s different: %v %v",
-							fieldName, next.Term, apitrn.Frequency(), bpitrn.Frequency()))
-					}
-
-					if apitrn.Norm() != bpitrn.Norm() {
-						rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsListIterator Next() Norm()'s different: %v %v",
-							fieldName, next.Term, apitrn.Norm(), bpitrn.Norm()))
-					}
-
-					if len(apitrn.Locations()) != len(bpitrn.Locations()) {
-						rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsListIterator Next() Locations() len's different: %v %v",
-							fieldName, next.Term, len(apitrn.Locations()), len(bpitrn.Locations())))
-					}
-
-					for loci, aloc := range apitrn.Locations() {
-						bloc := bpitrn.Locations()[loci]
-
-						if (aloc != nil) != (bloc != nil) {
-							rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsListIterator Next() loc different: %v %v",
-								fieldName, next.Term, aloc, bloc))
-							break
-						}
-
-						if aloc.Field() != bloc.Field() ||
-							aloc.Start() != bloc.Start() ||
-							aloc.End() != bloc.End() ||
-							aloc.Pos() != bloc.Pos() ||
-							!reflect.DeepEqual(aloc.ArrayPositions(), bloc.ArrayPositions()) {
-							rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsListIterator Next() loc details different: %v %v",
-								fieldName, next.Term, aloc, bloc))
-						}
-					}
-
-					if fieldName == "_id" {
-						docId := next.Term
-						docNumA := apitrn.Number()
-						docNumB := bpitrn.Number()
-						afields := map[string]interface{}{}
-						err = a.VisitDocument(apitrn.Number(),
-							func(field string, typ byte, value []byte, pos []uint64) bool {
-								afields[field+"-typ"] = typ
-								afields[field+"-value"] = append([]byte(nil), value...)
-								afields[field+"-pos"] = append([]uint64(nil), pos...)
-								return true
-							})
-						if err != nil {
-							rv = append(rv, fmt.Sprintf("a.VisitDocument err: %v", err))
-						}
-						bfields := map[string]interface{}{}
-						err = b.VisitDocument(bpitrn.Number(),
-							func(field string, typ byte, value []byte, pos []uint64) bool {
-								bfields[field+"-typ"] = typ
-								bfields[field+"-value"] = append([]byte(nil), value...)
-								bfields[field+"-pos"] = append([]uint64(nil), pos...)
-								return true
-							})
-						if err != nil {
-							rv = append(rv, fmt.Sprintf("b.VisitDocument err: %v", err))
-						}
-						if !reflect.DeepEqual(afields, bfields) {
-							rv = append(rv, fmt.Sprintf("afields != bfields,"+
-								" id: %s, docNumA: %d, docNumB: %d,"+
-								" afields: %#v, bfields: %#v",
-								docId, docNumA, docNumB, afields, bfields))
-						}
-					}
-				}
-			}
-		}
-	}
-
-	return strings.Join(rv, "\n")
-}
-
-func TestMergeAndDrop(t *testing.T) {
-	docsToDrop := make([]*roaring.Bitmap, 2)
-	docsToDrop[0] = roaring.NewBitmap()
-	docsToDrop[0].AddInt(1)
-	docsToDrop[1] = roaring.NewBitmap()
-	docsToDrop[1].AddInt(1)
-	testMergeAndDrop(t, docsToDrop)
-}
-
-func TestMergeAndDropAllFromOneSegment(t *testing.T) {
-	docsToDrop := make([]*roaring.Bitmap, 2)
-	docsToDrop[0] = roaring.NewBitmap()
-	docsToDrop[0].AddInt(0)
-	docsToDrop[0].AddInt(1)
-	docsToDrop[1] = roaring.NewBitmap()
-	testMergeAndDrop(t, docsToDrop)
-}
-
-func testMergeAndDrop(t *testing.T, docsToDrop []*roaring.Bitmap) {
-	_ = os.RemoveAll("/tmp/scorch.zap")
-	_ = os.RemoveAll("/tmp/scorch2.zap")
-
-	testSeg, _, _ := buildTestSegmentMulti()
-	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
-	if err != nil {
-		t.Fatal(err)
-	}
-	segment, err := Open("/tmp/scorch.zap")
-	if err != nil {
-		t.Fatalf("error opening segment: %v", err)
-	}
-	defer func() {
-		cerr := segment.Close()
-		if cerr != nil {
-			t.Fatalf("error closing segment: %v", err)
-		}
-	}()
-
-	testSeg2, _, _ := buildTestSegmentMulti2()
-	err = PersistSegmentBase(testSeg2, "/tmp/scorch2.zap")
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	segment2, err := Open("/tmp/scorch2.zap")
-	if err != nil {
-		t.Fatalf("error opening segment: %v", err)
-	}
-	defer func() {
-		cerr := segment2.Close()
-		if cerr != nil {
-			t.Fatalf("error closing segment: %v", err)
-		}
-	}()
-
-	segsToMerge := make([]*Segment, 2)
-	segsToMerge[0] = segment.(*Segment)
-	segsToMerge[1] = segment2.(*Segment)
-
-	testMergeAndDropSegments(t, segsToMerge, docsToDrop, 2)
-}
-
-func TestMergeWithUpdates(t *testing.T) {
-	segmentDocIds := [][]string{
-		[]string{"a", "b"},
-		[]string{"b", "c"}, // doc "b" updated
-	}
-
-	docsToDrop := make([]*roaring.Bitmap, 2)
-	docsToDrop[0] = roaring.NewBitmap()
-	docsToDrop[0].AddInt(1) // doc "b" updated
-	docsToDrop[1] = roaring.NewBitmap()
-
-	testMergeWithUpdates(t, segmentDocIds, docsToDrop, 3)
-}
-
-func TestMergeWithUpdatesOnManySegments(t *testing.T) {
-	segmentDocIds := [][]string{
-		[]string{"a", "b"},
-		[]string{"b", "c"}, // doc "b" updated
-		[]string{"c", "d"}, // doc "c" updated
-		[]string{"d", "e"}, // doc "d" updated
-	}
-
-	docsToDrop := make([]*roaring.Bitmap, 4)
-	docsToDrop[0] = roaring.NewBitmap()
-	docsToDrop[0].AddInt(1) // doc "b" updated
-	docsToDrop[1] = roaring.NewBitmap()
-	docsToDrop[1].AddInt(1) // doc "c" updated
-	docsToDrop[2] = roaring.NewBitmap()
-	docsToDrop[2].AddInt(1) // doc "d" updated
-	docsToDrop[3] = roaring.NewBitmap()
-
-	testMergeWithUpdates(t, segmentDocIds, docsToDrop, 5)
-}
-
-func TestMergeWithUpdatesOnOneDoc(t *testing.T) {
-	segmentDocIds := [][]string{
-		[]string{"a", "b"},
-		[]string{"a", "c"}, // doc "a" updated
-		[]string{"a", "d"}, // doc "a" updated
-		[]string{"a", "e"}, // doc "a" updated
-	}
-
-	docsToDrop := make([]*roaring.Bitmap, 4)
-	docsToDrop[0] = roaring.NewBitmap()
-	docsToDrop[0].AddInt(0) // doc "a" updated
-	docsToDrop[1] = roaring.NewBitmap()
-	docsToDrop[1].AddInt(0) // doc "a" updated
-	docsToDrop[2] = roaring.NewBitmap()
-	docsToDrop[2].AddInt(0) // doc "a" updated
-	docsToDrop[3] = roaring.NewBitmap()
-
-	testMergeWithUpdates(t, segmentDocIds, docsToDrop, 5)
-}
-
-func testMergeWithUpdates(t *testing.T, segmentDocIds [][]string, docsToDrop []*roaring.Bitmap, expectedNumDocs uint64) {
-	var segsToMerge []*Segment
-
-	// convert segmentDocIds to segsToMerge
-	for i, docIds := range segmentDocIds {
-		fname := fmt.Sprintf("scorch%d.zap", i)
-
-		_ = os.RemoveAll("/tmp/" + fname)
-
-		testSeg, _, _ := buildTestSegmentMultiHelper(docIds)
-		err := PersistSegmentBase(testSeg, "/tmp/"+fname)
-		if err != nil {
-			t.Fatal(err)
-		}
-		segment, err := Open("/tmp/" + fname)
-		if err != nil {
-			t.Fatalf("error opening segment: %v", err)
-		}
-		defer func(segment *Segment) {
-			cerr := segment.Close()
-			if cerr != nil {
-				t.Fatalf("error closing segment: %v", err)
-			}
-		}(segment.(*Segment))
-
-		segsToMerge = append(segsToMerge, segment.(*Segment))
-	}
-
-	testMergeAndDropSegments(t, segsToMerge, docsToDrop, expectedNumDocs)
-}
-
-func testMergeAndDropSegments(t *testing.T, segsToMerge []*Segment, docsToDrop []*roaring.Bitmap, expectedNumDocs uint64) {
-	_ = os.RemoveAll("/tmp/scorch-merged.zap")
-
-	_, _, err := Merge(segsToMerge, docsToDrop, "/tmp/scorch-merged.zap", 1024, nil, nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	segm, err := Open("/tmp/scorch-merged.zap")
-	if err != nil {
-		t.Fatalf("error opening merged segment: %v", err)
-	}
-	defer func() {
-		cerr := segm.Close()
-		if cerr != nil {
-			t.Fatalf("error closing segment: %v", err)
-		}
-	}()
-
-	if segm.Count() != expectedNumDocs {
-		t.Fatalf("wrong count, got: %d, wanted: %d", segm.Count(), expectedNumDocs)
-	}
-	if len(segm.Fields()) != 5 {
-		t.Fatalf("wrong # fields: %#v\n", segm.Fields())
-	}
-
-	testMergeWithSelf(t, segm.(*Segment), expectedNumDocs)
-}
-
-func buildTestSegmentMulti2() (*SegmentBase, uint64, error) {
-	return buildTestSegmentMultiHelper([]string{"c", "d"})
-}
-
-func buildTestSegmentMultiHelper(docIds []string) (*SegmentBase, uint64, error) {
-	doc := &document.Document{
-		ID: "c",
-		Fields: []document.Field{
-			document.NewTextFieldCustom("_id", nil, []byte(docIds[0]), document.IndexField|document.StoreField, nil),
-			document.NewTextFieldCustom("name", nil, []byte("mat"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-		},
-		CompositeFields: []*document.CompositeField{
-			document.NewCompositeField("_all", true, nil, []string{"_id"}),
-		},
-	}
-
-	doc2 := &document.Document{
-		ID: "d",
-		Fields: []document.Field{
-			document.NewTextFieldCustom("_id", nil, []byte(docIds[1]), document.IndexField|document.StoreField, nil),
-			document.NewTextFieldCustom("name", nil, []byte("joa"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
-		},
-		CompositeFields: []*document.CompositeField{
-			document.NewCompositeField("_all", true, nil, []string{"_id"}),
-		},
-	}
-
-	// forge analyzed docs
-	results := []*index.AnalysisResult{
-		&index.AnalysisResult{
-			Document: doc,
-			Analyzed: []analysis.TokenFrequencies{
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      1,
-						Position: 1,
-						Term:     []byte(docIds[0]),
-					},
-				}, nil, false),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      3,
-						Position: 1,
-						Term:     []byte("mat"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("some"),
-					},
-					&analysis.Token{
-						Start:    5,
-						End:      10,
-						Position: 2,
-						Term:     []byte("thing"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("cold"),
-					},
-				}, []uint64{0}, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("dark"),
-					},
-				}, []uint64{1}, true),
-			},
-			Length: []int{
-				1,
-				1,
-				2,
-				1,
-				1,
-			},
-		},
-		&index.AnalysisResult{
-			Document: doc2,
-			Analyzed: []analysis.TokenFrequencies{
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      1,
-						Position: 1,
-						Term:     []byte(docIds[1]),
-					},
-				}, nil, false),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      3,
-						Position: 1,
-						Term:     []byte("joa"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("some"),
-					},
-					&analysis.Token{
-						Start:    5,
-						End:      10,
-						Position: 2,
-						Term:     []byte("thing"),
-					},
-				}, nil, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("cold"),
-					},
-				}, []uint64{0}, true),
-				analysis.TokenFrequency(analysis.TokenStream{
-					&analysis.Token{
-						Start:    0,
-						End:      4,
-						Position: 1,
-						Term:     []byte("dark"),
-					},
-				}, []uint64{1}, true),
-			},
-			Length: []int{
-				1,
-				1,
-				2,
-				1,
-				1,
-			},
-		},
-	}
-
-	// fix up composite fields
-	for _, ar := range results {
-		for i, f := range ar.Document.Fields {
-			for _, cf := range ar.Document.CompositeFields {
-				cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
-			}
-		}
-	}
-
-	return AnalysisResultsToSegmentBase(results, 1024)
-}
-
-func TestMergeBytesWritten(t *testing.T) {
-	_ = os.RemoveAll("/tmp/scorch.zap")
-	_ = os.RemoveAll("/tmp/scorch2.zap")
-	_ = os.RemoveAll("/tmp/scorch3.zap")
-
-	testSeg, _, _ := buildTestSegmentMulti()
-	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	testSeg2, _, _ := buildTestSegmentMulti2()
-	err = PersistSegmentBase(testSeg2, "/tmp/scorch2.zap")
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	segment, err := Open("/tmp/scorch.zap")
-	if err != nil {
-		t.Fatalf("error opening segment: %v", err)
-	}
-	defer func() {
-		cerr := segment.Close()
-		if cerr != nil {
-			t.Fatalf("error closing segment: %v", err)
-		}
-	}()
-
-	segment2, err := Open("/tmp/scorch2.zap")
-	if err != nil {
-		t.Fatalf("error opening segment: %v", err)
-	}
-	defer func() {
-		cerr := segment2.Close()
-		if cerr != nil {
-			t.Fatalf("error closing segment: %v", err)
-		}
-	}()
-
-	segsToMerge := make([]*Segment, 2)
-	segsToMerge[0] = segment.(*Segment)
-	segsToMerge[1] = segment2.(*Segment)
-
-	_, nBytes, err := Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024, nil, nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if nBytes == 0 {
-		t.Fatalf("expected a non zero total_compaction_written_bytes")
-	}
-
-	segm, err := Open("/tmp/scorch3.zap")
-	if err != nil {
-		t.Fatalf("error opening merged segment: %v", err)
-	}
-	seg3 := segm.(*Segment)
-	defer func() {
-		cerr := seg3.Close()
-		if cerr != nil {
-			t.Fatalf("error closing segment: %v", err)
-		}
-	}()
-
-	if seg3.Path() != "/tmp/scorch3.zap" {
-		t.Fatalf("wrong path")
-	}
-	if seg3.Count() != 4 {
-		t.Fatalf("wrong count")
-	}
-	if len(seg3.Fields()) != 5 {
-		t.Fatalf("wrong # fields: %#v\n", seg3.Fields())
-	}
-
-	testMergeWithSelf(t, seg3, 4)
-}
-
-func TestUnder32Bits(t *testing.T) {
-	if !under32Bits(0) || !under32Bits(uint64(0x7fffffff)) {
-		t.Errorf("under32Bits bad")
-	}
-	if under32Bits(uint64(0x80000000)) || under32Bits(uint64(0x80000001)) {
-		t.Errorf("under32Bits wrong")
-	}
-}
diff --git a/index/scorch/segment/zap/new.go b/index/scorch/segment/zap/new.go
deleted file mode 100644
index c108ec16d..000000000
--- a/index/scorch/segment/zap/new.go
+++ /dev/null
@@ -1,839 +0,0 @@
-//  Copyright (c) 2018 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
-	"bytes"
-	"encoding/binary"
-	"math"
-	"sort"
-	"sync"
-
-	"github.com/RoaringBitmap/roaring"
-	"github.com/blevesearch/bleve/analysis"
-	"github.com/blevesearch/bleve/document"
-	"github.com/blevesearch/bleve/index"
-	"github.com/couchbase/vellum"
-	"github.com/golang/snappy"
-)
-
-var NewSegmentBufferNumResultsBump int = 100
-var NewSegmentBufferNumResultsFactor float64 = 1.0
-var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0
-
-// ValidateDocFields can be set by applications to perform additional checks
-// on fields in a document being added to a new segment, by default it does
-// nothing.
-// This API is experimental and may be removed at any time.
-var ValidateDocFields = func(field document.Field) error {
-	return nil
-}
-
-// AnalysisResultsToSegmentBase produces an in-memory zap-encoded
-// SegmentBase from analysis results
-func AnalysisResultsToSegmentBase(results []*index.AnalysisResult,
-	chunkFactor uint32) (*SegmentBase, uint64, error) {
-	s := interimPool.Get().(*interim)
-
-	var br bytes.Buffer
-	if s.lastNumDocs > 0 {
-		// use previous results to initialize the buf with an estimate
-		// size, but note that the interim instance comes from a
-		// global interimPool, so multiple scorch instances indexing
-		// different docs can lead to low quality estimates
-		estimateAvgBytesPerDoc := int(float64(s.lastOutSize/s.lastNumDocs) *
-			NewSegmentBufferNumResultsFactor)
-		estimateNumResults := int(float64(len(results)+NewSegmentBufferNumResultsBump) *
-			NewSegmentBufferAvgBytesPerDocFactor)
-		br.Grow(estimateAvgBytesPerDoc * estimateNumResults)
-	}
-
-	s.results = results
-	s.chunkFactor = chunkFactor
-	s.w = NewCountHashWriter(&br)
-
-	storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets,
-		err := s.convert()
-	if err != nil {
-		return nil, uint64(0), err
-	}
-
-	sb, err := InitSegmentBase(br.Bytes(), s.w.Sum32(), chunkFactor,
-		s.FieldsMap, s.FieldsInv, uint64(len(results)),
-		storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets)
-
-	if err == nil && s.reset() == nil {
-		s.lastNumDocs = len(results)
-		s.lastOutSize = len(br.Bytes())
-		interimPool.Put(s)
-	}
-
-	return sb, uint64(len(br.Bytes())), err
-}
-
-var interimPool = sync.Pool{New: func() interface{} { return &interim{} }}
-
-// interim holds temporary working data used while converting from
-// analysis results to a zap-encoded segment
-type interim struct {
-	results []*index.AnalysisResult
-
-	chunkFactor uint32
-
-	w *CountHashWriter
-
-	// FieldsMap adds 1 to field id to avoid zero value issues
-	//  name -> field id + 1
-	FieldsMap map[string]uint16
-
-	// FieldsInv is the inverse of FieldsMap
-	//  field id -> name
-	FieldsInv []string
-
-	// Term dictionaries for each field
-	//  field id -> term -> postings list id + 1
-	Dicts []map[string]uint64
-
-	// Terms for each field, where terms are sorted ascending
-	//  field id -> []term
-	DictKeys [][]string
-
-	// Fields whose IncludeDocValues is true
-	//  field id -> bool
-	IncludeDocValues []bool
-
-	// postings id -> bitmap of docNums
-	Postings []*roaring.Bitmap
-
-	// postings id -> freq/norm's, one for each docNum in postings
-	FreqNorms        [][]interimFreqNorm
-	freqNormsBacking []interimFreqNorm
-
-	// postings id -> locs, one for each freq
-	Locs        [][]interimLoc
-	locsBacking []interimLoc
-
-	numTermsPerPostingsList []int // key is postings list id
-	numLocsPerPostingsList  []int // key is postings list id
-
-	builder    *vellum.Builder
-	builderBuf bytes.Buffer
-
-	metaBuf bytes.Buffer
-
-	tmp0 []byte
-	tmp1 []byte
-
-	lastNumDocs int
-	lastOutSize int
-}
-
-func (s *interim) reset() (err error) {
-	s.results = nil
-	s.chunkFactor = 0
-	s.w = nil
-	s.FieldsMap = nil
-	s.FieldsInv = nil
-	for i := range s.Dicts {
-		s.Dicts[i] = nil
-	}
-	s.Dicts = s.Dicts[:0]
-	for i := range s.DictKeys {
-		s.DictKeys[i] = s.DictKeys[i][:0]
-	}
-	s.DictKeys = s.DictKeys[:0]
-	for i := range s.IncludeDocValues {
-		s.IncludeDocValues[i] = false
-	}
-	s.IncludeDocValues = s.IncludeDocValues[:0]
-	for _, idn := range s.Postings {
-		idn.Clear()
-	}
-	s.Postings = s.Postings[:0]
-	s.FreqNorms = s.FreqNorms[:0]
-	for i := range s.freqNormsBacking {
-		s.freqNormsBacking[i] = interimFreqNorm{}
-	}
-	s.freqNormsBacking = s.freqNormsBacking[:0]
-	s.Locs = s.Locs[:0]
-	for i := range s.locsBacking {
-		s.locsBacking[i] = interimLoc{}
-	}
-	s.locsBacking = s.locsBacking[:0]
-	s.numTermsPerPostingsList = s.numTermsPerPostingsList[:0]
-	s.numLocsPerPostingsList = s.numLocsPerPostingsList[:0]
-	s.builderBuf.Reset()
-	if s.builder != nil {
-		err = s.builder.Reset(&s.builderBuf)
-	}
-	s.metaBuf.Reset()
-	s.tmp0 = s.tmp0[:0]
-	s.tmp1 = s.tmp1[:0]
-	s.lastNumDocs = 0
-	s.lastOutSize = 0
-
-	return err
-}
-
-func (s *interim) grabBuf(size int) []byte {
-	buf := s.tmp0
-	if cap(buf) < size {
-		buf = make([]byte, size)
-		s.tmp0 = buf
-	}
-	return buf[0:size]
-}
-
-type interimStoredField struct {
-	vals      [][]byte
-	typs      []byte
-	arrayposs [][]uint64 // array positions
-}
-
-type interimFreqNorm struct {
-	freq    uint64
-	norm    float32
-	numLocs int
-}
-
-type interimLoc struct {
-	fieldID   uint16
-	pos       uint64
-	start     uint64
-	end       uint64
-	arrayposs []uint64
-}
-
-func (s *interim) convert() (uint64, uint64, uint64, []uint64, error) {
-	s.FieldsMap = map[string]uint16{}
-
-	s.getOrDefineField("_id") // _id field is fieldID 0
-
-	for _, result := range s.results {
-		for _, field := range result.Document.CompositeFields {
-			s.getOrDefineField(field.Name())
-		}
-		for _, field := range result.Document.Fields {
-			s.getOrDefineField(field.Name())
-		}
-	}
-
-	sort.Strings(s.FieldsInv[1:]) // keep _id as first field
-
-	for fieldID, fieldName := range s.FieldsInv {
-		s.FieldsMap[fieldName] = uint16(fieldID + 1)
-	}
-
-	if cap(s.IncludeDocValues) >= len(s.FieldsInv) {
-		s.IncludeDocValues = s.IncludeDocValues[:len(s.FieldsInv)]
-	} else {
-		s.IncludeDocValues = make([]bool, len(s.FieldsInv))
-	}
-
-	s.prepareDicts()
-
-	for _, dict := range s.DictKeys {
-		sort.Strings(dict)
-	}
-
-	s.processDocuments()
-
-	storedIndexOffset, err := s.writeStoredFields()
-	if err != nil {
-		return 0, 0, 0, nil, err
-	}
-
-	var fdvIndexOffset uint64
-	var dictOffsets []uint64
-
-	if len(s.results) > 0 {
-		fdvIndexOffset, dictOffsets, err = s.writeDicts()
-		if err != nil {
-			return 0, 0, 0, nil, err
-		}
-	} else {
-		dictOffsets = make([]uint64, len(s.FieldsInv))
-	}
-
-	fieldsIndexOffset, err := persistFields(s.FieldsInv, s.w, dictOffsets)
-	if err != nil {
-		return 0, 0, 0, nil, err
-	}
-
-	return storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets, nil
-}
-
-func (s *interim) getOrDefineField(fieldName string) int {
-	fieldIDPlus1, exists := s.FieldsMap[fieldName]
-	if !exists {
-		fieldIDPlus1 = uint16(len(s.FieldsInv) + 1)
-		s.FieldsMap[fieldName] = fieldIDPlus1
-		s.FieldsInv = append(s.FieldsInv, fieldName)
-
-		s.Dicts = append(s.Dicts, make(map[string]uint64))
-
-		n := len(s.DictKeys)
-		if n < cap(s.DictKeys) {
-			s.DictKeys = s.DictKeys[:n+1]
-			s.DictKeys[n] = s.DictKeys[n][:0]
-		} else {
-			s.DictKeys = append(s.DictKeys, []string(nil))
-		}
-	}
-
-	return int(fieldIDPlus1 - 1)
-}
-
-// fill Dicts and DictKeys from analysis results
-func (s *interim) prepareDicts() {
-	var pidNext int
-
-	var totTFs int
-	var totLocs int
-
-	visitField := func(fieldID uint16, tfs analysis.TokenFrequencies) {
-		dict := s.Dicts[fieldID]
-		dictKeys := s.DictKeys[fieldID]
-
-		for term, tf := range tfs {
-			pidPlus1, exists := dict[term]
-			if !exists {
-				pidNext++
-				pidPlus1 = uint64(pidNext)
-
-				dict[term] = pidPlus1
-				dictKeys = append(dictKeys, term)
-
-				s.numTermsPerPostingsList = append(s.numTermsPerPostingsList, 0)
-				s.numLocsPerPostingsList = append(s.numLocsPerPostingsList, 0)
-			}
-
-			pid := pidPlus1 - 1
-
-			s.numTermsPerPostingsList[pid] += 1
-			s.numLocsPerPostingsList[pid] += len(tf.Locations)
-
-			totLocs += len(tf.Locations)
-		}
-
-		totTFs += len(tfs)
-
-		s.DictKeys[fieldID] = dictKeys
-	}
-
-	for _, result := range s.results {
-		// walk each composite field
-		for _, field := range result.Document.CompositeFields {
-			fieldID := uint16(s.getOrDefineField(field.Name()))
-			_, tf := field.Analyze()
-			visitField(fieldID, tf)
-		}
-
-		// walk each field
-		for i, field := range result.Document.Fields {
-			fieldID := uint16(s.getOrDefineField(field.Name()))
-			tf := result.Analyzed[i]
-			visitField(fieldID, tf)
-		}
-	}
-
-	numPostingsLists := pidNext
-
-	if cap(s.Postings) >= numPostingsLists {
-		s.Postings = s.Postings[:numPostingsLists]
-	} else {
-		postings := make([]*roaring.Bitmap, numPostingsLists)
-		copy(postings, s.Postings[:cap(s.Postings)])
-		for i := 0; i < numPostingsLists; i++ {
-			if postings[i] == nil {
-				postings[i] = roaring.New()
-			}
-		}
-		s.Postings = postings
-	}
-
-	if cap(s.FreqNorms) >= numPostingsLists {
-		s.FreqNorms = s.FreqNorms[:numPostingsLists]
-	} else {
-		s.FreqNorms = make([][]interimFreqNorm, numPostingsLists)
-	}
-
-	if cap(s.freqNormsBacking) >= totTFs {
-		s.freqNormsBacking = s.freqNormsBacking[:totTFs]
-	} else {
-		s.freqNormsBacking = make([]interimFreqNorm, totTFs)
-	}
-
-	freqNormsBacking := s.freqNormsBacking
-	for pid, numTerms := range s.numTermsPerPostingsList {
-		s.FreqNorms[pid] = freqNormsBacking[0:0]
-		freqNormsBacking = freqNormsBacking[numTerms:]
-	}
-
-	if cap(s.Locs) >= numPostingsLists {
-		s.Locs = s.Locs[:numPostingsLists]
-	} else {
-		s.Locs = make([][]interimLoc, numPostingsLists)
-	}
-
-	if cap(s.locsBacking) >= totLocs {
-		s.locsBacking = s.locsBacking[:totLocs]
-	} else {
-		s.locsBacking = make([]interimLoc, totLocs)
-	}
-
-	locsBacking := s.locsBacking
-	for pid, numLocs := range s.numLocsPerPostingsList {
-		s.Locs[pid] = locsBacking[0:0]
-		locsBacking = locsBacking[numLocs:]
-	}
-}
-
-func (s *interim) processDocuments() {
-	numFields := len(s.FieldsInv)
-	reuseFieldLens := make([]int, numFields)
-	reuseFieldTFs := make([]analysis.TokenFrequencies, numFields)
-
-	for docNum, result := range s.results {
-		for i := 0; i < numFields; i++ { // clear these for reuse
-			reuseFieldLens[i] = 0
-			reuseFieldTFs[i] = nil
-		}
-
-		s.processDocument(uint64(docNum), result,
-			reuseFieldLens, reuseFieldTFs)
-	}
-}
-
-func (s *interim) processDocument(docNum uint64,
-	result *index.AnalysisResult,
-	fieldLens []int, fieldTFs []analysis.TokenFrequencies) {
-	visitField := func(fieldID uint16, fieldName string,
-		ln int, tf analysis.TokenFrequencies) {
-		fieldLens[fieldID] += ln
-
-		existingFreqs := fieldTFs[fieldID]
-		if existingFreqs != nil {
-			existingFreqs.MergeAll(fieldName, tf)
-		} else {
-			fieldTFs[fieldID] = tf
-		}
-	}
-
-	// walk each composite field
-	for _, field := range result.Document.CompositeFields {
-		fieldID := uint16(s.getOrDefineField(field.Name()))
-		ln, tf := field.Analyze()
-		visitField(fieldID, field.Name(), ln, tf)
-	}
-
-	// walk each field
-	for i, field := range result.Document.Fields {
-		fieldID := uint16(s.getOrDefineField(field.Name()))
-		ln := result.Length[i]
-		tf := result.Analyzed[i]
-		visitField(fieldID, field.Name(), ln, tf)
-	}
-
-	// now that it's been rolled up into fieldTFs, walk that
-	for fieldID, tfs := range fieldTFs {
-		dict := s.Dicts[fieldID]
-		norm := float32(1.0 / math.Sqrt(float64(fieldLens[fieldID])))
-
-		for term, tf := range tfs {
-			pid := dict[term] - 1
-			bs := s.Postings[pid]
-			bs.Add(uint32(docNum))
-
-			s.FreqNorms[pid] = append(s.FreqNorms[pid],
-				interimFreqNorm{
-					freq:    uint64(tf.Frequency()),
-					norm:    norm,
-					numLocs: len(tf.Locations),
-				})
-
-			if len(tf.Locations) > 0 {
-				locs := s.Locs[pid]
-
-				for _, loc := range tf.Locations {
-					var locf = uint16(fieldID)
-					if loc.Field != "" {
-						locf = uint16(s.getOrDefineField(loc.Field))
-					}
-					var arrayposs []uint64
-					if len(loc.ArrayPositions) > 0 {
-						arrayposs = loc.ArrayPositions
-					}
-					locs = append(locs, interimLoc{
-						fieldID:   locf,
-						pos:       uint64(loc.Position),
-						start:     uint64(loc.Start),
-						end:       uint64(loc.End),
-						arrayposs: arrayposs,
-					})
-				}
-
-				s.Locs[pid] = locs
-			}
-		}
-	}
-}
-
-func (s *interim) writeStoredFields() (
-	storedIndexOffset uint64, err error) {
-	varBuf := make([]byte, binary.MaxVarintLen64)
-	metaEncode := func(val uint64) (int, error) {
-		wb := binary.PutUvarint(varBuf, val)
-		return s.metaBuf.Write(varBuf[:wb])
-	}
-
-	data, compressed := s.tmp0[:0], s.tmp1[:0]
-	defer func() { s.tmp0, s.tmp1 = data, compressed }()
-
-	// keyed by docNum
-	docStoredOffsets := make([]uint64, len(s.results))
-
-	// keyed by fieldID, for the current doc in the loop
-	docStoredFields := map[uint16]interimStoredField{}
-
-	for docNum, result := range s.results {
-		for fieldID := range docStoredFields { // reset for next doc
-			delete(docStoredFields, fieldID)
-		}
-
-		for _, field := range result.Document.Fields {
-			fieldID := uint16(s.getOrDefineField(field.Name()))
-
-			opts := field.Options()
-
-			if opts.IsStored() {
-				isf := docStoredFields[fieldID]
-				isf.vals = append(isf.vals, field.Value())
-				isf.typs = append(isf.typs, encodeFieldType(field))
-				isf.arrayposs = append(isf.arrayposs, field.ArrayPositions())
-				docStoredFields[fieldID] = isf
-			}
-
-			if opts.IncludeDocValues() {
-				s.IncludeDocValues[fieldID] = true
-			}
-
-			err := ValidateDocFields(field)
-			if err != nil {
-				return 0, err
-			}
-		}
-
-		var curr int
-
-		s.metaBuf.Reset()
-		data = data[:0]
-
-		// _id field special case optimizes ExternalID() lookups
-		idFieldVal := docStoredFields[uint16(0)].vals[0]
-		_, err = metaEncode(uint64(len(idFieldVal)))
-		if err != nil {
-			return 0, err
-		}
-
-		// handle non-"_id" fields
-		for fieldID := 1; fieldID < len(s.FieldsInv); fieldID++ {
-			isf, exists := docStoredFields[uint16(fieldID)]
-			if exists {
-				curr, data, err = persistStoredFieldValues(
-					fieldID, isf.vals, isf.typs, isf.arrayposs,
-					curr, metaEncode, data)
-				if err != nil {
-					return 0, err
-				}
-			}
-		}
-
-		metaBytes := s.metaBuf.Bytes()
-
-		compressed = snappy.Encode(compressed[:cap(compressed)], data)
-
-		docStoredOffsets[docNum] = uint64(s.w.Count())
-
-		_, err := writeUvarints(s.w,
-			uint64(len(metaBytes)),
-			uint64(len(idFieldVal)+len(compressed)))
-		if err != nil {
-			return 0, err
-		}
-
-		_, err = s.w.Write(metaBytes)
-		if err != nil {
-			return 0, err
-		}
-
-		_, err = s.w.Write(idFieldVal)
-		if err != nil {
-			return 0, err
-		}
-
-		_, err = s.w.Write(compressed)
-		if err != nil {
-			return 0, err
-		}
-	}
-
-	storedIndexOffset = uint64(s.w.Count())
-
-	for _, docStoredOffset := range docStoredOffsets {
-		err = binary.Write(s.w, binary.BigEndian, docStoredOffset)
-		if err != nil {
-			return 0, err
-		}
-	}
-
-	return storedIndexOffset, nil
-}
-
-func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err error) {
-	dictOffsets = make([]uint64, len(s.FieldsInv))
-
-	fdvOffsetsStart := make([]uint64, len(s.FieldsInv))
-	fdvOffsetsEnd := make([]uint64, len(s.FieldsInv))
-
-	buf := s.grabBuf(binary.MaxVarintLen64)
-
-	tfEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1))
-	locEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1))
-	fdvEncoder := newChunkedContentCoder(uint64(s.chunkFactor), uint64(len(s.results)-1), s.w, false)
-
-	var docTermMap [][]byte
-
-	if s.builder == nil {
-		s.builder, err = vellum.New(&s.builderBuf, nil)
-		if err != nil {
-			return 0, nil, err
-		}
-	}
-
-	for fieldID, terms := range s.DictKeys {
-		if cap(docTermMap) < len(s.results) {
-			docTermMap = make([][]byte, len(s.results))
-		} else {
-			docTermMap = docTermMap[0:len(s.results)]
-			for docNum := range docTermMap { // reset the docTermMap
-				docTermMap[docNum] = docTermMap[docNum][:0]
-			}
-		}
-
-		dict := s.Dicts[fieldID]
-
-		for _, term := range terms { // terms are already sorted
-			pid := dict[term] - 1
-
-			postingsBS := s.Postings[pid]
-
-			freqNorms := s.FreqNorms[pid]
-			freqNormOffset := 0
-
-			locs := s.Locs[pid]
-			locOffset := 0
-
-			postingsItr := postingsBS.Iterator()
-			for postingsItr.HasNext() {
-				docNum := uint64(postingsItr.Next())
-
-				freqNorm := freqNorms[freqNormOffset]
-
-				err = tfEncoder.Add(docNum,
-					encodeFreqHasLocs(freqNorm.freq, freqNorm.numLocs > 0),
-					uint64(math.Float32bits(freqNorm.norm)))
-				if err != nil {
-					return 0, nil, err
-				}
-
-				if freqNorm.numLocs > 0 {
-					numBytesLocs := 0
-					for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] {
-						numBytesLocs += totalUvarintBytes(
-							uint64(loc.fieldID), loc.pos, loc.start, loc.end,
-							uint64(len(loc.arrayposs)), loc.arrayposs)
-					}
-
-					err = locEncoder.Add(docNum, uint64(numBytesLocs))
-					if err != nil {
-						return 0, nil, err
-					}
-
-					for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] {
-						err = locEncoder.Add(docNum,
-							uint64(loc.fieldID), loc.pos, loc.start, loc.end,
-							uint64(len(loc.arrayposs)))
-						if err != nil {
-							return 0, nil, err
-						}
-
-						err = locEncoder.Add(docNum, loc.arrayposs...)
-						if err != nil {
-							return 0, nil, err
-						}
-					}
-
-					locOffset += freqNorm.numLocs
-				}
-
-				freqNormOffset++
-
-				docTermMap[docNum] = append(
-					append(docTermMap[docNum], term...),
-					termSeparator)
-			}
-
-			tfEncoder.Close()
-			locEncoder.Close()
-
-			postingsOffset, err :=
-				writePostings(postingsBS, tfEncoder, locEncoder, nil, s.w, buf)
-			if err != nil {
-				return 0, nil, err
-			}
-
-			if postingsOffset > uint64(0) {
-				err = s.builder.Insert([]byte(term), postingsOffset)
-				if err != nil {
-					return 0, nil, err
-				}
-			}
-
-			tfEncoder.Reset()
-			locEncoder.Reset()
-		}
-
-		err = s.builder.Close()
-		if err != nil {
-			return 0, nil, err
-		}
-
-		// record where this dictionary starts
-		dictOffsets[fieldID] = uint64(s.w.Count())
-
-		vellumData := s.builderBuf.Bytes()
-
-		// write out the length of the vellum data
-		n := binary.PutUvarint(buf, uint64(len(vellumData)))
-		_, err = s.w.Write(buf[:n])
-		if err != nil {
-			return 0, nil, err
-		}
-
-		// write this vellum to disk
-		_, err = s.w.Write(vellumData)
-		if err != nil {
-			return 0, nil, err
-		}
-
-		// reset vellum for reuse
-		s.builderBuf.Reset()
-
-		err = s.builder.Reset(&s.builderBuf)
-		if err != nil {
-			return 0, nil, err
-		}
-
-		// write the field doc values
-		if s.IncludeDocValues[fieldID] {
-			for docNum, docTerms := range docTermMap {
-				if len(docTerms) > 0 {
-					err = fdvEncoder.Add(uint64(docNum), docTerms)
-					if err != nil {
-						return 0, nil, err
-					}
-				}
-			}
-			err = fdvEncoder.Close()
-			if err != nil {
-				return 0, nil, err
-			}
-
-			fdvOffsetsStart[fieldID] = uint64(s.w.Count())
-
-			_, err = fdvEncoder.Write()
-			if err != nil {
-				return 0, nil, err
-			}
-
-			fdvOffsetsEnd[fieldID] = uint64(s.w.Count())
-
-			fdvEncoder.Reset()
-		} else {
-			fdvOffsetsStart[fieldID] = fieldNotUninverted
-			fdvOffsetsEnd[fieldID] = fieldNotUninverted
-		}
-	}
-
-	fdvIndexOffset = uint64(s.w.Count())
-
-	for i := 0; i < len(fdvOffsetsStart); i++ {
-		n := binary.PutUvarint(buf, fdvOffsetsStart[i])
-		_, err := s.w.Write(buf[:n])
-		if err != nil {
-			return 0, nil, err
-		}
-		n = binary.PutUvarint(buf, fdvOffsetsEnd[i])
-		_, err = s.w.Write(buf[:n])
-		if err != nil {
-			return 0, nil, err
-		}
-	}
-
-	return fdvIndexOffset, dictOffsets, nil
-}
-
-func encodeFieldType(f document.Field) byte {
-	fieldType := byte('x')
-	switch f.(type) {
-	case *document.TextField:
-		fieldType = 't'
-	case *document.NumericField:
-		fieldType = 'n'
-	case *document.DateTimeField:
-		fieldType = 'd'
-	case *document.BooleanField:
-		fieldType = 'b'
-	case *document.GeoPointField:
-		fieldType = 'g'
-	case *document.CompositeField:
-		fieldType = 'c'
-	}
-	return fieldType
-}
-
-// returns the total # of bytes needed to encode the given uint64's
-// into binary.PutUVarint() encoding
-func totalUvarintBytes(a, b, c, d, e uint64, more []uint64) (n int) {
-	n = numUvarintBytes(a)
-	n += numUvarintBytes(b)
-	n += numUvarintBytes(c)
-	n += numUvarintBytes(d)
-	n += numUvarintBytes(e)
-	for _, v := range more {
-		n += numUvarintBytes(v)
-	}
-	return n
-}
-
-// returns # of bytes needed to encode x in binary.PutUvarint() encoding
-func numUvarintBytes(x uint64) (n int) {
-	for x >= 0x80 {
-		x >>= 7
-		n++
-	}
-	return n + 1
-}
diff --git a/index/scorch/segment/zap/posting.go b/index/scorch/segment/zap/posting.go
deleted file mode 100644
index 4c43fdb9b..000000000
--- a/index/scorch/segment/zap/posting.go
+++ /dev/null
@@ -1,897 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
-	"encoding/binary"
-	"fmt"
-	"math"
-	"reflect"
-
-	"github.com/RoaringBitmap/roaring"
-	"github.com/blevesearch/bleve/index/scorch/segment"
-	"github.com/blevesearch/bleve/size"
-)
-
-var reflectStaticSizePostingsList int
-var reflectStaticSizePostingsIterator int
-var reflectStaticSizePosting int
-var reflectStaticSizeLocation int
-
-func init() {
-	var pl PostingsList
-	reflectStaticSizePostingsList = int(reflect.TypeOf(pl).Size())
-	var pi PostingsIterator
-	reflectStaticSizePostingsIterator = int(reflect.TypeOf(pi).Size())
-	var p Posting
-	reflectStaticSizePosting = int(reflect.TypeOf(p).Size())
-	var l Location
-	reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
-}
-
-// FST or vellum value (uint64) encoding is determined by the top two
-// highest-order or most significant bits...
-//
-//  encoding  : MSB
-//  name      : 63  62  61...to...bit #0 (LSB)
-//  ----------+---+---+---------------------------------------------------
-//   general  : 0 | 0 | 62-bits of postingsOffset.
-//   ~        : 0 | 1 | reserved for future.
-//   1-hit    : 1 | 0 | 31-bits of positive float31 norm | 31-bits docNum.
-//   ~        : 1 | 1 | reserved for future.
-//
-// Encoding "general" is able to handle all cases, where the
-// postingsOffset points to more information about the postings for
-// the term.
-//
-// Encoding "1-hit" is used to optimize a commonly seen case when a
-// term has only a single hit.  For example, a term in the _id field
-// will have only 1 hit.  The "1-hit" encoding is used for a term
-// in a field when...
-//
-// - term vector info is disabled for that field;
-// - and, the term appears in only a single doc for that field;
-// - and, the term's freq is exactly 1 in that single doc for that field;
-// - and, the docNum must fit into 31-bits;
-//
-// Otherwise, the "general" encoding is used instead.
-//
-// In the "1-hit" encoding, the field in that single doc may have
-// other terms, which is supported in the "1-hit" encoding by the
-// positive float31 norm.
-
-const FSTValEncodingMask = uint64(0xc000000000000000)
-const FSTValEncodingGeneral = uint64(0x0000000000000000)
-const FSTValEncoding1Hit = uint64(0x8000000000000000)
-
-func FSTValEncode1Hit(docNum uint64, normBits uint64) uint64 {
-	return FSTValEncoding1Hit | ((mask31Bits & normBits) << 31) | (mask31Bits & docNum)
-}
-
-func FSTValDecode1Hit(v uint64) (docNum uint64, normBits uint64) {
-	return (mask31Bits & v), (mask31Bits & (v >> 31))
-}
-
-const mask31Bits = uint64(0x000000007fffffff)
-
-func under32Bits(x uint64) bool {
-	return x <= mask31Bits
-}
-
-const DocNum1HitFinished = math.MaxUint64
-
-var NormBits1Hit = uint64(math.Float32bits(float32(1)))
-
-// PostingsList is an in-memory representation of a postings list
-type PostingsList struct {
-	sb             *SegmentBase
-	postingsOffset uint64
-	freqOffset     uint64
-	locOffset      uint64
-	postings       *roaring.Bitmap
-	except         *roaring.Bitmap
-
-	// when normBits1Hit != 0, then this postings list came from a
-	// 1-hit encoding, and only the docNum1Hit & normBits1Hit apply
-	docNum1Hit   uint64
-	normBits1Hit uint64
-}
-
-// represents an immutable, empty postings list
-var emptyPostingsList = &PostingsList{}
-
-func (p *PostingsList) Size() int {
-	sizeInBytes := reflectStaticSizePostingsList + size.SizeOfPtr
-
-	if p.except != nil {
-		sizeInBytes += int(p.except.GetSizeInBytes())
-	}
-
-	return sizeInBytes
-}
-
-func (p *PostingsList) OrInto(receiver *roaring.Bitmap) {
-	if p.normBits1Hit != 0 {
-		receiver.Add(uint32(p.docNum1Hit))
-		return
-	}
-
-	if p.postings != nil {
-		receiver.Or(p.postings)
-	}
-}
-
-// Iterator returns an iterator for this postings list
-func (p *PostingsList) Iterator(includeFreq, includeNorm, includeLocs bool,
-	prealloc segment.PostingsIterator) segment.PostingsIterator {
-	if p.normBits1Hit == 0 && p.postings == nil {
-		return emptyPostingsIterator
-	}
-
-	var preallocPI *PostingsIterator
-	pi, ok := prealloc.(*PostingsIterator)
-	if ok && pi != nil {
-		preallocPI = pi
-	}
-	if preallocPI == emptyPostingsIterator {
-		preallocPI = nil
-	}
-
-	return p.iterator(includeFreq, includeNorm, includeLocs, preallocPI)
-}
-
-func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
-	rv *PostingsIterator) *PostingsIterator {
-	if rv == nil {
-		rv = &PostingsIterator{}
-	} else {
-		freqNormReader := rv.freqNormReader
-		if freqNormReader != nil {
-			freqNormReader.Reset([]byte(nil))
-		}
-
-		locReader := rv.locReader
-		if locReader != nil {
-			locReader.Reset([]byte(nil))
-		}
-
-		freqChunkOffsets := rv.freqChunkOffsets[:0]
-		locChunkOffsets := rv.locChunkOffsets[:0]
-
-		nextLocs := rv.nextLocs[:0]
-		nextSegmentLocs := rv.nextSegmentLocs[:0]
-
-		buf := rv.buf
-
-		*rv = PostingsIterator{} // clear the struct
-
-		rv.freqNormReader = freqNormReader
-		rv.locReader = locReader
-
-		rv.freqChunkOffsets = freqChunkOffsets
-		rv.locChunkOffsets = locChunkOffsets
-
-		rv.nextLocs = nextLocs
-		rv.nextSegmentLocs = nextSegmentLocs
-
-		rv.buf = buf
-	}
-
-	rv.postings = p
-	rv.includeFreqNorm = includeFreq || includeNorm || includeLocs
-	rv.includeLocs = includeLocs
-
-	if p.normBits1Hit != 0 {
-		// "1-hit" encoding
-		rv.docNum1Hit = p.docNum1Hit
-		rv.normBits1Hit = p.normBits1Hit
-
-		if p.except != nil && p.except.Contains(uint32(rv.docNum1Hit)) {
-			rv.docNum1Hit = DocNum1HitFinished
-		}
-
-		return rv
-	}
-
-	// "general" encoding, check if empty
-	if p.postings == nil {
-		return rv
-	}
-
-	var n uint64
-	var read int
-
-	// prepare the freq chunk details
-	if rv.includeFreqNorm {
-		var numFreqChunks uint64
-		numFreqChunks, read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
-		n += uint64(read)
-		if cap(rv.freqChunkOffsets) >= int(numFreqChunks) {
-			rv.freqChunkOffsets = rv.freqChunkOffsets[:int(numFreqChunks)]
-		} else {
-			rv.freqChunkOffsets = make([]uint64, int(numFreqChunks))
-		}
-		for i := 0; i < int(numFreqChunks); i++ {
-			rv.freqChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
-			n += uint64(read)
-		}
-		rv.freqChunkStart = p.freqOffset + n
-	}
-
-	// prepare the loc chunk details
-	if rv.includeLocs {
-		n = 0
-		var numLocChunks uint64
-		numLocChunks, read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
-		n += uint64(read)
-		if cap(rv.locChunkOffsets) >= int(numLocChunks) {
-			rv.locChunkOffsets = rv.locChunkOffsets[:int(numLocChunks)]
-		} else {
-			rv.locChunkOffsets = make([]uint64, int(numLocChunks))
-		}
-		for i := 0; i < int(numLocChunks); i++ {
-			rv.locChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
-			n += uint64(read)
-		}
-		rv.locChunkStart = p.locOffset + n
-	}
-
-	rv.all = p.postings.Iterator()
-	if p.except != nil {
-		rv.ActualBM = roaring.AndNot(p.postings, p.except)
-		rv.Actual = rv.ActualBM.Iterator()
-	} else {
-		rv.ActualBM = p.postings
-		rv.Actual = rv.all // Optimize to use same iterator for all & Actual.
-	}
-
-	return rv
-}
-
-// Count returns the number of items on this postings list
-func (p *PostingsList) Count() uint64 {
-	var n, e uint64
-	if p.normBits1Hit != 0 {
-		n = 1
-		if p.except != nil && p.except.Contains(uint32(p.docNum1Hit)) {
-			e = 1
-		}
-	} else if p.postings != nil {
-		n = p.postings.GetCardinality()
-		if p.except != nil {
-			e = p.postings.AndCardinality(p.except)
-		}
-	}
-	return n - e
-}
-
-func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
-	rv.postingsOffset = postingsOffset
-
-	// handle "1-hit" encoding special case
-	if rv.postingsOffset&FSTValEncodingMask == FSTValEncoding1Hit {
-		return rv.init1Hit(postingsOffset)
-	}
-
-	// read the location of the freq/norm details
-	var n uint64
-	var read int
-
-	rv.freqOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+binary.MaxVarintLen64])
-	n += uint64(read)
-
-	rv.locOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
-	n += uint64(read)
-
-	var postingsLen uint64
-	postingsLen, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
-	n += uint64(read)
-
-	roaringBytes := d.sb.mem[postingsOffset+n : postingsOffset+n+postingsLen]
-
-	if rv.postings == nil {
-		rv.postings = roaring.NewBitmap()
-	}
-	_, err := rv.postings.FromBuffer(roaringBytes)
-	if err != nil {
-		return fmt.Errorf("error loading roaring bitmap: %v", err)
-	}
-
-	return nil
-}
-
-func (rv *PostingsList) init1Hit(fstVal uint64) error {
-	docNum, normBits := FSTValDecode1Hit(fstVal)
-
-	rv.docNum1Hit = docNum
-	rv.normBits1Hit = normBits
-
-	return nil
-}
-
-// PostingsIterator provides a way to iterate through the postings list
-type PostingsIterator struct {
-	postings *PostingsList
-	all      roaring.IntPeekable
-	Actual   roaring.IntPeekable
-	ActualBM *roaring.Bitmap
-
-	currChunk         uint32
-	currChunkFreqNorm []byte
-	currChunkLoc      []byte
-
-	freqNormReader *segment.MemUvarintReader
-	locReader      *segment.MemUvarintReader
-
-	freqChunkOffsets []uint64
-	freqChunkStart   uint64
-
-	locChunkOffsets []uint64
-	locChunkStart   uint64
-
-	next            Posting            // reused across Next() calls
-	nextLocs        []Location         // reused across Next() calls
-	nextSegmentLocs []segment.Location // reused across Next() calls
-
-	docNum1Hit   uint64
-	normBits1Hit uint64
-
-	buf []byte
-
-	includeFreqNorm bool
-	includeLocs     bool
-}
-
-var emptyPostingsIterator = &PostingsIterator{}
-
-func (i *PostingsIterator) Size() int {
-	sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr +
-		len(i.currChunkFreqNorm) +
-		len(i.currChunkLoc) +
-		len(i.freqChunkOffsets)*size.SizeOfUint64 +
-		len(i.locChunkOffsets)*size.SizeOfUint64 +
-		i.next.Size()
-
-	for _, entry := range i.nextLocs {
-		sizeInBytes += entry.Size()
-	}
-
-	return sizeInBytes
-}
-
-func (i *PostingsIterator) loadChunk(chunk int) error {
-	if i.includeFreqNorm {
-		if chunk >= len(i.freqChunkOffsets) {
-			return fmt.Errorf("tried to load freq chunk that doesn't exist %d/(%d)",
-				chunk, len(i.freqChunkOffsets))
-		}
-
-		end, start := i.freqChunkStart, i.freqChunkStart
-		s, e := readChunkBoundary(chunk, i.freqChunkOffsets)
-		start += s
-		end += e
-		i.currChunkFreqNorm = i.postings.sb.mem[start:end]
-		if i.freqNormReader == nil {
-			i.freqNormReader = segment.NewMemUvarintReader(i.currChunkFreqNorm)
-		} else {
-			i.freqNormReader.Reset(i.currChunkFreqNorm)
-		}
-	}
-
-	if i.includeLocs {
-		if chunk >= len(i.locChunkOffsets) {
-			return fmt.Errorf("tried to load loc chunk that doesn't exist %d/(%d)",
-				chunk, len(i.locChunkOffsets))
-		}
-
-		end, start := i.locChunkStart, i.locChunkStart
-		s, e := readChunkBoundary(chunk, i.locChunkOffsets)
-		start += s
-		end += e
-		i.currChunkLoc = i.postings.sb.mem[start:end]
-		if i.locReader == nil {
-			i.locReader = segment.NewMemUvarintReader(i.currChunkLoc)
-		} else {
-			i.locReader.Reset(i.currChunkLoc)
-		}
-	}
-
-	i.currChunk = uint32(chunk)
-	return nil
-}
-
-func (i *PostingsIterator) readFreqNormHasLocs() (uint64, uint64, bool, error) {
-	if i.normBits1Hit != 0 {
-		return 1, i.normBits1Hit, false, nil
-	}
-
-	freqHasLocs, err := i.freqNormReader.ReadUvarint()
-	if err != nil {
-		return 0, 0, false, fmt.Errorf("error reading frequency: %v", err)
-	}
-
-	freq, hasLocs := decodeFreqHasLocs(freqHasLocs)
-
-	normBits, err := i.freqNormReader.ReadUvarint()
-	if err != nil {
-		return 0, 0, false, fmt.Errorf("error reading norm: %v", err)
-	}
-
-	return freq, normBits, hasLocs, nil
-}
-
-func (i *PostingsIterator) skipFreqNormReadHasLocs() (bool, error) {
-	if i.normBits1Hit != 0 {
-		return false, nil
-	}
-
-	freqHasLocs, err := i.freqNormReader.ReadUvarint()
-	if err != nil {
-		return false, fmt.Errorf("error reading freqHasLocs: %v", err)
-	}
-
-	i.freqNormReader.SkipUvarint() // Skip normBits.
-
-	return freqHasLocs&0x01 != 0, nil // See decodeFreqHasLocs() / hasLocs.
-}
-
-func encodeFreqHasLocs(freq uint64, hasLocs bool) uint64 {
-	rv := freq << 1
-	if hasLocs {
-		rv = rv | 0x01 // 0'th LSB encodes whether there are locations
-	}
-	return rv
-}
-
-func decodeFreqHasLocs(freqHasLocs uint64) (uint64, bool) {
-	freq := freqHasLocs >> 1
-	hasLocs := freqHasLocs&0x01 != 0
-	return freq, hasLocs
-}
-
-// readLocation processes all the integers on the stream representing a single
-// location.
-func (i *PostingsIterator) readLocation(l *Location) error {
-	// read off field
-	fieldID, err := i.locReader.ReadUvarint()
-	if err != nil {
-		return fmt.Errorf("error reading location field: %v", err)
-	}
-	// read off pos
-	pos, err := i.locReader.ReadUvarint()
-	if err != nil {
-		return fmt.Errorf("error reading location pos: %v", err)
-	}
-	// read off start
-	start, err := i.locReader.ReadUvarint()
-	if err != nil {
-		return fmt.Errorf("error reading location start: %v", err)
-	}
-	// read off end
-	end, err := i.locReader.ReadUvarint()
-	if err != nil {
-		return fmt.Errorf("error reading location end: %v", err)
-	}
-	// read off num array pos
-	numArrayPos, err := i.locReader.ReadUvarint()
-	if err != nil {
-		return fmt.Errorf("error reading location num array pos: %v", err)
-	}
-
-	l.field = i.postings.sb.fieldsInv[fieldID]
-	l.pos = pos
-	l.start = start
-	l.end = end
-
-	if cap(l.ap) < int(numArrayPos) {
-		l.ap = make([]uint64, int(numArrayPos))
-	} else {
-		l.ap = l.ap[:int(numArrayPos)]
-	}
-
-	// read off array positions
-	for k := 0; k < int(numArrayPos); k++ {
-		ap, err := i.locReader.ReadUvarint()
-		if err != nil {
-			return fmt.Errorf("error reading array position: %v", err)
-		}
-
-		l.ap[k] = ap
-	}
-
-	return nil
-}
-
-// Next returns the next posting on the postings list, or nil at the end
-func (i *PostingsIterator) Next() (segment.Posting, error) {
-	return i.nextAtOrAfter(0)
-}
-
-// Advance returns the posting at the specified docNum or it is not present
-// the next posting, or if the end is reached, nil
-func (i *PostingsIterator) Advance(docNum uint64) (segment.Posting, error) {
-	return i.nextAtOrAfter(docNum)
-}
-
-// Next returns the next posting on the postings list, or nil at the end
-func (i *PostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.Posting, error) {
-	docNum, exists, err := i.nextDocNumAtOrAfter(atOrAfter)
-	if err != nil || !exists {
-		return nil, err
-	}
-
-	i.next = Posting{} // clear the struct
-	rv := &i.next
-	rv.docNum = docNum
-
-	if !i.includeFreqNorm {
-		return rv, nil
-	}
-
-	var normBits uint64
-	var hasLocs bool
-
-	rv.freq, normBits, hasLocs, err = i.readFreqNormHasLocs()
-	if err != nil {
-		return nil, err
-	}
-
-	rv.norm = math.Float32frombits(uint32(normBits))
-
-	if i.includeLocs && hasLocs {
-		// prepare locations into reused slices, where we assume
-		// rv.freq >= "number of locs", since in a composite field,
-		// some component fields might have their IncludeTermVector
-		// flags disabled while other component fields are enabled
-		if cap(i.nextLocs) >= int(rv.freq) {
-			i.nextLocs = i.nextLocs[0:rv.freq]
-		} else {
-			i.nextLocs = make([]Location, rv.freq, rv.freq*2)
-		}
-		if cap(i.nextSegmentLocs) < int(rv.freq) {
-			i.nextSegmentLocs = make([]segment.Location, rv.freq, rv.freq*2)
-		}
-		rv.locs = i.nextSegmentLocs[:0]
-
-		numLocsBytes, err := i.locReader.ReadUvarint()
-		if err != nil {
-			return nil, fmt.Errorf("error reading location numLocsBytes: %v", err)
-		}
-
-		j := 0
-		startBytesRemaining := i.locReader.Len() // # bytes remaining in the locReader
-		for startBytesRemaining-i.locReader.Len() < int(numLocsBytes) {
-			err := i.readLocation(&i.nextLocs[j])
-			if err != nil {
-				return nil, err
-			}
-			rv.locs = append(rv.locs, &i.nextLocs[j])
-			j++
-		}
-	}
-
-	return rv, nil
-}
-
-var freqHasLocs1Hit = encodeFreqHasLocs(1, false)
-
-// nextBytes returns the docNum and the encoded freq & loc bytes for
-// the next posting
-func (i *PostingsIterator) nextBytes() (
-	docNumOut uint64, freq uint64, normBits uint64,
-	bytesFreqNorm []byte, bytesLoc []byte, err error) {
-	docNum, exists, err := i.nextDocNumAtOrAfter(0)
-	if err != nil || !exists {
-		return 0, 0, 0, nil, nil, err
-	}
-
-	if i.normBits1Hit != 0 {
-		if i.buf == nil {
-			i.buf = make([]byte, binary.MaxVarintLen64*2)
-		}
-		n := binary.PutUvarint(i.buf, freqHasLocs1Hit)
-		n += binary.PutUvarint(i.buf[n:], i.normBits1Hit)
-		return docNum, uint64(1), i.normBits1Hit, i.buf[:n], nil, nil
-	}
-
-	startFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
-
-	var hasLocs bool
-
-	freq, normBits, hasLocs, err = i.readFreqNormHasLocs()
-	if err != nil {
-		return 0, 0, 0, nil, nil, err
-	}
-
-	endFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
-	bytesFreqNorm = i.currChunkFreqNorm[startFreqNorm:endFreqNorm]
-
-	if hasLocs {
-		startLoc := len(i.currChunkLoc) - i.locReader.Len()
-
-		numLocsBytes, err := i.locReader.ReadUvarint()
-		if err != nil {
-			return 0, 0, 0, nil, nil,
-				fmt.Errorf("error reading location nextBytes numLocs: %v", err)
-		}
-
-		// skip over all the location bytes
-		i.locReader.SkipBytes(int(numLocsBytes))
-
-		endLoc := len(i.currChunkLoc) - i.locReader.Len()
-		bytesLoc = i.currChunkLoc[startLoc:endLoc]
-	}
-
-	return docNum, freq, normBits, bytesFreqNorm, bytesLoc, nil
-}
-
-// nextDocNum returns the next docNum on the postings list, and also
-// sets up the currChunk / loc related fields of the iterator.
-func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool, error) {
-	if i.normBits1Hit != 0 {
-		if i.docNum1Hit == DocNum1HitFinished {
-			return 0, false, nil
-		}
-		if i.docNum1Hit < atOrAfter {
-			// advanced past our 1-hit
-			i.docNum1Hit = DocNum1HitFinished // consume our 1-hit docNum
-			return 0, false, nil
-		}
-		docNum := i.docNum1Hit
-		i.docNum1Hit = DocNum1HitFinished // consume our 1-hit docNum
-		return docNum, true, nil
-	}
-
-	if i.Actual == nil || !i.Actual.HasNext() {
-		return 0, false, nil
-	}
-
-	if i.postings == nil || i.postings.postings == i.ActualBM {
-		return i.nextDocNumAtOrAfterClean(atOrAfter)
-	}
-
-	i.Actual.AdvanceIfNeeded(uint32(atOrAfter))
-
-	if !i.Actual.HasNext() {
-		// couldn't find anything
-		return 0, false, nil
-	}
-
-	n := i.Actual.Next()
-	allN := i.all.Next()
-
-	nChunk := n / i.postings.sb.chunkFactor
-
-	// when allN becomes >= to here, then allN is in the same chunk as nChunk.
-	allNReachesNChunk := nChunk * i.postings.sb.chunkFactor
-
-	// n is the next actual hit (excluding some postings), and
-	// allN is the next hit in the full postings, and
-	// if they don't match, move 'all' forwards until they do
-	for allN != n {
-		// we've reached same chunk, so move the freq/norm/loc decoders forward
-		if i.includeFreqNorm && allN >= allNReachesNChunk {
-			err := i.currChunkNext(nChunk)
-			if err != nil {
-				return 0, false, err
-			}
-		}
-
-		allN = i.all.Next()
-	}
-
-	if i.includeFreqNorm && (i.currChunk != nChunk || i.currChunkFreqNorm == nil) {
-		err := i.loadChunk(int(nChunk))
-		if err != nil {
-			return 0, false, fmt.Errorf("error loading chunk: %v", err)
-		}
-	}
-
-	return uint64(n), true, nil
-}
-
-// optimization when the postings list is "clean" (e.g., no updates &
-// no deletions) where the all bitmap is the same as the actual bitmap
-func (i *PostingsIterator) nextDocNumAtOrAfterClean(
-	atOrAfter uint64) (uint64, bool, error) {
-
-	if !i.includeFreqNorm {
-		i.Actual.AdvanceIfNeeded(uint32(atOrAfter))
-
-		if !i.Actual.HasNext() {
-			return 0, false, nil // couldn't find anything
-		}
-
-		return uint64(i.Actual.Next()), true, nil
-	}
-
-	// freq-norm's needed, so maintain freq-norm chunk reader
-	sameChunkNexts := 0 // # of times we called Next() in the same chunk
-	n := i.Actual.Next()
-	nChunk := n / i.postings.sb.chunkFactor
-
-	for uint64(n) < atOrAfter && i.Actual.HasNext() {
-		n = i.Actual.Next()
-
-		nChunkPrev := nChunk
-		nChunk = n / i.postings.sb.chunkFactor
-
-		if nChunk != nChunkPrev {
-			sameChunkNexts = 0
-		} else {
-			sameChunkNexts += 1
-		}
-	}
-
-	if uint64(n) < atOrAfter {
-		// couldn't find anything
-		return 0, false, nil
-	}
-
-	for j := 0; j < sameChunkNexts; j++ {
-		err := i.currChunkNext(nChunk)
-		if err != nil {
-			return 0, false, fmt.Errorf("error optimized currChunkNext: %v", err)
-		}
-	}
-
-	if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
-		err := i.loadChunk(int(nChunk))
-		if err != nil {
-			return 0, false, fmt.Errorf("error loading chunk: %v", err)
-		}
-	}
-
-	return uint64(n), true, nil
-}
-
-func (i *PostingsIterator) currChunkNext(nChunk uint32) error {
-	if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
-		err := i.loadChunk(int(nChunk))
-		if err != nil {
-			return fmt.Errorf("error loading chunk: %v", err)
-		}
-	}
-
-	// read off freq/offsets even though we don't care about them
-	hasLocs, err := i.skipFreqNormReadHasLocs()
-	if err != nil {
-		return err
-	}
-
-	if i.includeLocs && hasLocs {
-		numLocsBytes, err := i.locReader.ReadUvarint()
-		if err != nil {
-			return fmt.Errorf("error reading location numLocsBytes: %v", err)
-		}
-
-		// skip over all the location bytes
-		i.locReader.SkipBytes(int(numLocsBytes))
-	}
-
-	return nil
-}
-
-// DocNum1Hit returns the docNum and true if this is "1-hit" optimized
-// and the docNum is available.
-func (p *PostingsIterator) DocNum1Hit() (uint64, bool) {
-	if p.normBits1Hit != 0 && p.docNum1Hit != DocNum1HitFinished {
-		return p.docNum1Hit, true
-	}
-	return 0, false
-}
-
-// PostingsIteratorFromBitmap constructs a PostingsIterator given an
-// "actual" bitmap.
-func PostingsIteratorFromBitmap(bm *roaring.Bitmap,
-	includeFreqNorm, includeLocs bool) (*PostingsIterator, error) {
-	return &PostingsIterator{
-		ActualBM:        bm,
-		Actual:          bm.Iterator(),
-		includeFreqNorm: includeFreqNorm,
-		includeLocs:     includeLocs,
-	}, nil
-}
-
-// PostingsIteratorFrom1Hit constructs a PostingsIterator given a
-// 1-hit docNum.
-func PostingsIteratorFrom1Hit(docNum1Hit, normBits1Hit uint64,
-	includeFreqNorm, includeLocs bool) (*PostingsIterator, error) {
-	return &PostingsIterator{
-		docNum1Hit:      docNum1Hit,
-		normBits1Hit:    normBits1Hit,
-		includeFreqNorm: includeFreqNorm,
-		includeLocs:     includeLocs,
-	}, nil
-}
-
-// Posting is a single entry in a postings list
-type Posting struct {
-	docNum uint64
-	freq   uint64
-	norm   float32
-	locs   []segment.Location
-}
-
-func (p *Posting) Size() int {
-	sizeInBytes := reflectStaticSizePosting
-
-	for _, entry := range p.locs {
-		sizeInBytes += entry.Size()
-	}
-
-	return sizeInBytes
-}
-
-// Number returns the document number of this posting in this segment
-func (p *Posting) Number() uint64 {
-	return p.docNum
-}
-
-// Frequency returns the frequencies of occurrence of this term in this doc/field
-func (p *Posting) Frequency() uint64 {
-	return p.freq
-}
-
-// Norm returns the normalization factor for this posting
-func (p *Posting) Norm() float64 {
-	return float64(p.norm)
-}
-
-// Locations returns the location information for each occurrence
-func (p *Posting) Locations() []segment.Location {
-	return p.locs
-}
-
-// Location represents the location of a single occurrence
-type Location struct {
-	field string
-	pos   uint64
-	start uint64
-	end   uint64
-	ap    []uint64
-}
-
-func (l *Location) Size() int {
-	return reflectStaticSizeLocation +
-		len(l.field) +
-		len(l.ap)*size.SizeOfUint64
-}
-
-// Field returns the name of the field (useful in composite fields to know
-// which original field the value came from)
-func (l *Location) Field() string {
-	return l.field
-}
-
-// Start returns the start byte offset of this occurrence
-func (l *Location) Start() uint64 {
-	return l.start
-}
-
-// End returns the end byte offset of this occurrence
-func (l *Location) End() uint64 {
-	return l.end
-}
-
-// Pos returns the 1-based phrase position of this occurrence
-func (l *Location) Pos() uint64 {
-	return l.pos
-}
-
-// ArrayPositions returns the array position vector associated with this occurrence
-func (l *Location) ArrayPositions() []uint64 {
-	return l.ap
-}
diff --git a/index/scorch/segment/zap/read.go b/index/scorch/segment/zap/read.go
deleted file mode 100644
index e47d4c6ab..000000000
--- a/index/scorch/segment/zap/read.go
+++ /dev/null
@@ -1,43 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import "encoding/binary"
-
-func (s *SegmentBase) getDocStoredMetaAndCompressed(docNum uint64) ([]byte, []byte) {
-	_, storedOffset, n, metaLen, dataLen := s.getDocStoredOffsets(docNum)
-
-	meta := s.mem[storedOffset+n : storedOffset+n+metaLen]
-	data := s.mem[storedOffset+n+metaLen : storedOffset+n+metaLen+dataLen]
-
-	return meta, data
-}
-
-func (s *SegmentBase) getDocStoredOffsets(docNum uint64) (
-	uint64, uint64, uint64, uint64, uint64) {
-	indexOffset := s.storedIndexOffset + (8 * docNum)
-
-	storedOffset := binary.BigEndian.Uint64(s.mem[indexOffset : indexOffset+8])
-
-	var n uint64
-
-	metaLen, read := binary.Uvarint(s.mem[storedOffset : storedOffset+binary.MaxVarintLen64])
-	n += uint64(read)
-
-	dataLen, read := binary.Uvarint(s.mem[storedOffset+n : storedOffset+n+binary.MaxVarintLen64])
-	n += uint64(read)
-
-	return indexOffset, storedOffset, n, metaLen, dataLen
-}
diff --git a/index/scorch/segment/zap/segment.go b/index/scorch/segment/zap/segment.go
deleted file mode 100644
index 5aa33a26c..000000000
--- a/index/scorch/segment/zap/segment.go
+++ /dev/null
@@ -1,572 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
-	"bytes"
-	"encoding/binary"
-	"fmt"
-	"io"
-	"os"
-	"sync"
-	"unsafe"
-
-	"github.com/RoaringBitmap/roaring"
-	"github.com/blevesearch/bleve/index/scorch/segment"
-	"github.com/blevesearch/bleve/size"
-	"github.com/couchbase/vellum"
-	mmap "github.com/edsrzf/mmap-go"
-	"github.com/golang/snappy"
-)
-
-var reflectStaticSizeSegmentBase int
-
-func init() {
-	var sb SegmentBase
-	reflectStaticSizeSegmentBase = int(unsafe.Sizeof(sb))
-}
-
-// Open returns a zap impl of a segment
-func Open(path string) (segment.Segment, error) {
-	f, err := os.Open(path)
-	if err != nil {
-		return nil, err
-	}
-	mm, err := mmap.Map(f, mmap.RDONLY, 0)
-	if err != nil {
-		// mmap failed, try to close the file
-		_ = f.Close()
-		return nil, err
-	}
-
-	rv := &Segment{
-		SegmentBase: SegmentBase{
-			mem:            mm[0 : len(mm)-FooterSize],
-			fieldsMap:      make(map[string]uint16),
-			fieldDvReaders: make(map[uint16]*docValueReader),
-			fieldFSTs:      make(map[uint16]*vellum.FST),
-		},
-		f:    f,
-		mm:   mm,
-		path: path,
-		refs: 1,
-	}
-	rv.SegmentBase.updateSize()
-
-	err = rv.loadConfig()
-	if err != nil {
-		_ = rv.Close()
-		return nil, err
-	}
-
-	err = rv.loadFields()
-	if err != nil {
-		_ = rv.Close()
-		return nil, err
-	}
-
-	err = rv.loadDvReaders()
-	if err != nil {
-		_ = rv.Close()
-		return nil, err
-	}
-
-	return rv, nil
-}
-
-// SegmentBase is a memory only, read-only implementation of the
-// segment.Segment interface, using zap's data representation.
-type SegmentBase struct {
-	mem               []byte
-	memCRC            uint32
-	chunkFactor       uint32
-	fieldsMap         map[string]uint16 // fieldName -> fieldID+1
-	fieldsInv         []string          // fieldID -> fieldName
-	numDocs           uint64
-	storedIndexOffset uint64
-	fieldsIndexOffset uint64
-	docValueOffset    uint64
-	dictLocs          []uint64
-	fieldDvReaders    map[uint16]*docValueReader // naive chunk cache per field
-	fieldDvNames      []string                   // field names cached in fieldDvReaders
-	size              uint64
-
-	m         sync.Mutex
-	fieldFSTs map[uint16]*vellum.FST
-}
-
-func (sb *SegmentBase) Size() int {
-	return int(sb.size)
-}
-
-func (sb *SegmentBase) updateSize() {
-	sizeInBytes := reflectStaticSizeSegmentBase +
-		cap(sb.mem)
-
-	// fieldsMap
-	for k, _ := range sb.fieldsMap {
-		sizeInBytes += (len(k) + size.SizeOfString) + size.SizeOfUint16
-	}
-
-	// fieldsInv, dictLocs
-	for _, entry := range sb.fieldsInv {
-		sizeInBytes += len(entry) + size.SizeOfString
-	}
-	sizeInBytes += len(sb.dictLocs) * size.SizeOfUint64
-
-	// fieldDvReaders
-	for _, v := range sb.fieldDvReaders {
-		sizeInBytes += size.SizeOfUint16 + size.SizeOfPtr
-		if v != nil {
-			sizeInBytes += v.size()
-		}
-	}
-
-	sb.size = uint64(sizeInBytes)
-}
-
-func (sb *SegmentBase) AddRef()             {}
-func (sb *SegmentBase) DecRef() (err error) { return nil }
-func (sb *SegmentBase) Close() (err error)  { return nil }
-
-// Segment implements a persisted segment.Segment interface, by
-// embedding an mmap()'ed SegmentBase.
-type Segment struct {
-	SegmentBase
-
-	f       *os.File
-	mm      mmap.MMap
-	path    string
-	version uint32
-	crc     uint32
-
-	m    sync.Mutex // Protects the fields that follow.
-	refs int64
-}
-
-func (s *Segment) Size() int {
-	// 8 /* size of file pointer */
-	// 4 /* size of version -> uint32 */
-	// 4 /* size of crc -> uint32 */
-	sizeOfUints := 16
-
-	sizeInBytes := (len(s.path) + size.SizeOfString) + sizeOfUints
-
-	// mutex, refs -> int64
-	sizeInBytes += 16
-
-	// do not include the mmap'ed part
-	return sizeInBytes + s.SegmentBase.Size() - cap(s.mem)
-}
-
-func (s *Segment) AddRef() {
-	s.m.Lock()
-	s.refs++
-	s.m.Unlock()
-}
-
-func (s *Segment) DecRef() (err error) {
-	s.m.Lock()
-	s.refs--
-	if s.refs == 0 {
-		err = s.closeActual()
-	}
-	s.m.Unlock()
-	return err
-}
-
-func (s *Segment) loadConfig() error {
-	crcOffset := len(s.mm) - 4
-	s.crc = binary.BigEndian.Uint32(s.mm[crcOffset : crcOffset+4])
-
-	verOffset := crcOffset - 4
-	s.version = binary.BigEndian.Uint32(s.mm[verOffset : verOffset+4])
-	if s.version != Version {
-		return fmt.Errorf("unsupported version %d", s.version)
-	}
-
-	chunkOffset := verOffset - 4
-	s.chunkFactor = binary.BigEndian.Uint32(s.mm[chunkOffset : chunkOffset+4])
-
-	docValueOffset := chunkOffset - 8
-	s.docValueOffset = binary.BigEndian.Uint64(s.mm[docValueOffset : docValueOffset+8])
-
-	fieldsIndexOffset := docValueOffset - 8
-	s.fieldsIndexOffset = binary.BigEndian.Uint64(s.mm[fieldsIndexOffset : fieldsIndexOffset+8])
-
-	storedIndexOffset := fieldsIndexOffset - 8
-	s.storedIndexOffset = binary.BigEndian.Uint64(s.mm[storedIndexOffset : storedIndexOffset+8])
-
-	numDocsOffset := storedIndexOffset - 8
-	s.numDocs = binary.BigEndian.Uint64(s.mm[numDocsOffset : numDocsOffset+8])
-	return nil
-}
-
-func (s *SegmentBase) loadFields() error {
-	// NOTE for now we assume the fields index immediately precedes
-	// the footer, and if this changes, need to adjust accordingly (or
-	// store explicit length), where s.mem was sliced from s.mm in Open().
-	fieldsIndexEnd := uint64(len(s.mem))
-
-	// iterate through fields index
-	var fieldID uint64
-	for s.fieldsIndexOffset+(8*fieldID) < fieldsIndexEnd {
-		addr := binary.BigEndian.Uint64(s.mem[s.fieldsIndexOffset+(8*fieldID) : s.fieldsIndexOffset+(8*fieldID)+8])
-
-		dictLoc, read := binary.Uvarint(s.mem[addr:fieldsIndexEnd])
-		n := uint64(read)
-		s.dictLocs = append(s.dictLocs, dictLoc)
-
-		var nameLen uint64
-		nameLen, read = binary.Uvarint(s.mem[addr+n : fieldsIndexEnd])
-		n += uint64(read)
-
-		name := string(s.mem[addr+n : addr+n+nameLen])
-		s.fieldsInv = append(s.fieldsInv, name)
-		s.fieldsMap[name] = uint16(fieldID + 1)
-
-		fieldID++
-	}
-	return nil
-}
-
-// Dictionary returns the term dictionary for the specified field
-func (s *SegmentBase) Dictionary(field string) (segment.TermDictionary, error) {
-	dict, err := s.dictionary(field)
-	if err == nil && dict == nil {
-		return &segment.EmptyDictionary{}, nil
-	}
-	return dict, err
-}
-
-func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) {
-	fieldIDPlus1 := sb.fieldsMap[field]
-	if fieldIDPlus1 > 0 {
-		rv = &Dictionary{
-			sb:      sb,
-			field:   field,
-			fieldID: fieldIDPlus1 - 1,
-		}
-
-		dictStart := sb.dictLocs[rv.fieldID]
-		if dictStart > 0 {
-			var ok bool
-			sb.m.Lock()
-			if rv.fst, ok = sb.fieldFSTs[rv.fieldID]; !ok {
-				// read the length of the vellum data
-				vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64])
-				fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
-				rv.fst, err = vellum.Load(fstBytes)
-				if err != nil {
-					sb.m.Unlock()
-					return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err)
-				}
-
-				sb.fieldFSTs[rv.fieldID] = rv.fst
-			}
-
-			sb.m.Unlock()
-			rv.fstReader, err = rv.fst.Reader()
-			if err != nil {
-				return nil, fmt.Errorf("dictionary field %s vellum reader err: %v", field, err)
-			}
-
-		}
-	}
-
-	return rv, nil
-}
-
-// visitDocumentCtx holds data structures that are reusable across
-// multiple VisitDocument() calls to avoid memory allocations
-type visitDocumentCtx struct {
-	buf      []byte
-	reader   bytes.Reader
-	arrayPos []uint64
-}
-
-var visitDocumentCtxPool = sync.Pool{
-	New: func() interface{} {
-		reuse := &visitDocumentCtx{}
-		return reuse
-	},
-}
-
-// VisitDocument invokes the DocFieldValueVistor for each stored field
-// for the specified doc number
-func (s *SegmentBase) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
-	vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx)
-	defer visitDocumentCtxPool.Put(vdc)
-	return s.visitDocument(vdc, num, visitor)
-}
-
-func (s *SegmentBase) visitDocument(vdc *visitDocumentCtx, num uint64,
-	visitor segment.DocumentFieldValueVisitor) error {
-	// first make sure this is a valid number in this segment
-	if num < s.numDocs {
-		meta, compressed := s.getDocStoredMetaAndCompressed(num)
-
-		vdc.reader.Reset(meta)
-
-		// handle _id field special case
-		idFieldValLen, err := binary.ReadUvarint(&vdc.reader)
-		if err != nil {
-			return err
-		}
-		idFieldVal := compressed[:idFieldValLen]
-
-		keepGoing := visitor("_id", byte('t'), idFieldVal, nil)
-		if !keepGoing {
-			visitDocumentCtxPool.Put(vdc)
-			return nil
-		}
-
-		// handle non-"_id" fields
-		compressed = compressed[idFieldValLen:]
-
-		uncompressed, err := snappy.Decode(vdc.buf[:cap(vdc.buf)], compressed)
-		if err != nil {
-			return err
-		}
-
-		for keepGoing {
-			field, err := binary.ReadUvarint(&vdc.reader)
-			if err == io.EOF {
-				break
-			}
-			if err != nil {
-				return err
-			}
-			typ, err := binary.ReadUvarint(&vdc.reader)
-			if err != nil {
-				return err
-			}
-			offset, err := binary.ReadUvarint(&vdc.reader)
-			if err != nil {
-				return err
-			}
-			l, err := binary.ReadUvarint(&vdc.reader)
-			if err != nil {
-				return err
-			}
-			numap, err := binary.ReadUvarint(&vdc.reader)
-			if err != nil {
-				return err
-			}
-			var arrayPos []uint64
-			if numap > 0 {
-				if cap(vdc.arrayPos) < int(numap) {
-					vdc.arrayPos = make([]uint64, numap)
-				}
-				arrayPos = vdc.arrayPos[:numap]
-				for i := 0; i < int(numap); i++ {
-					ap, err := binary.ReadUvarint(&vdc.reader)
-					if err != nil {
-						return err
-					}
-					arrayPos[i] = ap
-				}
-			}
-
-			value := uncompressed[offset : offset+l]
-			keepGoing = visitor(s.fieldsInv[field], byte(typ), value, arrayPos)
-		}
-
-		vdc.buf = uncompressed
-	}
-	return nil
-}
-
-// DocID returns the value of the _id field for the given docNum
-func (s *SegmentBase) DocID(num uint64) ([]byte, error) {
-	if num >= s.numDocs {
-		return nil, nil
-	}
-
-	vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx)
-
-	meta, compressed := s.getDocStoredMetaAndCompressed(num)
-
-	vdc.reader.Reset(meta)
-
-	// handle _id field special case
-	idFieldValLen, err := binary.ReadUvarint(&vdc.reader)
-	if err != nil {
-		return nil, err
-	}
-	idFieldVal := compressed[:idFieldValLen]
-
-	visitDocumentCtxPool.Put(vdc)
-
-	return idFieldVal, nil
-}
-
-// Count returns the number of documents in this segment.
-func (s *SegmentBase) Count() uint64 {
-	return s.numDocs
-}
-
-// DocNumbers returns a bitset corresponding to the doc numbers of all the
-// provided _id strings
-func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) {
-	rv := roaring.New()
-
-	if len(s.fieldsMap) > 0 {
-		idDict, err := s.dictionary("_id")
-		if err != nil {
-			return nil, err
-		}
-
-		postingsList := emptyPostingsList
-
-		sMax, err := idDict.fst.GetMaxKey()
-		if err != nil {
-			return nil, err
-		}
-		sMaxStr := string(sMax)
-		filteredIds := make([]string, 0, len(ids))
-		for _, id := range ids {
-			if id <= sMaxStr {
-				filteredIds = append(filteredIds, id)
-			}
-		}
-
-		for _, id := range filteredIds {
-			postingsList, err = idDict.postingsList([]byte(id), nil, postingsList)
-			if err != nil {
-				return nil, err
-			}
-			postingsList.OrInto(rv)
-		}
-	}
-
-	return rv, nil
-}
-
-// Fields returns the field names used in this segment
-func (s *SegmentBase) Fields() []string {
-	return s.fieldsInv
-}
-
-// Path returns the path of this segment on disk
-func (s *Segment) Path() string {
-	return s.path
-}
-
-// Close releases all resources associated with this segment
-func (s *Segment) Close() (err error) {
-	return s.DecRef()
-}
-
-func (s *Segment) closeActual() (err error) {
-	if s.mm != nil {
-		err = s.mm.Unmap()
-	}
-	// try to close file even if unmap failed
-	if s.f != nil {
-		err2 := s.f.Close()
-		if err == nil {
-			// try to return first error
-			err = err2
-		}
-	}
-	return
-}
-
-// some helpers i started adding for the command-line utility
-
-// Data returns the underlying mmaped data slice
-func (s *Segment) Data() []byte {
-	return s.mm
-}
-
-// CRC returns the CRC value stored in the file footer
-func (s *Segment) CRC() uint32 {
-	return s.crc
-}
-
-// Version returns the file version in the file footer
-func (s *Segment) Version() uint32 {
-	return s.version
-}
-
-// ChunkFactor returns the chunk factor in the file footer
-func (s *Segment) ChunkFactor() uint32 {
-	return s.chunkFactor
-}
-
-// FieldsIndexOffset returns the fields index offset in the file footer
-func (s *Segment) FieldsIndexOffset() uint64 {
-	return s.fieldsIndexOffset
-}
-
-// StoredIndexOffset returns the stored value index offset in the file footer
-func (s *Segment) StoredIndexOffset() uint64 {
-	return s.storedIndexOffset
-}
-
-// DocValueOffset returns the docValue offset in the file footer
-func (s *Segment) DocValueOffset() uint64 {
-	return s.docValueOffset
-}
-
-// NumDocs returns the number of documents in the file footer
-func (s *Segment) NumDocs() uint64 {
-	return s.numDocs
-}
-
-// DictAddr is a helper function to compute the file offset where the
-// dictionary is stored for the specified field.
-func (s *Segment) DictAddr(field string) (uint64, error) {
-	fieldIDPlus1, ok := s.fieldsMap[field]
-	if !ok {
-		return 0, fmt.Errorf("no such field '%s'", field)
-	}
-
-	return s.dictLocs[fieldIDPlus1-1], nil
-}
-
-func (s *SegmentBase) loadDvReaders() error {
-	if s.docValueOffset == fieldNotUninverted || s.numDocs == 0 {
-		return nil
-	}
-
-	var read uint64
-	for fieldID, field := range s.fieldsInv {
-		var fieldLocStart, fieldLocEnd uint64
-		var n int
-		fieldLocStart, n = binary.Uvarint(s.mem[s.docValueOffset+read : s.docValueOffset+read+binary.MaxVarintLen64])
-		if n <= 0 {
-			return fmt.Errorf("loadDvReaders: failed to read the docvalue offset start for field %d", fieldID)
-		}
-		read += uint64(n)
-		fieldLocEnd, n = binary.Uvarint(s.mem[s.docValueOffset+read : s.docValueOffset+read+binary.MaxVarintLen64])
-		if n <= 0 {
-			return fmt.Errorf("loadDvReaders: failed to read the docvalue offset end for field %d", fieldID)
-		}
-		read += uint64(n)
-
-		fieldDvReader, err := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd)
-		if err != nil {
-			return err
-		}
-		if fieldDvReader != nil {
-			s.fieldDvReaders[uint16(fieldID)] = fieldDvReader
-			s.fieldDvNames = append(s.fieldDvNames, field)
-		}
-	}
-
-	return nil
-}
diff --git a/index/scorch/segment/zap/segment_test.go b/index/scorch/segment/zap/segment_test.go
deleted file mode 100644
index ffe4c7c80..000000000
--- a/index/scorch/segment/zap/segment_test.go
+++ /dev/null
@@ -1,737 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
-	"math"
-	"os"
-	"reflect"
-	"testing"
-
-	"github.com/RoaringBitmap/roaring"
-	"github.com/blevesearch/bleve/index"
-	"github.com/blevesearch/bleve/index/scorch/segment"
-)
-
-func TestOpen(t *testing.T) {
-	_ = os.RemoveAll("/tmp/scorch.zap")
-
-	testSeg, _, _ := buildTestSegment()
-	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
-	if err != nil {
-		t.Fatalf("error persisting segment: %v", err)
-	}
-
-	segment, err := Open("/tmp/scorch.zap")
-	if err != nil {
-		t.Fatalf("error opening segment: %v", err)
-	}
-	defer func() {
-		cerr := segment.Close()
-		if cerr != nil {
-			t.Fatalf("error closing segment: %v", cerr)
-		}
-	}()
-
-	expectFields := map[string]struct{}{
-		"_id":  struct{}{},
-		"_all": struct{}{},
-		"name": struct{}{},
-		"desc": struct{}{},
-		"tag":  struct{}{},
-	}
-	fields := segment.Fields()
-	if len(fields) != len(expectFields) {
-		t.Errorf("expected %d fields, only got %d", len(expectFields), len(fields))
-	}
-	for _, field := range fields {
-		if _, ok := expectFields[field]; !ok {
-			t.Errorf("got unexpected field: %s", field)
-		}
-	}
-
-	docCount := segment.Count()
-	if docCount != 1 {
-		t.Errorf("expected count 1, got %d", docCount)
-	}
-
-	// check the _id field
-	dict, err := segment.Dictionary("_id")
-	if err != nil {
-		t.Fatal(err)
-	}
-	if dict == nil {
-		t.Fatal("got nil dict, expected non-nil")
-	}
-
-	postingsList, err := dict.PostingsList([]byte("a"), nil, nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if postingsList == nil {
-		t.Fatal("got nil postings list, expected non-nil")
-	}
-
-	postingsItr := postingsList.Iterator(true, true, true, nil)
-	if postingsItr == nil {
-		t.Fatal("got nil iterator, expected non-nil")
-	}
-
-	count := 0
-	nextPosting, err := postingsItr.Next()
-	for nextPosting != nil && err == nil {
-		count++
-		if nextPosting.Frequency() != 1 {
-			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
-		}
-		if nextPosting.Number() != 0 {
-			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
-		}
-		if nextPosting.Norm() != 1.0 {
-			t.Errorf("expected norm 1.0, got %f", nextPosting.Norm())
-		}
-
-		nextPosting, err = postingsItr.Next()
-	}
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if count != 1 {
-		t.Errorf("expected count to be 1, got %d", count)
-	}
-
-	// check the name field
-	dict, err = segment.Dictionary("name")
-	if err != nil {
-		t.Fatal(err)
-	}
-	if dict == nil {
-		t.Fatal("got nil dict, expected non-nil")
-	}
-
-	postingsList, err = dict.PostingsList([]byte("wow"), nil, nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if postingsList == nil {
-		t.Fatal("got nil postings list, expected non-nil")
-	}
-
-	postingsItr = postingsList.Iterator(true, true, true, nil)
-	if postingsItr == nil {
-		t.Fatal("got nil iterator, expected non-nil")
-	}
-
-	count = 0
-	nextPosting, err = postingsItr.Next()
-	for nextPosting != nil && err == nil {
-		count++
-		if nextPosting.Frequency() != 1 {
-			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
-		}
-		if nextPosting.Number() != 0 {
-			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
-		}
-		if nextPosting.Norm() != 1.0 {
-			t.Errorf("expected norm 1.0, got %f", nextPosting.Norm())
-		}
-		var numLocs uint64
-		for _, loc := range nextPosting.Locations() {
-			numLocs++
-			if loc.Field() != "name" {
-				t.Errorf("expected loc field to be 'name', got '%s'", loc.Field())
-			}
-			if loc.Start() != 0 {
-				t.Errorf("expected loc start to be 0, got %d", loc.Start())
-			}
-			if loc.End() != 3 {
-				t.Errorf("expected loc end to be 3, got %d", loc.End())
-			}
-			if loc.Pos() != 1 {
-				t.Errorf("expected loc pos to be 1, got %d", loc.Pos())
-			}
-			if loc.ArrayPositions() != nil {
-				t.Errorf("expect loc array pos to be nil, got %v", loc.ArrayPositions())
-			}
-		}
-		if numLocs != nextPosting.Frequency() {
-			t.Errorf("expected %d locations, got %d", nextPosting.Frequency(), numLocs)
-		}
-
-		nextPosting, err = postingsItr.Next()
-	}
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if count != 1 {
-		t.Errorf("expected count to be 1, got %d", count)
-	}
-
-	// check the _all field (composite)
-	dict, err = segment.Dictionary("_all")
-	if err != nil {
-		t.Fatal(err)
-	}
-	if dict == nil {
-		t.Fatal("got nil dict, expected non-nil")
-	}
-
-	postingsList, err = dict.PostingsList([]byte("wow"), nil, nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if postingsList == nil {
-		t.Fatal("got nil postings list, expected non-nil")
-	}
-
-	postingsItr = postingsList.Iterator(true, true, true, nil)
-	if postingsItr == nil {
-		t.Fatal("got nil iterator, expected non-nil")
-	}
-
-	count = 0
-	nextPosting, err = postingsItr.Next()
-	for nextPosting != nil && err == nil {
-		count++
-		if nextPosting.Frequency() != 1 {
-			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
-		}
-		if nextPosting.Number() != 0 {
-			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
-		}
-		expectedNorm := float32(1.0 / math.Sqrt(float64(5)))
-		if nextPosting.Norm() != float64(expectedNorm) {
-			t.Errorf("expected norm %f, got %f", expectedNorm, nextPosting.Norm())
-		}
-		var numLocs uint64
-		for _, loc := range nextPosting.Locations() {
-			numLocs++
-			if loc.Field() != "name" {
-				t.Errorf("expected loc field to be 'name', got '%s'", loc.Field())
-			}
-			if loc.Start() != 0 {
-				t.Errorf("expected loc start to be 0, got %d", loc.Start())
-			}
-			if loc.End() != 3 {
-				t.Errorf("expected loc end to be 3, got %d", loc.End())
-			}
-			if loc.Pos() != 1 {
-				t.Errorf("expected loc pos to be 1, got %d", loc.Pos())
-			}
-			if loc.ArrayPositions() != nil {
-				t.Errorf("expect loc array pos to be nil, got %v", loc.ArrayPositions())
-			}
-		}
-		if numLocs != nextPosting.Frequency() {
-			t.Errorf("expected %d locations, got %d", nextPosting.Frequency(), numLocs)
-		}
-
-		nextPosting, err = postingsItr.Next()
-	}
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if count != 1 {
-		t.Errorf("expected count to be 1, got %d", count)
-	}
-
-	// now try a field with array positions
-	dict, err = segment.Dictionary("tag")
-	if err != nil {
-		t.Fatal(err)
-	}
-	if dict == nil {
-		t.Fatal("got nil dict, expected non-nil")
-	}
-
-	postingsList, err = dict.PostingsList([]byte("dark"), nil, nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if postingsList == nil {
-		t.Fatal("got nil postings list, expected non-nil")
-	}
-
-	postingsItr = postingsList.Iterator(true, true, true, nil)
-	if postingsItr == nil {
-		t.Fatal("got nil iterator, expected non-nil")
-	}
-
-	nextPosting, err = postingsItr.Next()
-	for nextPosting != nil && err == nil {
-
-		if nextPosting.Frequency() != 1 {
-			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
-		}
-		if nextPosting.Number() != 0 {
-			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
-		}
-		var numLocs uint64
-		for _, loc := range nextPosting.Locations() {
-			numLocs++
-			if loc.Field() != "tag" {
-				t.Errorf("expected loc field to be 'name', got '%s'", loc.Field())
-			}
-			if loc.Start() != 0 {
-				t.Errorf("expected loc start to be 0, got %d", loc.Start())
-			}
-			if loc.End() != 4 {
-				t.Errorf("expected loc end to be 3, got %d", loc.End())
-			}
-			if loc.Pos() != 1 {
-				t.Errorf("expected loc pos to be 1, got %d", loc.Pos())
-			}
-			expectArrayPos := []uint64{1}
-			if !reflect.DeepEqual(loc.ArrayPositions(), expectArrayPos) {
-				t.Errorf("expect loc array pos to be %v, got %v", expectArrayPos, loc.ArrayPositions())
-			}
-		}
-		if numLocs != nextPosting.Frequency() {
-			t.Errorf("expected %d locations, got %d", nextPosting.Frequency(), numLocs)
-		}
-
-		nextPosting, err = postingsItr.Next()
-	}
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	// now try and visit a document
-	var fieldValuesSeen int
-	err = segment.VisitDocument(0, func(field string, typ byte, value []byte, pos []uint64) bool {
-		fieldValuesSeen++
-		return true
-	})
-	if err != nil {
-		t.Fatal(err)
-	}
-	if fieldValuesSeen != 5 {
-		t.Errorf("expected 5 field values, got %d", fieldValuesSeen)
-	}
-}
-
-func TestOpenMulti(t *testing.T) {
-	_ = os.RemoveAll("/tmp/scorch.zap")
-
-	testSeg, _, _ := buildTestSegmentMulti()
-	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
-	if err != nil {
-		t.Fatalf("error persisting segment: %v", err)
-	}
-
-	segment, err := Open("/tmp/scorch.zap")
-	if err != nil {
-		t.Fatalf("error opening segment: %v", err)
-	}
-	defer func() {
-		cerr := segment.Close()
-		if cerr != nil {
-			t.Fatalf("error closing segment: %v", cerr)
-		}
-	}()
-
-	if segment.Count() != 2 {
-		t.Errorf("expected count 2, got %d", segment.Count())
-	}
-
-	// check the desc field
-	dict, err := segment.Dictionary("desc")
-	if err != nil {
-		t.Fatal(err)
-	}
-	if dict == nil {
-		t.Fatal("got nil dict, expected non-nil")
-	}
-
-	postingsList, err := dict.PostingsList([]byte("thing"), nil, nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if postingsList == nil {
-		t.Fatal("got nil postings list, expected non-nil")
-	}
-
-	postingsItr := postingsList.Iterator(true, true, true, nil)
-	if postingsItr == nil {
-		t.Fatal("got nil iterator, expected non-nil")
-	}
-
-	count := 0
-	nextPosting, err := postingsItr.Next()
-	for nextPosting != nil && err == nil {
-		count++
-		nextPosting, err = postingsItr.Next()
-	}
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if count != 2 {
-		t.Errorf("expected count to be 2, got %d", count)
-	}
-
-	// get docnum of a
-	exclude, err := segment.DocNumbers([]string{"a"})
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	// look for term 'thing' excluding doc 'a'
-	postingsListExcluding, err := dict.PostingsList([]byte("thing"), exclude, nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if postingsList == nil {
-		t.Fatal("got nil postings list, expected non-nil")
-	}
-
-	postingsListExcludingCount := postingsListExcluding.Count()
-	if postingsListExcludingCount != 1 {
-		t.Errorf("expected count from postings list to be 1, got %d", postingsListExcludingCount)
-	}
-
-	postingsItrExcluding := postingsListExcluding.Iterator(true, true, true, nil)
-	if postingsItr == nil {
-		t.Fatal("got nil iterator, expected non-nil")
-	}
-
-	count = 0
-	nextPosting, err = postingsItrExcluding.Next()
-	for nextPosting != nil && err == nil {
-		count++
-		nextPosting, err = postingsItrExcluding.Next()
-	}
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if count != 1 {
-		t.Errorf("expected count to be 1, got %d", count)
-	}
-}
-
-func TestOpenMultiWithTwoChunks(t *testing.T) {
-	_ = os.RemoveAll("/tmp/scorch.zap")
-
-	testSeg, _, _ := buildTestSegmentMultiWithChunkFactor(1)
-	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
-	if err != nil {
-		t.Fatalf("error persisting segment: %v", err)
-	}
-
-	segment, err := Open("/tmp/scorch.zap")
-	if err != nil {
-		t.Fatalf("error opening segment: %v", err)
-	}
-	defer func() {
-		cerr := segment.Close()
-		if cerr != nil {
-			t.Fatalf("error closing segment: %v", cerr)
-		}
-	}()
-
-	if segment.Count() != 2 {
-		t.Errorf("expected count 2, got %d", segment.Count())
-	}
-
-	// check the desc field
-	dict, err := segment.Dictionary("desc")
-	if err != nil {
-		t.Fatal(err)
-	}
-	if dict == nil {
-		t.Fatal("got nil dict, expected non-nil")
-	}
-
-	postingsList, err := dict.PostingsList([]byte("thing"), nil, nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if postingsList == nil {
-		t.Fatal("got nil postings list, expected non-nil")
-	}
-
-	postingsItr := postingsList.Iterator(true, true, true, nil)
-	if postingsItr == nil {
-		t.Fatal("got nil iterator, expected non-nil")
-	}
-
-	count := 0
-	nextPosting, err := postingsItr.Next()
-	for nextPosting != nil && err == nil {
-		count++
-		nextPosting, err = postingsItr.Next()
-	}
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if count != 2 {
-		t.Errorf("expected count to be 2, got %d", count)
-	}
-
-	// get docnum of a
-	exclude, err := segment.DocNumbers([]string{"a"})
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	// look for term 'thing' excluding doc 'a'
-	postingsListExcluding, err := dict.PostingsList([]byte("thing"), exclude, nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if postingsList == nil {
-		t.Fatal("got nil postings list, expected non-nil")
-	}
-
-	postingsItrExcluding := postingsListExcluding.Iterator(true, true, true, nil)
-	if postingsItr == nil {
-		t.Fatal("got nil iterator, expected non-nil")
-	}
-
-	count = 0
-	nextPosting, err = postingsItrExcluding.Next()
-	for nextPosting != nil && err == nil {
-		count++
-		nextPosting, err = postingsItrExcluding.Next()
-	}
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if count != 1 {
-		t.Errorf("expected count to be 1, got %d", count)
-	}
-}
-
-func TestSegmentVisitableDocValueFieldsList(t *testing.T) {
-	_ = os.RemoveAll("/tmp/scorch.zap")
-
-	testSeg, _, _ := buildTestSegmentMultiWithChunkFactor(1)
-	err := PersistSegmentBase(testSeg, "/tmp/scorch.zap")
-	if err != nil {
-		t.Fatalf("error persisting segment: %v", err)
-	}
-
-	seg, err := Open("/tmp/scorch.zap")
-	if err != nil {
-		t.Fatalf("error opening segment: %v", err)
-	}
-
-	if zaps, ok := seg.(segment.DocumentFieldTermVisitable); ok {
-		fields, err := zaps.VisitableDocValueFields()
-		if err != nil {
-			t.Fatalf("segment VisitableDocValueFields err: %v", err)
-		}
-		// no persisted doc value fields
-		if len(fields) != 0 {
-			t.Errorf("expected no persisted fields for doc values, got: %#v", fields)
-		}
-	}
-
-	err = seg.Close()
-	if err != nil {
-		t.Fatalf("error closing segment: %v", err)
-	}
-	_ = os.RemoveAll("/tmp/scorch.zap")
-
-	testSeg, _, _ = buildTestSegmentWithDefaultFieldMapping(1)
-	err = PersistSegmentBase(testSeg, "/tmp/scorch.zap")
-	if err != nil {
-		t.Fatalf("error persisting segment: %v", err)
-	}
-
-	seg, err = Open("/tmp/scorch.zap")
-	if err != nil {
-		t.Fatalf("error opening segment: %v", err)
-	}
-
-	defer func() {
-		cerr := seg.Close()
-		if cerr != nil {
-			t.Fatalf("error closing segment: %v", cerr)
-		}
-	}()
-
-	if zaps, ok := seg.(segment.DocumentFieldTermVisitable); ok {
-		fields, err := zaps.VisitableDocValueFields()
-		if err != nil {
-			t.Fatalf("segment VisitableDocValueFields err: %v", err)
-		}
-
-		expectedFields := []string{"desc", "name", "tag"}
-		if !reflect.DeepEqual(fields, expectedFields) {
-			t.Errorf("expected field terms: %#v, got: %#v", expectedFields, fields)
-		}
-
-		fieldTerms := make(index.FieldTerms)
-		_, err = zaps.VisitDocumentFieldTerms(0, fields, func(field string, term []byte) {
-			fieldTerms[field] = append(fieldTerms[field], string(term))
-		}, nil)
-		if err != nil {
-			t.Error(err)
-		}
-
-		expectedFieldTerms := index.FieldTerms{
-			"name": []string{"wow"},
-			"desc": []string{"some", "thing"},
-			"tag":  []string{"cold"},
-		}
-		if !reflect.DeepEqual(fieldTerms, expectedFieldTerms) {
-			t.Errorf("expected field terms: %#v, got: %#v", expectedFieldTerms, fieldTerms)
-		}
-
-	}
-}
-
-func TestSegmentDocsWithNonOverlappingFields(t *testing.T) {
-	_ = os.RemoveAll("/tmp/scorch.zap")
-
-	testSeg, _, err := buildTestSegmentMultiWithDifferentFields(true, true)
-	if err != nil {
-		t.Fatalf("error building segment: %v", err)
-	}
-	err = PersistSegmentBase(testSeg, "/tmp/scorch.zap")
-	if err != nil {
-		t.Fatalf("error persisting segment: %v", err)
-	}
-
-	segment, err := Open("/tmp/scorch.zap")
-	if err != nil {
-		t.Fatalf("error opening segment: %v", err)
-	}
-	defer func() {
-		cerr := segment.Close()
-		if cerr != nil {
-			t.Fatalf("error closing segment: %v", cerr)
-		}
-	}()
-
-	if segment.Count() != 2 {
-		t.Errorf("expected 2, got %d", segment.Count())
-	}
-
-	expectFields := map[string]struct{}{
-		"_id":           struct{}{},
-		"_all":          struct{}{},
-		"name":          struct{}{},
-		"dept":          struct{}{},
-		"manages.id":    struct{}{},
-		"manages.count": struct{}{},
-		"reportsTo.id":  struct{}{},
-	}
-
-	fields := segment.Fields()
-	if len(fields) != len(expectFields) {
-		t.Errorf("expected %d fields, only got %d", len(expectFields), len(fields))
-	}
-	for _, field := range fields {
-		if _, ok := expectFields[field]; !ok {
-			t.Errorf("got unexpected field: %s", field)
-		}
-	}
-}
-
-func TestMergedSegmentDocsWithNonOverlappingFields(t *testing.T) {
-	_ = os.RemoveAll("/tmp/scorch1.zap")
-	_ = os.RemoveAll("/tmp/scorch2.zap")
-	_ = os.RemoveAll("/tmp/scorch3.zap")
-
-	testSeg1, _, _ := buildTestSegmentMultiWithDifferentFields(true, false)
-	err := PersistSegmentBase(testSeg1, "/tmp/scorch1.zap")
-	if err != nil {
-		t.Fatalf("error persisting segment: %v", err)
-	}
-
-	testSeg2, _, _ := buildTestSegmentMultiWithDifferentFields(false, true)
-	err = PersistSegmentBase(testSeg2, "/tmp/scorch2.zap")
-	if err != nil {
-		t.Fatalf("error persisting segment: %v", err)
-	}
-
-	segment1, err := Open("/tmp/scorch1.zap")
-	if err != nil {
-		t.Fatalf("error opening segment: %v", err)
-	}
-	defer func() {
-		cerr := segment1.Close()
-		if cerr != nil {
-			t.Fatalf("error closing segment: %v", cerr)
-		}
-	}()
-
-	segment2, err := Open("/tmp/scorch2.zap")
-	if err != nil {
-		t.Fatalf("error opening segment: %v", err)
-	}
-	defer func() {
-		cerr := segment2.Close()
-		if cerr != nil {
-			t.Fatalf("error closing segment: %v", cerr)
-		}
-	}()
-
-	segsToMerge := make([]*Segment, 2)
-	segsToMerge[0] = segment1.(*Segment)
-	segsToMerge[1] = segment2.(*Segment)
-
-	_, nBytes, err := Merge(segsToMerge, []*roaring.Bitmap{nil, nil}, "/tmp/scorch3.zap", 1024, nil, nil)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if nBytes == 0 {
-		t.Fatalf("expected a non zero total_compaction_written_bytes")
-	}
-
-	segmentM, err := Open("/tmp/scorch3.zap")
-	if err != nil {
-		t.Fatalf("error opening merged segment: %v", err)
-	}
-	defer func() {
-		cerr := segmentM.Close()
-		if cerr != nil {
-			t.Fatalf("error closing segment: %v", cerr)
-		}
-	}()
-
-	if segmentM.Count() != 2 {
-		t.Errorf("expected 2, got %d", segmentM.Count())
-	}
-
-	expectFields := map[string]struct{}{
-		"_id":           struct{}{},
-		"_all":          struct{}{},
-		"name":          struct{}{},
-		"dept":          struct{}{},
-		"manages.id":    struct{}{},
-		"manages.count": struct{}{},
-		"reportsTo.id":  struct{}{},
-	}
-
-	fields := segmentM.Fields()
-	if len(fields) != len(expectFields) {
-		t.Errorf("expected %d fields, only got %d", len(expectFields), len(fields))
-	}
-	for _, field := range fields {
-		if _, ok := expectFields[field]; !ok {
-			t.Errorf("got unexpected field: %s", field)
-		}
-	}
-}
diff --git a/index/scorch/segment/zap/write.go b/index/scorch/segment/zap/write.go
deleted file mode 100644
index cddaedd00..000000000
--- a/index/scorch/segment/zap/write.go
+++ /dev/null
@@ -1,145 +0,0 @@
-//  Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
-	"encoding/binary"
-	"io"
-
-	"github.com/RoaringBitmap/roaring"
-)
-
-// writes out the length of the roaring bitmap in bytes as varint
-// then writes out the roaring bitmap itself
-func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer,
-	reuseBufVarint []byte) (int, error) {
-	buf, err := r.ToBytes()
-	if err != nil {
-		return 0, err
-	}
-
-	var tw int
-
-	// write out the length
-	n := binary.PutUvarint(reuseBufVarint, uint64(len(buf)))
-	nw, err := w.Write(reuseBufVarint[:n])
-	tw += nw
-	if err != nil {
-		return tw, err
-	}
-
-	// write out the roaring bytes
-	nw, err = w.Write(buf)
-	tw += nw
-	if err != nil {
-		return tw, err
-	}
-
-	return tw, nil
-}
-
-func persistFields(fieldsInv []string, w *CountHashWriter, dictLocs []uint64) (uint64, error) {
-	var rv uint64
-	var fieldsOffsets []uint64
-
-	for fieldID, fieldName := range fieldsInv {
-		// record start of this field
-		fieldsOffsets = append(fieldsOffsets, uint64(w.Count()))
-
-		// write out the dict location and field name length
-		_, err := writeUvarints(w, dictLocs[fieldID], uint64(len(fieldName)))
-		if err != nil {
-			return 0, err
-		}
-
-		// write out the field name
-		_, err = w.Write([]byte(fieldName))
-		if err != nil {
-			return 0, err
-		}
-	}
-
-	// now write out the fields index
-	rv = uint64(w.Count())
-	for fieldID := range fieldsInv {
-		err := binary.Write(w, binary.BigEndian, fieldsOffsets[fieldID])
-		if err != nil {
-			return 0, err
-		}
-	}
-
-	return rv, nil
-}
-
-// FooterSize is the size of the footer record in bytes
-// crc + ver + chunk + field offset + stored offset + num docs + docValueOffset
-const FooterSize = 4 + 4 + 4 + 8 + 8 + 8 + 8
-
-func persistFooter(numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64,
-	chunkFactor uint32, crcBeforeFooter uint32, writerIn io.Writer) error {
-	w := NewCountHashWriter(writerIn)
-	w.crc = crcBeforeFooter
-
-	// write out the number of docs
-	err := binary.Write(w, binary.BigEndian, numDocs)
-	if err != nil {
-		return err
-	}
-	// write out the stored field index location:
-	err = binary.Write(w, binary.BigEndian, storedIndexOffset)
-	if err != nil {
-		return err
-	}
-	// write out the field index location
-	err = binary.Write(w, binary.BigEndian, fieldsIndexOffset)
-	if err != nil {
-		return err
-	}
-	// write out the fieldDocValue location
-	err = binary.Write(w, binary.BigEndian, docValueOffset)
-	if err != nil {
-		return err
-	}
-	// write out 32-bit chunk factor
-	err = binary.Write(w, binary.BigEndian, chunkFactor)
-	if err != nil {
-		return err
-	}
-	// write out 32-bit version
-	err = binary.Write(w, binary.BigEndian, Version)
-	if err != nil {
-		return err
-	}
-	// write out CRC-32 of everything upto but not including this CRC
-	err = binary.Write(w, binary.BigEndian, w.crc)
-	if err != nil {
-		return err
-	}
-	return nil
-}
-
-func writeUvarints(w io.Writer, vals ...uint64) (tw int, err error) {
-	buf := make([]byte, binary.MaxVarintLen64)
-	for _, val := range vals {
-		n := binary.PutUvarint(buf, val)
-		var nw int
-		nw, err = w.Write(buf[:n])
-		tw += nw
-		if err != nil {
-			return tw, err
-		}
-	}
-	return tw, err
-}
diff --git a/index/scorch/segment/zap/write_test.go b/index/scorch/segment/zap/write_test.go
deleted file mode 100644
index 2e72d4b82..000000000
--- a/index/scorch/segment/zap/write_test.go
+++ /dev/null
@@ -1,86 +0,0 @@
-//  Copyright (c) 2018 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// 		http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
-	"math"
-	"testing"
-
-	"github.com/RoaringBitmap/roaring"
-)
-
-func TestRoaringSizes(t *testing.T) {
-	tests := []struct {
-		vals          []uint32
-		expectedSize  int // expected serialized # bytes
-		optimizedSize int // after calling roaring's RunOptimize() API
-	}{
-		{[]uint32{}, 8, 8}, // empty roaring is 8 bytes
-
-		{[]uint32{0}, 18, 18}, // single entry roaring is 18 bytes
-		{[]uint32{1}, 18, 18},
-		{[]uint32{4}, 18, 18},
-		{[]uint32{4000}, 18, 18},
-		{[]uint32{40000000}, 18, 18},
-		{[]uint32{math.MaxUint32}, 18, 18},
-		{[]uint32{math.MaxUint32 - 1}, 18, 18},
-
-		{[]uint32{0, 1}, 20, 20},
-		{[]uint32{0, 10000000}, 28, 28},
-
-		{[]uint32{0, 1, 2}, 22, 15},
-		{[]uint32{0, 1, 20000000}, 30, 30},
-
-		{[]uint32{0, 1, 2, 3}, 24, 15},
-		{[]uint32{0, 1, 2, 30000000}, 32, 21},
-	}
-
-	for _, test := range tests {
-		bm := roaring.New()
-		for _, val := range test.vals {
-			bm.Add(val)
-		}
-
-		b, err := bm.ToBytes()
-		if err != nil {
-			t.Errorf("expected no ToBytes() err, got: %v", err)
-		}
-		if len(b) != test.expectedSize {
-			t.Errorf("size did not match,"+
-				" got: %d, test: %#v", len(b), test)
-		}
-		if int(bm.GetSerializedSizeInBytes()) != test.expectedSize {
-			t.Errorf("GetSerializedSizeInBytes did not match,"+
-				" got: %d, test: %#v",
-				bm.GetSerializedSizeInBytes(), test)
-		}
-
-		bm.RunOptimize()
-
-		b, err = bm.ToBytes()
-		if err != nil {
-			t.Errorf("expected no ToBytes() err, got: %v", err)
-		}
-		if len(b) != test.optimizedSize {
-			t.Errorf("optimized size did not match,"+
-				" got: %d, test: %#v", len(b), test)
-		}
-		if int(bm.GetSerializedSizeInBytes()) != test.optimizedSize {
-			t.Errorf("optimized GetSerializedSizeInBytes did not match,"+
-				" got: %d, test: %#v",
-				bm.GetSerializedSizeInBytes(), test)
-		}
-	}
-}
diff --git a/index/scorch/segment/zap/zap.md b/index/scorch/segment/zap/zap.md
deleted file mode 100644
index d74dc548b..000000000
--- a/index/scorch/segment/zap/zap.md
+++ /dev/null
@@ -1,177 +0,0 @@
-# ZAP File Format 
-
-## Legend
-
-### Sections
-
-    |========|
-    |        | section
-    |========|
-    
-### Fixed-size fields
-
-    |--------|        |----|        |--|        |-|
-    |        | uint64 |    | uint32 |  | uint16 | | uint8
-    |--------|        |----|        |--|        |-|
-
-### Varints
-
-    |~~~~~~~~|
-    |        | varint(up to uint64)
-    |~~~~~~~~|
-
-### Arbitrary-length fields
-
-    |--------...---|
-    |              | arbitrary-length field (string, vellum, roaring bitmap)
-    |--------...---|
-
-### Chunked data
-
-	[--------]
-	[        ]
-	[--------]
-
-## Overview
-
-Footer section describes the configuration of particular ZAP file. The format of footer is version-dependent, so it is necessary to check `V` field before the parsing.
-
-            |==================================================|
-            | Stored Fields                                    |
-            |==================================================|
-    |-----> | Stored Fields Index                              |
-    |       |==================================================|   
-    |       | Dictionaries + Postings + DocValues              | 
-    |       |==================================================|
-    | |---> | DocValues Index                                  |
-    | |     |==================================================|   
-    | |     | Fields                                           |
-    | |     |==================================================|
-    | | |-> | Fields Index                                     |
-    | | |   |========|========|========|========|====|====|====|
-    | | |   |     D# |     SF |      F |    FDV | CF |  V | CC | (Footer)
-    | | |   |========|====|===|====|===|====|===|====|====|====|
-    | | |                 |        |        |
-    |-+-+-----------------|        |        |
-      | |--------------------------|        |
-      |-------------------------------------|
-
-     D#. Number of Docs.
-     SF. Stored Fields Index Offset.
-      F. Field Index Offset.
-    FDV. Field DocValue Offset.
-     CF. Chunk Factor.
-      V. Version.
-     CC. CRC32.
-
-## Stored Fields
-
-Stored Fields Index is `D#` consecutive 64-bit unsigned integers - offsets, where relevant Stored Fields Data records are located.
-
-    0                                [SF]                   [SF + D# * 8]
-    | Stored Fields                  | Stored Fields Index              |
-    |================================|==================================|
-    |                                |                                  |
-    |       |--------------------|   ||--------|--------|. . .|--------||
-    |   |-> | Stored Fields Data |   ||      0 |      1 |     | D# - 1 ||
-    |   |   |--------------------|   ||--------|----|---|. . .|--------||
-    |   |                            |              |                   |
-    |===|============================|==============|===================|
-        |                                           |
-        |-------------------------------------------|
-
-Stored Fields Data is an arbitrary size record, which consists of metadata and [Snappy](https://github.com/golang/snappy)-compressed data.
-
-    Stored Fields Data
-    |~~~~~~~~|~~~~~~~~|~~~~~~~~...~~~~~~~~|~~~~~~~~...~~~~~~~~|
-    |    MDS |    CDS |                MD |                CD |
-    |~~~~~~~~|~~~~~~~~|~~~~~~~~...~~~~~~~~|~~~~~~~~...~~~~~~~~|
-    
-    MDS. Metadata size.
-    CDS. Compressed data size.
-    MD. Metadata.
-    CD. Snappy-compressed data.
-
-## Fields
-
-Fields Index section located between addresses `F` and `len(file) - len(footer)` and consist of `uint64` values (`F1`, `F2`, ...) which are offsets to records in Fields section. We have `F# = (len(file) - len(footer) - F) / sizeof(uint64)` fields.
-
-
-    (...)                            [F]                       [F + F#]
-    | Fields                         | Fields Index.                  |
-    |================================|================================|
-    |                                |                                |
-    |   |~~~~~~~~|~~~~~~~~|---...---|||--------|--------|...|--------||
-    ||->|   Dict | Length |    Name |||      0 |      1 |   | F# - 1 ||
-    ||  |~~~~~~~~|~~~~~~~~|---...---|||--------|----|---|...|--------||
-    ||                               |              |                 |
-    ||===============================|==============|=================|
-     |                                              |
-     |----------------------------------------------|
-        
-
-## Dictionaries + Postings
-
-Each of fields has its own dictionary, encoded in [Vellum](https://github.com/couchbase/vellum) format. Dictionary consists of pairs `(term, offset)`, where `offset` indicates the position of postings (list of documents) for this particular term.
-
-	|================================================================|- Dictionaries + 
-	|                                                                |   Postings +
-	|                                                                |    DocValues
-	|    Freq/Norm (chunked)                                         |
-	|    [~~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~]                      |
-	| |->[ Freq | Norm (float32 under varint) ]                      |
-	| |  [~~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~]                      |
-	| |                                                              |
-	| |------------------------------------------------------------| |
-	|    Location Details (chunked)                                | |
-	|    [~~~~~~|~~~~~|~~~~~~~|~~~~~|~~~~~~|~~~~~~~~|~~~~~]        | |
-	| |->[ Size | Pos | Start | End | Arr# | ArrPos | ... ]        | |
-	| |  [~~~~~~|~~~~~|~~~~~~~|~~~~~|~~~~~~|~~~~~~~~|~~~~~]        | |
-	| |                                                            | |
-	| |----------------------|                                     | |
-	|          Postings List |                                     | |
-	|         |~~~~~~~~|~~~~~|~~|~~~~~~~~|-----------...--|        | |
-	|      |->|    F/N |     LD | Length | ROARING BITMAP |        | |
-	|      |  |~~~~~|~~|~~~~~~~~|~~~~~~~~|-----------...--|        | |
-	|      |        |----------------------------------------------| |
-	|      |--------------------------------------|                  |
-	|          Dictionary                         |                  |
-	|         |~~~~~~~~|--------------------------|-...-|            |
-	|      |->| Length | VELLUM DATA : (TERM -> OFFSET) |            |
-	|      |  |~~~~~~~~|----------------------------...-|            |
-	|      |                                                         |
-	|======|=========================================================|- DocValues Index
-	|      |                                                         |
-	|======|=========================================================|- Fields
-	|      |                                                         |
-	| |~~~~|~~~|~~~~~~~~|---...---|                                  |
-	| |   Dict | Length |    Name |                                  |
-	| |~~~~~~~~|~~~~~~~~|---...---|                                  |
-	|                                                                |
-	|================================================================|
-
-## DocValues
-
-DocValues Index is `F#` pairs of varints, one pair per field. Each pair of varints indicates start and end point of DocValues slice.
-
-	|================================================================|
-	|     |------...--|                                              |
-	|  |->| DocValues |<-|                                           |
-	|  |  |------...--|  |                                           |
-	|==|=================|===========================================|- DocValues Index
-	||~|~~~~~~~~~|~~~~~~~|~~|           |~~~~~~~~~~~~~~|~~~~~~~~~~~~||
-	|| DV1 START | DV1 STOP | . . . . . | DV(F#) START | DV(F#) END ||
-	||~~~~~~~~~~~|~~~~~~~~~~|           |~~~~~~~~~~~~~~|~~~~~~~~~~~~||
-	|================================================================|
-
-DocValues is chunked Snappy-compressed values for each document and field.
-
-    [~~~~~~~~~~~~~~~|~~~~~~|~~~~~~~~~|-...-|~~~~~~|~~~~~~~~~|--------------------...-]
-    [ Doc# in Chunk | Doc1 | Offset1 | ... | DocN | OffsetN | SNAPPY COMPRESSED DATA ]
-    [~~~~~~~~~~~~~~~|~~~~~~|~~~~~~~~~|-...-|~~~~~~|~~~~~~~~~|--------------------...-]
-
-Last 16 bytes are description of chunks.
-
-    |~~~~~~~~~~~~...~|----------------|----------------|
-    |   Chunk Sizes  | Chunk Size Arr |         Chunk# |
-    |~~~~~~~~~~~~...~|----------------|----------------|
diff --git a/index/scorch/segment_plugin.go b/index/scorch/segment_plugin.go
new file mode 100644
index 000000000..01eda7fbd
--- /dev/null
+++ b/index/scorch/segment_plugin.go
@@ -0,0 +1,77 @@
+//  Copyright (c) 2019 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package scorch
+
+import (
+	"fmt"
+
+	"github.com/blevesearch/bleve/index/scorch/segment"
+
+	zapv11 "github.com/blevesearch/zap/v11"
+	zapv12 "github.com/blevesearch/zap/v12"
+)
+
+var supportedSegmentPlugins map[string]map[uint32]segment.Plugin
+var defaultSegmentPlugin segment.Plugin
+
+func init() {
+	ResetPlugins()
+	RegisterPlugin(zapv12.Plugin(), false)
+	RegisterPlugin(zapv11.Plugin(), true)
+}
+
+func ResetPlugins() {
+	supportedSegmentPlugins = map[string]map[uint32]segment.Plugin{}
+}
+
+func RegisterPlugin(plugin segment.Plugin, makeDefault bool) {
+	if _, ok := supportedSegmentPlugins[plugin.Type()]; !ok {
+		supportedSegmentPlugins[plugin.Type()] = map[uint32]segment.Plugin{}
+	}
+	supportedSegmentPlugins[plugin.Type()][plugin.Version()] = plugin
+	if makeDefault {
+		defaultSegmentPlugin = plugin
+	}
+}
+
+func SupportedSegmentTypes() (rv []string) {
+	for k := range supportedSegmentPlugins {
+		rv = append(rv, k)
+	}
+	return
+}
+
+func SupportedSegmentTypeVersions(typ string) (rv []uint32) {
+	for k := range supportedSegmentPlugins[typ] {
+		rv = append(rv, k)
+	}
+	return rv
+}
+
+func (s *Scorch) loadSegmentPlugin(forcedSegmentType string,
+	forcedSegmentVersion uint32) error {
+	if versions, ok := supportedSegmentPlugins[forcedSegmentType]; ok {
+		if segPlugin, ok := versions[uint32(forcedSegmentVersion)]; ok {
+			s.segPlugin = segPlugin
+			return nil
+		}
+		return fmt.Errorf(
+			"unsupported version %d for segment type: %s, supported: %v",
+			forcedSegmentVersion, forcedSegmentType,
+			SupportedSegmentTypeVersions(forcedSegmentType))
+	}
+	return fmt.Errorf("unsupported segment type: %s, supported: %v",
+		forcedSegmentType, SupportedSegmentTypes())
+}
diff --git a/vendor/manifest b/vendor/manifest
deleted file mode 100644
index 1c0b482ef..000000000
--- a/vendor/manifest
+++ /dev/null
@@ -1,145 +0,0 @@
-{
-	"version": 0,
-	"dependencies": [
-		{
-			"importpath": "github.com/blevesearch/go-porterstemmer",
-			"repository": "https://github.com/blevesearch/go-porterstemmer",
-			"vcs": "",
-			"revision": "23a2c8e5cf1f380f27722c6d2ae8896431dc7d0e",
-			"branch": "master",
-			"notests": true
-		},
-		{
-			"importpath": "github.com/blevesearch/segment",
-			"repository": "https://github.com/blevesearch/segment",
-			"vcs": "",
-			"revision": "db70c57796cc8c310613541dfade3dce627d09c7",
-			"branch": "master",
-			"notests": true
-		},
-		{
-			"importpath": "github.com/blevesearch/snowballstem",
-			"repository": "https://github.com/blevesearch/snowballstem",
-			"vcs": "",
-			"revision": "26b06a2c243d4f8ca5db3486f94409dd5b2a7467",
-			"branch": "master",
-			"notests": true
-		},
-		{
-			"importpath": "go.etcd.io/bbolt",
-			"repository": "https://github.com/etcd-io/bbolt",
-			"vcs": "",
-			"revision": "68cc10a767ea1c6b9e8dcb9847317ff192d6d974",
-			"branch": "master",
-			"notests": true
-		},
-		{
-			"importpath": "github.com/couchbase/moss",
-			"repository": "https://github.com/couchbase/moss",
-			"vcs": "git",
-			"revision": "013a19c55df3e689a66b632c7c8074e37162217d",
-			"branch": "master",
-			"notests": true
-		},
-		{
-			"importpath": "github.com/golang/protobuf/proto",
-			"repository": "https://github.com/golang/protobuf",
-			"vcs": "",
-			"revision": "655cdfa588ea190e901bc5590e65d5621688847c",
-			"branch": "master",
-			"path": "/proto",
-			"notests": true
-		},
-		{
-			"importpath": "github.com/golang/snappy",
-			"repository": "https://github.com/golang/snappy",
-			"vcs": "",
-			"revision": "cef980a12b316c5b7e5bb3a8e168eb43ae999a88",
-			"branch": "master",
-			"notests": true
-		},
-		{
-			"importpath": "github.com/leemcloughlin/gofarmhash",
-			"repository": "https://github.com/leemcloughlin/gofarmhash",
-			"vcs": "git",
-			"revision": "b3cc1466b93e9c540eda444faaeba9876682c61b",
-			"branch": "master",
-			"notests": true
-		},
-		{
-			"importpath": "github.com/rcrowley/go-metrics",
-			"repository": "https://github.com/rcrowley/go-metrics",
-			"vcs": "",
-			"revision": "dee209f2455f101a5e4e593dea94872d2c62d85d",
-			"branch": "master",
-			"notests": true
-		},
-		{
-			"importpath": "github.com/RoaringBitmap/roaring",
-			"repository": "https://github.com/RoaringBitmap/roaring",
-			"vcs": "",
-			"revision": "4208ad825dda03a6a3d2197df8ec57948aebcc12",
-			"branch": "master",
-			"notests": true
-		},
-		{
-			"importpath": "github.com/seiflotfy/cuckoofilter",
-			"repository": "https://github.com/seiflotfy/cuckoofilter",
-			"vcs": "git",
-			"revision": "d04838794ab86926d32b124345777e55e6f43974",
-			"branch": "master",
-			"notests": true
-		},
-		{
-			"importpath": "github.com/steveyen/gtreap",
-			"repository": "https://github.com/steveyen/gtreap",
-			"vcs": "",
-			"revision": "0abe01ef9be25c4aedc174758ec2d917314d6d70",
-			"branch": "master",
-			"notests": true
-		},
-		{
-			"importpath": "github.com/syndtr/goleveldb/leveldb",
-			"repository": "https://github.com/syndtr/goleveldb",
-			"vcs": "",
-			"revision": "93fc893f2dadb96ffde441c7546cc67ea290a3a8",
-			"branch": "master",
-			"path": "/leveldb",
-			"notests": true
-		},
-		{
-			"importpath": "github.com/willf/bitset",
-			"repository": "https://github.com/willf/bitset",
-			"vcs": "",
-			"revision": "2e6e8094ef4745224150c88c16191c7dceaad16f",
-			"branch": "master",
-			"notests": true
-		},
-		{
-			"importpath": "golang.org/x/text/transform",
-			"repository": "https://go.googlesource.com/text",
-			"vcs": "",
-			"revision": "5ee49cfe751141f8017047bab800d1f528ee3be1",
-			"branch": "master",
-			"path": "/transform",
-			"notests": true
-		},
-		{
-			"importpath": "golang.org/x/text/unicode/norm",
-			"repository": "https://go.googlesource.com/text",
-			"vcs": "",
-			"revision": "5ee49cfe751141f8017047bab800d1f528ee3be1",
-			"branch": "master",
-			"path": "/unicode/norm",
-			"notests": true
-		},
-		{
-			"importpath": "github.com/couchbase/vellum",
-			"repository": "https://github.com/couchbase/vellum",
-			"vcs": "git",
-			"revision": "dc222902e86f298bfae0b3dec6ba8b9d874ad5f8",
-			"branch": "master",
-			"notests": true
-		}
-	]
-}

From e862319dd29284fadd8e41bae2c885e031c32ece Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Sun, 5 Apr 2020 17:16:55 -0400
Subject: [PATCH 657/728] remove bleve self reference from go.mod

---
 go.mod | 1 -
 1 file changed, 1 deletion(-)

diff --git a/go.mod b/go.mod
index 7a1483824..b3a82c3d8 100644
--- a/go.mod
+++ b/go.mod
@@ -4,7 +4,6 @@ go 1.13
 
 require (
 	github.com/RoaringBitmap/roaring v0.4.21
-	github.com/blevesearch/bleve v1.0.0
 	github.com/blevesearch/blevex v0.0.0-20190916190636-152f0fe5c040
 	github.com/blevesearch/go-porterstemmer v1.0.3
 	github.com/blevesearch/segment v0.9.0

From 660a798904d25c02bf96ac118ed5f0bec39a8cf2 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Sun, 5 Apr 2020 17:20:56 -0400
Subject: [PATCH 658/728] fix module name

---
 go.mod | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index b3a82c3d8..11f5e5906 100644
--- a/go.mod
+++ b/go.mod
@@ -1,4 +1,4 @@
-module github.com/blugelabs/bleve
+module github.com/blevesearch/bleve
 
 go 1.13
 

From a9dc7b417a7f440ff36f231879bdabfe08928ef4 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Sun, 5 Apr 2020 17:28:02 -0400
Subject: [PATCH 659/728] bump zap versions used by bleve

---
 go.mod | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/go.mod b/go.mod
index 11f5e5906..942224c79 100644
--- a/go.mod
+++ b/go.mod
@@ -8,8 +8,8 @@ require (
 	github.com/blevesearch/go-porterstemmer v1.0.3
 	github.com/blevesearch/segment v0.9.0
 	github.com/blevesearch/snowballstem v0.9.0
-	github.com/blevesearch/zap/v11 v11.0.1
-	github.com/blevesearch/zap/v12 v12.0.1
+	github.com/blevesearch/zap/v11 v11.0.2
+	github.com/blevesearch/zap/v12 v12.0.2
 	github.com/couchbase/ghistogram v0.1.0 // indirect
 	github.com/couchbase/moss v0.0.0-20190322010551-a0cae174c498
 	github.com/couchbase/vellum v1.0.0

From c34fdc810a196c03fc2b8aee19f37f520fd5c575 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Sun, 5 Apr 2020 17:41:15 -0400
Subject: [PATCH 660/728] bump zap versions again

---
 go.mod | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/go.mod b/go.mod
index 942224c79..c9cffbc7a 100644
--- a/go.mod
+++ b/go.mod
@@ -8,8 +8,8 @@ require (
 	github.com/blevesearch/go-porterstemmer v1.0.3
 	github.com/blevesearch/segment v0.9.0
 	github.com/blevesearch/snowballstem v0.9.0
-	github.com/blevesearch/zap/v11 v11.0.2
-	github.com/blevesearch/zap/v12 v12.0.2
+	github.com/blevesearch/zap/v11 v11.0.3
+	github.com/blevesearch/zap/v12 v12.0.3
 	github.com/couchbase/ghistogram v0.1.0 // indirect
 	github.com/couchbase/moss v0.0.0-20190322010551-a0cae174c498
 	github.com/couchbase/vellum v1.0.0

From 81b1327ecc52ddaec8e6b373c84297c6253feee6 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Sun, 5 Apr 2020 17:47:38 -0400
Subject: [PATCH 661/728] bump again again again

---
 go.mod | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/go.mod b/go.mod
index c9cffbc7a..8702417e7 100644
--- a/go.mod
+++ b/go.mod
@@ -8,8 +8,8 @@ require (
 	github.com/blevesearch/go-porterstemmer v1.0.3
 	github.com/blevesearch/segment v0.9.0
 	github.com/blevesearch/snowballstem v0.9.0
-	github.com/blevesearch/zap/v11 v11.0.3
-	github.com/blevesearch/zap/v12 v12.0.3
+	github.com/blevesearch/zap/v11 v11.0.4
+	github.com/blevesearch/zap/v12 v12.0.4
 	github.com/couchbase/ghistogram v0.1.0 // indirect
 	github.com/couchbase/moss v0.0.0-20190322010551-a0cae174c498
 	github.com/couchbase/vellum v1.0.0

From 487ec05ccabc3abad32f9d32d4a1b31aad76beee Mon Sep 17 00:00:00 2001
From: Abhinav Dangeti <abhinav@couchbase.com>
Date: Wed, 15 Apr 2020 10:28:43 -0700
Subject: [PATCH 662/728] Fix slice out-of-bounds panic within highlighter

Addresses https://github.com/blevesearch/bleve/issues/1370
---
 search/highlight/fragmenter/simple/simple.go |  5 ++
 search_test.go                               | 68 ++++++++++++++++++++
 2 files changed, 73 insertions(+)

diff --git a/search/highlight/fragmenter/simple/simple.go b/search/highlight/fragmenter/simple/simple.go
index 6f6ecedf5..9c63f7fb6 100644
--- a/search/highlight/fragmenter/simple/simple.go
+++ b/search/highlight/fragmenter/simple/simple.go
@@ -58,6 +58,11 @@ OUTER:
 		// push back towards beginning
 		// without cross maxbegin
 		for start > 0 && used < s.fragmentSize {
+			if start > len(orig) {
+				// bail if out of bounds, possibly due to token replacement
+				// e.g with a regexp replacement
+				continue OUTER
+			}
 			r, size := utf8.DecodeLastRune(orig[0:start])
 			if r == utf8.RuneError {
 				continue OUTER // bail
diff --git a/search_test.go b/search_test.go
index 7cfd44737..50faaedd8 100644
--- a/search_test.go
+++ b/search_test.go
@@ -28,6 +28,7 @@ import (
 	"github.com/blevesearch/bleve/analysis/analyzer/custom"
 	"github.com/blevesearch/bleve/analysis/analyzer/keyword"
 	"github.com/blevesearch/bleve/analysis/analyzer/standard"
+	regexp_char_filter "github.com/blevesearch/bleve/analysis/char/regexp"
 	"github.com/blevesearch/bleve/analysis/token/length"
 	"github.com/blevesearch/bleve/analysis/token/lowercase"
 	"github.com/blevesearch/bleve/analysis/token/shingle"
@@ -38,6 +39,7 @@ import (
 	"github.com/blevesearch/bleve/index/upsidedown"
 	"github.com/blevesearch/bleve/mapping"
 	"github.com/blevesearch/bleve/search"
+	"github.com/blevesearch/bleve/search/highlight/highlighter/ansi"
 	"github.com/blevesearch/bleve/search/highlight/highlighter/html"
 	"github.com/blevesearch/bleve/search/query"
 )
@@ -1639,3 +1641,69 @@ func TestGeoDistanceIssue1301(t *testing.T) {
 		t.Fatalf("Size expected: 3, actual %d\n", sr.Total)
 	}
 }
+
+func TestSearchHighlightingWithRegexpReplacement(t *testing.T) {
+	idxMapping := NewIndexMapping()
+	if err := idxMapping.AddCustomCharFilter(regexp_char_filter.Name, map[string]interface{}{
+		"regexp":  `([a-z])\s+(\d)`,
+		"replace": "ooooo$1-$2",
+		"type":    regexp_char_filter.Name,
+	}); err != nil {
+		t.Fatal(err)
+	}
+	if err := idxMapping.AddCustomAnalyzer("regexp_replace", map[string]interface{}{
+		"type":      custom.Name,
+		"tokenizer": "unicode",
+		"char_filters": []string{
+			regexp_char_filter.Name,
+		},
+	}); err != nil {
+		t.Fatal(err)
+	}
+
+	idxMapping.DefaultAnalyzer = "regexp_replace"
+	idxMapping.StoreDynamic = true
+	idx, err := NewUsing("testidx", idxMapping, scorch.Name, Config.DefaultKVStore, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = os.RemoveAll("testidx")
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	doc := map[string]interface{}{
+		"status": "fool 10",
+	}
+
+	batch := idx.NewBatch()
+	if err = batch.Index("doc", doc); err != nil {
+		t.Fatal(err)
+	}
+
+	if err = idx.Batch(batch); err != nil {
+		t.Fatal(err)
+	}
+
+	query := NewMatchQuery("fool 10")
+	sreq := NewSearchRequest(query)
+	sreq.Fields = []string{"*"}
+	sreq.Highlight = NewHighlightWithStyle(ansi.Name)
+
+	sres, err := idx.Search(sreq)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if sres.Total != 1 {
+		t.Fatalf("Expected 1 hit, got: %v", sres.Total)
+	}
+}

From 2fc361aa497e658d8a0f9bfe1c52c6faa00a68fd Mon Sep 17 00:00:00 2001
From: Abhinav Dangeti <abhinav@couchbase.com>
Date: Mon, 16 Mar 2020 15:51:02 -0700
Subject: [PATCH 663/728] MB-38312: regexp to use ReplaceAll which uses
 `Expand`

+ Expand essentially interprets $ signs, so for example
  $1 represents the text of the first submatch.
+ For example ..
    - Consider the following regex:
        ([a-z])\s+(\d)
    - For the string "temp 1", the above regex matches:
        "p 1"
    - Let the replacement be "$1-$2", so the expectation
      is that "p 1" gets replaced by "p-1".
    - The code before the fix replaces "p 1" with:
        "$1-$2$1-$2$1-$2"
---
 analysis/char/regexp/regexp.go      |  3 +--
 analysis/char/regexp/regexp_test.go | 22 ++++++++++++++++++----
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/analysis/char/regexp/regexp.go b/analysis/char/regexp/regexp.go
index c290d39ad..fc344b312 100644
--- a/analysis/char/regexp/regexp.go
+++ b/analysis/char/regexp/regexp.go
@@ -15,7 +15,6 @@
 package regexp
 
 import (
-	"bytes"
 	"fmt"
 	"regexp"
 
@@ -38,7 +37,7 @@ func New(r *regexp.Regexp, replacement []byte) *CharFilter {
 }
 
 func (s *CharFilter) Filter(input []byte) []byte {
-	return s.r.ReplaceAllFunc(input, func(in []byte) []byte { return bytes.Repeat(s.replacement, len(in)) })
+	return s.r.ReplaceAll(input, s.replacement)
 }
 
 func CharFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.CharFilter, error) {
diff --git a/analysis/char/regexp/regexp_test.go b/analysis/char/regexp/regexp_test.go
index 3d25b3f92..86a375438 100644
--- a/analysis/char/regexp/regexp_test.go
+++ b/analysis/char/regexp/regexp_test.go
@@ -21,7 +21,6 @@ import (
 )
 
 func TestRegexpCharFilter(t *testing.T) {
-
 	htmlTagPattern := `</?[!\w]+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[^'">\s]+))?)+\s*|\s*)/?>`
 	htmlRegex := regexp.MustCompile(htmlTagPattern)
 
@@ -31,7 +30,7 @@ func TestRegexpCharFilter(t *testing.T) {
 	}{
 		{
 			input:  []byte(`<html>test</html>`),
-			output: []byte(`      test       `),
+			output: []byte(` test `),
 		},
 	}
 
@@ -45,7 +44,6 @@ func TestRegexpCharFilter(t *testing.T) {
 }
 
 func TestZeroWidthNonJoinerCharFilter(t *testing.T) {
-
 	zeroWidthNonJoinerPattern := `\x{200C}`
 	zeroWidthNonJoinerRegex := regexp.MustCompile(zeroWidthNonJoinerPattern)
 
@@ -55,7 +53,7 @@ func TestZeroWidthNonJoinerCharFilter(t *testing.T) {
 	}{
 		{
 			input:  []byte("water\u200Cunder\u200Cthe\u200Cbridge"),
-			output: []byte("water   under   the   bridge"),
+			output: []byte("water under the bridge"),
 		},
 	}
 
@@ -67,3 +65,19 @@ func TestZeroWidthNonJoinerCharFilter(t *testing.T) {
 		}
 	}
 }
+
+func TestRegexpCustomReplace(t *testing.T) {
+	regexStr := `([a-z])\s+(\d)`
+	replace := []byte(`$1-$2`)
+
+	regex := regexp.MustCompile(regexStr)
+	filter := New(regex, replace)
+
+	input := []byte("temp 1")
+	expectOutput := []byte("temp-1")
+
+	output := filter.Filter(input)
+	if !reflect.DeepEqual(output, expectOutput) {
+		t.Errorf("Expected: %s, Got: %s\n", string(expectOutput), string(output))
+	}
+}

From e94966ded5931686b1eaa9cf14b4c810e4c4b711 Mon Sep 17 00:00:00 2001
From: Abhinav Dangeti <abhinav@couchbase.com>
Date: Mon, 16 Mar 2020 17:53:47 -0700
Subject: [PATCH 664/728] MB-38312: More test cases for regexp-replace

---
 analysis/char/regexp/regexp_test.go | 54 +++++++++++++++++++++++------
 1 file changed, 44 insertions(+), 10 deletions(-)

diff --git a/analysis/char/regexp/regexp_test.go b/analysis/char/regexp/regexp_test.go
index 86a375438..a3430555e 100644
--- a/analysis/char/regexp/regexp_test.go
+++ b/analysis/char/regexp/regexp_test.go
@@ -67,17 +67,51 @@ func TestZeroWidthNonJoinerCharFilter(t *testing.T) {
 }
 
 func TestRegexpCustomReplace(t *testing.T) {
-	regexStr := `([a-z])\s+(\d)`
-	replace := []byte(`$1-$2`)
-
-	regex := regexp.MustCompile(regexStr)
-	filter := New(regex, replace)
+	tests := []struct {
+		regexStr string
+		replace  []byte
+		input    []byte
+		output   []byte
+	}{
+		{
+			regexStr: `([a-z])\s+(\d)`,
+			replace:  []byte(`$1-$2`),
+			input:    []byte(`temp 1`),
+			output:   []byte(`temp-1`),
+		},
+		{
+			regexStr: `foo.?`,
+			replace:  []byte(`X`),
+			input:    []byte(`seafood, fool`),
+			output:   []byte(`seaX, X`),
+		},
+		{
+			regexStr: `def`,
+			replace:  []byte(`_`),
+			input:    []byte(`abcdefghi`),
+			output:   []byte(`abc_ghi`),
+		},
+		{
+			regexStr: `456`,
+			replace:  []byte(`000000`),
+			input:    []byte(`123456789`),
+			output:   []byte(`123000000789`),
+		},
+		{
+			regexStr: `“|”`,
+			replace:  []byte(`"`),
+			input:    []byte(`“hello”`),
+			output:   []byte(`"hello"`),
+		},
+	}
 
-	input := []byte("temp 1")
-	expectOutput := []byte("temp-1")
+	for i := range tests {
+		regex := regexp.MustCompile(tests[i].regexStr)
+		filter := New(regex, tests[i].replace)
 
-	output := filter.Filter(input)
-	if !reflect.DeepEqual(output, expectOutput) {
-		t.Errorf("Expected: %s, Got: %s\n", string(expectOutput), string(output))
+		output := filter.Filter(tests[i].input)
+		if !reflect.DeepEqual(tests[i].output, output) {
+			t.Errorf("[%d] Expected: `%s`, Got: `%s`\n", i, string(tests[i].output), string(output))
+		}
 	}
 }

From dad40529ec1681dd3d1f772c5f5914a3661879b7 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 17 Apr 2020 19:57:28 -0400
Subject: [PATCH 665/728] update to latest zap

everyone moving to blevesearch/mmap-go
---
 go.mod | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/go.mod b/go.mod
index 8702417e7..d38cf8f92 100644
--- a/go.mod
+++ b/go.mod
@@ -8,16 +8,16 @@ require (
 	github.com/blevesearch/go-porterstemmer v1.0.3
 	github.com/blevesearch/segment v0.9.0
 	github.com/blevesearch/snowballstem v0.9.0
-	github.com/blevesearch/zap/v11 v11.0.4
-	github.com/blevesearch/zap/v12 v12.0.4
+	github.com/blevesearch/zap/v11 v11.0.7
+	github.com/blevesearch/zap/v12 v12.0.7
 	github.com/couchbase/ghistogram v0.1.0 // indirect
-	github.com/couchbase/moss v0.0.0-20190322010551-a0cae174c498
-	github.com/couchbase/vellum v1.0.0
+	github.com/couchbase/moss v0.1.0
+	github.com/couchbase/vellum v1.0.1
 	github.com/golang/protobuf v1.3.2
 	github.com/kljensen/snowball v0.6.0
 	github.com/rcrowley/go-metrics v0.0.0-20190826022208-cac0b30c2563
 	github.com/spf13/cobra v0.0.5
-	github.com/steveyen/gtreap v0.0.0-20150807155958-0abe01ef9be2
+	github.com/steveyen/gtreap v0.1.0
 	github.com/syndtr/goleveldb v1.0.0
 	github.com/willf/bitset v1.1.10
 	go.etcd.io/bbolt v1.3.4

From 4dc13100a7fd0120229e81450e6ed7ebcf8951fb Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Mon, 20 Apr 2020 13:17:13 -0400
Subject: [PATCH 666/728] add CI for unit tests (#1368)

* add CI for unit tests

* fix some tests that did not close their index

* increase delays for tests running via github actions

seems as though on the machines these tests run
sometimes by the time the cancel fires all the
results have been returned anyway in spite of
their delays
---
 .github/workflows/tests.yml | 24 ++++++++++++++++++++++++
 index_alias_impl_test.go    |  4 ++--
 search_test.go              | 12 ++++++++++--
 3 files changed, 36 insertions(+), 4 deletions(-)
 create mode 100644 .github/workflows/tests.yml

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
new file mode 100644
index 000000000..bcafb812b
--- /dev/null
+++ b/.github/workflows/tests.yml
@@ -0,0 +1,24 @@
+on:
+  push:
+    branches:
+    - master
+  pull_request:
+name: Tests
+jobs:
+  test:
+    strategy:
+      matrix:
+        go-version: [1.13.x, 1.14.x]
+        platform: [ubuntu-latest, macos-latest, windows-latest]
+    runs-on: ${{ matrix.platform }}
+    steps:
+    - name: Install Go
+      uses: actions/setup-go@v1
+      with:
+        go-version: ${{ matrix.go-version }}
+    - name: Checkout code
+      uses: actions/checkout@v2
+    - name: Test
+      run: |
+        go version
+        go test -race ./...
diff --git a/index_alias_impl_test.go b/index_alias_impl_test.go
index 6e25157de..3d05b1e54 100644
--- a/index_alias_impl_test.go
+++ b/index_alias_impl_test.go
@@ -1013,7 +1013,7 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
 			select {
 			case <-ctx.Done():
 				return ctx.Err()
-			case <-time.After(50 * time.Millisecond):
+			case <-time.After(250 * time.Millisecond):
 				return nil
 			}
 		},
@@ -1042,7 +1042,7 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
 			select {
 			case <-ctx.Done():
 				return ctx.Err()
-			case <-time.After(50 * time.Millisecond):
+			case <-time.After(250 * time.Millisecond):
 				return nil
 			}
 		},
diff --git a/search_test.go b/search_test.go
index 50faaedd8..453657ee3 100644
--- a/search_test.go
+++ b/search_test.go
@@ -1558,7 +1558,11 @@ func TestSearchScoreNone(t *testing.T) {
 	}
 
 	defer func() {
-		err := os.RemoveAll("testidx")
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+		err = os.RemoveAll("testidx")
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -1609,7 +1613,11 @@ func TestGeoDistanceIssue1301(t *testing.T) {
 	}
 
 	defer func() {
-		err := os.RemoveAll("testidx")
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+		err = os.RemoveAll("testidx")
 		if err != nil {
 			t.Fatal(err)
 		}

From b658b028ecf09c97c79e2d8d29401ba6e56b4a11 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Mon, 20 Apr 2020 13:24:56 -0400
Subject: [PATCH 667/728] misc cleanup while reviewing chans/goroutines (#1375)

- renamed snapshot_rollback.go and snapshot_rollback_test.go to
  rollback.go and rollback_test.go, the snapshot_ prefix
  should be limited to code that relates directly to either
  an index snapshot or segment snapshot.  rollback can be seen
  as operating on the index as a whole.

- renamed mainLoop to introducerLoop, consistent with
  persisterLoop and mergerLoop

- remove snapshotReversion this is no longer used

- remove type uint64Descending and related methods, no longer used
---
 index/scorch/introducer.go                                | 8 +-------
 index/scorch/persister.go                                 | 6 ------
 index/scorch/{snapshot_rollback.go => rollback.go}        | 0
 .../{snapshot_rollback_test.go => rollback_test.go}       | 0
 index/scorch/scorch.go                                    | 2 +-
 5 files changed, 2 insertions(+), 14 deletions(-)
 rename index/scorch/{snapshot_rollback.go => rollback.go} (100%)
 rename index/scorch/{snapshot_rollback_test.go => rollback_test.go} (100%)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index e5f00f80e..64ca969bd 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -45,13 +45,7 @@ type epochWatcher struct {
 	notifyCh notificationChan
 }
 
-type snapshotReversion struct {
-	snapshot  *IndexSnapshot
-	applied   chan error
-	persisted chan error
-}
-
-func (s *Scorch) mainLoop() {
+func (s *Scorch) introducerLoop() {
 	var epochWatchers []*epochWatcher
 OUTER:
 	for {
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index 30e75df77..ffa656693 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -780,12 +780,6 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro
 	return rv, nil
 }
 
-type uint64Descending []uint64
-
-func (p uint64Descending) Len() int           { return len(p) }
-func (p uint64Descending) Less(i, j int) bool { return p[i] > p[j] }
-func (p uint64Descending) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
-
 func (s *Scorch) removeOldData() {
 	removed, err := s.removeOldBoltSnapshots()
 	if err != nil {
diff --git a/index/scorch/snapshot_rollback.go b/index/scorch/rollback.go
similarity index 100%
rename from index/scorch/snapshot_rollback.go
rename to index/scorch/rollback.go
diff --git a/index/scorch/snapshot_rollback_test.go b/index/scorch/rollback_test.go
similarity index 100%
rename from index/scorch/snapshot_rollback_test.go
rename to index/scorch/rollback_test.go
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 80f9e3a79..698aaf16a 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -181,7 +181,7 @@ func (s *Scorch) Open() error {
 	}
 
 	s.asyncTasks.Add(1)
-	go s.mainLoop()
+	go s.introducerLoop()
 
 	if !s.readOnly && s.path != "" {
 		s.asyncTasks.Add(1)

From fb361a72a60251e8103e7bf8c2f01bfdb171553e Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Mon, 20 Apr 2020 13:56:41 -0400
Subject: [PATCH 668/728] update README (#1376)

move each badge to it's own line, as this seems to still format
correctly, but is easier to maintain

replaced build badge from travis with tests badge using the new
github workflow
---
 README.md | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 7c1a7c7c4..eff0be97e 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,13 @@
 # ![bleve](docs/bleve.png) bleve
 
-[![Build Status](https://travis-ci.org/blevesearch/bleve.svg?branch=master)](https://travis-ci.org/blevesearch/bleve) [![Coverage Status](https://coveralls.io/repos/github/blevesearch/bleve/badge.svg?branch=master)](https://coveralls.io/github/blevesearch/bleve?branch=master) [![GoDoc](https://godoc.org/github.com/blevesearch/bleve?status.svg)](https://godoc.org/github.com/blevesearch/bleve)
+[![Tests](https://github.com/blevesearch/bleve/workflows/Tests/badge.svg?branch=master&event=push)](https://github.com/blevesearch/bleve/actions?query=workflow%3ATests+event%3Apush+branch%3Amaster)
+[![Coverage Status](https://coveralls.io/repos/github/blevesearch/bleve/badge.svg?branch=master)](https://coveralls.io/github/blevesearch/bleve?branch=master)
+[![GoDoc](https://godoc.org/github.com/blevesearch/bleve?status.svg)](https://godoc.org/github.com/blevesearch/bleve)
 [![Join the chat at https://gitter.im/blevesearch/bleve](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/blevesearch/bleve?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
 [![codebeat](https://codebeat.co/badges/38a7cbc9-9cf5-41c0-a315-0746178230f4)](https://codebeat.co/projects/github-com-blevesearch-bleve)
 [![Go Report Card](https://goreportcard.com/badge/blevesearch/bleve)](https://goreportcard.com/report/blevesearch/bleve)
-[![Sourcegraph](https://sourcegraph.com/github.com/blevesearch/bleve/-/badge.svg)](https://sourcegraph.com/github.com/blevesearch/bleve?badge)  [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
+[![Sourcegraph](https://sourcegraph.com/github.com/blevesearch/bleve/-/badge.svg)](https://sourcegraph.com/github.com/blevesearch/bleve?badge)
+[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
 
 modern text indexing in go - [blevesearch.com](http://www.blevesearch.com/)
 

From ba54e05e30587b38e10e3571ca1c2faeb846dcd8 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Wed, 29 Apr 2020 16:39:18 -0400
Subject: [PATCH 669/728] improve reliability of test on mac

observed this test failing frequently on mac
environment through github actions

simple change seems to still test the same behavior
instead of arranging for one request to sleep longer
than the context timeout, instead arrange for it
to never return.  that way no matter when the context
timeout eventually happens, we correctly observe
partial results.
---
 index_alias_impl_test.go | 2 --
 1 file changed, 2 deletions(-)

diff --git a/index_alias_impl_test.go b/index_alias_impl_test.go
index 3d05b1e54..0c07390a3 100644
--- a/index_alias_impl_test.go
+++ b/index_alias_impl_test.go
@@ -913,8 +913,6 @@ func TestMultiSearchTimeoutPartial(t *testing.T) {
 			select {
 			case <-ctx.Done():
 				return ctx.Err()
-			case <-time.After(50 * time.Millisecond):
-				return nil
 			}
 		},
 		err: nil,

From 5353f4da6d3048f47db1da17bd65d0adcf497ed3 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Thu, 30 Apr 2020 11:06:37 -0400
Subject: [PATCH 670/728] improve unit test execution time
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Before:

$ go test -v -run=TestSearchQueryCallback
=== RUN   TestSearchQueryCallback
--- PASS: TestSearchQueryCallback (20.99s)
PASS
ok  	github.com/blevesearch/bleve	20.997s

After:

$ ✗ go test -v -run=TestSearchQueryCallback
=== RUN   TestSearchQueryCallback
--- PASS: TestSearchQueryCallback (1.50s)
PASS
ok  	github.com/blevesearch/bleve	1.509s
---
 index_test.go | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/index_test.go b/index_test.go
index 65ddedaa8..57c6fc0fb 100644
--- a/index_test.go
+++ b/index_test.go
@@ -1892,13 +1892,18 @@ func TestSearchQueryCallback(t *testing.T) {
 	}()
 
 	elements := []string{"air", "water", "fire", "earth"}
+	b := index.NewBatch()
 	for j := 0; j < 10000; j++ {
-		err = index.Index(fmt.Sprintf("%d", j),
+		err = b.Index(fmt.Sprintf("%d", j),
 			map[string]interface{}{"name": elements[j%len(elements)]})
 		if err != nil {
 			t.Fatal(err)
 		}
 	}
+	err = index.Batch(b)
+	if err != nil {
+		t.Fatal(err)
+	}
 
 	query := NewTermQuery("water")
 	req := NewSearchRequest(query)

From 98b3d3b5b7fac7aab20172eb438ff3e04e0cef04 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Thu, 30 Apr 2020 14:10:16 -0400
Subject: [PATCH 671/728] improve unit test perf using batch

---
 index_test.go | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/index_test.go b/index_test.go
index 57c6fc0fb..73db16bf6 100644
--- a/index_test.go
+++ b/index_test.go
@@ -726,16 +726,21 @@ func TestSortMatchSearch(t *testing.T) {
 	names := []string{"Noam", "Uri", "David", "Yosef", "Eitan", "Itay", "Ariel", "Daniel", "Omer", "Yogev", "Yehonatan", "Moshe", "Mohammed", "Yusuf", "Omar"}
 	days := []string{"Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"}
 	numbers := []string{"One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine", "Ten", "Eleven", "Twelve"}
+	b := index.NewBatch()
 	for i := 0; i < 200; i++ {
 		doc := make(map[string]interface{})
 		doc["Name"] = names[i%len(names)]
 		doc["Day"] = days[i%len(days)]
 		doc["Number"] = numbers[i%len(numbers)]
-		err = index.Index(fmt.Sprintf("%d", i), doc)
+		err = b.Index(fmt.Sprintf("%d", i), doc)
 		if err != nil {
 			t.Fatal(err)
 		}
 	}
+	err = index.Batch(b)
+	if err != nil {
+		t.Fatal(err)
+	}
 
 	req := NewSearchRequest(NewMatchQuery("One"))
 	req.SortBy([]string{"Day", "Name"})

From 94161c7d81e5ca622d1652877ec2b407d1d04966 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Thu, 30 Apr 2020 16:16:45 -0400
Subject: [PATCH 672/728] improve performance of query callback test

the behavior being tested did not depend on any data actually
being indexed, so that was removed

additional documentation was added to clarify what this test
was testing, since it was not clear and we did not remember
---
 index_test.go | 22 ++++------------------
 1 file changed, 4 insertions(+), 18 deletions(-)

diff --git a/index_test.go b/index_test.go
index 73db16bf6..0ee1bf53f 100644
--- a/index_test.go
+++ b/index_test.go
@@ -1896,29 +1896,15 @@ func TestSearchQueryCallback(t *testing.T) {
 		}
 	}()
 
-	elements := []string{"air", "water", "fire", "earth"}
-	b := index.NewBatch()
-	for j := 0; j < 10000; j++ {
-		err = b.Index(fmt.Sprintf("%d", j),
-			map[string]interface{}{"name": elements[j%len(elements)]})
-		if err != nil {
-			t.Fatal(err)
-		}
-	}
-	err = index.Batch(b)
-	if err != nil {
-		t.Fatal(err)
-	}
-
 	query := NewTermQuery("water")
 	req := NewSearchRequest(query)
 
 	expErr := fmt.Errorf("MEM_LIMIT_EXCEEDED")
 	f := func(size uint64) error {
-		if size > 1000 {
-			return expErr
-		}
-		return nil
+		// the intended usage of this callback is to see the estimated
+		// memory usage before executing, and possibly abort early
+		// in this test we simulate returning such an error
+		return expErr
 	}
 
 	ctx := context.WithValue(context.Background(), SearchQueryStartCallbackKey,

From 4638928d99bdde25a5c75a36dfd63b4e5ce30ddd Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Thu, 30 Apr 2020 20:01:43 -0400
Subject: [PATCH 673/728] replace testidx with proper ioutil.TempDir (#1386)

---
 index_test.go             | 355 +++++++++++++++-----------------------
 search_test.go            | 176 +++++++++----------
 test/versus_score_test.go |  17 +-
 3 files changed, 227 insertions(+), 321 deletions(-)

diff --git a/index_test.go b/index_test.go
index 0ee1bf53f..1662c60e3 100644
--- a/index_test.go
+++ b/index_test.go
@@ -21,6 +21,7 @@ import (
 	"log"
 	"math"
 	"os"
+	"path/filepath"
 	"reflect"
 	"sort"
 	"strconv"
@@ -42,15 +43,30 @@ import (
 	"github.com/blevesearch/bleve/index/upsidedown"
 )
 
+type Fatalfable interface {
+	Fatalf(format string, args ...interface{})
+}
+
+func createTmpIndexPath(f Fatalfable) string {
+	tmpIndexPath, err := ioutil.TempDir("", "bleve-testidx")
+	if err != nil {
+		f.Fatalf("error creating temp dir: %v", err)
+	}
+	return tmpIndexPath
+}
+
+func cleanupTmpIndexPath(f Fatalfable, path string) {
+	err := os.RemoveAll(path)
+	if err != nil {
+		f.Fatalf("error removing temp dir: %v", err)
+	}
+}
+
 func TestCrud(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
-	index, err := New("testidx", NewIndexMapping())
+	index, err := New(tmpIndexPath, NewIndexMapping())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -157,7 +173,7 @@ func TestCrud(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	index, err = Open("testidx")
+	index, err = Open(tmpIndexPath)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -216,14 +232,10 @@ func TestCrud(t *testing.T) {
 }
 
 func TestIndexCreateNewOverExisting(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
-	index, err := New("testidx", NewIndexMapping())
+	index, err := New(tmpIndexPath, NewIndexMapping())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -231,7 +243,7 @@ func TestIndexCreateNewOverExisting(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	index, err = New("testidx", NewIndexMapping())
+	index, err = New(tmpIndexPath, NewIndexMapping())
 	if err != ErrorIndexPathExists {
 		t.Fatalf("expected error index path exists, got %v", err)
 	}
@@ -245,14 +257,10 @@ func TestIndexOpenNonExisting(t *testing.T) {
 }
 
 func TestIndexOpenMetaMissingOrCorrupt(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
-	index, err := New("testidx", NewIndexMapping())
+	index, err := New(tmpIndexPath, NewIndexMapping())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -261,35 +269,37 @@ func TestIndexOpenMetaMissingOrCorrupt(t *testing.T) {
 		t.Fatal(err)
 	}
 
+	tmpIndexPathMeta := filepath.Join(tmpIndexPath, "index_meta.json")
+
 	// now intentionally change the storage type
-	err = ioutil.WriteFile("testidx/index_meta.json", []byte(`{"storage":"mystery"}`), 0666)
+	err = ioutil.WriteFile(tmpIndexPathMeta, []byte(`{"storage":"mystery"}`), 0666)
 	if err != nil {
 		t.Fatal(err)
 	}
 
-	index, err = Open("testidx")
+	index, err = Open(tmpIndexPath)
 	if err != ErrorUnknownStorageType {
 		t.Fatalf("expected error unknown storage type, got %v", err)
 	}
 
 	// now intentionally corrupt the metadata
-	err = ioutil.WriteFile("testidx/index_meta.json", []byte("corrupted"), 0666)
+	err = ioutil.WriteFile(tmpIndexPathMeta, []byte("corrupted"), 0666)
 	if err != nil {
 		t.Fatal(err)
 	}
 
-	index, err = Open("testidx")
+	index, err = Open(tmpIndexPath)
 	if err != ErrorIndexMetaCorrupt {
 		t.Fatalf("expected error index metadata corrupted, got %v", err)
 	}
 
 	// now intentionally remove the metadata
-	err = os.Remove("testidx/index_meta.json")
+	err = os.Remove(tmpIndexPathMeta)
 	if err != nil {
 		t.Fatal(err)
 	}
 
-	index, err = Open("testidx")
+	index, err = Open(tmpIndexPath)
 	if err != ErrorIndexMetaMissing {
 		t.Fatalf("expected error index metadata missing, got %v", err)
 	}
@@ -365,12 +375,8 @@ func (s *slowQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, option
 }
 
 func TestSlowSearch(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
 	defer func() {
 		// reset logger back to normal
@@ -380,7 +386,7 @@ func TestSlowSearch(t *testing.T) {
 	var sdw sawDataWriter
 	SetLog(log.New(&sdw, "bleve", log.LstdFlags))
 
-	index, err := New("testidx", NewIndexMapping())
+	index, err := New(tmpIndexPath, NewIndexMapping())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -430,14 +436,10 @@ func (s *sawDataWriter) Write(p []byte) (n int, err error) {
 }
 
 func TestStoredFieldPreserved(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
-	index, err := New("testidx", NewIndexMapping())
+	index, err := New(tmpIndexPath, NewIndexMapping())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -485,14 +487,10 @@ func TestStoredFieldPreserved(t *testing.T) {
 }
 
 func TestDict(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
-	index, err := New("testidx", NewIndexMapping())
+	index, err := New(tmpIndexPath, NewIndexMapping())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -612,14 +610,10 @@ func TestDict(t *testing.T) {
 }
 
 func TestBatchString(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
-	index, err := New("testidx", NewIndexMapping())
+	index, err := New(tmpIndexPath, NewIndexMapping())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -659,14 +653,10 @@ func TestBatchString(t *testing.T) {
 }
 
 func TestIndexMetadataRaceBug198(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
-	index, err := New("testidx", NewIndexMapping())
+	index, err := New(tmpIndexPath, NewIndexMapping())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -711,14 +701,10 @@ func TestIndexMetadataRaceBug198(t *testing.T) {
 }
 
 func TestSortMatchSearch(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
-	index, err := New("testidx", NewIndexMapping())
+	index, err := New(tmpIndexPath, NewIndexMapping())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -764,14 +750,10 @@ func TestSortMatchSearch(t *testing.T) {
 }
 
 func TestIndexCountMatchSearch(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
-	index, err := New("testidx", NewIndexMapping())
+	index, err := New(tmpIndexPath, NewIndexMapping())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -831,14 +813,10 @@ func TestIndexCountMatchSearch(t *testing.T) {
 }
 
 func TestBatchReset(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
-	index, err := New("testidx", NewIndexMapping())
+	index, err := New(tmpIndexPath, NewIndexMapping())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -874,14 +852,10 @@ func TestBatchReset(t *testing.T) {
 }
 
 func TestDocumentFieldArrayPositions(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
-	index, err := New("testidx", NewIndexMapping())
+	index, err := New(tmpIndexPath, NewIndexMapping())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -983,12 +957,8 @@ func TestDocumentFieldArrayPositions(t *testing.T) {
 }
 
 func TestKeywordSearchBug207(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
 	f := NewTextFieldMapping()
 	f.Analyzer = keyword.Name
@@ -997,7 +967,7 @@ func TestKeywordSearchBug207(t *testing.T) {
 	m.DefaultMapping = NewDocumentMapping()
 	m.DefaultMapping.AddFieldMappingsAt("Body", f)
 
-	index, err := New("testidx", m)
+	index, err := New(tmpIndexPath, m)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1081,14 +1051,10 @@ func TestKeywordSearchBug207(t *testing.T) {
 }
 
 func TestTermVectorArrayPositions(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
-	index, err := New("testidx", NewIndexMapping())
+	index, err := New(tmpIndexPath, NewIndexMapping())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1158,12 +1124,8 @@ func TestTermVectorArrayPositions(t *testing.T) {
 }
 
 func TestDocumentStaticMapping(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
 	m := NewIndexMapping()
 	m.DefaultMapping = NewDocumentStaticMapping()
@@ -1171,7 +1133,7 @@ func TestDocumentStaticMapping(t *testing.T) {
 	m.DefaultMapping.AddFieldMappingsAt("Date", NewDateTimeFieldMapping())
 	m.DefaultMapping.AddFieldMappingsAt("Numeric", NewNumericFieldMapping())
 
-	index, err := New("testidx", m)
+	index, err := New(tmpIndexPath, m)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1219,14 +1181,10 @@ func TestDocumentStaticMapping(t *testing.T) {
 }
 
 func TestIndexEmptyDocId(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
-	index, err := New("testidx", NewIndexMapping())
+	index, err := New(tmpIndexPath, NewIndexMapping())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1264,12 +1222,8 @@ func TestIndexEmptyDocId(t *testing.T) {
 }
 
 func TestDateTimeFieldMappingIssue287(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
 	f := NewDateTimeFieldMapping()
 
@@ -1277,7 +1231,7 @@ func TestDateTimeFieldMappingIssue287(t *testing.T) {
 	m.DefaultMapping = NewDocumentMapping()
 	m.DefaultMapping.AddFieldMappingsAt("Date", f)
 
-	index, err := New("testidx", m)
+	index, err := New(tmpIndexPath, m)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1347,14 +1301,10 @@ func TestDateTimeFieldMappingIssue287(t *testing.T) {
 }
 
 func TestDocumentFieldArrayPositionsBug295(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
-	index, err := New("testidx", NewIndexMapping())
+	index, err := New(tmpIndexPath, NewIndexMapping())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1431,18 +1381,14 @@ func TestDocumentFieldArrayPositionsBug295(t *testing.T) {
 }
 
 func TestBooleanFieldMappingIssue109(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
 	m := NewIndexMapping()
 	m.DefaultMapping = NewDocumentMapping()
 	m.DefaultMapping.AddFieldMappingsAt("Bool", NewBooleanFieldMapping())
 
-	index, err := New("testidx", m)
+	index, err := New(tmpIndexPath, m)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1497,14 +1443,10 @@ func TestBooleanFieldMappingIssue109(t *testing.T) {
 }
 
 func TestSearchTimeout(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
-	index, err := New("testidx", NewIndexMapping())
+	index, err := New(tmpIndexPath, NewIndexMapping())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1565,13 +1507,9 @@ func TestConfigCache(t *testing.T) {
 }
 
 func TestBatchRaceBug260(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
-	i, err := New("testidx", NewIndexMapping())
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
+	i, err := New(tmpIndexPath, NewIndexMapping())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1603,14 +1541,10 @@ func TestBatchRaceBug260(t *testing.T) {
 }
 
 func BenchmarkBatchOverhead(b *testing.B) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			b.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(b)
+	defer cleanupTmpIndexPath(b, tmpIndexPath)
 	m := NewIndexMapping()
-	i, err := NewUsing("testidx", m, Config.DefaultIndexType, null.Name, nil)
+	i, err := NewUsing(tmpIndexPath, m, Config.DefaultIndexType, null.Name, nil)
 	if err != nil {
 		b.Fatal(err)
 	}
@@ -1632,15 +1566,11 @@ func BenchmarkBatchOverhead(b *testing.B) {
 }
 
 func TestOpenReadonlyMultiple(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
 	// build an index and close it
-	index, err := New("testidx", NewIndexMapping())
+	index, err := New(tmpIndexPath, NewIndexMapping())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1660,7 +1590,7 @@ func TestOpenReadonlyMultiple(t *testing.T) {
 	}
 
 	// now open it read-only
-	index, err = OpenUsing("testidx", map[string]interface{}{
+	index, err = OpenUsing(tmpIndexPath, map[string]interface{}{
 		"read_only": true,
 	})
 
@@ -1669,7 +1599,7 @@ func TestOpenReadonlyMultiple(t *testing.T) {
 	}
 
 	// now open it again
-	index2, err := OpenUsing("testidx", map[string]interface{}{
+	index2, err := OpenUsing(tmpIndexPath, map[string]interface{}{
 		"read_only": true,
 	})
 
@@ -1753,14 +1683,10 @@ func TestBug408(t *testing.T) {
 }
 
 func TestIndexAdvancedCountMatchSearch(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
-	index, err := New("testidx", NewIndexMapping())
+	index, err := New(tmpIndexPath, NewIndexMapping())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1826,14 +1752,10 @@ func TestIndexAdvancedCountMatchSearch(t *testing.T) {
 }
 
 func benchmarkSearchOverhead(indexType string, b *testing.B) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			b.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(b)
+	defer cleanupTmpIndexPath(b, tmpIndexPath)
 
-	index, err := NewUsing("testidx", NewIndexMapping(),
+	index, err := NewUsing(tmpIndexPath, NewIndexMapping(),
 		indexType, Config.DefaultKVStore, nil)
 	if err != nil {
 		b.Fatal(err)
@@ -1878,14 +1800,10 @@ func BenchmarkScorchSearchOverhead(b *testing.B) {
 }
 
 func TestSearchQueryCallback(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
-	index, err := New("testidx", NewIndexMapping())
+	index, err := New(tmpIndexPath, NewIndexMapping())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1916,14 +1834,10 @@ func TestSearchQueryCallback(t *testing.T) {
 }
 
 func TestBatchMerge(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
-	index, err := New("testidx", NewIndexMapping())
+	index, err := New(tmpIndexPath, NewIndexMapping())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1991,7 +1905,7 @@ func TestBatchMerge(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	index, err = Open("testidx")
+	index, err = Open(tmpIndexPath)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -2072,19 +1986,15 @@ func TestBatchMerge(t *testing.T) {
 }
 
 func TestBug1096(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
 	// use default mapping
 	mapping := NewIndexMapping()
 
 	// create a scorch index with default SAFE batches
 	var idx Index
-	idx, err = NewUsing("testidx", mapping, "scorch", "scorch", nil)
+	idx, err = NewUsing(tmpIndexPath, mapping, "scorch", "scorch", nil)
 	if err != nil {
 		log.Fatal(err)
 	}
@@ -2148,18 +2058,14 @@ func TestBug1096(t *testing.T) {
 }
 
 func TestDataRaceBug1092(t *testing.T) {
-	defer func() {
-		rerr := os.RemoveAll("testidx")
-		if rerr != nil {
-			t.Fatal(rerr)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
 	// use default mapping
 	mapping := NewIndexMapping()
 
 	var idx Index
-	idx, err = NewUsing("testidx", mapping, upsidedown.Name, boltdb.Name, nil)
+	idx, err = NewUsing(tmpIndexPath, mapping, upsidedown.Name, boltdb.Name, nil)
 	if err != nil {
 		log.Fatal(err)
 	}
@@ -2182,14 +2088,25 @@ func TestDataRaceBug1092(t *testing.T) {
 }
 
 func TestBatchRaceBug1149(t *testing.T) {
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
+	i, err := New(tmpIndexPath, NewIndexMapping())
+	if err != nil {
+		t.Fatal(err)
+	}
 	defer func() {
-		err := os.RemoveAll("testidx")
+		err := i.Close()
 		if err != nil {
 			t.Fatal(err)
 		}
 	}()
-	i, err := New("testidx", NewIndexMapping())
-	//i, err := NewUsing("testidx", NewIndexMapping(), "scorch", "scorch", nil)
+	testBatchRaceBug1149(t, i)
+}
+
+func TestBatchRaceBug1149Scorch(t *testing.T) {
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
+	i, err := NewUsing(tmpIndexPath, NewIndexMapping(), "scorch", "scorch", nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -2199,6 +2116,10 @@ func TestBatchRaceBug1149(t *testing.T) {
 			t.Fatal(err)
 		}
 	}()
+	testBatchRaceBug1149(t, i)
+}
+
+func testBatchRaceBug1149(t *testing.T, i Index) {
 	b := i.NewBatch()
 	b.Delete("1")
 	err = i.Batch(b)
diff --git a/search_test.go b/search_test.go
index 453657ee3..755999db5 100644
--- a/search_test.go
+++ b/search_test.go
@@ -17,7 +17,6 @@ package bleve
 import (
 	"encoding/json"
 	"fmt"
-	"os"
 	"reflect"
 	"strconv"
 	"strings"
@@ -221,7 +220,7 @@ func TestFacetNumericDateRangeRequests(t *testing.T) {
 				Field: "Date_Range_Success_With_StartEnd",
 				Size:  1,
 				DateTimeRanges: []*dateTimeRange{
-					&dateTimeRange{Name: "testName", Start: time.Unix(0, 0), End: time.Now()},
+					{Name: "testName", Start: time.Unix(0, 0), End: time.Now()},
 				},
 			},
 			result: nil,
@@ -231,7 +230,7 @@ func TestFacetNumericDateRangeRequests(t *testing.T) {
 				Field: "Date_Range_Success_With_Start",
 				Size:  1,
 				DateTimeRanges: []*dateTimeRange{
-					&dateTimeRange{Name: "testName", Start: time.Unix(0, 0)},
+					{Name: "testName", Start: time.Unix(0, 0)},
 				},
 			},
 			result: nil,
@@ -241,7 +240,7 @@ func TestFacetNumericDateRangeRequests(t *testing.T) {
 				Field: "Date_Range_Success_With_End",
 				Size:  1,
 				DateTimeRanges: []*dateTimeRange{
-					&dateTimeRange{Name: "testName", End: time.Now()},
+					{Name: "testName", End: time.Now()},
 				},
 			},
 			result: nil,
@@ -251,7 +250,7 @@ func TestFacetNumericDateRangeRequests(t *testing.T) {
 				Field: "Numeric_Range_Success_With_MinMax",
 				Size:  1,
 				NumericRanges: []*numericRange{
-					&numericRange{Name: "testName", Min: &value, Max: &value},
+					{Name: "testName", Min: &value, Max: &value},
 				},
 			},
 			result: nil,
@@ -261,7 +260,7 @@ func TestFacetNumericDateRangeRequests(t *testing.T) {
 				Field: "Numeric_Range_Success_With_Min",
 				Size:  1,
 				NumericRanges: []*numericRange{
-					&numericRange{Name: "testName", Min: &value},
+					{Name: "testName", Min: &value},
 				},
 			},
 			result: nil,
@@ -271,7 +270,7 @@ func TestFacetNumericDateRangeRequests(t *testing.T) {
 				Field: "Numeric_Range_Success_With_Max",
 				Size:  1,
 				NumericRanges: []*numericRange{
-					&numericRange{Name: "testName", Max: &value},
+					{Name: "testName", Max: &value},
 				},
 			},
 			result: nil,
@@ -281,9 +280,9 @@ func TestFacetNumericDateRangeRequests(t *testing.T) {
 				Field: "Date_Range_Missing_Failure",
 				Size:  1,
 				DateTimeRanges: []*dateTimeRange{
-					&dateTimeRange{Name: "testName2", Start: time.Unix(0, 0)},
-					&dateTimeRange{Name: "testName1", End: time.Now()},
-					&dateTimeRange{Name: "testName"},
+					{Name: "testName2", Start: time.Unix(0, 0)},
+					{Name: "testName1", End: time.Now()},
+					{Name: "testName"},
 				},
 			},
 			result: drMissingErr,
@@ -293,9 +292,9 @@ func TestFacetNumericDateRangeRequests(t *testing.T) {
 				Field: "Numeric_Range_Missing_Failure",
 				Size:  1,
 				NumericRanges: []*numericRange{
-					&numericRange{Name: "testName2", Min: &value},
-					&numericRange{Name: "testName1", Max: &value},
-					&numericRange{Name: "testName"},
+					{Name: "testName2", Min: &value},
+					{Name: "testName1", Max: &value},
+					{Name: "testName"},
 				},
 			},
 			result: nrMissingErr,
@@ -305,10 +304,10 @@ func TestFacetNumericDateRangeRequests(t *testing.T) {
 				Field: "Numeric_And_DateRanges_Failure",
 				Size:  1,
 				NumericRanges: []*numericRange{
-					&numericRange{Name: "testName", Max: &value},
+					{Name: "testName", Max: &value},
 				},
 				DateTimeRanges: []*dateTimeRange{
-					&dateTimeRange{Name: "testName", End: time.Now()},
+					{Name: "testName", End: time.Now()},
 				},
 			},
 			result: drNrErr,
@@ -318,8 +317,8 @@ func TestFacetNumericDateRangeRequests(t *testing.T) {
 				Field: "Numeric_Range_Name_Repeat_Failure",
 				Size:  1,
 				NumericRanges: []*numericRange{
-					&numericRange{Name: "testName", Min: &value},
-					&numericRange{Name: "testName", Max: &value},
+					{Name: "testName", Min: &value},
+					{Name: "testName", Max: &value},
 				},
 			},
 			result: nrNameDupErr,
@@ -329,8 +328,8 @@ func TestFacetNumericDateRangeRequests(t *testing.T) {
 				Field: "Date_Range_Name_Repeat_Failure",
 				Size:  1,
 				DateTimeRanges: []*dateTimeRange{
-					&dateTimeRange{Name: "testName", Start: time.Unix(0, 0)},
-					&dateTimeRange{Name: "testName", End: time.Now()},
+					{Name: "testName", Start: time.Unix(0, 0)},
+					{Name: "testName", End: time.Now()},
 				},
 			},
 			result: drNameDupErr,
@@ -447,7 +446,11 @@ func TestNestedBooleanSearchers(t *testing.T) {
 	}
 
 	idxMapping.DefaultAnalyzer = "3xbla"
-	idx, err := New("testidx", idxMapping)
+
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
+
+	idx, err := New(tmpIndexPath, idxMapping)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -457,11 +460,6 @@ func TestNestedBooleanSearchers(t *testing.T) {
 		if err != nil {
 			t.Fatal(err)
 		}
-
-		err = os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
 	}()
 
 	// create and insert documents as a batch
@@ -559,9 +557,12 @@ func TestNestedBooleanSearchers(t *testing.T) {
 }
 
 func TestNestedBooleanMustNotSearcherUpsidedown(t *testing.T) {
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
+
 	// create an index with default settings
 	idxMapping := NewIndexMapping()
-	idx, err := New("testidx", idxMapping)
+	idx, err := New(tmpIndexPath, idxMapping)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -571,11 +572,6 @@ func TestNestedBooleanMustNotSearcherUpsidedown(t *testing.T) {
 		if err != nil {
 			t.Fatal(err)
 		}
-
-		err = os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
 	}()
 
 	// create and insert documents as a batch
@@ -706,7 +702,10 @@ func TestSearchScorchOverEmptyKeyword(t *testing.T) {
 	imap.DefaultMapping = dmap
 	imap.DefaultAnalyzer = standard.Name
 
-	idx, err := New("testidx", imap)
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
+
+	idx, err := New(tmpIndexPath, imap)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -717,10 +716,6 @@ func TestSearchScorchOverEmptyKeyword(t *testing.T) {
 			t.Fatal(err)
 		}
 
-		err = os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
 		Config.DefaultIndexType = defaultIndexType
 	}()
 
@@ -754,9 +749,12 @@ func TestMultipleNestedBooleanMustNotSearchersOnScorch(t *testing.T) {
 	defaultIndexType := Config.DefaultIndexType
 	Config.DefaultIndexType = scorch.Name
 
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
+
 	// create an index with default settings
 	idxMapping := NewIndexMapping()
-	idx, err := New("testidx", idxMapping)
+	idx, err := New(tmpIndexPath, idxMapping)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -767,10 +765,6 @@ func TestMultipleNestedBooleanMustNotSearchersOnScorch(t *testing.T) {
 			t.Fatal(err)
 		}
 
-		err = os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
 		Config.DefaultIndexType = defaultIndexType
 	}()
 
@@ -896,8 +890,11 @@ func TestMultipleNestedBooleanMustNotSearchersOnScorch(t *testing.T) {
 }
 
 func testBooleanMustNotSearcher(t *testing.T, indexName string) {
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
+
 	im := NewIndexMapping()
-	idx, err := NewUsing("testidx", im, indexName, Config.DefaultKVStore, nil)
+	idx, err := NewUsing(tmpIndexPath, im, indexName, Config.DefaultKVStore, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -907,11 +904,6 @@ func testBooleanMustNotSearcher(t *testing.T, indexName string) {
 		if err != nil {
 			t.Fatal(err)
 		}
-
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
 	}()
 
 	docs := []struct {
@@ -1021,9 +1013,12 @@ func TestQueryStringEmptyConjunctionSearcher(t *testing.T) {
 }
 
 func TestDisjunctionQueryIncorrectMin(t *testing.T) {
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
+
 	// create an index with default settings
 	idxMapping := NewIndexMapping()
-	idx, err := New("testidx", idxMapping)
+	idx, err := New(tmpIndexPath, idxMapping)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1032,10 +1027,6 @@ func TestDisjunctionQueryIncorrectMin(t *testing.T) {
 		if err != nil {
 			t.Fatal(err)
 		}
-		err = os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
 	}()
 
 	// create and insert documents as a batch
@@ -1090,7 +1081,10 @@ func TestDisjunctionQueryIncorrectMin(t *testing.T) {
 }
 
 func TestBooleanShouldMinPropagation(t *testing.T) {
-	idx, err := New("testidx", NewIndexMapping())
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
+
+	idx, err := New(tmpIndexPath, NewIndexMapping())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1100,11 +1094,6 @@ func TestBooleanShouldMinPropagation(t *testing.T) {
 		if err != nil {
 			t.Fatal(err)
 		}
-
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
 	}()
 
 	doc1 := map[string]interface{}{
@@ -1160,7 +1149,10 @@ func TestBooleanShouldMinPropagation(t *testing.T) {
 }
 
 func TestDisjunctionMinPropagation(t *testing.T) {
-	idx, err := New("testidx", NewIndexMapping())
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
+
+	idx, err := New(tmpIndexPath, NewIndexMapping())
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1170,11 +1162,6 @@ func TestDisjunctionMinPropagation(t *testing.T) {
 		if err != nil {
 			t.Fatal(err)
 		}
-
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
 	}()
 
 	doc1 := map[string]interface{}{
@@ -1287,7 +1274,11 @@ func TestBooleanMustSingleMatchNone(t *testing.T) {
 	}
 
 	idxMapping.DefaultAnalyzer = "custom1"
-	idx, err := New("testidx", idxMapping)
+
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
+
+	idx, err := New(tmpIndexPath, idxMapping)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1297,11 +1288,6 @@ func TestBooleanMustSingleMatchNone(t *testing.T) {
 		if err != nil {
 			t.Fatal(err)
 		}
-
-		err = os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
 	}()
 
 	doc := map[string]interface{}{
@@ -1360,7 +1346,11 @@ func TestBooleanMustNotSingleMatchNone(t *testing.T) {
 	}
 
 	idxMapping.DefaultAnalyzer = "custom1"
-	idx, err := New("testidx", idxMapping)
+
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
+
+	idx, err := New(tmpIndexPath, idxMapping)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1370,11 +1360,6 @@ func TestBooleanMustNotSingleMatchNone(t *testing.T) {
 		if err != nil {
 			t.Fatal(err)
 		}
-
-		err = os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
 	}()
 
 	doc := map[string]interface{}{
@@ -1419,12 +1404,8 @@ func TestBooleanMustNotSingleMatchNone(t *testing.T) {
 }
 
 func TestBooleanSearchBug1185(t *testing.T) {
-	defer func() {
-		err := os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
 	of := NewTextFieldMapping()
 	of.Analyzer = keyword.Name
@@ -1436,7 +1417,7 @@ func TestBooleanSearchBug1185(t *testing.T) {
 	m := NewIndexMapping()
 	m.DefaultMapping = dm
 
-	idx, err := NewUsing("testidx", m, "scorch", "scorch", nil)
+	idx, err := NewUsing(tmpIndexPath, m, "scorch", "scorch", nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1552,7 +1533,10 @@ func TestBooleanSearchBug1185(t *testing.T) {
 }
 
 func TestSearchScoreNone(t *testing.T) {
-	idx, err := NewUsing("testidx", NewIndexMapping(), scorch.Name, Config.DefaultKVStore, nil)
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
+
+	idx, err := NewUsing(tmpIndexPath, NewIndexMapping(), scorch.Name, Config.DefaultKVStore, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1562,10 +1546,6 @@ func TestSearchScoreNone(t *testing.T) {
 		if err != nil {
 			t.Fatal(err)
 		}
-		err = os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
 	}()
 
 	doc := map[string]interface{}{
@@ -1607,7 +1587,10 @@ func TestGeoDistanceIssue1301(t *testing.T) {
 	shopIndexMapping := NewIndexMapping()
 	shopIndexMapping.DefaultMapping = shopMapping
 
-	idx, err := NewUsing("testidx", shopIndexMapping, scorch.Name, Config.DefaultKVStore, nil)
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
+
+	idx, err := NewUsing(tmpIndexPath, shopIndexMapping, scorch.Name, Config.DefaultKVStore, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1617,10 +1600,6 @@ func TestGeoDistanceIssue1301(t *testing.T) {
 		if err != nil {
 			t.Fatal(err)
 		}
-		err = os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
 	}()
 
 	for i, g := range []string{"wecpkbeddsmf", "wecpk8tne453", "wecpkb80s09t"} {
@@ -1671,7 +1650,11 @@ func TestSearchHighlightingWithRegexpReplacement(t *testing.T) {
 
 	idxMapping.DefaultAnalyzer = "regexp_replace"
 	idxMapping.StoreDynamic = true
-	idx, err := NewUsing("testidx", idxMapping, scorch.Name, Config.DefaultKVStore, nil)
+
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
+
+	idx, err := NewUsing(tmpIndexPath, idxMapping, scorch.Name, Config.DefaultKVStore, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1681,11 +1664,6 @@ func TestSearchHighlightingWithRegexpReplacement(t *testing.T) {
 		if err != nil {
 			t.Fatal(err)
 		}
-
-		err = os.RemoveAll("testidx")
-		if err != nil {
-			t.Fatal(err)
-		}
 	}()
 
 	doc := map[string]interface{}{
diff --git a/test/versus_score_test.go b/test/versus_score_test.go
index dcaf8d650..b0cb4afe1 100644
--- a/test/versus_score_test.go
+++ b/test/versus_score_test.go
@@ -16,6 +16,7 @@ package test
 
 import (
 	"fmt"
+	"io/ioutil"
 	"os"
 	"strconv"
 	"testing"
@@ -48,9 +49,19 @@ func TestDisjunctionSearchScoreIndexWithCompositeFields(t *testing.T) {
 
 func disjunctionQueryiOnIndexWithCompositeFields(indexName string,
 	t *testing.T) []*search.DocumentMatch {
+	tmpIndexPath, err := ioutil.TempDir("", "bleve-testidx")
+	if err != nil {
+		t.Fatalf("error creating temp dir: %v", err)
+	}
+	defer func() {
+		err := os.RemoveAll(tmpIndexPath)
+		if err != nil {
+			t.Fatalf("error removing temp dir: %v", err)
+		}
+	}()
 	// create an index
 	idxMapping := mapping.NewIndexMapping()
-	idx, err := bleve.NewUsing("testidx", idxMapping, indexName,
+	idx, err := bleve.NewUsing(tmpIndexPath, idxMapping, indexName,
 		bleve.Config.DefaultKVStore, nil)
 	if err != nil {
 		t.Error(err)
@@ -61,10 +72,6 @@ func disjunctionQueryiOnIndexWithCompositeFields(indexName string,
 		if err != nil {
 			t.Error(err)
 		}
-		err = os.RemoveAll("testidx")
-		if err != nil {
-			t.Error(err)
-		}
 	}()
 
 	// create and insert documents as a batch

From 837a9e6e1b6a248d87e001202659ecbacc4b52bc Mon Sep 17 00:00:00 2001
From: Abhinav Dangeti <abhinav@couchbase.com>
Date: Mon, 4 May 2020 15:39:18 -0700
Subject: [PATCH 674/728] MB-38957: Document mapping's analyzer to be inherited
 correctly

Check if the parent document mapping's default analyzer is available
while determining the analyzer to use for a child field in the event
that neither the child field has an analyzer set nor the intermediate
parent property.

Fixes: https://github.com/blevesearch/bleve/issues/1390
---
 go.mod              |  1 -
 mapping/document.go |  6 ++++++
 search_test.go      | 44 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index d38cf8f92..22133ea7a 100644
--- a/go.mod
+++ b/go.mod
@@ -10,7 +10,6 @@ require (
 	github.com/blevesearch/snowballstem v0.9.0
 	github.com/blevesearch/zap/v11 v11.0.7
 	github.com/blevesearch/zap/v12 v12.0.7
-	github.com/couchbase/ghistogram v0.1.0 // indirect
 	github.com/couchbase/moss v0.1.0
 	github.com/couchbase/vellum v1.0.1
 	github.com/golang/protobuf v1.3.2
diff --git a/mapping/document.go b/mapping/document.go
index 15cb6b5fa..58ad06c79 100644
--- a/mapping/document.go
+++ b/mapping/document.go
@@ -316,10 +316,16 @@ func (dm *DocumentMapping) defaultAnalyzerName(path []string) string {
 		if !ok {
 			break
 		}
+
 		if current.DefaultAnalyzer != "" {
 			rv = current.DefaultAnalyzer
 		}
 	}
+
+	if rv == "" {
+		rv = dm.DefaultAnalyzer
+	}
+
 	return rv
 }
 
diff --git a/search_test.go b/search_test.go
index 755999db5..3ee0827b4 100644
--- a/search_test.go
+++ b/search_test.go
@@ -1693,3 +1693,47 @@ func TestSearchHighlightingWithRegexpReplacement(t *testing.T) {
 		t.Fatalf("Expected 1 hit, got: %v", sres.Total)
 	}
 }
+
+func TestAnalyzerInheritance(t *testing.T) {
+	dMapping := mapping.NewDocumentStaticMapping()
+	dMapping.DefaultAnalyzer = keyword.Name
+
+	fMapping := mapping.NewTextFieldMapping()
+	dMapping.AddFieldMappingsAt("city", fMapping)
+
+	idxMapping := NewIndexMapping()
+	idxMapping.DefaultMapping = dMapping
+
+	tmpIndexPath := createTmpIndexPath(t)
+	idx, err := New(tmpIndexPath, idxMapping)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	doc := map[string]interface{}{
+		"city": "San Francisco",
+	}
+
+	if err = idx.Index("doc", doc); err != nil {
+		t.Fatal(err)
+	}
+
+	q := NewTermQuery("San Francisco")
+	q.SetField("city")
+
+	res, err := idx.Search(NewSearchRequest(q))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(res.Hits) != 1 {
+		t.Fatalf("unexpected number of hits: %v", len(res.Hits))
+	}
+}

From ead5428c6109e5c2abe03315f8942eb190fa2a0f Mon Sep 17 00:00:00 2001
From: Abhinav Dangeti <abhinav@couchbase.com>
Date: Tue, 5 May 2020 16:12:54 -0700
Subject: [PATCH 675/728] MB-38957: Adding more test cases

+ Note that the bug is only with inheriting the top level
  type mapping's analyzer.
+ The inheritance of any child mapping's analyzers to child
  fields works as expected.
---
 search_test.go | 125 +++++++++++++++++++++++++++++++++++++------------
 1 file changed, 95 insertions(+), 30 deletions(-)

diff --git a/search_test.go b/search_test.go
index 3ee0827b4..c31750965 100644
--- a/search_test.go
+++ b/search_test.go
@@ -1695,45 +1695,110 @@ func TestSearchHighlightingWithRegexpReplacement(t *testing.T) {
 }
 
 func TestAnalyzerInheritance(t *testing.T) {
-	dMapping := mapping.NewDocumentStaticMapping()
-	dMapping.DefaultAnalyzer = keyword.Name
-
-	fMapping := mapping.NewTextFieldMapping()
-	dMapping.AddFieldMappingsAt("city", fMapping)
-
-	idxMapping := NewIndexMapping()
-	idxMapping.DefaultMapping = dMapping
-
-	tmpIndexPath := createTmpIndexPath(t)
-	idx, err := New(tmpIndexPath, idxMapping)
-	if err != nil {
-		t.Fatal(err)
+	tests := []struct {
+		mappingStr string
+		doc        map[string]interface{}
+		queryField string
+		queryTerm  string
+	}{
+		{
+			/*
+				index_mapping: keyword
+				default_mapping: ""
+					-> child field (should inherit keyword)
+			*/
+			mappingStr: `{"default_mapping":{"enabled":true,"dynamic":false,"properties":` +
+				`{"city":{"enabled":true,"dynamic":false,"fields":[{"name":"city","type":"text",` +
+				`"store":false,"index":true}]}}},"default_analyzer":"keyword"}`,
+			doc:        map[string]interface{}{"city": "San Francisco"},
+			queryField: "city",
+			queryTerm:  "San Francisco",
+		},
+		{
+			/*
+				index_mapping: standard
+				default_mapping: keyword
+				    -> child field (should inherit keyword)
+			*/
+			mappingStr: `{"default_mapping":{"enabled":true,"dynamic":false,"properties":` +
+				`{"city":{"enabled":true,"dynamic":false,"fields":[{"name":"city","type":"text",` +
+				`"index":true}]}},"default_analyzer":"keyword"},"default_analyzer":"standard"}`,
+			doc:        map[string]interface{}{"city": "San Francisco"},
+			queryField: "city",
+			queryTerm:  "San Francisco",
+		},
+		{
+			/*
+				index_mapping: standard
+				default_mapping: keyword
+				    -> child mapping: ""
+					    -> child field: (should inherit keyword)
+			*/
+			mappingStr: `{"default_mapping":{"enabled":true,"dynamic":false,"default_analyzer":` +
+				`"keyword","properties":{"address":{"enabled":true,"dynamic":false,"properties":` +
+				`{"city":{"enabled":true,"dynamic":false,"fields":[{"name":"city","type":"text",` +
+				`"index":true}]}}}}},"default_analyzer":"standard"}`,
+			doc: map[string]interface{}{
+				"address": map[string]interface{}{"city": "San Francisco"},
+			},
+			queryField: "address.city",
+			queryTerm:  "San Francisco",
+		},
+		{
+			/*
+				index_mapping: standard
+				default_mapping: ""
+				    -> child mapping: "keyword"
+					    -> child mapping: ""
+						    -> child field: (should inherit keyword)
+			*/
+			mappingStr: `{"default_mapping":{"enabled":true,"dynamic":false,"properties":` +
+				`{"address":{"enabled":true,"dynamic":false,"default_analyzer":"keyword",` +
+				`"properties":{"state":{"enabled":true,"dynamic":false,"properties":{"city":` +
+				`{"enabled":true,"dynamic":false,"fields":[{"name":"city","type":"text",` +
+				`"store":false,"index":true}]}}}}}}},"default_analyer":"standard"}`,
+			doc: map[string]interface{}{
+				"address": map[string]interface{}{
+					"state": map[string]interface{}{"city": "San Francisco"},
+				},
+			},
+			queryField: "address.state.city",
+			queryTerm:  "San Francisco",
+		},
 	}
 
-	defer func() {
-		err := idx.Close()
+	for i := range tests {
+		idxMapping := NewIndexMapping()
+		if err := idxMapping.UnmarshalJSON([]byte(tests[i].mappingStr)); err != nil {
+			t.Fatal(err)
+		}
+
+		tmpIndexPath := createTmpIndexPath(t)
+		idx, err := New(tmpIndexPath, idxMapping)
 		if err != nil {
 			t.Fatal(err)
 		}
-	}()
 
-	doc := map[string]interface{}{
-		"city": "San Francisco",
-	}
+		defer func() {
+			if err := idx.Close(); err != nil {
+				t.Fatal(err)
+			}
+		}()
 
-	if err = idx.Index("doc", doc); err != nil {
-		t.Fatal(err)
-	}
+		if err = idx.Index("doc", tests[i].doc); err != nil {
+			t.Fatal(err)
+		}
 
-	q := NewTermQuery("San Francisco")
-	q.SetField("city")
+		q := NewTermQuery(tests[i].queryTerm)
+		q.SetField(tests[i].queryField)
 
-	res, err := idx.Search(NewSearchRequest(q))
-	if err != nil {
-		t.Fatal(err)
-	}
+		res, err := idx.Search(NewSearchRequest(q))
+		if err != nil {
+			t.Fatal(err)
+		}
 
-	if len(res.Hits) != 1 {
-		t.Fatalf("unexpected number of hits: %v", len(res.Hits))
+		if len(res.Hits) != 1 {
+			t.Errorf("[%d] Unexpected number of hits: %v", i, len(res.Hits))
+		}
 	}
 }

From e2db10a90111893a597bbbe261a4bdd1fbd516e6 Mon Sep 17 00:00:00 2001
From: Abhinav Dangeti <abhinav@couchbase.com>
Date: Wed, 6 May 2020 11:39:23 -0700
Subject: [PATCH 676/728] MB-38957: Initialize rv to dm.DefaultAnalyzer to
 start with

+ This is a better approach than to overwrite at the end after
  checking for an empty string.
---
 mapping/document.go | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/mapping/document.go b/mapping/document.go
index 58ad06c79..4083d55d5 100644
--- a/mapping/document.go
+++ b/mapping/document.go
@@ -308,8 +308,8 @@ func (dm *DocumentMapping) UnmarshalJSON(data []byte) error {
 }
 
 func (dm *DocumentMapping) defaultAnalyzerName(path []string) string {
-	rv := ""
 	current := dm
+	rv := current.DefaultAnalyzer
 	for _, pathElement := range path {
 		var ok bool
 		current, ok = current.Properties[pathElement]
@@ -322,10 +322,6 @@ func (dm *DocumentMapping) defaultAnalyzerName(path []string) string {
 		}
 	}
 
-	if rv == "" {
-		rv = dm.DefaultAnalyzer
-	}
-
 	return rv
 }
 

From 124045f0fea96ef2f6dc2a5b1a0bd6e49691f42e Mon Sep 17 00:00:00 2001
From: Abhinav Dangeti <abhinav@couchbase.com>
Date: Wed, 6 May 2020 13:31:39 -0700
Subject: [PATCH 677/728] Remove unintended extra lines

---
 mapping/document.go | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/mapping/document.go b/mapping/document.go
index 4083d55d5..355a602e5 100644
--- a/mapping/document.go
+++ b/mapping/document.go
@@ -251,7 +251,6 @@ func (dm *DocumentMapping) AddFieldMapping(fm *FieldMapping) {
 
 // UnmarshalJSON offers custom unmarshaling with optional strict validation
 func (dm *DocumentMapping) UnmarshalJSON(data []byte) error {
-
 	var tmp map[string]json.RawMessage
 	err := json.Unmarshal(data, &tmp)
 	if err != nil {
@@ -316,12 +315,10 @@ func (dm *DocumentMapping) defaultAnalyzerName(path []string) string {
 		if !ok {
 			break
 		}
-
 		if current.DefaultAnalyzer != "" {
 			rv = current.DefaultAnalyzer
 		}
 	}
-
 	return rv
 }
 

From 3d8737411a6785793b0ac33a3c9d0af54e3ca48e Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Thu, 7 May 2020 10:38:08 +0530
Subject: [PATCH 678/728] Api to trigger a force merge operation on an index

Introducing a new synchronous ForceMerge api to enable
users conveniently invoke a manual merge operation
on an online scorch index. This api would let advanced
bleve users to compact their index all the way down to a
single file segment based index.

The api takes an optional mergePlanOptions argument which
would be applied for the forceful merge operations, while
the original mergePlanOptions property of the index remains
intact after the forceful merge cycle exits.
In the absense of a mergePlanOptions argument, default policy
assumed would be an aggressive one to target a single file
segment index with a maximum segment size of 1B approx.

The api would trigger the merger's work loop only once
during it's invocation and hence it is the caller's
responsibility to actively monitor the expected
file segments count at the root and trigger any
subsequent forceMerge invocations if needed to
meet the expected segment's count at root.

The caller could also pass a cancelCh for cancelling
an ongoing forceMerge operation in case of any
runtime anomalies during the merge operation.

The api also takes an optional parameter to override
the numSnapshotsToKeep for reducing the space wastage
out of segments with duplicate contents.

The caller should also ensure that there is enough disk
space before invoking this compaction/merge operation
as it needs at least double the disk space
whenever there is a full compaction.
---
 index/scorch/introducer.go           |   6 +-
 index/scorch/merge.go                | 128 ++++++++++++++-
 index/scorch/mergeplan/merge_plan.go |  11 ++
 index/scorch/persister.go            |  25 ++-
 index/scorch/scorch.go               |   3 +
 index/scorch/scorch_test.go          | 237 ++++++++++++++++++++++++++-
 index/scorch/stats.go                |   3 +
 index_alias_impl.go                  |  12 ++
 8 files changed, 411 insertions(+), 14 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 64ca969bd..3325c9b17 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -313,11 +313,15 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 
 	defer func() { _ = root.DecRef() }()
 
+	if nextMerge.creator == "" {
+		nextMerge.creator = "introduceMerge"
+	}
+
 	newSnapshot := &IndexSnapshot{
 		parent:   s,
 		internal: root.internal,
 		refs:     1,
-		creator:  "introduceMerge",
+		creator:  nextMerge.creator,
 	}
 
 	// iterate through current segments
diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 37dca529a..3be781f73 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -27,14 +27,18 @@ import (
 	"github.com/blevesearch/bleve/index/scorch/segment"
 )
 
+var numSnapShotsToKeepOverRuler = "introduceForceMerge"
+
 func (s *Scorch) mergerLoop() {
 	var lastEpochMergePlanned uint64
+	var ctrlMsg *mergerCtrl
 	mergePlannerOptions, err := s.parseMergePlannerOptions()
 	if err != nil {
 		s.fireAsyncError(fmt.Errorf("mergePlannerOption json parsing err: %v", err))
 		s.asyncTasks.Done()
 		return
 	}
+	ctrlMsgDflt := &mergerCtrl{options: mergePlannerOptions, doneCh: nil}
 
 OUTER:
 	for {
@@ -53,16 +57,30 @@ OUTER:
 			atomic.StoreUint64(&s.iStats.mergeEpoch, ourSnapshot.epoch)
 			s.rootLock.Unlock()
 
-			if ourSnapshot.epoch != lastEpochMergePlanned {
+			if ctrlMsg == nil && ourSnapshot.epoch != lastEpochMergePlanned {
+				ctrlMsg = ctrlMsgDflt
+			}
+			if ctrlMsg != nil {
 				startTime := time.Now()
 
 				// lets get started
-				err := s.planMergeAtSnapshot(ourSnapshot, mergePlannerOptions)
+				err := s.planMergeAtSnapshot(ourSnapshot, ctrlMsg.options,
+					ctrlMsg.creator, ctrlMsg.cancelCh)
 				if err != nil {
 					atomic.StoreUint64(&s.iStats.mergeEpoch, 0)
 					if err == segment.ErrClosed {
 						// index has been closed
 						_ = ourSnapshot.DecRef()
+
+						// continue the workloop on a user triggered cancel
+						if ctrlMsg.doneCh != nil {
+							close(ctrlMsg.doneCh)
+							ctrlMsg = nil
+							continue OUTER
+						}
+
+						// exit the workloop on index closure
+						ctrlMsg = nil
 						break OUTER
 					}
 					s.fireAsyncError(fmt.Errorf("merging err: %v", err))
@@ -70,6 +88,12 @@ OUTER:
 					atomic.AddUint64(&s.stats.TotFileMergeLoopErr, 1)
 					continue OUTER
 				}
+
+				if ctrlMsg.doneCh != nil {
+					close(ctrlMsg.doneCh)
+				}
+				ctrlMsg = nil
+
 				lastEpochMergePlanned = ourSnapshot.epoch
 
 				atomic.StoreUint64(&s.stats.LastMergedEpoch, ourSnapshot.epoch)
@@ -90,6 +114,8 @@ OUTER:
 			case <-s.closeCh:
 				break OUTER
 			case s.persisterNotifier <- ew:
+			case ctrlMsg = <-s.mergerKickCh:
+				continue OUTER
 			}
 
 			// now wait for persister (but also detect close)
@@ -97,6 +123,7 @@ OUTER:
 			case <-s.closeCh:
 				break OUTER
 			case <-ew.notifyCh:
+			case ctrlMsg = <-s.mergerKickCh:
 			}
 		}
 
@@ -106,6 +133,76 @@ OUTER:
 	s.asyncTasks.Done()
 }
 
+type mergerCtrl struct {
+	creator  string
+	options  *mergeplan.MergePlanOptions
+	doneCh   chan struct{}
+	cancelCh chan struct{}
+}
+
+// MergeRequest represents various control
+// parameters for the ForceMerge API.
+type MergeRequest struct {
+	// MergeOptions specify the merge policy applied during
+	// the forced merge cycles. This doesn't override the
+	// index's original merge policy.
+	MergeOptions *mergeplan.MergePlanOptions
+
+	// OverrideNumSnapshotsToKeep specify whether to retain
+	// a number of older snapshots dictated by numSnapshotsToKeep
+	// during a forced merge cycle. Enabling this reduces the
+	// disk space requirements during a forced merge operation.
+	OverrideNumSnapshotsToKeep bool
+
+	// CancelCh helps in cancelling an ongoing merge operation.
+	CancelCh chan struct{}
+}
+
+// ForceMerge helps users trigger a merge operation on
+// an online scorch index.
+func (s *Scorch) ForceMerge(mr *MergeRequest) error {
+	// check whether force merge is already under processing
+	s.rootLock.Lock()
+	if s.stats.TotFileMergeForceOpsStarted >
+		s.stats.TotFileMergeForceOpsCompleted {
+		s.rootLock.Unlock()
+		return fmt.Errorf("force merge already in progress")
+	}
+
+	s.stats.TotFileMergeForceOpsStarted++
+	s.rootLock.Unlock()
+
+	if mr.MergeOptions == nil {
+		// assume the default single segment merge policy
+		mr.MergeOptions = &mergeplan.SingleSegmentMergePlanOptions
+	}
+	var ssCreator string
+	if mr.OverrideNumSnapshotsToKeep {
+		ssCreator = numSnapShotsToKeepOverRuler
+	}
+	msg := &mergerCtrl{creator: ssCreator,
+		options:  mr.MergeOptions,
+		doneCh:   make(chan struct{}),
+		cancelCh: mr.CancelCh,
+	}
+
+	// kick the merger workloop
+	select {
+	case s.mergerKickCh <- msg:
+	case <-s.closeCh:
+		return nil
+	}
+
+	// wait for the force merge operation completion
+	select {
+	case <-msg.doneCh:
+		atomic.AddUint64(&s.stats.TotFileMergeForceOpsCompleted, 1)
+	case <-s.closeCh:
+	}
+
+	return nil
+}
+
 func (s *Scorch) parseMergePlannerOptions() (*mergeplan.MergePlanOptions,
 	error) {
 	mergePlannerOptions := mergeplan.DefaultMergePlanOptions
@@ -129,7 +226,8 @@ func (s *Scorch) parseMergePlannerOptions() (*mergeplan.MergePlanOptions,
 }
 
 func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
-	options *mergeplan.MergePlanOptions) error {
+	options *mergeplan.MergePlanOptions, creator string,
+	cancelCh chan struct{}) error {
 	// build list of persisted segments in this snapshot
 	var onlyPersistedSnapshots []mergeplan.Segment
 	for _, segmentSnapshot := range ourSnapshot.segment {
@@ -158,6 +256,26 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 	// process tasks in serial for now
 	var filenames []string
 
+	closeCh := make(chan struct{})
+	defer func() {
+		select {
+		case <-closeCh:
+		default:
+			close(closeCh)
+		}
+	}()
+	// cancel the merge operation on events like the index closure
+	// or upon a user cancel.
+	go func() {
+		select {
+		case <-s.closeCh:
+			close(closeCh)
+		case <-cancelCh:
+			close(closeCh)
+		case <-closeCh:
+		}
+	}()
+
 	for _, task := range resultMergePlan.Tasks {
 		if len(task.Segments) == 0 {
 			atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1)
@@ -203,7 +321,7 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 
 			atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1)
 			newDocNums, _, err := s.segPlugin.Merge(segmentsToMerge, docsToDrop, path,
-				s.closeCh, s)
+				closeCh, s)
 			atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
 
 			fileMergeZapTime := uint64(time.Since(fileMergeZapStartTime))
@@ -241,6 +359,7 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 			oldNewDocNums: oldNewDocNums,
 			new:           seg,
 			notify:        make(chan *IndexSnapshot),
+			creator:       creator,
 		}
 
 		// give it to the introducer
@@ -285,6 +404,7 @@ type segmentMerge struct {
 	oldNewDocNums map[uint64][]uint64
 	new           segment.Segment
 	notify        chan *IndexSnapshot
+	creator       string
 }
 
 // perform a merging of the given SegmentBase instances into a new,
diff --git a/index/scorch/mergeplan/merge_plan.go b/index/scorch/mergeplan/merge_plan.go
index c2a0d3c64..e02923cc1 100644
--- a/index/scorch/mergeplan/merge_plan.go
+++ b/index/scorch/mergeplan/merge_plan.go
@@ -134,6 +134,17 @@ var DefaultMergePlanOptions = MergePlanOptions{
 	ReclaimDeletesWeight: 2.0,
 }
 
+// SingleSegmentMergePlanOptions helps in creating a
+// single segment index.
+var SingleSegmentMergePlanOptions = MergePlanOptions{
+	MaxSegmentsPerTier:   1,
+	MaxSegmentSize:       1 << 30,
+	TierGrowth:           1.0,
+	SegmentsPerMergeTask: 10,
+	FloorSegmentSize:     1 << 30,
+	ReclaimDeletesWeight: 2.0,
+}
+
 // -------------------------------------------
 
 func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) {
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index ffa656693..aec1c0fea 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -209,7 +209,7 @@ OUTER:
 		case s.introducerNotifier <- w:
 		}
 
-		s.removeOldData() // might as well cleanup while waiting
+		s.removeOldData(s.getNumSnapshotsToKeep(ourSnapshot)) // might as well cleanup while waiting
 
 		atomic.AddUint64(&s.stats.TotPersistLoopWait, 1)
 
@@ -276,7 +276,7 @@ func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64,
 	// 1. Too many older snapshots awaiting the clean up.
 	// 2. The merger could be lagging behind on merging the disk files.
 	if numFilesOnDisk > uint64(po.PersisterNapUnderNumFiles) {
-		s.removeOldData()
+		s.removeOldData(s.numSnapshotsToKeep)
 		numFilesOnDisk, _, _ = s.diskFileStats(nil)
 	}
 
@@ -780,8 +780,8 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro
 	return rv, nil
 }
 
-func (s *Scorch) removeOldData() {
-	removed, err := s.removeOldBoltSnapshots()
+func (s *Scorch) removeOldData(numSnapshotsToKeep int) {
+	removed, err := s.removeOldBoltSnapshots(numSnapshotsToKeep)
 	if err != nil {
 		s.fireAsyncError(fmt.Errorf("got err removing old bolt snapshots: %v", err))
 	}
@@ -798,22 +798,31 @@ func (s *Scorch) removeOldData() {
 // rollback'ability.
 var NumSnapshotsToKeep = 1
 
+func (s *Scorch) getNumSnapshotsToKeep(ourSnapshot *IndexSnapshot) int {
+	if ourSnapshot != nil &&
+		ourSnapshot.creator != numSnapShotsToKeepOverRuler {
+		return s.numSnapshotsToKeep
+	}
+	return 1
+}
+
 // Removes enough snapshots from the rootBolt so that the
 // s.eligibleForRemoval stays under the NumSnapshotsToKeep policy.
-func (s *Scorch) removeOldBoltSnapshots() (numRemoved int, err error) {
+func (s *Scorch) removeOldBoltSnapshots(numSnapshotsToKeep int) (
+	numRemoved int, err error) {
 	persistedEpochs, err := s.RootBoltSnapshotEpochs()
 	if err != nil {
 		return 0, err
 	}
 
-	if len(persistedEpochs) <= s.numSnapshotsToKeep {
+	if len(persistedEpochs) <= numSnapshotsToKeep {
 		// we need to keep everything
 		return 0, nil
 	}
 
 	// make a map of epochs to protect from deletion
-	protectedEpochs := make(map[uint64]struct{}, s.numSnapshotsToKeep)
-	for _, epoch := range persistedEpochs[0:s.numSnapshotsToKeep] {
+	protectedEpochs := make(map[uint64]struct{}, numSnapshotsToKeep)
+	for _, epoch := range persistedEpochs[0:numSnapshotsToKeep] {
 		protectedEpochs[epoch] = struct{}{}
 	}
 
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 698aaf16a..affda93a3 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -77,6 +77,8 @@ type Scorch struct {
 
 	pauseCount uint64
 
+	mergerKickCh chan *mergerCtrl
+
 	segPlugin segment.Plugin
 }
 
@@ -101,6 +103,7 @@ func NewScorch(storeName string,
 		nextSnapshotEpoch:    1,
 		closeCh:              make(chan struct{}),
 		ineligibleForRemoval: map[string]bool{},
+		mergerKickCh:         make(chan *mergerCtrl, 1),
 		segPlugin:            defaultSegmentPlugin,
 	}
 
diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index b168728ee..1e6f0ab7f 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -32,6 +32,7 @@ import (
 	regexpTokenizer "github.com/blevesearch/bleve/analysis/tokenizer/regexp"
 	"github.com/blevesearch/bleve/document"
 	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/scorch/mergeplan"
 	"github.com/blevesearch/bleve/mapping"
 )
 
@@ -2149,4 +2150,238 @@ func TestForceVersion(t *testing.T) {
 	if err == nil {
 		t.Fatalf("expected an error opening an unsupported vesion, got nil")
 	}
-}
\ No newline at end of file
+}
+
+func TestIndexForceMerge(t *testing.T) {
+	cfg := CreateConfig("TestIndexBatch")
+	err := InitTest(cfg)
+	tmp := struct {
+		MaxSegmentsPerTier   int   `json:"maxSegmentsPerTier"`
+		SegmentsPerMergeTask int   `json:"segmentsPerMergeTask"`
+		FloorSegmentSize     int64 `json:"floorSegmentSize"`
+	}{
+		int(1),
+		int(1),
+		int64(2),
+	}
+	cfg["scorchMergePlanOptions"] = &tmp
+
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer func() {
+		err := DestroyTest(cfg)
+		if err != nil {
+			t.Log(err)
+		}
+	}()
+
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Fatalf("error opening index: %v", err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	var expectedCount uint64
+	batch := index.NewBatch()
+	for i := 0; i < 10; i++ {
+		doc := document.NewDocument(fmt.Sprintf("doc1-%d", i))
+		doc.AddField(document.NewTextField("name", []uint64{}, []byte(fmt.Sprintf("text1-%d", i))))
+		batch.Update(doc)
+		doc = document.NewDocument(fmt.Sprintf("doc2-%d", i))
+		doc.AddField(document.NewTextField("name", []uint64{}, []byte(fmt.Sprintf("text2-%d", i))))
+		batch.Update(doc)
+		err = idx.Batch(batch)
+		if err != nil {
+			t.Error(err)
+		}
+		batch.Reset()
+		expectedCount += 2
+	}
+
+	// verify doc count
+	indexReader, err := idx.Reader()
+	if err != nil {
+		t.Error(err)
+	}
+	docCount, err := indexReader.DocCount()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+	err = indexReader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if ns, ok := idx.StatsMap()["TotFileSegmentsAtRoot"].(uint64); ok &&
+		ns != 10 {
+		t.Errorf("expected 10 root file segments, got: %d", ns)
+	}
+
+	for {
+		if ns, ok := idx.StatsMap()["TotFileSegmentsAtRoot"].(uint64); ok &&
+			ns == 1 {
+			break
+		}
+
+		if si, ok := idx.(*Scorch); ok {
+			err := si.ForceMerge(&MergeRequest{
+				MergeOptions: &mergeplan.MergePlanOptions{
+					MaxSegmentsPerTier:   1,
+					MaxSegmentSize:       10000,
+					SegmentsPerMergeTask: 10,
+					FloorSegmentSize:     10000},
+				OverrideNumSnapshotsToKeep: true})
+			if err != nil {
+				t.Errorf("RequestMerge failed, err: %v", err)
+			}
+		}
+	}
+
+	// verify the final root segment count
+	if ns, ok := idx.StatsMap()["TotFileSegmentsAtRoot"].(uint64); ok &&
+		ns != 1 {
+		t.Errorf("expected a single root file segments, got: %d", ns)
+	}
+
+}
+
+func TestCancelIndexForceMerge(t *testing.T) {
+	cfg := CreateConfig("TestIndexBatch")
+	err := InitTest(cfg)
+	tmp := struct {
+		MaxSegmentsPerTier   int   `json:"maxSegmentsPerTier"`
+		SegmentsPerMergeTask int   `json:"segmentsPerMergeTask"`
+		FloorSegmentSize     int64 `json:"floorSegmentSize"`
+	}{
+		int(1),
+		int(1),
+		int64(2),
+	}
+	cfg["scorchMergePlanOptions"] = &tmp
+
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer func() {
+		err := DestroyTest(cfg)
+		if err != nil {
+			t.Log(err)
+		}
+	}()
+
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Fatalf("error opening index: %v", err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	var expectedCount uint64
+	batch := index.NewBatch()
+	for i := 0; i < 20; i++ {
+		doc := document.NewDocument(fmt.Sprintf("doc1-%d", i))
+		doc.AddField(document.NewTextField("name", []uint64{}, []byte(fmt.Sprintf("text1-%d", i))))
+		batch.Update(doc)
+		doc = document.NewDocument(fmt.Sprintf("doc2-%d", i))
+		doc.AddField(document.NewTextField("name", []uint64{}, []byte(fmt.Sprintf("text2-%d", i))))
+		batch.Update(doc)
+		err = idx.Batch(batch)
+		if err != nil {
+			t.Error(err)
+		}
+		batch.Reset()
+		expectedCount += 2
+	}
+
+	// verify doc count
+	indexReader, err := idx.Reader()
+	if err != nil {
+		t.Error(err)
+	}
+	docCount, err := indexReader.DocCount()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+	err = indexReader.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// no merge operations are expected as per the original merge policy.
+	if ns, ok := idx.StatsMap()["TotFileSegmentsAtRoot"].(uint64); ok &&
+		ns != 20 {
+		t.Errorf("expected 20 root file segments, got: %d", ns)
+	}
+
+	fsar := uint64(0)
+	cancelCh := make(chan struct{})
+	// cancel the force merge operation once the root has some new merge
+	// introductions. ie if the root has lesser file segments than earlier.
+	go func() {
+		for {
+			if nval, ok := idx.StatsMap()["TotFileSegmentsAtRoot"].(uint64); ok &&
+				nval < fsar {
+				close(cancelCh)
+				return
+			}
+			time.Sleep(time.Millisecond * 5)
+		}
+	}()
+
+OUTER:
+	for {
+		select {
+		case <-cancelCh:
+			break OUTER
+		default:
+		}
+		// get the number of file segments at root right before
+		// the force merge operation.
+		fsar, _ = idx.StatsMap()["TotFileSegmentsAtRoot"].(uint64)
+
+		if si, ok := idx.(*Scorch); ok {
+			err := si.ForceMerge(&MergeRequest{
+				MergeOptions: &mergeplan.MergePlanOptions{
+					MaxSegmentsPerTier:   1,
+					MaxSegmentSize:       10000,
+					SegmentsPerMergeTask: 5,
+					FloorSegmentSize:     10000},
+				OverrideNumSnapshotsToKeep: true,
+				CancelCh:                   cancelCh})
+			if err != nil {
+				t.Errorf("RequestMerge failed, err: %v", err)
+			}
+		}
+	}
+
+	// verify the final root file segment count or forceMerge completion
+	if ns, ok := idx.StatsMap()["TotFileSegmentsAtRoot"].(uint64); ok &&
+		ns == 1 {
+		t.Errorf("expected many files at root, but got: %d segments", ns)
+	}
+}
diff --git a/index/scorch/stats.go b/index/scorch/stats.go
index e638362a7..2900ac0df 100644
--- a/index/scorch/stats.go
+++ b/index/scorch/stats.go
@@ -82,6 +82,9 @@ type Stats struct {
 	TotFileMergeLoopErr uint64
 	TotFileMergeLoopEnd uint64
 
+	TotFileMergeForceOpsStarted   uint64
+	TotFileMergeForceOpsCompleted uint64
+
 	TotFileMergePlan     uint64
 	TotFileMergePlanErr  uint64
 	TotFileMergePlanNone uint64
diff --git a/index_alias_impl.go b/index_alias_impl.go
index 4366fc795..94eb50f4e 100644
--- a/index_alias_impl.go
+++ b/index_alias_impl.go
@@ -44,6 +44,18 @@ func NewIndexAlias(indexes ...Index) *indexAliasImpl {
 	}
 }
 
+// Indexes just returns the indexes included in the
+// index alias at the moment in an unsafe way.
+// Caller must be aware that the results will be
+// inconsistent if there are concurrent Add/Remove
+// operations on the alias.
+func (i *indexAliasImpl) Indexes() []Index {
+	i.mutex.RLock()
+	rv := i.indexes
+	i.mutex.RUnlock()
+	return rv
+}
+
 func (i *indexAliasImpl) isAliasToSingleIndex() error {
 	if len(i.indexes) < 1 {
 		return ErrorAliasEmpty

From 50532472b773525ac3606fb4529c812bf4578f5c Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Thu, 7 May 2020 11:49:12 +0530
Subject: [PATCH 679/728] fixing the raciness issue

---
 index/scorch/merge.go       |  10 ++-
 index/scorch/scorch_test.go | 163 ++++++++++++++++--------------------
 2 files changed, 80 insertions(+), 93 deletions(-)

diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 3be781f73..c0464fba8 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -257,21 +257,23 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 	var filenames []string
 
 	closeCh := make(chan struct{})
-	defer func() {
+	cleanup := func() {
 		select {
 		case <-closeCh:
 		default:
 			close(closeCh)
 		}
-	}()
+	}
+	defer cleanup()
+
 	// cancel the merge operation on events like the index closure
 	// or upon a user cancel.
 	go func() {
 		select {
 		case <-s.closeCh:
-			close(closeCh)
+			cleanup()
 		case <-cancelCh:
-			close(closeCh)
+			cleanup()
 		case <-closeCh:
 		}
 	}()
diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index 1e6f0ab7f..2e26f6b24 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -23,6 +23,7 @@ import (
 	"regexp"
 	"strconv"
 	"sync"
+	"sync/atomic"
 	"testing"
 	"time"
 
@@ -2153,8 +2154,14 @@ func TestForceVersion(t *testing.T) {
 }
 
 func TestIndexForceMerge(t *testing.T) {
-	cfg := CreateConfig("TestIndexBatch")
+	cfg := CreateConfig("TestIndexForceMerge")
 	err := InitTest(cfg)
+	defer func() {
+		err := DestroyTest(cfg)
+		if err != nil {
+			t.Log(err)
+		}
+	}()
 	tmp := struct {
 		MaxSegmentsPerTier   int   `json:"maxSegmentsPerTier"`
 		SegmentsPerMergeTask int   `json:"segmentsPerMergeTask"`
@@ -2166,16 +2173,6 @@ func TestIndexForceMerge(t *testing.T) {
 	}
 	cfg["scorchMergePlanOptions"] = &tmp
 
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer func() {
-		err := DestroyTest(cfg)
-		if err != nil {
-			t.Log(err)
-		}
-	}()
-
 	analysisQueue := index.NewAnalysisQueue(1)
 	idx, err := NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
@@ -2185,13 +2182,6 @@ func TestIndexForceMerge(t *testing.T) {
 	if err != nil {
 		t.Fatalf("error opening index: %v", err)
 	}
-	defer func() {
-		err := idx.Close()
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
-
 	var expectedCount uint64
 	batch := index.NewBatch()
 	for i := 0; i < 10; i++ {
@@ -2225,42 +2215,57 @@ func TestIndexForceMerge(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	if ns, ok := idx.StatsMap()["TotFileSegmentsAtRoot"].(uint64); ok &&
-		ns != 10 {
-		t.Errorf("expected 10 root file segments, got: %d", ns)
+	var si *Scorch
+	var ok bool
+	if si, ok = idx.(*Scorch); !ok {
+		t.Errorf("expects a scorch index")
+	}
+
+	nfs := atomic.LoadUint64(&si.stats.TotFileSegmentsAtRoot)
+	if nfs != 10 {
+		t.Errorf("expected 10 root file segments, got: %d", nfs)
 	}
 
 	for {
-		if ns, ok := idx.StatsMap()["TotFileSegmentsAtRoot"].(uint64); ok &&
-			ns == 1 {
+		if atomic.LoadUint64(&si.stats.TotFileSegmentsAtRoot) == 1 {
 			break
 		}
-
-		if si, ok := idx.(*Scorch); ok {
-			err := si.ForceMerge(&MergeRequest{
-				MergeOptions: &mergeplan.MergePlanOptions{
-					MaxSegmentsPerTier:   1,
-					MaxSegmentSize:       10000,
-					SegmentsPerMergeTask: 10,
-					FloorSegmentSize:     10000},
-				OverrideNumSnapshotsToKeep: true})
-			if err != nil {
-				t.Errorf("RequestMerge failed, err: %v", err)
-			}
+		err := si.ForceMerge(&MergeRequest{
+			MergeOptions: &mergeplan.MergePlanOptions{
+				MaxSegmentsPerTier:   1,
+				MaxSegmentSize:       10000,
+				SegmentsPerMergeTask: 10,
+				FloorSegmentSize:     10000},
+			OverrideNumSnapshotsToKeep: true})
+		if err != nil {
+			t.Errorf("ForceMerge failed, err: %v", err)
 		}
 	}
 
 	// verify the final root segment count
-	if ns, ok := idx.StatsMap()["TotFileSegmentsAtRoot"].(uint64); ok &&
-		ns != 1 {
-		t.Errorf("expected a single root file segments, got: %d", ns)
+	if atomic.LoadUint64(&si.stats.TotFileSegmentsAtRoot) != 1 {
+		t.Errorf("expected a single root file segments, got: %d",
+			atomic.LoadUint64(&si.stats.TotFileSegmentsAtRoot))
+	}
+	err = idx.Close()
+	if err != nil {
+		t.Fatal(err)
 	}
-
 }
 
 func TestCancelIndexForceMerge(t *testing.T) {
-	cfg := CreateConfig("TestIndexBatch")
+	cfg := CreateConfig("TestCancelIndexForceMerge")
 	err := InitTest(cfg)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer func() {
+		err := DestroyTest(cfg)
+		if err != nil {
+			t.Log(err)
+		}
+	}()
+
 	tmp := struct {
 		MaxSegmentsPerTier   int   `json:"maxSegmentsPerTier"`
 		SegmentsPerMergeTask int   `json:"segmentsPerMergeTask"`
@@ -2272,16 +2277,6 @@ func TestCancelIndexForceMerge(t *testing.T) {
 	}
 	cfg["scorchMergePlanOptions"] = &tmp
 
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer func() {
-		err := DestroyTest(cfg)
-		if err != nil {
-			t.Log(err)
-		}
-	}()
-
 	analysisQueue := index.NewAnalysisQueue(1)
 	idx, err := NewScorch(Name, cfg, analysisQueue)
 	if err != nil {
@@ -2291,12 +2286,6 @@ func TestCancelIndexForceMerge(t *testing.T) {
 	if err != nil {
 		t.Fatalf("error opening index: %v", err)
 	}
-	defer func() {
-		err := idx.Close()
-		if err != nil {
-			t.Fatal(err)
-		}
-	}()
 
 	var expectedCount uint64
 	batch := index.NewBatch()
@@ -2332,20 +2321,25 @@ func TestCancelIndexForceMerge(t *testing.T) {
 		t.Fatal(err)
 	}
 
+	var si *Scorch
+	var ok bool
+	if si, ok = idx.(*Scorch); !ok {
+		t.Fatal("expects a scorch index")
+	}
+
 	// no merge operations are expected as per the original merge policy.
-	if ns, ok := idx.StatsMap()["TotFileSegmentsAtRoot"].(uint64); ok &&
-		ns != 20 {
-		t.Errorf("expected 20 root file segments, got: %d", ns)
+	nfsr := atomic.LoadUint64(&si.stats.TotFileSegmentsAtRoot)
+	if nfsr != 20 {
+		t.Errorf("expected 20 root file segments, got: %d", nfsr)
 	}
 
-	fsar := uint64(0)
 	cancelCh := make(chan struct{})
 	// cancel the force merge operation once the root has some new merge
 	// introductions. ie if the root has lesser file segments than earlier.
 	go func() {
 		for {
-			if nval, ok := idx.StatsMap()["TotFileSegmentsAtRoot"].(uint64); ok &&
-				nval < fsar {
+			nval := atomic.LoadUint64(&si.stats.TotFileSegmentsAtRoot)
+			if nval < nfsr {
 				close(cancelCh)
 				return
 			}
@@ -2353,35 +2347,26 @@ func TestCancelIndexForceMerge(t *testing.T) {
 		}
 	}()
 
-OUTER:
-	for {
-		select {
-		case <-cancelCh:
-			break OUTER
-		default:
-		}
-		// get the number of file segments at root right before
-		// the force merge operation.
-		fsar, _ = idx.StatsMap()["TotFileSegmentsAtRoot"].(uint64)
-
-		if si, ok := idx.(*Scorch); ok {
-			err := si.ForceMerge(&MergeRequest{
-				MergeOptions: &mergeplan.MergePlanOptions{
-					MaxSegmentsPerTier:   1,
-					MaxSegmentSize:       10000,
-					SegmentsPerMergeTask: 5,
-					FloorSegmentSize:     10000},
-				OverrideNumSnapshotsToKeep: true,
-				CancelCh:                   cancelCh})
-			if err != nil {
-				t.Errorf("RequestMerge failed, err: %v", err)
-			}
-		}
+	err = si.ForceMerge(&MergeRequest{
+		MergeOptions: &mergeplan.MergePlanOptions{
+			MaxSegmentsPerTier:   1,
+			MaxSegmentSize:       10000,
+			SegmentsPerMergeTask: 5,
+			FloorSegmentSize:     10000},
+		OverrideNumSnapshotsToKeep: true,
+		CancelCh:                   cancelCh})
+	if err != nil {
+		t.Errorf("ForceMerge failed, err: %v", err)
 	}
 
 	// verify the final root file segment count or forceMerge completion
-	if ns, ok := idx.StatsMap()["TotFileSegmentsAtRoot"].(uint64); ok &&
-		ns == 1 {
-		t.Errorf("expected many files at root, but got: %d segments", ns)
+	if atomic.LoadUint64(&si.stats.TotFileSegmentsAtRoot) == 1 {
+		t.Errorf("expected many files at root, but got: %d segments",
+			atomic.LoadUint64(&si.stats.TotFileSegmentsAtRoot))
+	}
+
+	err = idx.Close()
+	if err != nil {
+		t.Fatal(err)
 	}
 }

From 878e5f1d5187732c346bb1da6f48b7a8591e86d1 Mon Sep 17 00:00:00 2001
From: Abhinav Dangeti <abhinav@couchbase.com>
Date: Thu, 7 May 2020 10:17:16 -0700
Subject: [PATCH 680/728] MB-38957: Add a unit test for mapping API:
 defaultAnalyzerName

---
 mapping/mapping_test.go | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/mapping/mapping_test.go b/mapping/mapping_test.go
index b57283f3a..2b6870966 100644
--- a/mapping/mapping_test.go
+++ b/mapping/mapping_test.go
@@ -1111,7 +1111,6 @@ func TestClosestDocDynamicMapping(t *testing.T) {
 }
 
 func TestMappingPointerToTimeBug1152(t *testing.T) {
-
 	when, err := time.Parse(time.RFC3339, "2019-03-06T15:04:05Z")
 	if err != nil {
 		t.Fatal(err)
@@ -1141,3 +1140,14 @@ func TestMappingPointerToTimeBug1152(t *testing.T) {
 		t.Fatalf("expected field to be type *document.DateTimeField, got %T", doc.Fields[0])
 	}
 }
+
+func TestDefaultAnalyzerInheritance(t *testing.T) {
+	docMapping := NewDocumentMapping()
+	docMapping.DefaultAnalyzer = "xyz"
+	childMapping := NewTextFieldMapping()
+	docMapping.AddFieldMappingsAt("field", childMapping)
+
+	if analyzer := docMapping.defaultAnalyzerName([]string{"field"}); analyzer != "xyz" {
+		t.Fatalf("Expected analyzer: xyz to be inherited by field, but got: '%v'", analyzer)
+	}
+}

From f194b0026293045dad84a48169035f3706ad415c Mon Sep 17 00:00:00 2001
From: Abhinav Dangeti <abhinav@couchbase.com>
Date: Thu, 7 May 2020 10:18:08 -0700
Subject: [PATCH 681/728] Undo go.mod edit

+ Adding back the unnecessary go.mod entry.
---
 go.mod | 1 +
 1 file changed, 1 insertion(+)

diff --git a/go.mod b/go.mod
index 22133ea7a..d38cf8f92 100644
--- a/go.mod
+++ b/go.mod
@@ -10,6 +10,7 @@ require (
 	github.com/blevesearch/snowballstem v0.9.0
 	github.com/blevesearch/zap/v11 v11.0.7
 	github.com/blevesearch/zap/v12 v12.0.7
+	github.com/couchbase/ghistogram v0.1.0 // indirect
 	github.com/couchbase/moss v0.1.0
 	github.com/couchbase/vellum v1.0.1
 	github.com/golang/protobuf v1.3.2

From 1e93807deaf1fbdc77b85dde8487da86a7d7052f Mon Sep 17 00:00:00 2001
From: Abhinav Dangeti <abhinav@couchbase.com>
Date: Thu, 7 May 2020 11:58:30 -0700
Subject: [PATCH 682/728] Adding sub-test functionality for table-driven tests

---
 search_test.go | 57 ++++++++++++++++++++++++++++----------------------
 1 file changed, 32 insertions(+), 25 deletions(-)

diff --git a/search_test.go b/search_test.go
index c31750965..98623e1a2 100644
--- a/search_test.go
+++ b/search_test.go
@@ -1696,6 +1696,7 @@ func TestSearchHighlightingWithRegexpReplacement(t *testing.T) {
 
 func TestAnalyzerInheritance(t *testing.T) {
 	tests := []struct {
+		name       string
 		mappingStr string
 		doc        map[string]interface{}
 		queryField string
@@ -1707,6 +1708,7 @@ func TestAnalyzerInheritance(t *testing.T) {
 				default_mapping: ""
 					-> child field (should inherit keyword)
 			*/
+			name: "Child field to inherit index mapping's default analyzer",
 			mappingStr: `{"default_mapping":{"enabled":true,"dynamic":false,"properties":` +
 				`{"city":{"enabled":true,"dynamic":false,"fields":[{"name":"city","type":"text",` +
 				`"store":false,"index":true}]}}},"default_analyzer":"keyword"}`,
@@ -1720,6 +1722,7 @@ func TestAnalyzerInheritance(t *testing.T) {
 				default_mapping: keyword
 				    -> child field (should inherit keyword)
 			*/
+			name: "Child field to inherit default mapping's default analyzer",
 			mappingStr: `{"default_mapping":{"enabled":true,"dynamic":false,"properties":` +
 				`{"city":{"enabled":true,"dynamic":false,"fields":[{"name":"city","type":"text",` +
 				`"index":true}]}},"default_analyzer":"keyword"},"default_analyzer":"standard"}`,
@@ -1734,6 +1737,7 @@ func TestAnalyzerInheritance(t *testing.T) {
 				    -> child mapping: ""
 					    -> child field: (should inherit keyword)
 			*/
+			name: "Nested child field to inherit default mapping's default analyzer",
 			mappingStr: `{"default_mapping":{"enabled":true,"dynamic":false,"default_analyzer":` +
 				`"keyword","properties":{"address":{"enabled":true,"dynamic":false,"properties":` +
 				`{"city":{"enabled":true,"dynamic":false,"fields":[{"name":"city","type":"text",` +
@@ -1752,6 +1756,7 @@ func TestAnalyzerInheritance(t *testing.T) {
 					    -> child mapping: ""
 						    -> child field: (should inherit keyword)
 			*/
+			name: "Nested child field to inherit first child mapping's default analyzer",
 			mappingStr: `{"default_mapping":{"enabled":true,"dynamic":false,"properties":` +
 				`{"address":{"enabled":true,"dynamic":false,"default_analyzer":"keyword",` +
 				`"properties":{"state":{"enabled":true,"dynamic":false,"properties":{"city":` +
@@ -1768,37 +1773,39 @@ func TestAnalyzerInheritance(t *testing.T) {
 	}
 
 	for i := range tests {
-		idxMapping := NewIndexMapping()
-		if err := idxMapping.UnmarshalJSON([]byte(tests[i].mappingStr)); err != nil {
-			t.Fatal(err)
-		}
-
-		tmpIndexPath := createTmpIndexPath(t)
-		idx, err := New(tmpIndexPath, idxMapping)
-		if err != nil {
-			t.Fatal(err)
-		}
+		t.Run(fmt.Sprintf("%s", tests[i].name), func(t *testing.T) {
+			idxMapping := NewIndexMapping()
+			if err := idxMapping.UnmarshalJSON([]byte(tests[i].mappingStr)); err != nil {
+				t.Fatal(err)
+			}
 
-		defer func() {
-			if err := idx.Close(); err != nil {
+			tmpIndexPath := createTmpIndexPath(t)
+			idx, err := New(tmpIndexPath, idxMapping)
+			if err != nil {
 				t.Fatal(err)
 			}
-		}()
 
-		if err = idx.Index("doc", tests[i].doc); err != nil {
-			t.Fatal(err)
-		}
+			defer func() {
+				if err := idx.Close(); err != nil {
+					t.Fatal(err)
+				}
+			}()
 
-		q := NewTermQuery(tests[i].queryTerm)
-		q.SetField(tests[i].queryField)
+			if err = idx.Index("doc", tests[i].doc); err != nil {
+				t.Fatal(err)
+			}
 
-		res, err := idx.Search(NewSearchRequest(q))
-		if err != nil {
-			t.Fatal(err)
-		}
+			q := NewTermQuery(tests[i].queryTerm)
+			q.SetField(tests[i].queryField)
 
-		if len(res.Hits) != 1 {
-			t.Errorf("[%d] Unexpected number of hits: %v", i, len(res.Hits))
-		}
+			res, err := idx.Search(NewSearchRequest(q))
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			if len(res.Hits) != 1 {
+				t.Errorf("Unexpected number of hits: %v", len(res.Hits))
+			}
+		})
 	}
 }

From c5a10892e327a422f169b9a81270cfcdf3d61307 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 12 May 2020 19:59:39 -0400
Subject: [PATCH 683/728] introduce new scorch index builder (#1282)

The purpose of the index builder is to better support
use cases where one would like to index a set of data,
not search it during the build, ensure the resulting file
is in an efficient optimized state, and then support
opening this index read-only to serve queries.

Useful implementation details:

Only an Index() method is offered, meaning you do not control
the batch size.  The builder will group data into batches
using a configurable 'batchSize' parameter (default 1000)

All of these batches are persisted as segments into a temp
location.  You can control this using 'buildPathPrefix', or
it will default to a system temp location.

You must call Close() after indexing all data.  This will
flush the last batch, and begin merging segments.  The
builder will attempt to merge several segments at once. This
is configurable with the 'mergeMax' setting, which defaults
to 10.  Merging continues until there is only 1 segment
remaining.

At this point, the final segment is moved to the final
location (as specified by the original builder constructor).
The root.bolt is created, pointing to the segment, and
the remaining bleve index wrapping is completed.
---
 builder.go                     |  94 ++++++++++
 builder_test.go                |  89 +++++++++
 index.go                       |  14 ++
 index/index.go                 |   7 +
 index/scorch/builder.go        | 333 +++++++++++++++++++++++++++++++++
 index/scorch/builder_test.go   | 160 ++++++++++++++++
 index/scorch/persister.go      |  75 ++++----
 index/scorch/scorch.go         |  34 +++-
 index/scorch/segment_plugin.go |  22 ++-
 9 files changed, 780 insertions(+), 48 deletions(-)
 create mode 100644 builder.go
 create mode 100644 builder_test.go
 create mode 100644 index/scorch/builder.go
 create mode 100644 index/scorch/builder_test.go

diff --git a/builder.go b/builder.go
new file mode 100644
index 000000000..de00c97b6
--- /dev/null
+++ b/builder.go
@@ -0,0 +1,94 @@
+//  Copyright (c) 2019 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package bleve
+
+import (
+	"encoding/json"
+	"fmt"
+
+	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/scorch"
+	"github.com/blevesearch/bleve/mapping"
+)
+
+type builderImpl struct {
+	b index.IndexBuilder
+	m mapping.IndexMapping
+}
+
+func (b *builderImpl) Index(id string, data interface{}) error {
+	if id == "" {
+		return ErrorEmptyID
+	}
+
+	doc := document.NewDocument(id)
+	err := b.m.MapDocument(doc, data)
+	if err != nil {
+		return err
+	}
+	err = b.b.Index(doc)
+	return err
+}
+
+func (b *builderImpl) Close() error {
+	return b.b.Close()
+}
+
+func newBuilder(path string, mapping mapping.IndexMapping, config map[string]interface{}) (Builder, error) {
+	if path == "" {
+		return nil, fmt.Errorf("builder requires path")
+	}
+
+	err := mapping.Validate()
+	if err != nil {
+		return nil, err
+	}
+
+	if config == nil {
+		config = map[string]interface{}{}
+	}
+
+	// the builder does not have an API to interact with internal storage
+	// however we can pass k/v pairs through the config
+	mappingBytes, err := json.Marshal(mapping)
+	if err != nil {
+		return nil, err
+	}
+	config["internal"] = map[string][]byte{
+		string(mappingInternalKey): mappingBytes,
+	}
+
+	// do not use real config, as these are options for the builder,
+	// not the resulting index
+	meta := newIndexMeta(scorch.Name, scorch.Name, map[string]interface{}{})
+	err = meta.Save(path)
+	if err != nil {
+		return nil, err
+	}
+
+	config["path"] = indexStorePath(path)
+
+	b, err := scorch.NewBuilder(config)
+	if err != nil {
+		return nil, err
+	}
+	rv := &builderImpl{
+		b: b,
+		m: mapping,
+	}
+
+	return rv, nil
+}
diff --git a/builder_test.go b/builder_test.go
new file mode 100644
index 000000000..119f39f58
--- /dev/null
+++ b/builder_test.go
@@ -0,0 +1,89 @@
+//  Copyright (c) 2019 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package bleve
+
+import (
+	"fmt"
+	"io/ioutil"
+	"os"
+	"testing"
+)
+
+func TestBuilder(t *testing.T) {
+	tmpDir, err := ioutil.TempDir("", "bleve-scorch-builder-test")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer func() {
+		err = os.RemoveAll(tmpDir)
+		if err != nil {
+			t.Fatalf("error cleaning up test index")
+		}
+	}()
+
+	conf := map[string]interface{}{
+		"batchSize": 2,
+		"mergeMax":  2,
+	}
+	b, err := NewBuilder(tmpDir, NewIndexMapping(), conf)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	for i := 0; i < 10; i++ {
+		doc := map[string]interface{}{
+			"name": "hello",
+		}
+		err = b.Index(fmt.Sprintf("%d", i), doc)
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	err = b.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	idx, err := Open(tmpDir)
+	if err != nil {
+		t.Fatalf("error opening index: %v", err)
+	}
+	defer func() {
+		err = idx.Close()
+		if err != nil {
+			t.Errorf("error closing index: %v", err)
+		}
+	}()
+
+	docCount, err := idx.DocCount()
+	if err != nil {
+		t.Errorf("error checking doc count: %v", err)
+	}
+	if docCount != 10 {
+		t.Errorf("expected doc count to be 10, got %d", docCount)
+	}
+
+	q := NewTermQuery("hello")
+	q.SetField("name")
+	req := NewSearchRequest(q)
+	res, err := idx.Search(req)
+	if err != nil {
+		t.Errorf("error searching index: %v", err)
+	}
+	if res.Total != 10 {
+		t.Errorf("expected 10 search hits, got %d", res.Total)
+	}
+}
diff --git a/index.go b/index.go
index ef6ede934..974358b81 100644
--- a/index.go
+++ b/index.go
@@ -293,3 +293,17 @@ func Open(path string) (Index, error) {
 func OpenUsing(path string, runtimeConfig map[string]interface{}) (Index, error) {
 	return openIndexUsing(path, runtimeConfig)
 }
+
+// Builder is a limited interface, used to build indexes in an offline mode.
+// Items cannot be updated or deleted, and the caller MUST ensure a document is
+// indexed only once.
+type Builder interface {
+	Index(id string, data interface{}) error
+	Close() error
+}
+
+// NewBuilder creates a builder, which will build an index at the specified path,
+// using the specified mapping and options.
+func NewBuilder(path string, mapping mapping.IndexMapping, config map[string]interface{}) (Builder, error) {
+	return newBuilder(path, mapping, config)
+}
diff --git a/index/index.go b/index/index.go
index 3e866f3aa..551f8de84 100644
--- a/index/index.go
+++ b/index/index.go
@@ -367,3 +367,10 @@ type OptimizableContext interface {
 type DocValueReader interface {
 	VisitDocValues(id IndexInternalID, visitor DocumentFieldTermVisitor) error
 }
+
+// IndexBuilder is an interface supported by some index schemes
+// to allow direct write-only index building
+type IndexBuilder interface {
+	Index(doc *document.Document) error
+	Close() error
+}
diff --git a/index/scorch/builder.go b/index/scorch/builder.go
new file mode 100644
index 000000000..cd400bd1d
--- /dev/null
+++ b/index/scorch/builder.go
@@ -0,0 +1,333 @@
+//  Copyright (c) 2019 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package scorch
+
+import (
+	"fmt"
+	"io/ioutil"
+	"os"
+	"sync"
+
+	"github.com/RoaringBitmap/roaring"
+	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/scorch/segment"
+	bolt "go.etcd.io/bbolt"
+)
+
+const DefaultBuilderBatchSize = 1000
+const DefaultBuilderMergeMax = 10
+
+type Builder struct {
+	m         sync.Mutex
+	segCount  uint64
+	path      string
+	buildPath string
+	segPaths  []string
+	batchSize int
+	mergeMax  int
+	batch     *index.Batch
+	internal  map[string][]byte
+	segPlugin segment.Plugin
+}
+
+func NewBuilder(config map[string]interface{}) (*Builder, error) {
+	path, ok := config["path"].(string)
+	if !ok {
+		return nil, fmt.Errorf("must specify path")
+	}
+
+	buildPathPrefix, _ := config["buildPathPrefix"].(string)
+	buildPath, err := ioutil.TempDir(buildPathPrefix, "scorch-offline-build")
+	if err != nil {
+		return nil, err
+	}
+
+	rv := &Builder{
+		path:      path,
+		buildPath: buildPath,
+		mergeMax:  DefaultBuilderMergeMax,
+		batchSize: DefaultBuilderBatchSize,
+		batch:     index.NewBatch(),
+		segPlugin: defaultSegmentPlugin,
+	}
+
+	err = rv.parseConfig(config)
+	if err != nil {
+		return nil, fmt.Errorf("error parsing builder config: %v", err)
+	}
+
+	return rv, nil
+}
+
+func (o *Builder) parseConfig(config map[string]interface{}) (err error) {
+	if v, ok := config["mergeMax"]; ok {
+		var t int
+		if t, err = parseToInteger(v); err != nil {
+			return fmt.Errorf("mergeMax parse err: %v", err)
+		}
+		if t > 0 {
+			o.mergeMax = t
+		}
+	}
+
+	if v, ok := config["batchSize"]; ok {
+		var t int
+		if t, err = parseToInteger(v); err != nil {
+			return fmt.Errorf("batchSize parse err: %v", err)
+		}
+		if t > 0 {
+			o.batchSize = t
+		}
+	}
+
+	if v, ok := config["internal"]; ok {
+		if vinternal, ok := v.(map[string][]byte); ok {
+			o.internal = vinternal
+		}
+	}
+
+	forcedSegmentType, forcedSegmentVersion, err := configForceSegmentTypeVersion(config)
+	if err != nil {
+		return err
+	}
+	if forcedSegmentType != "" && forcedSegmentVersion != 0 {
+		segPlugin, err := chooseSegmentPlugin(forcedSegmentType,
+			uint32(forcedSegmentVersion))
+		if err != nil {
+			o.segPlugin = segPlugin
+		}
+	}
+
+	return nil
+}
+
+// Index will place the document into the index.
+// It is invalid to index the same document multiple times.
+func (o *Builder) Index(doc *document.Document) error {
+	o.m.Lock()
+	defer o.m.Unlock()
+
+	o.batch.Update(doc)
+
+	return o.maybeFlushBatchLOCKED(o.batchSize)
+}
+
+func (o *Builder) maybeFlushBatchLOCKED(moreThan int) error {
+	if len(o.batch.IndexOps) >= moreThan {
+		defer o.batch.Reset()
+		return o.executeBatchLOCKED(o.batch)
+	}
+	return nil
+}
+
+func (o *Builder) executeBatchLOCKED(batch *index.Batch) (err error) {
+	analysisResults := make([]*index.AnalysisResult, 0, len(batch.IndexOps))
+	for _, doc := range batch.IndexOps {
+		if doc != nil {
+			// insert _id field
+			doc.AddField(document.NewTextFieldCustom("_id", nil, []byte(doc.ID), document.IndexField|document.StoreField, nil))
+			// perform analysis directly
+			analysisResult := analyze(doc)
+			analysisResults = append(analysisResults, analysisResult)
+		}
+	}
+
+	seg, _, err := o.segPlugin.New(analysisResults)
+	if err != nil {
+		return fmt.Errorf("error building segment base: %v", err)
+	}
+
+	filename := zapFileName(o.segCount)
+	o.segCount++
+	path := o.buildPath + string(os.PathSeparator) + filename
+
+	if segUnpersisted, ok := seg.(segment.UnpersistedSegment); ok {
+		err = segUnpersisted.Persist(path)
+		if err != nil {
+			return fmt.Errorf("error persisting segment base to %s: %v", path, err)
+		}
+
+		o.segPaths = append(o.segPaths, path)
+		return nil
+	}
+
+	return fmt.Errorf("new segment does not implement unpersisted: %T", seg)
+}
+
+func (o *Builder) doMerge() error {
+	// as long as we have more than 1 segment, keep merging
+	for len(o.segPaths) > 1 {
+
+		// merge the next <mergeMax> number of segments into one new one
+		// or, if there are fewer than <mergeMax> remaining, merge them all
+		mergeCount := o.mergeMax
+		if mergeCount > len(o.segPaths) {
+			mergeCount = len(o.segPaths)
+		}
+
+		mergePaths := o.segPaths[0:mergeCount]
+		o.segPaths = o.segPaths[mergeCount:]
+
+		// open each of the segments to be merged
+		mergeSegs := make([]segment.Segment, 0, mergeCount)
+
+		// closeOpenedSegs attempts to close all opened
+		// segments even if an error occurs, in which case
+		// the first error is returned
+		closeOpenedSegs := func() error {
+			var err error
+			for _, seg := range mergeSegs {
+				clErr := seg.Close()
+				if clErr != nil && err == nil {
+					err = clErr
+				}
+			}
+			return err
+		}
+
+		for _, mergePath := range mergePaths {
+			seg, err := o.segPlugin.Open(mergePath)
+			if err != nil {
+				_ = closeOpenedSegs()
+				return fmt.Errorf("error opening segment (%s) for merge: %v", mergePath, err)
+			}
+			mergeSegs = append(mergeSegs, seg)
+		}
+
+		// do the merge
+		mergedSegPath := o.buildPath + string(os.PathSeparator) + zapFileName(o.segCount)
+		drops := make([]*roaring.Bitmap, mergeCount)
+		_, _, err := o.segPlugin.Merge(mergeSegs, drops, mergedSegPath, nil, nil)
+		if err != nil {
+			_ = closeOpenedSegs()
+			return fmt.Errorf("error merging segments (%v): %v", mergePaths, err)
+		}
+		o.segCount++
+		o.segPaths = append(o.segPaths, mergedSegPath)
+
+		// close segments opened for merge
+		err = closeOpenedSegs()
+		if err != nil {
+			return fmt.Errorf("error closing opened segments: %v", err)
+		}
+
+		// remove merged segments
+		for _, mergePath := range mergePaths {
+			err = os.RemoveAll(mergePath)
+			if err != nil {
+				return fmt.Errorf("error removing segment %s after merge: %v", mergePath, err)
+			}
+		}
+	}
+
+	return nil
+}
+
+func (o *Builder) Close() error {
+	o.m.Lock()
+	defer o.m.Unlock()
+
+	// see if there is a partial batch
+	err := o.maybeFlushBatchLOCKED(1)
+	if err != nil {
+		return fmt.Errorf("error flushing batch before close: %v", err)
+	}
+
+	// perform all the merging
+	err = o.doMerge()
+	if err != nil {
+		return fmt.Errorf("error while merging: %v", err)
+	}
+
+	// ensure the store path exists
+	err = os.MkdirAll(o.path, 0700)
+	if err != nil {
+		return err
+	}
+
+	// move final segment into place
+	// segment id 2 is chosen to match the behavior of a scorch
+	// index which indexes a single batch of data
+	finalSegPath := o.path + string(os.PathSeparator) + zapFileName(2)
+	err = os.Rename(o.segPaths[0], finalSegPath)
+	if err != nil {
+		return fmt.Errorf("error moving final segment into place: %v", err)
+	}
+
+	// remove the buildPath, as it is no longer needed
+	err = os.RemoveAll(o.buildPath)
+	if err != nil {
+		return fmt.Errorf("error removing build path: %v", err)
+	}
+
+	// prepare wrapping
+	seg, err := o.segPlugin.Open(finalSegPath)
+	if err != nil {
+		return fmt.Errorf("error opening final segment")
+	}
+
+	// create a segment snapshot for this segment
+	ss := &SegmentSnapshot{
+		segment: seg,
+	}
+	is := &IndexSnapshot{
+		epoch:    3, // chosen to match scorch behavior when indexing a single batch
+		segment:  []*SegmentSnapshot{ss},
+		creator:  "scorch-builder",
+		internal: o.internal,
+	}
+
+	// create the root bolt
+	rootBoltPath := o.path + string(os.PathSeparator) + "root.bolt"
+	rootBolt, err := bolt.Open(rootBoltPath, 0600, nil)
+	if err != nil {
+		return err
+	}
+
+	// start a write transaction
+	tx, err := rootBolt.Begin(true)
+	if err != nil {
+		return err
+	}
+
+	// fill the root bolt with this fake index snapshot
+	_, _, err = prepareBoltSnapshot(is, tx, o.path, o.segPlugin)
+	if err != nil {
+		_ = tx.Rollback()
+		_ = rootBolt.Close()
+		return fmt.Errorf("error preparing bolt snapshot in root.bolt: %v", err)
+	}
+
+	// commit bolt data
+	err = tx.Commit()
+	if err != nil {
+		_ = rootBolt.Close()
+		return fmt.Errorf("error committing bolt tx in root.bolt: %v", err)
+	}
+
+	// close bolt
+	err = rootBolt.Close()
+	if err != nil {
+		return fmt.Errorf("error closing root.bolt: %v", err)
+	}
+
+	// close final segment
+	err = seg.Close()
+	if err != nil {
+		return fmt.Errorf("error closing final segment: %v", err)
+	}
+	return nil
+}
diff --git a/index/scorch/builder_test.go b/index/scorch/builder_test.go
new file mode 100644
index 000000000..1d0ce7160
--- /dev/null
+++ b/index/scorch/builder_test.go
@@ -0,0 +1,160 @@
+//  Copyright (c) 2019 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package scorch
+
+import (
+	"fmt"
+	"io/ioutil"
+	"os"
+	"testing"
+
+	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/index"
+)
+
+func TestBuilder(t *testing.T) {
+	tmpDir, err := ioutil.TempDir("", "scorch-builder-test")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer func() {
+		err = os.RemoveAll(tmpDir)
+		if err != nil {
+			t.Fatalf("error cleaning up test index: %v", err)
+		}
+	}()
+	options := map[string]interface{}{
+		"path":      tmpDir,
+		"batchSize": 2,
+		"mergeMax":  2,
+	}
+	b, err := NewBuilder(options)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	for i := 0; i < 10; i++ {
+		doc := document.NewDocument(fmt.Sprintf("%d", i))
+		doc.AddField(document.NewTextField("name", nil, []byte("hello")))
+		err = b.Index(doc)
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	err = b.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	checkIndex(t, tmpDir, []byte("hello"), "name", 10)
+
+}
+
+func checkIndex(t *testing.T, path string, term []byte, field string, expectCount int) {
+	cfg := make(map[string]interface{})
+	cfg["path"] = path
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Fatalf("error opening index: %v", err)
+	}
+	defer func() {
+		err = idx.Close()
+		if err != nil {
+			t.Fatalf("error closing index: %v", err)
+		}
+	}()
+
+	r, err := idx.Reader()
+	if err != nil {
+		t.Fatalf("error accessing index reader: %v", err)
+	}
+	defer func() {
+		err = r.Close()
+		if err != nil {
+			t.Fatalf("error closing reader: %v", err)
+		}
+	}()
+
+	// check the count, expect 10 docs
+	count, err := r.DocCount()
+	if err != nil {
+		t.Errorf("error accessing index doc count: %v", err)
+	} else if count != uint64(expectCount) {
+		t.Errorf("expected %d docs, got %d", expectCount, count)
+	}
+
+	// run a search for hello
+	tfr, err := r.TermFieldReader(term, field, false, false, false)
+	if err != nil {
+		t.Errorf("error accessing term field reader: %v", err)
+	} else {
+		var rows int
+		tfd, err := tfr.Next(nil)
+		for err == nil && tfd != nil {
+			rows++
+			tfd, err = tfr.Next(nil)
+		}
+		if err != nil {
+			t.Errorf("error calling next on term field reader: %v", err)
+		}
+		if rows != expectCount {
+			t.Errorf("expected %d rows for term hello, field name, got %d", expectCount, rows)
+		}
+	}
+}
+
+func TestBuilderFlushFinalBatch(t *testing.T) {
+	tmpDir, err := ioutil.TempDir("", "scorch-builder-test")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer func() {
+		err = os.RemoveAll(tmpDir)
+		if err != nil {
+			t.Fatalf("error cleaning up test index: %v", err)
+		}
+	}()
+	options := map[string]interface{}{
+		"path":      tmpDir,
+		"batchSize": 2,
+		"mergeMax":  2,
+	}
+	b, err := NewBuilder(options)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	for i := 0; i < 9; i++ {
+		doc := document.NewDocument(fmt.Sprintf("%d", i))
+		doc.AddField(document.NewTextField("name", nil, []byte("hello")))
+		err = b.Index(doc)
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	err = b.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	checkIndex(t, tmpDir, []byte("hello"), "name", 9)
+}
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index ffa656693..ea3667bbf 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -428,55 +428,44 @@ func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot) (
 	return true, nil
 }
 
-func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
-	// start a write transaction
-	tx, err := s.rootBolt.Begin(true)
-	if err != nil {
-		return err
-	}
-	// defer rollback on error
-	defer func() {
-		if err != nil {
-			_ = tx.Rollback()
-		}
-	}()
-
+func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string,
+	segPlugin segment.Plugin) ([]string, map[uint64]string, error) {
 	snapshotsBucket, err := tx.CreateBucketIfNotExists(boltSnapshotsBucket)
 	if err != nil {
-		return err
+		return nil, nil, err
 	}
 	newSnapshotKey := segment.EncodeUvarintAscending(nil, snapshot.epoch)
 	snapshotBucket, err := snapshotsBucket.CreateBucketIfNotExists(newSnapshotKey)
 	if err != nil {
-		return err
+		return nil, nil, err
 	}
 
 	// persist meta values
 	metaBucket, err := snapshotBucket.CreateBucketIfNotExists(boltMetaDataKey)
 	if err != nil {
-		return err
+		return nil, nil, err
 	}
-	err = metaBucket.Put(boltMetaDataSegmentTypeKey, []byte(s.segPlugin.Type()))
+	err = metaBucket.Put(boltMetaDataSegmentTypeKey, []byte(segPlugin.Type()))
 	if err != nil {
-		return err
+		return nil, nil, err
 	}
 	buf := make([]byte, binary.MaxVarintLen32)
-	binary.BigEndian.PutUint32(buf, s.segPlugin.Version())
+	binary.BigEndian.PutUint32(buf, segPlugin.Version())
 	err = metaBucket.Put(boltMetaDataSegmentVersionKey, buf)
 	if err != nil {
-		return err
+		return nil, nil, err
 	}
 
 	// persist internal values
 	internalBucket, err := snapshotBucket.CreateBucketIfNotExists(boltInternalKey)
 	if err != nil {
-		return err
+		return nil, nil, err
 	}
 	// TODO optimize writing these in order?
 	for k, v := range snapshot.internal {
 		err = internalBucket.Put([]byte(k), v)
 		if err != nil {
-			return err
+			return nil, nil, err
 		}
 	}
 
@@ -488,49 +477,69 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
 		snapshotSegmentKey := segment.EncodeUvarintAscending(nil, segmentSnapshot.id)
 		snapshotSegmentBucket, err := snapshotBucket.CreateBucketIfNotExists(snapshotSegmentKey)
 		if err != nil {
-			return err
+			return nil, nil, err
 		}
 		switch seg := segmentSnapshot.segment.(type) {
 		case segment.PersistedSegment:
-			path := seg.Path()
-			filename := strings.TrimPrefix(path, s.path+string(os.PathSeparator))
+			segPath := seg.Path()
+			filename := strings.TrimPrefix(segPath, path+string(os.PathSeparator))
 			err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
 			if err != nil {
-				return err
+				return nil, nil, err
 			}
 			filenames = append(filenames, filename)
 		case segment.UnpersistedSegment:
 			// need to persist this to disk
 			filename := zapFileName(segmentSnapshot.id)
-			path := s.path + string(os.PathSeparator) + filename
+			path := path + string(os.PathSeparator) + filename
 			err = seg.Persist(path)
 			if err != nil {
-				return fmt.Errorf("error persisting segment: %v", err)
+				return nil, nil, fmt.Errorf("error persisting segment: %v", err)
 			}
 			newSegmentPaths[segmentSnapshot.id] = path
 			err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
 			if err != nil {
-				return err
+				return nil, nil, err
 			}
 			filenames = append(filenames, filename)
-
 		default:
-			return fmt.Errorf("unknown segment type: %T", seg)
+			return nil, nil, fmt.Errorf("unknown segment type: %T", seg)
 		}
 		// store current deleted bits
 		var roaringBuf bytes.Buffer
 		if segmentSnapshot.deleted != nil {
 			_, err = segmentSnapshot.deleted.WriteTo(&roaringBuf)
 			if err != nil {
-				return fmt.Errorf("error persisting roaring bytes: %v", err)
+				return nil, nil, fmt.Errorf("error persisting roaring bytes: %v", err)
 			}
 			err = snapshotSegmentBucket.Put(boltDeletedKey, roaringBuf.Bytes())
 			if err != nil {
-				return err
+				return nil, nil, err
 			}
 		}
 	}
 
+	return filenames, newSegmentPaths, nil
+}
+
+func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
+	// start a write transaction
+	tx, err := s.rootBolt.Begin(true)
+	if err != nil {
+		return err
+	}
+	// defer rollback on error
+	defer func() {
+		if err != nil {
+			_ = tx.Rollback()
+		}
+	}()
+
+	filenames, newSegmentPaths, err := prepareBoltSnapshot(snapshot, tx, s.path, s.segPlugin)
+	if err != nil {
+		return err
+	}
+
 	// we need to swap in a new root only when we've persisted 1 or
 	// more segments -- whereby the new root would have 1-for-1
 	// replacements of in-memory segments with file-based segments
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 698aaf16a..d1e1c6af5 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -104,15 +104,11 @@ func NewScorch(storeName string,
 		segPlugin:            defaultSegmentPlugin,
 	}
 
-	// check if the caller has requested a specific segment type/version
-	forcedSegmentVersion, ok := config["forceSegmentVersion"].(int)
-	if ok {
-		forcedSegmentType, ok2 := config["forceSegmentType"].(string)
-		if !ok2 {
-			return nil, fmt.Errorf(
-				"forceSegmentVersion set to %d, must also specify forceSegmentType", forcedSegmentVersion)
-		}
-
+	forcedSegmentType, forcedSegmentVersion, err := configForceSegmentTypeVersion(config)
+	if err != nil {
+		return nil, err
+	}
+	if forcedSegmentType != "" && forcedSegmentVersion != 0 {
 		err := rv.loadSegmentPlugin(forcedSegmentType,
 			uint32(forcedSegmentVersion))
 		if err != nil {
@@ -140,6 +136,22 @@ func NewScorch(storeName string,
 	return rv, nil
 }
 
+// configForceSegmentTypeVersion checks if the caller has requested a
+// specific segment type/version
+func configForceSegmentTypeVersion(config map[string]interface{}) (string, uint32, error) {
+	forcedSegmentVersion, ok := config["forceSegmentVersion"].(int)
+	if ok {
+		forcedSegmentType, ok2 := config["forceSegmentType"].(string)
+		if !ok2 {
+			return "", 0, fmt.Errorf(
+				"forceSegmentVersion set to %d, must also specify forceSegmentType", forcedSegmentVersion)
+		}
+
+		return forcedSegmentType, uint32(forcedSegmentVersion), nil
+	}
+	return "", 0, nil
+}
+
 func (s *Scorch) paused() uint64 {
 	s.pauseLock.Lock()
 	pc := s.pauseCount
@@ -567,6 +579,10 @@ func (s *Scorch) StatsMap() map[string]interface{} {
 }
 
 func (s *Scorch) Analyze(d *document.Document) *index.AnalysisResult {
+	return analyze(d)
+}
+
+func analyze(d *document.Document) *index.AnalysisResult {
 	rv := &index.AnalysisResult{
 		Document: d,
 		Analyzed: make([]analysis.TokenFrequencies, len(d.Fields)+len(d.CompositeFields)),
diff --git a/index/scorch/segment_plugin.go b/index/scorch/segment_plugin.go
index 01eda7fbd..6dfa3b282 100644
--- a/index/scorch/segment_plugin.go
+++ b/index/scorch/segment_plugin.go
@@ -60,18 +60,28 @@ func SupportedSegmentTypeVersions(typ string) (rv []uint32) {
 	return rv
 }
 
-func (s *Scorch) loadSegmentPlugin(forcedSegmentType string,
-	forcedSegmentVersion uint32) error {
+func chooseSegmentPlugin(forcedSegmentType string,
+	forcedSegmentVersion uint32) (segment.Plugin, error) {
 	if versions, ok := supportedSegmentPlugins[forcedSegmentType]; ok {
 		if segPlugin, ok := versions[uint32(forcedSegmentVersion)]; ok {
-			s.segPlugin = segPlugin
-			return nil
+			return segPlugin, nil
 		}
-		return fmt.Errorf(
+		return nil, fmt.Errorf(
 			"unsupported version %d for segment type: %s, supported: %v",
 			forcedSegmentVersion, forcedSegmentType,
 			SupportedSegmentTypeVersions(forcedSegmentType))
 	}
-	return fmt.Errorf("unsupported segment type: %s, supported: %v",
+	return nil, fmt.Errorf("unsupported segment type: %s, supported: %v",
 		forcedSegmentType, SupportedSegmentTypes())
 }
+
+func (s *Scorch) loadSegmentPlugin(forcedSegmentType string,
+	forcedSegmentVersion uint32) error {
+		segPlugin, err := chooseSegmentPlugin(forcedSegmentType,
+			forcedSegmentVersion)
+		if err != nil {
+			return err
+		}
+		s.segPlugin = segPlugin
+		return nil
+}

From 472e3e19a293a902de8cc83236419fd38881594a Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Wed, 13 May 2020 11:50:19 +0530
Subject: [PATCH 684/728] introducing context argument to the ForceMerge

for cancellation of the api.
---
 index/scorch/introducer.go  |   6 +-
 index/scorch/merge.go       | 130 +++++++++++++++++-------------------
 index/scorch/persister.go   |  25 +++----
 index/scorch/scorch.go      |   5 +-
 index/scorch/scorch_test.go |  44 +++++++-----
 index_alias_impl.go         |  14 ++--
 6 files changed, 107 insertions(+), 117 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 3325c9b17..64ca969bd 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -313,15 +313,11 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 
 	defer func() { _ = root.DecRef() }()
 
-	if nextMerge.creator == "" {
-		nextMerge.creator = "introduceMerge"
-	}
-
 	newSnapshot := &IndexSnapshot{
 		parent:   s,
 		internal: root.internal,
 		refs:     1,
-		creator:  nextMerge.creator,
+		creator:  "introduceMerge",
 	}
 
 	// iterate through current segments
diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index c0464fba8..3eccee52e 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -15,6 +15,7 @@
 package scorch
 
 import (
+	"context"
 	"encoding/json"
 	"fmt"
 	"os"
@@ -27,8 +28,6 @@ import (
 	"github.com/blevesearch/bleve/index/scorch/segment"
 )
 
-var numSnapShotsToKeepOverRuler = "introduceForceMerge"
-
 func (s *Scorch) mergerLoop() {
 	var lastEpochMergePlanned uint64
 	var ctrlMsg *mergerCtrl
@@ -38,7 +37,9 @@ func (s *Scorch) mergerLoop() {
 		s.asyncTasks.Done()
 		return
 	}
-	ctrlMsgDflt := &mergerCtrl{options: mergePlannerOptions, doneCh: nil}
+	ctrlMsgDflt := &mergerCtrl{ctx: context.Background(),
+		options: mergePlannerOptions,
+		doneCh:  nil}
 
 OUTER:
 	for {
@@ -64,8 +65,8 @@ OUTER:
 				startTime := time.Now()
 
 				// lets get started
-				err := s.planMergeAtSnapshot(ourSnapshot, ctrlMsg.options,
-					ctrlMsg.creator, ctrlMsg.cancelCh)
+				err := s.planMergeAtSnapshot(ctrlMsg.ctx, ctrlMsg.options,
+					ourSnapshot)
 				if err != nil {
 					atomic.StoreUint64(&s.iStats.mergeEpoch, 0)
 					if err == segment.ErrClosed {
@@ -114,7 +115,7 @@ OUTER:
 			case <-s.closeCh:
 				break OUTER
 			case s.persisterNotifier <- ew:
-			case ctrlMsg = <-s.mergerKickCh:
+			case ctrlMsg = <-s.forceMergeRequestCh:
 				continue OUTER
 			}
 
@@ -123,7 +124,7 @@ OUTER:
 			case <-s.closeCh:
 				break OUTER
 			case <-ew.notifyCh:
-			case ctrlMsg = <-s.mergerKickCh:
+			case ctrlMsg = <-s.forceMergeRequestCh:
 			}
 		}
 
@@ -134,33 +135,15 @@ OUTER:
 }
 
 type mergerCtrl struct {
-	creator  string
-	options  *mergeplan.MergePlanOptions
-	doneCh   chan struct{}
-	cancelCh chan struct{}
-}
-
-// MergeRequest represents various control
-// parameters for the ForceMerge API.
-type MergeRequest struct {
-	// MergeOptions specify the merge policy applied during
-	// the forced merge cycles. This doesn't override the
-	// index's original merge policy.
-	MergeOptions *mergeplan.MergePlanOptions
-
-	// OverrideNumSnapshotsToKeep specify whether to retain
-	// a number of older snapshots dictated by numSnapshotsToKeep
-	// during a forced merge cycle. Enabling this reduces the
-	// disk space requirements during a forced merge operation.
-	OverrideNumSnapshotsToKeep bool
-
-	// CancelCh helps in cancelling an ongoing merge operation.
-	CancelCh chan struct{}
+	ctx     context.Context
+	options *mergeplan.MergePlanOptions
+	doneCh  chan struct{}
 }
 
 // ForceMerge helps users trigger a merge operation on
 // an online scorch index.
-func (s *Scorch) ForceMerge(mr *MergeRequest) error {
+func (s *Scorch) ForceMerge(ctx context.Context,
+	mo *mergeplan.MergePlanOptions) error {
 	// check whether force merge is already under processing
 	s.rootLock.Lock()
 	if s.stats.TotFileMergeForceOpsStarted >
@@ -172,23 +155,23 @@ func (s *Scorch) ForceMerge(mr *MergeRequest) error {
 	s.stats.TotFileMergeForceOpsStarted++
 	s.rootLock.Unlock()
 
-	if mr.MergeOptions == nil {
+	if mo != nil {
+		err := mergeplan.ValidateMergePlannerOptions(mo)
+		if err != nil {
+			return err
+		}
+	} else {
 		// assume the default single segment merge policy
-		mr.MergeOptions = &mergeplan.SingleSegmentMergePlanOptions
-	}
-	var ssCreator string
-	if mr.OverrideNumSnapshotsToKeep {
-		ssCreator = numSnapShotsToKeepOverRuler
+		mo = &mergeplan.SingleSegmentMergePlanOptions
 	}
-	msg := &mergerCtrl{creator: ssCreator,
-		options:  mr.MergeOptions,
-		doneCh:   make(chan struct{}),
-		cancelCh: mr.CancelCh,
+	msg := &mergerCtrl{options: mo,
+		doneCh: make(chan struct{}),
+		ctx:    ctx,
 	}
 
-	// kick the merger workloop
+	// request the merger perform a force merge
 	select {
-	case s.mergerKickCh <- msg:
+	case s.forceMergeRequestCh <- msg:
 	case <-s.closeCh:
 		return nil
 	}
@@ -225,9 +208,39 @@ func (s *Scorch) parseMergePlannerOptions() (*mergeplan.MergePlanOptions,
 	return &mergePlannerOptions, nil
 }
 
-func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
-	options *mergeplan.MergePlanOptions, creator string,
-	cancelCh chan struct{}) error {
+type closeChWrapper struct {
+	ch1     chan struct{}
+	ctx     context.Context
+	closeCh chan struct{}
+}
+
+func newCloseChWrapper(ch1 chan struct{},
+	ctx context.Context) *closeChWrapper {
+	return &closeChWrapper{ch1: ch1,
+		ctx:     ctx,
+		closeCh: make(chan struct{})}
+}
+
+func (w *closeChWrapper) close() {
+	select {
+	case <-w.closeCh:
+	default:
+		close(w.closeCh)
+	}
+}
+
+func (w *closeChWrapper) listen() {
+	select {
+	case <-w.ch1:
+		w.close()
+	case <-w.ctx.Done():
+		w.close()
+	case <-w.closeCh:
+	}
+}
+
+func (s *Scorch) planMergeAtSnapshot(ctx context.Context,
+	options *mergeplan.MergePlanOptions, ourSnapshot *IndexSnapshot) error {
 	// build list of persisted segments in this snapshot
 	var onlyPersistedSnapshots []mergeplan.Segment
 	for _, segmentSnapshot := range ourSnapshot.segment {
@@ -256,27 +269,10 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 	// process tasks in serial for now
 	var filenames []string
 
-	closeCh := make(chan struct{})
-	cleanup := func() {
-		select {
-		case <-closeCh:
-		default:
-			close(closeCh)
-		}
-	}
-	defer cleanup()
+	cw := newCloseChWrapper(s.closeCh, ctx)
+	defer cw.close()
 
-	// cancel the merge operation on events like the index closure
-	// or upon a user cancel.
-	go func() {
-		select {
-		case <-s.closeCh:
-			cleanup()
-		case <-cancelCh:
-			cleanup()
-		case <-closeCh:
-		}
-	}()
+	go cw.listen()
 
 	for _, task := range resultMergePlan.Tasks {
 		if len(task.Segments) == 0 {
@@ -323,7 +319,7 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 
 			atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1)
 			newDocNums, _, err := s.segPlugin.Merge(segmentsToMerge, docsToDrop, path,
-				closeCh, s)
+				cw.closeCh, s)
 			atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
 
 			fileMergeZapTime := uint64(time.Since(fileMergeZapStartTime))
@@ -361,7 +357,6 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
 			oldNewDocNums: oldNewDocNums,
 			new:           seg,
 			notify:        make(chan *IndexSnapshot),
-			creator:       creator,
 		}
 
 		// give it to the introducer
@@ -406,7 +401,6 @@ type segmentMerge struct {
 	oldNewDocNums map[uint64][]uint64
 	new           segment.Segment
 	notify        chan *IndexSnapshot
-	creator       string
 }
 
 // perform a merging of the given SegmentBase instances into a new,
diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index aec1c0fea..ffa656693 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -209,7 +209,7 @@ OUTER:
 		case s.introducerNotifier <- w:
 		}
 
-		s.removeOldData(s.getNumSnapshotsToKeep(ourSnapshot)) // might as well cleanup while waiting
+		s.removeOldData() // might as well cleanup while waiting
 
 		atomic.AddUint64(&s.stats.TotPersistLoopWait, 1)
 
@@ -276,7 +276,7 @@ func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64,
 	// 1. Too many older snapshots awaiting the clean up.
 	// 2. The merger could be lagging behind on merging the disk files.
 	if numFilesOnDisk > uint64(po.PersisterNapUnderNumFiles) {
-		s.removeOldData(s.numSnapshotsToKeep)
+		s.removeOldData()
 		numFilesOnDisk, _, _ = s.diskFileStats(nil)
 	}
 
@@ -780,8 +780,8 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro
 	return rv, nil
 }
 
-func (s *Scorch) removeOldData(numSnapshotsToKeep int) {
-	removed, err := s.removeOldBoltSnapshots(numSnapshotsToKeep)
+func (s *Scorch) removeOldData() {
+	removed, err := s.removeOldBoltSnapshots()
 	if err != nil {
 		s.fireAsyncError(fmt.Errorf("got err removing old bolt snapshots: %v", err))
 	}
@@ -798,31 +798,22 @@ func (s *Scorch) removeOldData(numSnapshotsToKeep int) {
 // rollback'ability.
 var NumSnapshotsToKeep = 1
 
-func (s *Scorch) getNumSnapshotsToKeep(ourSnapshot *IndexSnapshot) int {
-	if ourSnapshot != nil &&
-		ourSnapshot.creator != numSnapShotsToKeepOverRuler {
-		return s.numSnapshotsToKeep
-	}
-	return 1
-}
-
 // Removes enough snapshots from the rootBolt so that the
 // s.eligibleForRemoval stays under the NumSnapshotsToKeep policy.
-func (s *Scorch) removeOldBoltSnapshots(numSnapshotsToKeep int) (
-	numRemoved int, err error) {
+func (s *Scorch) removeOldBoltSnapshots() (numRemoved int, err error) {
 	persistedEpochs, err := s.RootBoltSnapshotEpochs()
 	if err != nil {
 		return 0, err
 	}
 
-	if len(persistedEpochs) <= numSnapshotsToKeep {
+	if len(persistedEpochs) <= s.numSnapshotsToKeep {
 		// we need to keep everything
 		return 0, nil
 	}
 
 	// make a map of epochs to protect from deletion
-	protectedEpochs := make(map[uint64]struct{}, numSnapshotsToKeep)
-	for _, epoch := range persistedEpochs[0:numSnapshotsToKeep] {
+	protectedEpochs := make(map[uint64]struct{}, s.numSnapshotsToKeep)
+	for _, epoch := range persistedEpochs[0:s.numSnapshotsToKeep] {
 		protectedEpochs[epoch] = struct{}{}
 	}
 
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index affda93a3..06a5e6b12 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -77,7 +77,7 @@ type Scorch struct {
 
 	pauseCount uint64
 
-	mergerKickCh chan *mergerCtrl
+	forceMergeRequestCh chan *mergerCtrl
 
 	segPlugin segment.Plugin
 }
@@ -103,7 +103,7 @@ func NewScorch(storeName string,
 		nextSnapshotEpoch:    1,
 		closeCh:              make(chan struct{}),
 		ineligibleForRemoval: map[string]bool{},
-		mergerKickCh:         make(chan *mergerCtrl, 1),
+		forceMergeRequestCh:  make(chan *mergerCtrl, 1),
 		segPlugin:            defaultSegmentPlugin,
 	}
 
@@ -244,6 +244,7 @@ func (s *Scorch) openBolt() error {
 	s.introducerNotifier = make(chan *epochWatcher, 1)
 	s.persisterNotifier = make(chan *epochWatcher, 1)
 	s.closeCh = make(chan struct{})
+	s.forceMergeRequestCh = make(chan *mergerCtrl, 1)
 
 	if !s.readOnly && s.path != "" {
 		err := s.removeOldZapFiles() // Before persister or merger create any new files.
diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index 2e26f6b24..5a9ef40cd 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -15,6 +15,7 @@
 package scorch
 
 import (
+	"context"
 	"fmt"
 	"log"
 	"math/rand"
@@ -2226,17 +2227,16 @@ func TestIndexForceMerge(t *testing.T) {
 		t.Errorf("expected 10 root file segments, got: %d", nfs)
 	}
 
+	ctx := context.Background()
 	for {
 		if atomic.LoadUint64(&si.stats.TotFileSegmentsAtRoot) == 1 {
 			break
 		}
-		err := si.ForceMerge(&MergeRequest{
-			MergeOptions: &mergeplan.MergePlanOptions{
-				MaxSegmentsPerTier:   1,
-				MaxSegmentSize:       10000,
-				SegmentsPerMergeTask: 10,
-				FloorSegmentSize:     10000},
-			OverrideNumSnapshotsToKeep: true})
+		err := si.ForceMerge(ctx, &mergeplan.MergePlanOptions{
+			MaxSegmentsPerTier:   1,
+			MaxSegmentSize:       10000,
+			SegmentsPerMergeTask: 10,
+			FloorSegmentSize:     10000})
 		if err != nil {
 			t.Errorf("ForceMerge failed, err: %v", err)
 		}
@@ -2247,6 +2247,17 @@ func TestIndexForceMerge(t *testing.T) {
 		t.Errorf("expected a single root file segments, got: %d",
 			atomic.LoadUint64(&si.stats.TotFileSegmentsAtRoot))
 	}
+
+	// verify with an invalid merge plan
+	err = si.ForceMerge(ctx, &mergeplan.MergePlanOptions{
+		MaxSegmentsPerTier:   1,
+		MaxSegmentSize:       1 << 33,
+		SegmentsPerMergeTask: 10,
+		FloorSegmentSize:     10000})
+	if err != mergeplan.ErrMaxSegmentSizeTooLarge {
+		t.Errorf("ForceMerge expected to fail with ErrMaxSegmentSizeTooLarge")
+	}
+
 	err = idx.Close()
 	if err != nil {
 		t.Fatal(err)
@@ -2333,28 +2344,27 @@ func TestCancelIndexForceMerge(t *testing.T) {
 		t.Errorf("expected 20 root file segments, got: %d", nfsr)
 	}
 
-	cancelCh := make(chan struct{})
+	ctx := context.Background()
+	ctx, cancel := context.WithCancel(ctx)
+
 	// cancel the force merge operation once the root has some new merge
 	// introductions. ie if the root has lesser file segments than earlier.
 	go func() {
 		for {
 			nval := atomic.LoadUint64(&si.stats.TotFileSegmentsAtRoot)
 			if nval < nfsr {
-				close(cancelCh)
+				cancel()
 				return
 			}
 			time.Sleep(time.Millisecond * 5)
 		}
 	}()
 
-	err = si.ForceMerge(&MergeRequest{
-		MergeOptions: &mergeplan.MergePlanOptions{
-			MaxSegmentsPerTier:   1,
-			MaxSegmentSize:       10000,
-			SegmentsPerMergeTask: 5,
-			FloorSegmentSize:     10000},
-		OverrideNumSnapshotsToKeep: true,
-		CancelCh:                   cancelCh})
+	err = si.ForceMerge(ctx, &mergeplan.MergePlanOptions{
+		MaxSegmentsPerTier:   1,
+		MaxSegmentSize:       10000,
+		SegmentsPerMergeTask: 5,
+		FloorSegmentSize:     10000})
 	if err != nil {
 		t.Errorf("ForceMerge failed, err: %v", err)
 	}
diff --git a/index_alias_impl.go b/index_alias_impl.go
index 94eb50f4e..bd5fabc86 100644
--- a/index_alias_impl.go
+++ b/index_alias_impl.go
@@ -44,16 +44,14 @@ func NewIndexAlias(indexes ...Index) *indexAliasImpl {
 	}
 }
 
-// Indexes just returns the indexes included in the
-// index alias at the moment in an unsafe way.
-// Caller must be aware that the results will be
-// inconsistent if there are concurrent Add/Remove
-// operations on the alias.
-func (i *indexAliasImpl) Indexes() []Index {
+// VisitIndexes invokes the visit callback on every
+// indexes included in the index alias.
+func (i *indexAliasImpl) VisitIndexes(visit func(Index)) {
 	i.mutex.RLock()
-	rv := i.indexes
+	for _, idx := range i.indexes {
+		visit(idx)
+	}
 	i.mutex.RUnlock()
-	return rv
 }
 
 func (i *indexAliasImpl) isAliasToSingleIndex() error {

From e65c4bb7d3d34c785f3fb110e451576d29d8b7dc Mon Sep 17 00:00:00 2001
From: Tyler Kovacs <tyler.kovacs@gmail.com>
Date: Wed, 20 May 2020 09:21:56 -0700
Subject: [PATCH 685/728] use inline comparison operators instead of
 strings.Compare

In the comments from strings.Compare:

// Compare is included only for symmetry with package bytes.
// It is usually clearer and always faster to use the built-in
// string comparison operators ==, <, >, and so on.

And another comment within the implementation:

// NOTE(rsc): This function does NOT call the runtime cmpstring function,
// because we do not want to provide any performance justification for
// using strings.Compare. Basically no one should use strings.Compare.
---
 search/sort.go | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/search/sort.go b/search/sort.go
index 6e4ed80fa..3dc118518 100644
--- a/search/sort.go
+++ b/search/sort.go
@@ -233,7 +233,11 @@ func (so SortOrder) Compare(cachedScoring, cachedDesc []bool, i, j *DocumentMatc
 		} else {
 			iVal := i.Sort[x]
 			jVal := j.Sort[x]
-			c = strings.Compare(iVal, jVal)
+			if iVal < jVal {
+				c = -1
+			} else if iVal > jVal {
+				c = 1
+			}
 		}
 
 		if c == 0 {

From 5b905c99f4350d29be5a5f9c5ba5becc9aecdcf0 Mon Sep 17 00:00:00 2001
From: Tyler Kovacs <tyler.kovacs@gmail.com>
Date: Wed, 20 May 2020 13:11:11 -0700
Subject: [PATCH 686/728] Add SortImpl to SearchRequest

This allows SearchRequests to specify the sort implementation
(defaults to sort.Sort) to use when sorting hits.
---
 index_alias_impl.go |  6 +++---
 index_impl.go       |  3 +--
 search.go           | 23 +++++++++++++++++++++++
 3 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/index_alias_impl.go b/index_alias_impl.go
index bd5fabc86..7b8bf09b5 100644
--- a/index_alias_impl.go
+++ b/index_alias_impl.go
@@ -16,7 +16,6 @@ package bleve
 
 import (
 	"context"
-	"sort"
 	"sync"
 	"time"
 
@@ -521,10 +520,11 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
 		}
 	}
 
+	sortImpl := req.GetSortImpl()
 	// sort all hits with the requested order
 	if len(req.Sort) > 0 {
 		sorter := newSearchHitSorter(req.Sort, sr.Hits)
-		sort.Sort(sorter)
+		sortImpl(sorter)
 	}
 
 	// now skip over the correct From
@@ -549,7 +549,7 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
 		req.Sort.Reverse()
 		// resort using the original order
 		mhs := newSearchHitSorter(req.Sort, sr.Hits)
-		sort.Sort(mhs)
+		sortImpl(mhs)
 		// reset request
 		req.SearchBefore = req.SearchAfter
 		req.SearchAfter = nil
diff --git a/index_impl.go b/index_impl.go
index 6324d960e..69eb44fae 100644
--- a/index_impl.go
+++ b/index_impl.go
@@ -19,7 +19,6 @@ import (
 	"encoding/json"
 	"fmt"
 	"os"
-	"sort"
 	"sync"
 	"sync/atomic"
 	"time"
@@ -579,7 +578,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
 		req.Sort.Reverse()
 		// resort using the original order
 		mhs := newSearchHitSorter(req.Sort, hits)
-		sort.Sort(mhs)
+		req.GetSortImpl()(mhs)
 		// reset request
 		req.SearchBefore = req.SearchAfter
 		req.SearchAfter = nil
diff --git a/search.go b/search.go
index b337edc9e..c2758848d 100644
--- a/search.go
+++ b/search.go
@@ -18,6 +18,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"reflect"
+	"sort"
 	"time"
 
 	"github.com/blevesearch/bleve/analysis"
@@ -264,6 +265,7 @@ func (h *HighlightRequest) AddField(field string) {
 // Score controls the kind of scoring performed
 // SearchAfter supports deep paging by providing a minimum sort key
 // SearchBefore supports deep paging by providing a maximum sort key
+// SortImpl specifies the sort implementation to use for sorting results.
 //
 // A special field named "*" can be used to return all fields.
 type SearchRequest struct {
@@ -279,6 +281,8 @@ type SearchRequest struct {
 	Score            string            `json:"score,omitempty"`
 	SearchAfter      []string          `json:"search_after"`
 	SearchBefore     []string          `json:"search_before"`
+
+	SortImpl func(sort.Interface) `json:"-"`
 }
 
 func (r *SearchRequest) Validate() error {
@@ -606,3 +610,22 @@ func MemoryNeededForSearchResult(req *SearchRequest) uint64 {
 
 	return uint64(estimate)
 }
+
+// SetSortFunc sets the sort implementation to use when sorting hits.
+//
+// SearchRequests can specify a custom sort implementation to meet
+// their needs. For instance, by specifying a parallel sort
+// that uses all available cores.
+func (r *SearchRequest) SetSortImpl(s func(sort.Interface)) {
+	r.SortImpl = s
+}
+
+// GetSortFunc returns the sort implementation to use when sorting hits.
+// Defaults to sort.Sort.
+func (r *SearchRequest) GetSortImpl() func(data sort.Interface) {
+	if r.SortImpl != nil {
+		return r.SortImpl
+	}
+
+	return sort.Sort
+}

From f1983f7cd7b3b0f1591ff47f9bb4b48f78493f42 Mon Sep 17 00:00:00 2001
From: Tyler Kovacs <tyler.kovacs@gmail.com>
Date: Wed, 20 May 2020 14:39:10 -0700
Subject: [PATCH 687/728] rename SortImpl to SortFunc and rename getter

---
 index_alias_impl.go |  6 +++---
 index_impl.go       |  2 +-
 search.go           | 16 ++++++++--------
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/index_alias_impl.go b/index_alias_impl.go
index 7b8bf09b5..5aa57d8ac 100644
--- a/index_alias_impl.go
+++ b/index_alias_impl.go
@@ -520,11 +520,11 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
 		}
 	}
 
-	sortImpl := req.GetSortImpl()
+	sortFunc := req.SortFunc()
 	// sort all hits with the requested order
 	if len(req.Sort) > 0 {
 		sorter := newSearchHitSorter(req.Sort, sr.Hits)
-		sortImpl(sorter)
+		sortFunc(sorter)
 	}
 
 	// now skip over the correct From
@@ -549,7 +549,7 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
 		req.Sort.Reverse()
 		// resort using the original order
 		mhs := newSearchHitSorter(req.Sort, sr.Hits)
-		sortImpl(mhs)
+		sortFunc(mhs)
 		// reset request
 		req.SearchBefore = req.SearchAfter
 		req.SearchAfter = nil
diff --git a/index_impl.go b/index_impl.go
index 69eb44fae..629cc9b2f 100644
--- a/index_impl.go
+++ b/index_impl.go
@@ -578,7 +578,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
 		req.Sort.Reverse()
 		// resort using the original order
 		mhs := newSearchHitSorter(req.Sort, hits)
-		req.GetSortImpl()(mhs)
+		req.SortFunc()(mhs)
 		// reset request
 		req.SearchBefore = req.SearchAfter
 		req.SearchAfter = nil
diff --git a/search.go b/search.go
index c2758848d..f67450779 100644
--- a/search.go
+++ b/search.go
@@ -265,7 +265,7 @@ func (h *HighlightRequest) AddField(field string) {
 // Score controls the kind of scoring performed
 // SearchAfter supports deep paging by providing a minimum sort key
 // SearchBefore supports deep paging by providing a maximum sort key
-// SortImpl specifies the sort implementation to use for sorting results.
+// sortFunc specifies the sort implementation to use for sorting results.
 //
 // A special field named "*" can be used to return all fields.
 type SearchRequest struct {
@@ -282,7 +282,7 @@ type SearchRequest struct {
 	SearchAfter      []string          `json:"search_after"`
 	SearchBefore     []string          `json:"search_before"`
 
-	SortImpl func(sort.Interface) `json:"-"`
+	sortFunc func(sort.Interface)
 }
 
 func (r *SearchRequest) Validate() error {
@@ -616,15 +616,15 @@ func MemoryNeededForSearchResult(req *SearchRequest) uint64 {
 // SearchRequests can specify a custom sort implementation to meet
 // their needs. For instance, by specifying a parallel sort
 // that uses all available cores.
-func (r *SearchRequest) SetSortImpl(s func(sort.Interface)) {
-	r.SortImpl = s
+func (r *SearchRequest) SetSortFunc(s func(sort.Interface)) {
+	r.sortFunc = s
 }
 
-// GetSortFunc returns the sort implementation to use when sorting hits.
+// SortFunc returns the sort implementation to use when sorting hits.
 // Defaults to sort.Sort.
-func (r *SearchRequest) GetSortImpl() func(data sort.Interface) {
-	if r.SortImpl != nil {
-		return r.SortImpl
+func (r *SearchRequest) SortFunc() func(data sort.Interface) {
+	if r.sortFunc != nil {
+		return r.sortFunc
 	}
 
 	return sort.Sort

From 9f5cdcf2c9fb65c7c7ca69087769c005fd2079c7 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 22 May 2020 11:11:48 -0400
Subject: [PATCH 688/728] speed up versus test by using larger batch (#1387)

---
 test/versus_test.go | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/test/versus_test.go b/test/versus_test.go
index 20a85d161..4257a9849 100644
--- a/test/versus_test.go
+++ b/test/versus_test.go
@@ -53,7 +53,7 @@ func TestScorchVersusUpsideDownBoltAll(t *testing.T) {
 		NumDocs:              1000,
 		MaxWordsPerDoc:       20,
 		NumWords:             10,
-		BatchSize:            10,
+		BatchSize:            1000,
 		NumAttemptsPerSearch: 100,
 	}).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil, nil)
 }
@@ -275,7 +275,6 @@ func testVersusSearches(vt *VersusTest, searchTemplates []string, idxA, idxB ble
 		if err != nil {
 			t.Fatalf("could not parse search template: %s, err: %v", searchTemplate, err)
 		}
-
 		for j := 0; j < vt.NumAttemptsPerSearch; j++ {
 			vt.CurAttempt = j
 

From a62b0bca7cfc82f26d4eb0f4b86080b8abc3f8d3 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 22 May 2020 11:12:36 -0400
Subject: [PATCH 689/728] update char filter table tests to use subtests
 (#1388)

---
 .../char/asciifolding/asciifolding_test.go    | 11 ++-
 analysis/char/regexp/regexp_test.go           | 75 ++++++-------------
 2 files changed, 30 insertions(+), 56 deletions(-)

diff --git a/analysis/char/asciifolding/asciifolding_test.go b/analysis/char/asciifolding/asciifolding_test.go
index 216583d1e..d79542a8a 100644
--- a/analysis/char/asciifolding/asciifolding_test.go
+++ b/analysis/char/asciifolding/asciifolding_test.go
@@ -15,6 +15,7 @@
 package asciifolding
 
 import (
+	"fmt"
 	"reflect"
 	"testing"
 )
@@ -52,9 +53,11 @@ func TestAsciiFoldingFilter(t *testing.T) {
 
 	for _, test := range tests {
 		filter := New()
-		output := filter.Filter(test.input)
-		if !reflect.DeepEqual(output, test.output) {
-			t.Errorf("Expected:\n`%s`\ngot:\n`%s`\nfor:\n`%s`\n", string(test.output), string(output), string(test.input))
-		}
+		t.Run(fmt.Sprintf("on %s", test.input), func(t *testing.T) {
+			output := filter.Filter(test.input)
+			if !reflect.DeepEqual(output, test.output) {
+				t.Errorf("\nExpected:\n`%s`\ngot:\n`%s`\n", string(test.output), string(output))
+			}
+		})
 	}
 }
diff --git a/analysis/char/regexp/regexp_test.go b/analysis/char/regexp/regexp_test.go
index a3430555e..ff1b04b3b 100644
--- a/analysis/char/regexp/regexp_test.go
+++ b/analysis/char/regexp/regexp_test.go
@@ -15,64 +15,32 @@
 package regexp
 
 import (
+	"fmt"
 	"reflect"
 	"regexp"
 	"testing"
 )
 
 func TestRegexpCharFilter(t *testing.T) {
-	htmlTagPattern := `</?[!\w]+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[^'">\s]+))?)+\s*|\s*)/?>`
-	htmlRegex := regexp.MustCompile(htmlTagPattern)
 
-	tests := []struct {
-		input  []byte
-		output []byte
-	}{
-		{
-			input:  []byte(`<html>test</html>`),
-			output: []byte(` test `),
-		},
-	}
-
-	for _, test := range tests {
-		filter := New(htmlRegex, []byte{' '})
-		output := filter.Filter(test.input)
-		if !reflect.DeepEqual(output, test.output) {
-			t.Errorf("Expected:\n`%s`\ngot:\n`%s`\nfor:\n`%s`\n", string(test.output), string(output), string(test.input))
-		}
-	}
-}
-
-func TestZeroWidthNonJoinerCharFilter(t *testing.T) {
-	zeroWidthNonJoinerPattern := `\x{200C}`
-	zeroWidthNonJoinerRegex := regexp.MustCompile(zeroWidthNonJoinerPattern)
-
-	tests := []struct {
-		input  []byte
-		output []byte
-	}{
-		{
-			input:  []byte("water\u200Cunder\u200Cthe\u200Cbridge"),
-			output: []byte("water under the bridge"),
-		},
-	}
-
-	for _, test := range tests {
-		filter := New(zeroWidthNonJoinerRegex, []byte{' '})
-		output := filter.Filter(test.input)
-		if !reflect.DeepEqual(output, test.output) {
-			t.Errorf("Expected:\n`%s`\ngot:\n`%s`\nfor:\n`%s`\n", string(test.output), string(output), string(test.input))
-		}
-	}
-}
-
-func TestRegexpCustomReplace(t *testing.T) {
 	tests := []struct {
 		regexStr string
 		replace  []byte
 		input    []byte
 		output   []byte
 	}{
+		{
+			regexStr: `</?[!\w]+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[^'">\s]+))?)+\s*|\s*)/?>`,
+			replace:  []byte{' '},
+			input:    []byte(`<html>test</html>`),
+			output:   []byte(` test `),
+		},
+		{
+			regexStr: `\x{200C}`,
+			replace:  []byte{' '},
+			input:    []byte("water\u200Cunder\u200Cthe\u200Cbridge"),
+			output:   []byte("water under the bridge"),
+		},
 		{
 			regexStr: `([a-z])\s+(\d)`,
 			replace:  []byte(`$1-$2`),
@@ -105,13 +73,16 @@ func TestRegexpCustomReplace(t *testing.T) {
 		},
 	}
 
-	for i := range tests {
-		regex := regexp.MustCompile(tests[i].regexStr)
-		filter := New(regex, tests[i].replace)
+	for _, test := range tests {
+		t.Run(fmt.Sprintf("match %s replace %s", test.regexStr, string(test.replace)), func(t *testing.T) {
+			regex := regexp.MustCompile(test.regexStr)
+			filter := New(regex, test.replace)
+
+			output := filter.Filter(test.input)
+			if !reflect.DeepEqual(test.output, output) {
+				t.Errorf("Expected: `%s`, Got: `%s`\n", string(test.output), string(output))
+			}
+		})
 
-		output := filter.Filter(tests[i].input)
-		if !reflect.DeepEqual(tests[i].output, output) {
-			t.Errorf("[%d] Expected: `%s`, Got: `%s`\n", i, string(tests[i].output), string(output))
-		}
 	}
 }

From c0444356976e14fb97636de3038ff86fa9351992 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 22 May 2020 11:12:54 -0400
Subject: [PATCH 690/728] update datetime table test to use subtests (#1389)

---
 analysis/datetime/flexible/flexible_test.go | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/analysis/datetime/flexible/flexible_test.go b/analysis/datetime/flexible/flexible_test.go
index e97423b78..644d91eb0 100644
--- a/analysis/datetime/flexible/flexible_test.go
+++ b/analysis/datetime/flexible/flexible_test.go
@@ -76,14 +76,14 @@ func TestFlexibleDateTimeParser(t *testing.T) {
 		})
 
 	for _, test := range tests {
-		actualTime, actualErr := dateOptionalTimeParser.ParseDateTime(test.input)
-		if actualErr != test.expectedError {
-			t.Errorf("expected error %#v, got %#v", test.expectedError, actualErr)
-			continue
-		}
-		if !reflect.DeepEqual(actualTime, test.expectedTime) {
-			t.Errorf("expected time %#v, got %#v", test.expectedTime, actualTime)
-			t.Errorf("expected location %#v,\n got %#v", test.expectedTime.Location(), actualTime.Location())
-		}
+		t.Run(test.input, func(t *testing.T) {
+			actualTime, actualErr := dateOptionalTimeParser.ParseDateTime(test.input)
+			if actualErr != test.expectedError {
+				t.Fatalf("expected error %#v, got %#v", test.expectedError, actualErr)
+			}
+			if !reflect.DeepEqual(actualTime, test.expectedTime) {
+				t.Errorf("expected time %v, got %v", test.expectedTime, actualTime)
+			}
+		})
 	}
 }

From 5426dde14309ecb3545ac12f62760cab0f4de126 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 22 May 2020 13:34:20 -0400
Subject: [PATCH 691/728] add support for zap v13 (#1403)

---
 go.mod                         | 5 +++--
 index/scorch/segment_plugin.go | 2 ++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/go.mod b/go.mod
index d38cf8f92..e9e27ba10 100644
--- a/go.mod
+++ b/go.mod
@@ -8,8 +8,9 @@ require (
 	github.com/blevesearch/go-porterstemmer v1.0.3
 	github.com/blevesearch/segment v0.9.0
 	github.com/blevesearch/snowballstem v0.9.0
-	github.com/blevesearch/zap/v11 v11.0.7
-	github.com/blevesearch/zap/v12 v12.0.7
+	github.com/blevesearch/zap/v11 v11.0.8
+	github.com/blevesearch/zap/v12 v12.0.8
+	github.com/blevesearch/zap/v13 v13.0.0
 	github.com/couchbase/ghistogram v0.1.0 // indirect
 	github.com/couchbase/moss v0.1.0
 	github.com/couchbase/vellum v1.0.1
diff --git a/index/scorch/segment_plugin.go b/index/scorch/segment_plugin.go
index 6dfa3b282..f5c39e063 100644
--- a/index/scorch/segment_plugin.go
+++ b/index/scorch/segment_plugin.go
@@ -21,6 +21,7 @@ import (
 
 	zapv11 "github.com/blevesearch/zap/v11"
 	zapv12 "github.com/blevesearch/zap/v12"
+	zapv13 "github.com/blevesearch/zap/v13"
 )
 
 var supportedSegmentPlugins map[string]map[uint32]segment.Plugin
@@ -28,6 +29,7 @@ var defaultSegmentPlugin segment.Plugin
 
 func init() {
 	ResetPlugins()
+	RegisterPlugin(zapv13.Plugin(), false)
 	RegisterPlugin(zapv12.Plugin(), false)
 	RegisterPlugin(zapv11.Plugin(), true)
 }

From f830931c0ffc2c80a20e02fdad4847c7d669423e Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Sat, 23 May 2020 17:37:05 -0400
Subject: [PATCH 692/728] allow integration tests to force scorch segment
 type/ver (#1402)

---
 test/integration_test.go | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/test/integration_test.go b/test/integration_test.go
index ed59323ff..cdd50f03a 100644
--- a/test/integration_test.go
+++ b/test/integration_test.go
@@ -39,14 +39,20 @@ var keepIndex = flag.Bool("keepIndex", false, "keep the index after testing")
 
 var indexType = flag.String("indexType", bleve.Config.DefaultIndexType, "index type to build")
 var kvType = flag.String("kvType", bleve.Config.DefaultKVStore, "kv store type to build")
+var segType = flag.String("segType", "", "force scorch segment type")
+var segVer = flag.Int("segVer", 0, "force scorch segment version")
 
 func TestIntegration(t *testing.T) {
 
 	flag.Parse()
 
-	bleve.Config.DefaultIndexType = *indexType
-	bleve.Config.DefaultKVStore = *kvType
 	t.Logf("using index type %s and kv type %s", *indexType, *kvType)
+	if *segType != "" {
+		t.Logf("forcing segment type: %s", *segType)
+	}
+	if *segVer != 0 {
+		t.Logf("forcing segment version: %d", *segVer)
+	}
 
 	var err error
 	var datasetRegexp *regexp.Regexp
@@ -194,7 +200,15 @@ func runTestDir(t *testing.T, dir, datasetName string) {
 
 func loadDataSet(t *testing.T, datasetName string, mapping mapping.IndexMappingImpl, path string) (bleve.Index, func(), error) {
 	idxPath := fmt.Sprintf("test-%s.bleve", datasetName)
-	index, err := bleve.New(idxPath, &mapping)
+	cfg := map[string]interface{}{}
+	if *segType != "" {
+		cfg["forceSegmentType"] = *segType
+	}
+	if *segVer != 0 {
+		cfg["forceSegmentVersion"] = *segVer
+	}
+
+	index, err := bleve.NewUsing(idxPath, &mapping, *indexType, *kvType, cfg)
 	if err != nil {
 		return nil, nil, fmt.Errorf("error creating new index: %v", err)
 	}

From 04c39ad184bb7dacaaf0dfc12357991897d14743 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 26 May 2020 11:25:09 -0400
Subject: [PATCH 693/728] allow segment type/version override to work (#1407)

logic was reversed, preventing correct configuration
from actually switching the segment plugin used
---
 index/scorch/builder.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/index/scorch/builder.go b/index/scorch/builder.go
index cd400bd1d..1f4b41d63 100644
--- a/index/scorch/builder.go
+++ b/index/scorch/builder.go
@@ -107,8 +107,9 @@ func (o *Builder) parseConfig(config map[string]interface{}) (err error) {
 		segPlugin, err := chooseSegmentPlugin(forcedSegmentType,
 			uint32(forcedSegmentVersion))
 		if err != nil {
-			o.segPlugin = segPlugin
+			return err
 		}
+		o.segPlugin = segPlugin
 	}
 
 	return nil

From 0c90a2fc4e8ccb06077958f766135b04610adcce Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 26 May 2020 11:36:20 -0400
Subject: [PATCH 694/728] improve scorch config parsing of forced zap version
 (#1401)

previous version did not correctly handle integer
values stored in a float64 due to JSON parsing
---
 index/scorch/scorch.go | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index c33f760ea..6b5fd3923 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -142,17 +142,18 @@ func NewScorch(storeName string,
 // configForceSegmentTypeVersion checks if the caller has requested a
 // specific segment type/version
 func configForceSegmentTypeVersion(config map[string]interface{}) (string, uint32, error) {
-	forcedSegmentVersion, ok := config["forceSegmentVersion"].(int)
-	if ok {
-		forcedSegmentType, ok2 := config["forceSegmentType"].(string)
-		if !ok2 {
-			return "", 0, fmt.Errorf(
-				"forceSegmentVersion set to %d, must also specify forceSegmentType", forcedSegmentVersion)
-		}
+	forcedSegmentVersion, err := parseToInteger(config["forceSegmentVersion"])
+	if err != nil {
+		return "", 0, nil
+	}
 
-		return forcedSegmentType, uint32(forcedSegmentVersion), nil
+	forcedSegmentType, ok := config["forceSegmentType"].(string)
+	if !ok {
+		return "", 0, fmt.Errorf(
+			"forceSegmentVersion set to %d, must also specify forceSegmentType", forcedSegmentVersion)
 	}
-	return "", 0, nil
+
+	return forcedSegmentType, uint32(forcedSegmentVersion), nil
 }
 
 func (s *Scorch) paused() uint64 {

From 2b80a2aedf6bb1746054c6771d0625866a0dcabe Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 26 May 2020 12:57:16 -0400
Subject: [PATCH 695/728] add support for zap v14 (#1408)

bump all the zap versions
---
 go.mod                         | 7 ++++---
 index/scorch/segment_plugin.go | 2 ++
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index e9e27ba10..2b23a510b 100644
--- a/go.mod
+++ b/go.mod
@@ -8,9 +8,10 @@ require (
 	github.com/blevesearch/go-porterstemmer v1.0.3
 	github.com/blevesearch/segment v0.9.0
 	github.com/blevesearch/snowballstem v0.9.0
-	github.com/blevesearch/zap/v11 v11.0.8
-	github.com/blevesearch/zap/v12 v12.0.8
-	github.com/blevesearch/zap/v13 v13.0.0
+	github.com/blevesearch/zap/v11 v11.0.9
+	github.com/blevesearch/zap/v12 v12.0.9
+	github.com/blevesearch/zap/v13 v13.0.1
+	github.com/blevesearch/zap/v14 v14.0.0
 	github.com/couchbase/ghistogram v0.1.0 // indirect
 	github.com/couchbase/moss v0.1.0
 	github.com/couchbase/vellum v1.0.1
diff --git a/index/scorch/segment_plugin.go b/index/scorch/segment_plugin.go
index f5c39e063..4729e6bd6 100644
--- a/index/scorch/segment_plugin.go
+++ b/index/scorch/segment_plugin.go
@@ -22,6 +22,7 @@ import (
 	zapv11 "github.com/blevesearch/zap/v11"
 	zapv12 "github.com/blevesearch/zap/v12"
 	zapv13 "github.com/blevesearch/zap/v13"
+	zapv14 "github.com/blevesearch/zap/v14"
 )
 
 var supportedSegmentPlugins map[string]map[uint32]segment.Plugin
@@ -29,6 +30,7 @@ var defaultSegmentPlugin segment.Plugin
 
 func init() {
 	ResetPlugins()
+	RegisterPlugin(zapv14.Plugin(), false)
 	RegisterPlugin(zapv13.Plugin(), false)
 	RegisterPlugin(zapv12.Plugin(), false)
 	RegisterPlugin(zapv11.Plugin(), true)

From 25a604dd310c0d9fd57216282640237a52c5dd87 Mon Sep 17 00:00:00 2001
From: Abhinav Dangeti <abhinav@couchbase.com>
Date: Mon, 8 Jun 2020 09:15:46 -0700
Subject: [PATCH 696/728] Log field name where available on tooManyClauses
 error

---
 search/searcher/search_disjunction.go       | 6 +++---
 search/searcher/search_disjunction_heap.go  | 2 +-
 search/searcher/search_disjunction_slice.go | 2 +-
 search/searcher/search_fuzzy.go             | 4 ++--
 search/searcher/search_multi_term.go        | 4 ++--
 search/searcher/search_numeric_range.go     | 2 +-
 search/searcher/search_regexp.go            | 2 +-
 search/searcher/search_term_prefix.go       | 2 +-
 8 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/search/searcher/search_disjunction.go b/search/searcher/search_disjunction.go
index 6a296b68f..4ad33b6ff 100644
--- a/search/searcher/search_disjunction.go
+++ b/search/searcher/search_disjunction.go
@@ -103,7 +103,7 @@ func tooManyClauses(count int) bool {
 	return false
 }
 
-func tooManyClausesErr(count int) error {
-	return fmt.Errorf("TooManyClauses[%d > maxClauseCount, which is set to %d]",
-		count, DisjunctionMaxClauseCount)
+func tooManyClausesErr(field string, count int) error {
+	return fmt.Errorf("TooManyClauses over field: `%s` [%d > maxClauseCount,"+
+		" which is set to %d]", field, count, DisjunctionMaxClauseCount)
 }
diff --git a/search/searcher/search_disjunction_heap.go b/search/searcher/search_disjunction_heap.go
index ec133f1f8..00b09fcb9 100644
--- a/search/searcher/search_disjunction_heap.go
+++ b/search/searcher/search_disjunction_heap.go
@@ -62,7 +62,7 @@ func newDisjunctionHeapSearcher(indexReader index.IndexReader,
 	limit bool) (
 	*DisjunctionHeapSearcher, error) {
 	if limit && tooManyClauses(len(searchers)) {
-		return nil, tooManyClausesErr(len(searchers))
+		return nil, tooManyClausesErr("", len(searchers))
 	}
 
 	// build our searcher
diff --git a/search/searcher/search_disjunction_slice.go b/search/searcher/search_disjunction_slice.go
index e47f39ad0..464878bc6 100644
--- a/search/searcher/search_disjunction_slice.go
+++ b/search/searcher/search_disjunction_slice.go
@@ -50,7 +50,7 @@ func newDisjunctionSliceSearcher(indexReader index.IndexReader,
 	limit bool) (
 	*DisjunctionSliceSearcher, error) {
 	if limit && tooManyClauses(len(qsearchers)) {
-		return nil, tooManyClausesErr(len(qsearchers))
+		return nil, tooManyClausesErr("", len(qsearchers))
 	}
 	// build the downstream searchers
 	searchers := make(OrderedSearcherList, len(qsearchers))
diff --git a/search/searcher/search_fuzzy.go b/search/searcher/search_fuzzy.go
index 8176e59b5..aca8a7d9f 100644
--- a/search/searcher/search_fuzzy.go
+++ b/search/searcher/search_fuzzy.go
@@ -75,7 +75,7 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
 		for err == nil && tfd != nil {
 			rv = append(rv, tfd.Term)
 			if tooManyClauses(len(rv)) {
-				return nil, tooManyClausesErr(len(rv))
+				return nil, tooManyClausesErr(field, len(rv))
 			}
 			tfd, err = fieldDict.Next()
 		}
@@ -107,7 +107,7 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
 		if !exceeded && ld <= fuzziness {
 			rv = append(rv, tfd.Term)
 			if tooManyClauses(len(rv)) {
-				return nil, tooManyClausesErr(len(rv))
+				return nil, tooManyClausesErr(field, len(rv))
 			}
 		}
 		tfd, err = fieldDict.Next()
diff --git a/search/searcher/search_multi_term.go b/search/searcher/search_multi_term.go
index c48366ee2..1c60d4a7a 100644
--- a/search/searcher/search_multi_term.go
+++ b/search/searcher/search_multi_term.go
@@ -23,7 +23,7 @@ func NewMultiTermSearcher(indexReader index.IndexReader, terms []string,
 	field string, boost float64, options search.SearcherOptions, limit bool) (
 	search.Searcher, error) {
 	if limit && tooManyClauses(len(terms)) {
-		return nil, tooManyClausesErr(len(terms))
+		return nil, tooManyClausesErr(field, len(terms))
 	}
 
 	qsearchers := make([]search.Searcher, len(terms))
@@ -51,7 +51,7 @@ func NewMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byte,
 	field string, boost float64, options search.SearcherOptions, limit bool) (
 	search.Searcher, error) {
 	if limit && tooManyClauses(len(terms)) {
-		return nil, tooManyClausesErr(len(terms))
+		return nil, tooManyClausesErr(field, len(terms))
 	}
 
 	qsearchers := make([]search.Searcher, len(terms))
diff --git a/search/searcher/search_numeric_range.go b/search/searcher/search_numeric_range.go
index 83107f020..675f569d9 100644
--- a/search/searcher/search_numeric_range.go
+++ b/search/searcher/search_numeric_range.go
@@ -97,7 +97,7 @@ func NewNumericRangeSearcher(indexReader index.IndexReader,
 	}
 
 	if tooManyClauses(len(terms)) {
-		return nil, tooManyClausesErr(len(terms))
+		return nil, tooManyClausesErr(field, len(terms))
 	}
 
 	return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options,
diff --git a/search/searcher/search_regexp.go b/search/searcher/search_regexp.go
index 4def832c4..11a44f159 100644
--- a/search/searcher/search_regexp.go
+++ b/search/searcher/search_regexp.go
@@ -110,7 +110,7 @@ func findRegexpCandidateTerms(indexReader index.IndexReader,
 		if matchPos != nil && matchPos[0] == 0 && matchPos[1] == len(tfd.Term) {
 			rv = append(rv, tfd.Term)
 			if tooManyClauses(len(rv)) {
-				return rv, tooManyClausesErr(len(rv))
+				return rv, tooManyClausesErr(field, len(rv))
 			}
 		}
 		tfd, err = fieldDict.Next()
diff --git a/search/searcher/search_term_prefix.go b/search/searcher/search_term_prefix.go
index b5af4631f..2a8f22cff 100644
--- a/search/searcher/search_term_prefix.go
+++ b/search/searcher/search_term_prefix.go
@@ -38,7 +38,7 @@ func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string,
 	for err == nil && tfd != nil {
 		terms = append(terms, tfd.Term)
 		if tooManyClauses(len(terms)) {
-			return nil, tooManyClausesErr(len(terms))
+			return nil, tooManyClausesErr(field, len(terms))
 		}
 		tfd, err = fieldDict.Next()
 	}

From 4aea3d9e665c34c08d063bf956d91ada3b441ad7 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Thu, 18 Jun 2020 15:11:15 +0530
Subject: [PATCH 697/728] MB-40007 -Obsolete segment file leaks during merge
 introductions

During merge segment introduction, if the segment contents
becomes totally obsolete, then the introducer skips
it's introduction to root. But it wasn't handling the clean
up ceremonies for the segment like Closing and cleaning
the entries from the ineligibleToRemove file list.
---
 index/scorch/introducer.go |  8 ++++++--
 index/scorch/merge.go      | 38 +++++++++++++++++++++++++++++---------
 index/scorch/stats.go      |  7 ++++---
 3 files changed, 39 insertions(+), 14 deletions(-)

diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index 64ca969bd..bcb04f87a 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -383,6 +383,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 			}
 		}
 	}
+	var skipped bool
 	// In case where all the docs in the newly merged segment getting
 	// deleted by the time we reach here, can skip the introduction.
 	if nextMerge.new != nil &&
@@ -405,6 +406,9 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 			docsToPersistCount += nextMerge.new.Count() - newSegmentDeleted.GetCardinality()
 			memSegments++
 		}
+	} else {
+		skipped = true
+		atomic.AddUint64(&s.stats.TotFileMergeIntroductionsObsoleted, 1)
 	}
 
 	atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
@@ -429,8 +433,8 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 	}
 
 	// notify requester that we incorporated this
-	nextMerge.notify <- newSnapshot
-	close(nextMerge.notify)
+	nextMerge.notifyCh <- &notify{iss: newSnapshot, skipped: skipped}
+	close(nextMerge.notifyCh)
 }
 
 func isMemorySegment(s *SegmentSnapshot) bool {
diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index 3eccee52e..e88023a3e 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -310,8 +310,9 @@ func (s *Scorch) planMergeAtSnapshot(ctx context.Context,
 
 		var oldNewDocNums map[uint64][]uint64
 		var seg segment.Segment
+		var filename string
 		if len(segmentsToMerge) > 0 {
-			filename := zapFileName(newSegmentID)
+			filename = zapFileName(newSegmentID)
 			s.markIneligibleForRemoval(filename)
 			path := s.path + string(os.PathSeparator) + filename
 
@@ -356,7 +357,7 @@ func (s *Scorch) planMergeAtSnapshot(ctx context.Context,
 			old:           oldMap,
 			oldNewDocNums: oldNewDocNums,
 			new:           seg,
-			notify:        make(chan *IndexSnapshot),
+			notifyCh:      make(chan *notify),
 		}
 
 		// give it to the introducer
@@ -371,15 +372,20 @@ func (s *Scorch) planMergeAtSnapshot(ctx context.Context,
 		introStartTime := time.Now()
 		// it is safe to blockingly wait for the merge introduction
 		// here as the introducer is bound to handle the notify channel.
-		newSnapshot := <-sm.notify
+		notify := <-sm.notifyCh
 		introTime := uint64(time.Since(introStartTime))
 		atomic.AddUint64(&s.stats.TotFileMergeZapIntroductionTime, introTime)
 		if atomic.LoadUint64(&s.stats.MaxFileMergeZapIntroductionTime) < introTime {
 			atomic.StoreUint64(&s.stats.MaxFileMergeZapIntroductionTime, introTime)
 		}
 		atomic.AddUint64(&s.stats.TotFileMergeIntroductionsDone, 1)
-		if newSnapshot != nil {
-			_ = newSnapshot.DecRef()
+		if notify != nil && notify.iss != nil {
+			_ = notify.iss.DecRef()
+			if notify.skipped {
+				// decrement the ref counts on skipping introduction.
+				s.unmarkIneligibleForRemoval(filename)
+				_ = seg.DecRef()
+			}
 		}
 
 		atomic.AddUint64(&s.stats.TotFileMergePlanTasksDone, 1)
@@ -395,12 +401,17 @@ func (s *Scorch) planMergeAtSnapshot(ctx context.Context,
 	return nil
 }
 
+type notify struct {
+	iss     *IndexSnapshot
+	skipped bool
+}
+
 type segmentMerge struct {
 	id            uint64
 	old           map[uint64]*SegmentSnapshot
 	oldNewDocNums map[uint64][]uint64
 	new           segment.Segment
-	notify        chan *IndexSnapshot
+	notifyCh      chan *notify
 }
 
 // perform a merging of the given SegmentBase instances into a new,
@@ -450,7 +461,7 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 		old:           make(map[uint64]*SegmentSnapshot),
 		oldNewDocNums: make(map[uint64][]uint64),
 		new:           seg,
-		notify:        make(chan *IndexSnapshot),
+		notifyCh:      make(chan *notify),
 	}
 
 	for i, idx := range sbsIndexes {
@@ -467,11 +478,20 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 	}
 
 	// blockingly wait for the introduction to complete
-	newSnapshot := <-sm.notify
-	if newSnapshot != nil {
+	var newSnapshot *IndexSnapshot
+	notify := <-sm.notifyCh
+	if notify != nil && notify.iss != nil {
+		newSnapshot = notify.iss
 		atomic.AddUint64(&s.stats.TotMemMergeSegments, uint64(len(sbs)))
 		atomic.AddUint64(&s.stats.TotMemMergeDone, 1)
+		if notify.skipped {
+			// decrement the ref counts on skipping introduction.
+			_ = newSnapshot.DecRef()
+			_ = seg.DecRef()
+			newSnapshot = nil
+		}
 	}
+
 	return newSnapshot, newSegmentID, nil
 }
 
diff --git a/index/scorch/stats.go b/index/scorch/stats.go
index 2900ac0df..9d7fbc0e2 100644
--- a/index/scorch/stats.go
+++ b/index/scorch/stats.go
@@ -108,9 +108,10 @@ type Stats struct {
 	TotFileMergeZapIntroductionTime uint64
 	MaxFileMergeZapIntroductionTime uint64
 
-	TotFileMergeIntroductions        uint64
-	TotFileMergeIntroductionsDone    uint64
-	TotFileMergeIntroductionsSkipped uint64
+	TotFileMergeIntroductions          uint64
+	TotFileMergeIntroductionsDone      uint64
+	TotFileMergeIntroductionsSkipped   uint64
+	TotFileMergeIntroductionsObsoleted uint64
 
 	CurFilesIneligibleForRemoval     uint64
 	TotSnapshotsRemovedFromMetaStore uint64

From dcafaff05395de11df95fb74f7f6a75c645a1b49 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Thu, 18 Jun 2020 10:49:33 -0400
Subject: [PATCH 698/728] fix stats handling of term field reader advancing
 backwards (#1416)

The current implementation creates a new term field reader,
which replaces the original one, this causes the stats
tracking start/finished to get out of sync.

This fixes the issue by recording the original term field
reader as finished when replacing with the new one.

fixes #1415
---
 index/scorch/scorch_test.go        | 85 ++++++++++++++++++++++++++++++
 index/scorch/snapshot_index_tfr.go |  2 +
 2 files changed, 87 insertions(+)

diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index 5a9ef40cd..e22fafa27 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -2380,3 +2380,88 @@ func TestCancelIndexForceMerge(t *testing.T) {
 		t.Fatal(err)
 	}
 }
+
+func TestIndexSeekBackwardsStats(t *testing.T) {
+	cfg := CreateConfig("TestIndexOpenReopen")
+	err := InitTest(cfg)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer func() {
+		err := DestroyTest(cfg)
+		if err != nil {
+			t.Log(err)
+		}
+	}()
+
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, cfg, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	// insert a doc
+	doc := document.NewDocument("1")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("cat")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Fatalf("error updating index: %v", err)
+	}
+
+	// insert another doc
+	doc = document.NewDocument("2")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("cat")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Fatalf("error updating index: %v", err)
+	}
+
+	reader, err := idx.Reader()
+	if err != nil {
+		t.Fatalf("error getting index reader: %v", err)
+	}
+	defer reader.Close()
+
+	tfr, err := reader.TermFieldReader([]byte("cat"), "name", false, false, false)
+	if err != nil {
+		t.Fatalf("error getting term field readyer for name/cat: %v", err)
+	}
+
+	tfdFirst, err := tfr.Next(nil)
+	if err != nil {
+		t.Fatalf("error getting first tfd: %v", err)
+	}
+
+	_, err = tfr.Next(nil)
+	if err != nil {
+		t.Fatalf("error getting second tfd: %v", err)
+	}
+
+	// seek backwards to the first
+	_, err = tfr.Advance(tfdFirst.ID, nil)
+	if err != nil {
+		t.Fatalf("error adancing backwards: %v", err)
+	}
+
+	err = tfr.Close()
+	if err != nil {
+		t.Fatalf("error closing term field reader: %v", err)
+	}
+
+
+	if idx.(*Scorch).stats.TotTermSearchersStarted != idx.(*Scorch).stats.TotTermSearchersFinished {
+		t.Errorf("expected term searchers started %d to equal term searchers finished %d",
+			idx.(*Scorch).stats.TotTermSearchersStarted,
+			idx.(*Scorch).stats.TotTermSearchersFinished)
+	}
+}
diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go
index 5d56f1944..61537fc4f 100644
--- a/index/scorch/snapshot_index_tfr.go
+++ b/index/scorch/snapshot_index_tfr.go
@@ -133,6 +133,8 @@ func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAllo
 		if err != nil {
 			return nil, err
 		}
+		// close the current term field reader before replacing it with a new one
+		_ = i.Close()
 		*i = *(i2.(*IndexSnapshotTermFieldReader))
 	}
 	num, err := docInternalToNumber(ID)

From 1314e3722897b3939bf97cbad4140a4c6dcaf0eb Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Fri, 19 Jun 2020 14:45:49 +0530
Subject: [PATCH 699/728] Switching to atomic ops instead of locks for events

---
 index/scorch/persister.go |  4 ++--
 index/scorch/scorch.go    | 29 ++++++-----------------------
 index/scorch/stats.go     |  3 +++
 3 files changed, 11 insertions(+), 25 deletions(-)

diff --git a/index/scorch/persister.go b/index/scorch/persister.go
index ea3667bbf..498378a4f 100644
--- a/index/scorch/persister.go
+++ b/index/scorch/persister.go
@@ -256,7 +256,7 @@ func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64,
 	// for sufficient in-memory segments to pile up for the next
 	// memory merge cum persist loop.
 	if numFilesOnDisk < uint64(po.PersisterNapUnderNumFiles) &&
-		po.PersisterNapTimeMSec > 0 && s.paused() == 0 {
+		po.PersisterNapTimeMSec > 0 && s.NumEventsBlocking() == 0 {
 		select {
 		case <-s.closeCh:
 		case <-time.After(time.Millisecond * time.Duration(po.PersisterNapTimeMSec)):
@@ -333,7 +333,7 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot,
 	// Perform in-memory segment merging only when the memory pressure is
 	// below the configured threshold, else the persister performs the
 	// direct persistence of segments.
-	if s.paused() < po.MemoryPressurePauseThreshold {
+	if s.NumEventsBlocking() < po.MemoryPressurePauseThreshold {
 		persisted, err := s.persistSnapshotMaybeMerge(snapshot)
 		if err != nil {
 			return err
diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index 6b5fd3923..ba98a460d 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -73,10 +73,6 @@ type Scorch struct {
 	onEvent      func(event Event)
 	onAsyncError func(err error)
 
-	pauseLock sync.RWMutex
-
-	pauseCount uint64
-
 	forceMergeRequestCh chan *mergerCtrl
 
 	segPlugin segment.Plugin
@@ -156,30 +152,17 @@ func configForceSegmentTypeVersion(config map[string]interface{}) (string, uint3
 	return forcedSegmentType, uint32(forcedSegmentVersion), nil
 }
 
-func (s *Scorch) paused() uint64 {
-	s.pauseLock.Lock()
-	pc := s.pauseCount
-	s.pauseLock.Unlock()
-	return pc
-}
-
-func (s *Scorch) incrPause() {
-	s.pauseLock.Lock()
-	s.pauseCount++
-	s.pauseLock.Unlock()
-}
-
-func (s *Scorch) decrPause() {
-	s.pauseLock.Lock()
-	s.pauseCount--
-	s.pauseLock.Unlock()
+func (s *Scorch) NumEventsBlocking() uint64 {
+	eventsCompleted := atomic.LoadUint64(&s.stats.TotEventTriggerCompleted)
+	eventsStarted := atomic.LoadUint64(&s.stats.TotEventTriggerStarted)
+	return eventsStarted - eventsCompleted
 }
 
 func (s *Scorch) fireEvent(kind EventKind, dur time.Duration) {
 	if s.onEvent != nil {
-		s.incrPause()
+		atomic.AddUint64(&s.stats.TotEventTriggerStarted, 1)
 		s.onEvent(Event{Kind: kind, Scorch: s, Duration: dur})
-		s.decrPause()
+		atomic.AddUint64(&s.stats.TotEventTriggerCompleted, 1)
 	}
 }
 
diff --git a/index/scorch/stats.go b/index/scorch/stats.go
index 2900ac0df..5795870f2 100644
--- a/index/scorch/stats.go
+++ b/index/scorch/stats.go
@@ -47,6 +47,9 @@ type Stats struct {
 	TotTermSearchersStarted  uint64
 	TotTermSearchersFinished uint64
 
+	TotEventTriggerStarted   uint64
+	TotEventTriggerCompleted uint64
+
 	TotIntroduceLoop       uint64
 	TotIntroduceSegmentBeg uint64
 	TotIntroduceSegmentEnd uint64

From 57aabdae99de69996720121be8cbd6b340a3e42a Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Fri, 19 Jun 2020 13:52:58 +0530
Subject: [PATCH 700/728] adding UT and minor refactoring

---
 index/scorch/event.go      |   8 ++
 index/scorch/introducer.go |   4 +-
 index/scorch/merge.go      |  40 +++++-----
 index/scorch/merge_test.go | 154 +++++++++++++++++++++++++++++++++++++
 4 files changed, 187 insertions(+), 19 deletions(-)
 create mode 100644 index/scorch/merge_test.go

diff --git a/index/scorch/event.go b/index/scorch/event.go
index dd79d6d06..8f3fc1914 100644
--- a/index/scorch/event.go
+++ b/index/scorch/event.go
@@ -54,3 +54,11 @@ var EventKindBatchIntroductionStart = EventKind(5)
 
 // EventKindBatchIntroduction is fired when Batch() completes.
 var EventKindBatchIntroduction = EventKind(6)
+
+// EventKindMergeTaskIntroductionStart is fired when the merger is about to
+// start the introduction of merged segment from a single merge task.
+var EventKindMergeTaskIntroductionStart = EventKind(7)
+
+// EventKindMergeTaskIntroduction is fired when the merger has completed
+// the introduction of merged segment from a single merge task.
+var EventKindMergeTaskIntroduction = EventKind(8)
diff --git a/index/scorch/introducer.go b/index/scorch/introducer.go
index bcb04f87a..7770c41c5 100644
--- a/index/scorch/introducer.go
+++ b/index/scorch/introducer.go
@@ -433,7 +433,9 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
 	}
 
 	// notify requester that we incorporated this
-	nextMerge.notifyCh <- &notify{iss: newSnapshot, skipped: skipped}
+	nextMerge.notifyCh <- &mergeTaskIntroStatus{
+		indexSnapshot: newSnapshot,
+		skipped:       skipped}
 	close(nextMerge.notifyCh)
 }
 
diff --git a/index/scorch/merge.go b/index/scorch/merge.go
index e88023a3e..56c0953f4 100644
--- a/index/scorch/merge.go
+++ b/index/scorch/merge.go
@@ -357,9 +357,11 @@ func (s *Scorch) planMergeAtSnapshot(ctx context.Context,
 			old:           oldMap,
 			oldNewDocNums: oldNewDocNums,
 			new:           seg,
-			notifyCh:      make(chan *notify),
+			notifyCh:      make(chan *mergeTaskIntroStatus),
 		}
 
+		s.fireEvent(EventKindMergeTaskIntroductionStart, 0)
+
 		// give it to the introducer
 		select {
 		case <-s.closeCh:
@@ -372,23 +374,25 @@ func (s *Scorch) planMergeAtSnapshot(ctx context.Context,
 		introStartTime := time.Now()
 		// it is safe to blockingly wait for the merge introduction
 		// here as the introducer is bound to handle the notify channel.
-		notify := <-sm.notifyCh
+		introStatus := <-sm.notifyCh
 		introTime := uint64(time.Since(introStartTime))
 		atomic.AddUint64(&s.stats.TotFileMergeZapIntroductionTime, introTime)
 		if atomic.LoadUint64(&s.stats.MaxFileMergeZapIntroductionTime) < introTime {
 			atomic.StoreUint64(&s.stats.MaxFileMergeZapIntroductionTime, introTime)
 		}
 		atomic.AddUint64(&s.stats.TotFileMergeIntroductionsDone, 1)
-		if notify != nil && notify.iss != nil {
-			_ = notify.iss.DecRef()
-			if notify.skipped {
-				// decrement the ref counts on skipping introduction.
+		if introStatus != nil && introStatus.indexSnapshot != nil {
+			_ = introStatus.indexSnapshot.DecRef()
+			if introStatus.skipped {
+				// close the segment on skipping introduction.
 				s.unmarkIneligibleForRemoval(filename)
-				_ = seg.DecRef()
+				_ = seg.Close()
 			}
 		}
 
 		atomic.AddUint64(&s.stats.TotFileMergePlanTasksDone, 1)
+
+		s.fireEvent(EventKindMergeTaskIntroduction, 0)
 	}
 
 	// once all the newly merged segment introductions are done,
@@ -401,9 +405,9 @@ func (s *Scorch) planMergeAtSnapshot(ctx context.Context,
 	return nil
 }
 
-type notify struct {
-	iss     *IndexSnapshot
-	skipped bool
+type mergeTaskIntroStatus struct {
+	indexSnapshot *IndexSnapshot
+	skipped       bool
 }
 
 type segmentMerge struct {
@@ -411,7 +415,7 @@ type segmentMerge struct {
 	old           map[uint64]*SegmentSnapshot
 	oldNewDocNums map[uint64][]uint64
 	new           segment.Segment
-	notifyCh      chan *notify
+	notifyCh      chan *mergeTaskIntroStatus
 }
 
 // perform a merging of the given SegmentBase instances into a new,
@@ -461,7 +465,7 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 		old:           make(map[uint64]*SegmentSnapshot),
 		oldNewDocNums: make(map[uint64][]uint64),
 		new:           seg,
-		notifyCh:      make(chan *notify),
+		notifyCh:      make(chan *mergeTaskIntroStatus),
 	}
 
 	for i, idx := range sbsIndexes {
@@ -479,15 +483,15 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
 
 	// blockingly wait for the introduction to complete
 	var newSnapshot *IndexSnapshot
-	notify := <-sm.notifyCh
-	if notify != nil && notify.iss != nil {
-		newSnapshot = notify.iss
+	introStatus := <-sm.notifyCh
+	if introStatus != nil && introStatus.indexSnapshot != nil {
+		newSnapshot = introStatus.indexSnapshot
 		atomic.AddUint64(&s.stats.TotMemMergeSegments, uint64(len(sbs)))
 		atomic.AddUint64(&s.stats.TotMemMergeDone, 1)
-		if notify.skipped {
-			// decrement the ref counts on skipping introduction.
+		if introStatus.skipped {
+			// close the segment on skipping introduction.
 			_ = newSnapshot.DecRef()
-			_ = seg.DecRef()
+			_ = seg.Close()
 			newSnapshot = nil
 		}
 	}
diff --git a/index/scorch/merge_test.go b/index/scorch/merge_test.go
new file mode 100644
index 000000000..41f6892e7
--- /dev/null
+++ b/index/scorch/merge_test.go
@@ -0,0 +1,154 @@
+//  Copyright (c) 2020 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package scorch
+
+import (
+	"sync"
+	"sync/atomic"
+	"testing"
+
+	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/index"
+)
+
+func TestObsoleteSegmentMergeIntroduction(t *testing.T) {
+	testConfig := CreateConfig("TestObsoleteSegmentMergeIntroduction")
+	err := InitTest(testConfig)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer func() {
+		err := DestroyTest(testConfig)
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	var introComplete, mergeIntroStart, mergeIntroComplete sync.WaitGroup
+	introComplete.Add(1)
+	mergeIntroStart.Add(1)
+	mergeIntroComplete.Add(1)
+	var segIntroCompleted int
+	RegistryEventCallbacks["test"] = func(e Event) {
+		if e.Kind == EventKindBatchIntroduction {
+			segIntroCompleted++
+			if segIntroCompleted == 3 {
+				// all 3 segments introduced
+				introComplete.Done()
+			}
+		} else if e.Kind == EventKindMergeTaskIntroductionStart {
+			// signal the start of merge task introduction so that
+			// we can introduce a new batch which obsoletes the
+			// merged segment's contents.
+			mergeIntroStart.Done()
+			// hold the merge task introduction until the merged segment contents
+			// are obsoleted with the next batch/segment introduction.
+			introComplete.Wait()
+		} else if e.Kind == EventKindMergeTaskIntroduction {
+			// signal the completion of the merge task introduction.
+			mergeIntroComplete.Done()
+
+		}
+	}
+
+	ourConfig := make(map[string]interface{}, len(testConfig))
+	for k, v := range testConfig {
+		ourConfig[k] = v
+	}
+	ourConfig["eventCallbackName"] = "test"
+
+	analysisQueue := index.NewAnalysisQueue(1)
+	idx, err := NewScorch(Name, ourConfig, analysisQueue)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	err = idx.Open()
+	if err != nil {
+		t.Fatalf("error opening index: %v", err)
+	}
+	defer func() {
+		err := idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	// first introduce two documents over two batches.
+	batch := index.NewBatch()
+	doc := document.NewDocument("1")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test3")))
+	batch.Update(doc)
+	err = idx.Batch(batch)
+	if err != nil {
+		t.Error(err)
+	}
+
+	batch.Reset()
+	doc = document.NewDocument("2")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test2updated")))
+	batch.Update(doc)
+	err = idx.Batch(batch)
+	if err != nil {
+		t.Error(err)
+	}
+
+	// wait until the merger trying to introduce the new merged segment.
+	mergeIntroStart.Wait()
+
+	// execute another batch which obsoletes the contents of the new merged
+	// segment awaiting introduction.
+	batch.Reset()
+	batch.Delete("1")
+	batch.Delete("2")
+	doc = document.NewDocument("3")
+	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test3updated")))
+	batch.Update(doc)
+	err = idx.Batch(batch)
+	if err != nil {
+		t.Error(err)
+	}
+
+	// wait until the merge task introduction complete.
+	mergeIntroComplete.Wait()
+
+	idxr, err := idx.Reader()
+	if err != nil {
+		t.Error(err)
+	}
+
+	numSegments := len(idxr.(*IndexSnapshot).segment)
+	if numSegments != 1 {
+		t.Errorf("expected one segment at the root, got: %d", numSegments)
+	}
+
+	skipIntroCount := atomic.LoadUint64(&idxr.(*IndexSnapshot).parent.stats.TotFileMergeIntroductionsObsoleted)
+	if skipIntroCount != 1 {
+		t.Errorf("expected one obsolete merge segment skipping the introduction, got: %d", skipIntroCount)
+	}
+
+	docCount, err := idxr.DocCount()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if docCount != 1 {
+		t.Errorf("Expected document count to be %d got %d", 1, docCount)
+	}
+
+	err = idxr.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+}

From 9da414702d919c6f24afc87ce8cbf8cf91abad8b Mon Sep 17 00:00:00 2001
From: ITmeze <michal.zygula@gmail.com>
Date: Fri, 19 Jun 2020 21:16:42 +0200
Subject: [PATCH 701/728] Fix for import paths in function documentation

Documentation for AddCustomAnalyzer is using  non existing import paths
---
 mapping/index.go | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/mapping/index.go b/mapping/index.go
index 602764cbb..21ca5cce3 100644
--- a/mapping/index.go
+++ b/mapping/index.go
@@ -101,26 +101,26 @@ func (im *IndexMappingImpl) AddCustomTokenFilter(name string, config map[string]
 // returned analyzer is registered in the IndexMapping.
 //
 // bleve comes with predefined analyzers, like
-// github.com/blevesearch/bleve/analysis/analyzers/custom_analyzer. They are
+// github.com/blevesearch/bleve/analysis/analyzer/custom. They are
 // available only if their package is imported by client code. To achieve this,
 // use their metadata to fill configuration entries:
 //
 //   import (
-//       "github.com/blevesearch/bleve/analysis/analyzers/custom_analyzer"
-//       "github.com/blevesearch/bleve/analysis/char_filters/html_char_filter"
-//       "github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter"
-//       "github.com/blevesearch/bleve/analysis/tokenizers/unicode"
+//       "github.com/blevesearch/bleve/analysis/analyzer/custom"
+//       "github.com/blevesearch/bleve/analysis/char/html"
+//       "github.com/blevesearch/bleve/analysis/token/lowercase"
+//       "github.com/blevesearch/bleve/analysis/tokenizer/unicode"
 //   )
 //
 //   m := bleve.NewIndexMapping()
 //   err := m.AddCustomAnalyzer("html", map[string]interface{}{
-//       "type": custom_analyzer.Name,
+//       "type": custom.Name,
 //       "char_filters": []string{
-//           html_char_filter.Name,
+//           html.Name,
 //       },
 //       "tokenizer":     unicode.Name,
 //       "token_filters": []string{
-//           lower_case_filter.Name,
+//           lowercase.Name,
 //           ...
 //       },
 //   })

From 1f15e1d641e8ec16df952423b14bd899a3b53b06 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Thu, 13 Aug 2020 11:23:28 -0400
Subject: [PATCH 702/728] remove TooManyClauses limitation when optimizable
 (#1426)

Observation: When a search does not perform scoring and does not
need location information, disjunction queries are optimized by
directly OR'ing the underlying bitset.  This avoids all the
usual multi-iterator disjunction logic.  However, in it's
traditional form, we still have a TooManyClauses limit, and
this makes sense as all the underlying iterators are still
in memory at one time.

Observation: The MultiTerm search is unique in that we have a
flat list of terms that are used to build the disjunction.
This is significant because it means we can ensure that
all the underlying searchers are optimizable.

By combining these two observations we can introduce a new mode
of operation for the MultiTerm search.  When it does not perform
scoring and does not need location information, we can do a new
optimization where we create smaller batches of disjunctions
which are immediately optimizable into a single term searcher.
By repeating this process across all terms, we end up with
the correct searcher, and we never had more than the batch
size iterators built in memory at one time.

UnadornedPostingsIteratorBitmap was refactored to also
implement OptimizablePostingsIterator, this allows us
to keep the in-progress final iterator in each batch,
simplifying the logic.

A new optimization mode "disjunction:unadorned-force" was
introduced.  It behaves exacdtly the same as
"disjunction:unadorned" only it always performs the
optimization without regard for the cardinality of the
underlying iterators.
---
 index/scorch/optimize.go                  |  21 ++-
 index/scorch/segment/unadorned.go         |  13 ++
 search/searcher/search_disjunction.go     |   8 +-
 search/searcher/search_multi_term.go      | 180 ++++++++++++++++++----
 search/searcher/search_term_range_test.go |  83 ++++++++++
 5 files changed, 269 insertions(+), 36 deletions(-)

diff --git a/index/scorch/optimize.go b/index/scorch/optimize.go
index b9cb9228a..f23fdfb8a 100644
--- a/index/scorch/optimize.go
+++ b/index/scorch/optimize.go
@@ -16,10 +16,10 @@ package scorch
 
 import (
 	"fmt"
-
 	"github.com/RoaringBitmap/roaring"
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/index/scorch/segment"
+	"sync/atomic"
 )
 
 var OptimizeConjunction = true
@@ -37,7 +37,11 @@ func (s *IndexSnapshotTermFieldReader) Optimize(kind string,
 	}
 
 	if OptimizeDisjunctionUnadorned && kind == "disjunction:unadorned" {
-		return s.optimizeDisjunctionUnadorned(octx)
+		return s.optimizeDisjunctionUnadorned(octx, OptimizeDisjunctionUnadornedMinChildCardinality)
+	}
+
+	if OptimizeDisjunctionUnadorned && kind == "disjunction:unadorned-force" {
+		return s.optimizeDisjunctionUnadorned(octx, 0)
 	}
 
 	return octx, nil
@@ -265,6 +269,7 @@ OUTER:
 		oTFR.iterators[i] = segment.NewUnadornedPostingsIteratorFromBitmap(bm)
 	}
 
+	atomic.AddUint64(&o.snapshot.parent.stats.TotTermSearchersStarted, uint64(1))
 	return oTFR, nil
 }
 
@@ -275,9 +280,12 @@ OUTER:
 // term-vectors are not required, and instead only the internal-id's
 // are needed.
 func (s *IndexSnapshotTermFieldReader) optimizeDisjunctionUnadorned(
-	octx index.OptimizableContext) (index.OptimizableContext, error) {
+	octx index.OptimizableContext, minChildCardinality uint64) (index.OptimizableContext, error) {
 	if octx == nil {
-		octx = &OptimizeTFRDisjunctionUnadorned{snapshot: s.snapshot}
+		octx = &OptimizeTFRDisjunctionUnadorned{
+			snapshot: s.snapshot,
+			minChildCardinality: minChildCardinality,
+		}
 	}
 
 	o, ok := octx.(*OptimizeTFRDisjunctionUnadorned)
@@ -298,6 +306,8 @@ type OptimizeTFRDisjunctionUnadorned struct {
 	snapshot *IndexSnapshot
 
 	tfrs []*IndexSnapshotTermFieldReader
+
+	minChildCardinality uint64
 }
 
 var OptimizeTFRDisjunctionUnadornedTerm = []byte("<disjunction:unadorned>")
@@ -332,7 +342,7 @@ func (o *OptimizeTFRDisjunctionUnadorned) Finish() (rv index.Optimized, err erro
 		// Heuristic to skip the optimization if all the constituent
 		// bitmaps are too small, where the processing & resource
 		// overhead to create the OR'ed bitmap outweighs the benefit.
-		if cMax < OptimizeDisjunctionUnadornedMinChildCardinality {
+		if cMax < o.minChildCardinality {
 			return nil, nil
 		}
 	}
@@ -392,5 +402,6 @@ func (o *OptimizeTFRDisjunctionUnadorned) Finish() (rv index.Optimized, err erro
 		oTFR.iterators[i] = segment.NewUnadornedPostingsIteratorFromBitmap(bm)
 	}
 
+	atomic.AddUint64(&o.snapshot.parent.stats.TotTermSearchersStarted, uint64(1))
 	return oTFR, nil
 }
diff --git a/index/scorch/segment/unadorned.go b/index/scorch/segment/unadorned.go
index 9a4d6c76c..56e752348 100644
--- a/index/scorch/segment/unadorned.go
+++ b/index/scorch/segment/unadorned.go
@@ -72,6 +72,19 @@ func (i *UnadornedPostingsIteratorBitmap) Size() int {
 	return reflectStaticSizeUnadornedPostingsIteratorBitmap
 }
 
+func (i *UnadornedPostingsIteratorBitmap)  ActualBitmap() *roaring.Bitmap {
+	return i.actualBM
+}
+
+func (i *UnadornedPostingsIteratorBitmap)  DocNum1Hit() (uint64, bool) {
+	return 0, false
+}
+
+func (i *UnadornedPostingsIteratorBitmap)  ReplaceActual(actual *roaring.Bitmap) {
+	i.actualBM = actual
+	i.actual = actual.Iterator()
+}
+
 func NewUnadornedPostingsIteratorFromBitmap(bm *roaring.Bitmap) PostingsIterator {
 	return &UnadornedPostingsIteratorBitmap{
 		actualBM: bm,
diff --git a/search/searcher/search_disjunction.go b/search/searcher/search_disjunction.go
index 4ad33b6ff..f47da27c4 100644
--- a/search/searcher/search_disjunction.go
+++ b/search/searcher/search_disjunction.go
@@ -16,7 +16,6 @@ package searcher
 
 import (
 	"fmt"
-
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
 )
@@ -37,6 +36,11 @@ func NewDisjunctionSearcher(indexReader index.IndexReader,
 	return newDisjunctionSearcher(indexReader, qsearchers, min, options, true)
 }
 
+func optionsDisjunctionOptimizable(options search.SearcherOptions) bool {
+	rv := options.Score == "none" && !options.IncludeTermVectors
+	return rv
+}
+
 func newDisjunctionSearcher(indexReader index.IndexReader,
 	qsearchers []search.Searcher, min float64, options search.SearcherOptions,
 	limit bool) (search.Searcher, error) {
@@ -44,7 +48,7 @@ func newDisjunctionSearcher(indexReader index.IndexReader,
 	// do not need extra information like freq-norm's or term vectors
 	// and the requested min is simple
 	if len(qsearchers) > 1 && min <= 1 &&
-		options.Score == "none" && !options.IncludeTermVectors {
+		optionsDisjunctionOptimizable(options) {
 		rv, err := optimizeCompositeSearcher("disjunction:unadorned",
 			indexReader, qsearchers, options)
 		if err != nil || rv != nil {
diff --git a/search/searcher/search_multi_term.go b/search/searcher/search_multi_term.go
index 1c60d4a7a..85aacc176 100644
--- a/search/searcher/search_multi_term.go
+++ b/search/searcher/search_multi_term.go
@@ -15,6 +15,7 @@
 package searcher
 
 import (
+	"fmt"
 	"github.com/blevesearch/bleve/index"
 	"github.com/blevesearch/bleve/search"
 )
@@ -22,10 +23,113 @@ import (
 func NewMultiTermSearcher(indexReader index.IndexReader, terms []string,
 	field string, boost float64, options search.SearcherOptions, limit bool) (
 	search.Searcher, error) {
-	if limit && tooManyClauses(len(terms)) {
-		return nil, tooManyClausesErr(field, len(terms))
+
+	if tooManyClauses(len(terms)) {
+		if optionsDisjunctionOptimizable(options) {
+			return optimizeMultiTermSearcher(indexReader, terms, field, boost, options)
+		}
+		if limit {
+			return nil, tooManyClausesErr(field, len(terms))
+		}
+	}
+
+	qsearchers, err  := makeBatchSearchers(indexReader, terms, field, boost, options)
+	if err != nil {
+		return nil, err
 	}
 
+	// build disjunction searcher of these ranges
+	return newMultiTermSearcherInternal(indexReader, qsearchers, field, boost,
+		options, limit)
+}
+
+func NewMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byte,
+	field string, boost float64, options search.SearcherOptions, limit bool) (
+	search.Searcher, error) {
+
+	if tooManyClauses(len(terms)) {
+		if optionsDisjunctionOptimizable(options) {
+			return optimizeMultiTermSearcherBytes(indexReader, terms, field, boost, options)
+		}
+
+		if limit {
+			return nil, tooManyClausesErr(field, len(terms))
+		}
+	}
+
+	qsearchers, err  := makeBatchSearchersBytes(indexReader, terms, field, boost, options)
+	if err != nil {
+		return nil, err
+	}
+
+	// build disjunction searcher of these ranges
+	return newMultiTermSearcherInternal(indexReader, qsearchers, field, boost,
+		options, limit)
+}
+
+func newMultiTermSearcherInternal(indexReader index.IndexReader,
+	searchers []search.Searcher, field string, boost float64,
+	options search.SearcherOptions, limit bool) (
+	search.Searcher, error) {
+
+	// build disjunction searcher of these ranges
+	searcher, err := newDisjunctionSearcher(indexReader, searchers, 0, options,
+		limit)
+	if err != nil {
+		for _, s := range searchers {
+			_ = s.Close()
+		}
+		return nil, err
+	}
+
+	return searcher, nil
+}
+
+func optimizeMultiTermSearcher(indexReader index.IndexReader, terms []string,
+	field string, boost float64, options search.SearcherOptions) (
+	search.Searcher, error) {
+	var finalSearcher search.Searcher
+	for len(terms) > 0 {
+		var batchTerms []string
+		if len(terms) > DisjunctionMaxClauseCount {
+			batchTerms = terms[:DisjunctionMaxClauseCount]
+			terms = terms[DisjunctionMaxClauseCount:]
+		} else {
+			batchTerms = terms
+			terms = nil
+		}
+		batch, err := makeBatchSearchers(indexReader, batchTerms, field, boost, options)
+		if err != nil {
+			return nil, err
+		}
+		if finalSearcher != nil {
+			batch = append(batch, finalSearcher)
+		}
+		cleanup := func() {
+			for _, searcher := range batch {
+				if searcher != nil {
+					_ = searcher.Close()
+				}
+			}
+		}
+		finalSearcher, err =  optimizeCompositeSearcher("disjunction:unadorned-force",
+			indexReader, batch, options)
+		// all searchers in batch should be closed, regardless of error or optimization failure
+		// either we're returning, or continuing and only finalSearcher is needed for next loop
+		cleanup()
+		if err != nil {
+			return nil, err
+		}
+		if finalSearcher == nil {
+			return nil, fmt.Errorf("unable to optimize")
+		}
+	}
+	return finalSearcher, nil
+}
+
+func makeBatchSearchers(indexReader index.IndexReader, terms []string, field string,
+	boost float64, options search.SearcherOptions) ([]search.Searcher, error) {
+
 	qsearchers := make([]search.Searcher, len(terms))
 	qsearchersClose := func() {
 		for _, searcher := range qsearchers {
@@ -42,17 +146,54 @@ func NewMultiTermSearcher(indexReader index.IndexReader, terms []string,
 			return nil, err
 		}
 	}
-	// build disjunction searcher of these ranges
-	return newMultiTermSearcherBytes(indexReader, qsearchers, field, boost,
-		options, limit)
+	return qsearchers, nil
 }
 
-func NewMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byte,
-	field string, boost float64, options search.SearcherOptions, limit bool) (
+func optimizeMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byte,
+	field string, boost float64, options search.SearcherOptions) (
 	search.Searcher, error) {
-	if limit && tooManyClauses(len(terms)) {
-		return nil, tooManyClausesErr(field, len(terms))
+
+	var finalSearcher search.Searcher
+	for len(terms) > 0 {
+		var batchTerms [][]byte
+		if len(terms) > DisjunctionMaxClauseCount {
+			batchTerms = terms[:DisjunctionMaxClauseCount]
+			terms = terms[DisjunctionMaxClauseCount:]
+		} else {
+			batchTerms = terms
+			terms = nil
+		}
+		batch, err := makeBatchSearchersBytes(indexReader, batchTerms, field, boost, options)
+		if err != nil {
+			return nil, err
+		}
+		if finalSearcher != nil {
+			batch = append(batch, finalSearcher)
+		}
+		cleanup := func() {
+			for _, searcher := range batch {
+				if searcher != nil {
+					_ = searcher.Close()
+				}
+			}
+		}
+		finalSearcher, err =  optimizeCompositeSearcher("disjunction:unadorned-force",
+			indexReader, batch, options)
+		// all searchers in batch should be closed, regardless of error or optimization failure
+		// either we're returning, or continuing and only finalSearcher is needed for next loop
+		cleanup()
+		if err != nil {
+			return nil, err
+		}
+		if finalSearcher == nil {
+			return nil, fmt.Errorf("unable to optimize")
+		}
 	}
+	return finalSearcher, nil
+}
+
+func makeBatchSearchersBytes(indexReader index.IndexReader, terms [][]byte, field string,
+	boost float64, options search.SearcherOptions) ([]search.Searcher, error) {
 
 	qsearchers := make([]search.Searcher, len(terms))
 	qsearchersClose := func() {
@@ -70,24 +211,5 @@ func NewMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byte,
 			return nil, err
 		}
 	}
-	return newMultiTermSearcherBytes(indexReader, qsearchers, field, boost,
-		options, limit)
-}
-
-func newMultiTermSearcherBytes(indexReader index.IndexReader,
-	searchers []search.Searcher, field string, boost float64,
-	options search.SearcherOptions, limit bool) (
-	search.Searcher, error) {
-
-	// build disjunction searcher of these ranges
-	searcher, err := newDisjunctionSearcher(indexReader, searchers, 0, options,
-		limit)
-	if err != nil {
-		for _, s := range searchers {
-			_ = s.Close()
-		}
-		return nil, err
-	}
-
-	return searcher, nil
+	return qsearchers, nil
 }
diff --git a/search/searcher/search_term_range_test.go b/search/searcher/search_term_range_test.go
index cd4e89114..ec8935c3e 100644
--- a/search/searcher/search_term_range_test.go
+++ b/search/searcher/search_term_range_test.go
@@ -15,7 +15,11 @@
 package searcher
 
 import (
+	"github.com/blevesearch/bleve/index/scorch"
+	"io/ioutil"
+	"os"
 	"reflect"
+	"sort"
 	"testing"
 
 	"github.com/blevesearch/bleve/search"
@@ -199,3 +203,82 @@ func TestTermRangeSearch(t *testing.T) {
 	}
 
 }
+
+func TestTermRangeSearchTooManyTerms(t *testing.T) {
+	dir, _ := ioutil.TempDir("", "scorchTwoDoc")
+	defer func() {
+		_ = os.RemoveAll(dir)
+	}()
+
+	scorchIndex := initTwoDocScorch(dir)
+
+	// use lower limit for this test
+	origLimit := DisjunctionMaxClauseCount
+	DisjunctionMaxClauseCount = 2
+	defer func() {
+		DisjunctionMaxClauseCount = origLimit
+	}()
+
+	scorchReader, err := scorchIndex.Reader()
+	if err != nil {
+		t.Error(err)
+	}
+	defer func() {
+		err := scorchReader.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	var want = []string{"1", "3", "4", "5"}
+	var truth = true
+	searcher, err := NewTermRangeSearcher(scorchReader, []byte("bobert"), []byte("ravi"),
+		&truth, &truth, "name", 1.0, search.SearcherOptions{Score: "none", IncludeTermVectors: false})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	var got []string
+	ctx := &search.SearchContext{
+		DocumentMatchPool: search.NewDocumentMatchPool(
+			searcher.DocumentMatchPoolSize(), 0),
+	}
+	next, err := searcher.Next(ctx)
+	i := 0
+	for err == nil && next != nil {
+		extId, err := scorchReader.ExternalID(next.IndexInternalID)
+		if err != nil {
+			t.Fatal(err)
+		}
+		got = append(got, extId)
+		ctx.DocumentMatchPool.Put(next)
+		next, err = searcher.Next(ctx)
+		i++
+	}
+	if err != nil {
+		t.Fatalf("error iterating searcher: %v", err)
+	}
+	err = searcher.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// check that the expected number of term searchers were started
+	// 6 = 4 original terms, 1 optimized after first round, then final searcher
+	// from the last round
+	statsMap := scorchIndex.(*scorch.Scorch).StatsMap()
+	if statsMap["term_searchers_started"].(uint64) != 6 {
+		t.Errorf("expected 6 term searchers started, got %d", statsMap["term_searchers_started"])
+	}
+	// check that all started searchers were closed
+	if statsMap["term_searchers_started"] != statsMap["term_searchers_finished"] {
+		t.Errorf("expected all term searchers closed, %d started %d closed",
+			statsMap["term_searchers_started"], statsMap["term_searchers_finished"])
+	}
+
+	sort.Strings(got)
+	if !reflect.DeepEqual(got, want) {
+		t.Errorf("expected: %#v, got %#v", want, got)
+	}
+
+}
\ No newline at end of file

From 079fce84252a18eb11cbcf5a2270ff8a38e40551 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 17 Aug 2020 20:30:15 +0530
Subject: [PATCH 703/728] Fix the incorrect behaviour for Composite queries
 with Score:none (#1439)

* Fix the incorrect behaviour for Composite queries with Score:none

Currently when one of the constituent query is not
optimizable, it just returns the incoming octx without
any modifications.This causes the optimization to proceed
incorrectly as it misses the non optimisable child searcher.

With the fix, when a sub query is not optimizable it
returns a nil context to stop the parent's optimizations.
This would prevent the parent to stop the optimized path
and take the slower path with score.

* adding UTs
---
 index/scorch/optimize.go                    |  2 +-
 index_test.go                               | 98 +++++++++++++++++++++
 search/searcher/search_disjunction_heap.go  |  2 +-
 search/searcher/search_disjunction_slice.go |  2 +-
 search/searcher/search_term.go              |  2 +-
 5 files changed, 102 insertions(+), 4 deletions(-)

diff --git a/index/scorch/optimize.go b/index/scorch/optimize.go
index f23fdfb8a..6711603d5 100644
--- a/index/scorch/optimize.go
+++ b/index/scorch/optimize.go
@@ -44,7 +44,7 @@ func (s *IndexSnapshotTermFieldReader) Optimize(kind string,
 		return s.optimizeDisjunctionUnadorned(octx, 0)
 	}
 
-	return octx, nil
+	return nil, nil
 }
 
 var OptimizeDisjunctionUnadornedMinChildCardinality = uint64(256)
diff --git a/index_test.go b/index_test.go
index 1662c60e3..c535456b0 100644
--- a/index_test.go
+++ b/index_test.go
@@ -2133,3 +2133,101 @@ func testBatchRaceBug1149(t *testing.T, i Index) {
 	}
 	b.Reset()
 }
+
+func TestOptimisedConjunctionSearchHits(t *testing.T) {
+	defer func() {
+		err := os.RemoveAll("testidx")
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	idx, err := NewUsing("testidx", NewIndexMapping(), "scorch", "scorch", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	doca := map[string]interface{}{
+		"country":    "united",
+		"name":       "Mercure Hotel",
+		"directions": "B560 and B56 Follow signs to the M56",
+	}
+	docb := map[string]interface{}{
+		"country":    "united",
+		"name":       "Mercure Altrincham Bowdon Hotel",
+		"directions": "A570 and A57 Follow signs to the M56 Manchester Airport",
+	}
+	docc := map[string]interface{}{
+		"country":    "india united",
+		"name":       "Sonoma Hotel",
+		"directions": "Northwest",
+	}
+	docd := map[string]interface{}{
+		"country":    "United Kingdom",
+		"name":       "Cresta Court Hotel",
+		"directions": "junction of A560 and A56",
+	}
+
+	b := idx.NewBatch()
+	err = b.Index("a", doca)
+	if err != nil {
+		t.Error(err)
+	}
+	err = b.Index("b", docb)
+	if err != nil {
+		t.Error(err)
+	}
+	err = b.Index("c", docc)
+	if err != nil {
+		t.Error(err)
+	}
+	err = b.Index("d", docd)
+	if err != nil {
+		t.Error(err)
+	}
+	// execute the batch
+	err = idx.Batch(b)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	mq := NewMatchQuery("united")
+	mq.SetField("country")
+
+	cq := NewConjunctionQuery(mq)
+
+	mq1 := NewMatchQuery("hotel")
+	mq1.SetField("name")
+	cq.AddQuery(mq1)
+
+	mq2 := NewMatchQuery("56")
+	mq2.SetField("directions")
+	mq2.SetFuzziness(1)
+	cq.AddQuery(mq2)
+
+	req := NewSearchRequest(cq)
+	req.Score = "none"
+
+	res, err := idx.Search(req)
+	if err != nil {
+		t.Fatal(err)
+	}
+	hitsWithOutScore := res.Total
+
+	req = NewSearchRequest(cq)
+	req.Score = ""
+
+	res, err = idx.Search(req)
+	if err != nil {
+		t.Fatal(err)
+	}
+	hitsWithScore := res.Total
+
+	if hitsWithOutScore != hitsWithScore {
+		t.Errorf("expected %d hits without score, got %d", hitsWithScore, hitsWithOutScore)
+	}
+
+	err = idx.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+}
diff --git a/search/searcher/search_disjunction_heap.go b/search/searcher/search_disjunction_heap.go
index 00b09fcb9..7f0a5a00e 100644
--- a/search/searcher/search_disjunction_heap.go
+++ b/search/searcher/search_disjunction_heap.go
@@ -310,7 +310,7 @@ func (s *DisjunctionHeapSearcher) Optimize(kind string, octx index.OptimizableCo
 		}
 	}
 
-	return octx, nil
+	return nil, nil
 }
 
 // heap impl
diff --git a/search/searcher/search_disjunction_slice.go b/search/searcher/search_disjunction_slice.go
index 464878bc6..dc566ade5 100644
--- a/search/searcher/search_disjunction_slice.go
+++ b/search/searcher/search_disjunction_slice.go
@@ -294,5 +294,5 @@ func (s *DisjunctionSliceSearcher) Optimize(kind string, octx index.OptimizableC
 		}
 	}
 
-	return octx, nil
+	return nil, nil
 }
diff --git a/search/searcher/search_term.go b/search/searcher/search_term.go
index c1af74c76..e07d25333 100644
--- a/search/searcher/search_term.go
+++ b/search/searcher/search_term.go
@@ -137,5 +137,5 @@ func (s *TermSearcher) Optimize(kind string, octx index.OptimizableContext) (
 		return o.Optimize(kind, octx)
 	}
 
-	return octx, nil
+	return nil, nil
 }

From f5339fb83361137667c2e1df635d52b3f4eca917 Mon Sep 17 00:00:00 2001
From: Abhinav Dangeti <abhinav@couchbase.com>
Date: Mon, 17 Aug 2020 14:57:05 -0600
Subject: [PATCH 704/728] Fix mishandled error within IndexSnapshot's
 newDocIDReader

+ Fixes https://github.com/blevesearch/bleve/issues/1442
+ Also removing unnecessary go.mod entry for ghistogram
---
 go.mod                         | 1 -
 index/scorch/snapshot_index.go | 9 ++++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/go.mod b/go.mod
index 2b23a510b..09bf53735 100644
--- a/go.mod
+++ b/go.mod
@@ -12,7 +12,6 @@ require (
 	github.com/blevesearch/zap/v12 v12.0.9
 	github.com/blevesearch/zap/v13 v13.0.1
 	github.com/blevesearch/zap/v14 v14.0.0
-	github.com/couchbase/ghistogram v0.1.0 // indirect
 	github.com/couchbase/moss v0.1.0
 	github.com/couchbase/vellum v1.0.1
 	github.com/golang/protobuf v1.3.2
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 47cc809b2..53eb3ed35 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -303,9 +303,12 @@ func (i *IndexSnapshot) newDocIDReader(results chan *asynchSegmentResult) (index
 	var err error
 	for count := 0; count < len(i.segment); count++ {
 		asr := <-results
-		if asr.err != nil && err != nil {
-			err = asr.err
-		} else {
+		if asr.err != nil {
+			if err == nil {
+				// returns the first error encountered
+				err = asr.err
+			}
+		} else if err == nil {
 			rv.iterators[asr.index] = asr.docs.Iterator()
 		}
 	}

From 3d6bf42ddd39e24e417d753d18697a5263313355 Mon Sep 17 00:00:00 2001
From: Abhinav Dangeti <abhinav@couchbase.com>
Date: Tue, 18 Aug 2020 15:16:03 -0600
Subject: [PATCH 705/728] MB-40916: Do not recycle unadorned term field readers
 for optimized conjunctions/disjunctions (#1438)

MB-40916: Do not recycle optimized unadorned term field readers

Do not recycle an optimized unadorned term field reader (used for
ConjunctionUnadorned or DisjunctionUnadorned), during when a fresh
roaring.Bitmap is built by AND-ing or OR-ing individual bitmaps -
after which we'll need to release them for GC.
---
 index/scorch/optimize.go                  | 45 ++++++++++++-----------
 index/scorch/scorch_test.go               |  1 -
 index/scorch/segment/unadorned.go         | 15 ++++----
 index/scorch/segment_plugin.go            | 14 +++----
 index/scorch/snapshot_index.go            | 12 +++++-
 index/scorch/snapshot_index_tfr.go        |  1 +
 search/searcher/search_multi_term.go      |  8 ++--
 search/searcher/search_term_range_test.go |  3 +-
 8 files changed, 55 insertions(+), 44 deletions(-)

diff --git a/index/scorch/optimize.go b/index/scorch/optimize.go
index 6711603d5..b3d736f71 100644
--- a/index/scorch/optimize.go
+++ b/index/scorch/optimize.go
@@ -165,16 +165,8 @@ func (o *OptimizeTFRConjunctionUnadorned) Finish() (rv index.Optimized, err erro
 
 	// We use an artificial term and field because the optimized
 	// termFieldReader can represent multiple terms and fields.
-	oTFR := &IndexSnapshotTermFieldReader{
-		term:               OptimizeTFRConjunctionUnadornedTerm,
-		field:              OptimizeTFRConjunctionUnadornedField,
-		snapshot:           o.snapshot,
-		iterators:          make([]segment.PostingsIterator, len(o.snapshot.segment)),
-		segmentOffset:      0,
-		includeFreq:        false,
-		includeNorm:        false,
-		includeTermVectors: false,
-	}
+	oTFR := o.snapshot.unadornedTermFieldReader(
+		OptimizeTFRConjunctionUnadornedTerm, OptimizeTFRConjunctionUnadornedField)
 
 	var actualBMs []*roaring.Bitmap // Collected from regular posting lists.
 
@@ -283,7 +275,7 @@ func (s *IndexSnapshotTermFieldReader) optimizeDisjunctionUnadorned(
 	octx index.OptimizableContext, minChildCardinality uint64) (index.OptimizableContext, error) {
 	if octx == nil {
 		octx = &OptimizeTFRDisjunctionUnadorned{
-			snapshot: s.snapshot,
+			snapshot:            s.snapshot,
 			minChildCardinality: minChildCardinality,
 		}
 	}
@@ -349,16 +341,8 @@ func (o *OptimizeTFRDisjunctionUnadorned) Finish() (rv index.Optimized, err erro
 
 	// We use an artificial term and field because the optimized
 	// termFieldReader can represent multiple terms and fields.
-	oTFR := &IndexSnapshotTermFieldReader{
-		term:               OptimizeTFRDisjunctionUnadornedTerm,
-		field:              OptimizeTFRDisjunctionUnadornedField,
-		snapshot:           o.snapshot,
-		iterators:          make([]segment.PostingsIterator, len(o.snapshot.segment)),
-		segmentOffset:      0,
-		includeFreq:        false,
-		includeNorm:        false,
-		includeTermVectors: false,
-	}
+	oTFR := o.snapshot.unadornedTermFieldReader(
+		OptimizeTFRDisjunctionUnadornedTerm, OptimizeTFRDisjunctionUnadornedField)
 
 	var docNums []uint32            // Collected docNum's from 1-hit posting lists.
 	var actualBMs []*roaring.Bitmap // Collected from regular posting lists.
@@ -405,3 +389,22 @@ func (o *OptimizeTFRDisjunctionUnadorned) Finish() (rv index.Optimized, err erro
 	atomic.AddUint64(&o.snapshot.parent.stats.TotTermSearchersStarted, uint64(1))
 	return oTFR, nil
 }
+
+// ----------------------------------------------------------------
+
+func (i *IndexSnapshot) unadornedTermFieldReader(
+	term []byte, field string) *IndexSnapshotTermFieldReader {
+	// This IndexSnapshotTermFieldReader will not be recycled, more
+	// conversation here: https://github.com/blevesearch/bleve/pull/1438
+	return &IndexSnapshotTermFieldReader{
+		term:               term,
+		field:              field,
+		snapshot:           i,
+		iterators:          make([]segment.PostingsIterator, len(i.segment)),
+		segmentOffset:      0,
+		includeFreq:        false,
+		includeNorm:        false,
+		includeTermVectors: false,
+		recycle:            false,
+	}
+}
diff --git a/index/scorch/scorch_test.go b/index/scorch/scorch_test.go
index e22fafa27..5f0ec1b75 100644
--- a/index/scorch/scorch_test.go
+++ b/index/scorch/scorch_test.go
@@ -2458,7 +2458,6 @@ func TestIndexSeekBackwardsStats(t *testing.T) {
 		t.Fatalf("error closing term field reader: %v", err)
 	}
 
-
 	if idx.(*Scorch).stats.TotTermSearchersStarted != idx.(*Scorch).stats.TotTermSearchersFinished {
 		t.Errorf("expected term searchers started %d to equal term searchers finished %d",
 			idx.(*Scorch).stats.TotTermSearchersStarted,
diff --git a/index/scorch/segment/unadorned.go b/index/scorch/segment/unadorned.go
index 56e752348..db06562df 100644
--- a/index/scorch/segment/unadorned.go
+++ b/index/scorch/segment/unadorned.go
@@ -24,7 +24,6 @@ var reflectStaticSizeUnadornedPostingsIteratorBitmap int
 var reflectStaticSizeUnadornedPostingsIterator1Hit int
 var reflectStaticSizeUnadornedPosting int
 
-
 func init() {
 	var pib UnadornedPostingsIteratorBitmap
 	reflectStaticSizeUnadornedPostingsIteratorBitmap = int(reflect.TypeOf(pib).Size())
@@ -34,7 +33,7 @@ func init() {
 	reflectStaticSizeUnadornedPosting = int(reflect.TypeOf(up).Size())
 }
 
-type UnadornedPostingsIteratorBitmap struct{
+type UnadornedPostingsIteratorBitmap struct {
 	actual   roaring.IntPeekable
 	actualBM *roaring.Bitmap
 }
@@ -72,15 +71,15 @@ func (i *UnadornedPostingsIteratorBitmap) Size() int {
 	return reflectStaticSizeUnadornedPostingsIteratorBitmap
 }
 
-func (i *UnadornedPostingsIteratorBitmap)  ActualBitmap() *roaring.Bitmap {
+func (i *UnadornedPostingsIteratorBitmap) ActualBitmap() *roaring.Bitmap {
 	return i.actualBM
 }
 
-func (i *UnadornedPostingsIteratorBitmap)  DocNum1Hit() (uint64, bool) {
+func (i *UnadornedPostingsIteratorBitmap) DocNum1Hit() (uint64, bool) {
 	return 0, false
 }
 
-func (i *UnadornedPostingsIteratorBitmap)  ReplaceActual(actual *roaring.Bitmap) {
+func (i *UnadornedPostingsIteratorBitmap) ReplaceActual(actual *roaring.Bitmap) {
 	i.actualBM = actual
 	i.actual = actual.Iterator()
 }
@@ -88,13 +87,13 @@ func (i *UnadornedPostingsIteratorBitmap)  ReplaceActual(actual *roaring.Bitmap)
 func NewUnadornedPostingsIteratorFromBitmap(bm *roaring.Bitmap) PostingsIterator {
 	return &UnadornedPostingsIteratorBitmap{
 		actualBM: bm,
-		actual: bm.Iterator(),
+		actual:   bm.Iterator(),
 	}
 }
 
 const docNum1HitFinished = math.MaxUint64
 
-type UnadornedPostingsIterator1Hit struct{
+type UnadornedPostingsIterator1Hit struct {
 	docNum uint64
 }
 
@@ -158,4 +157,4 @@ func (p UnadornedPosting) Locations() []Location {
 
 func (p UnadornedPosting) Size() int {
 	return reflectStaticSizeUnadornedPosting
-}
\ No newline at end of file
+}
diff --git a/index/scorch/segment_plugin.go b/index/scorch/segment_plugin.go
index 4729e6bd6..b830b2c05 100644
--- a/index/scorch/segment_plugin.go
+++ b/index/scorch/segment_plugin.go
@@ -81,11 +81,11 @@ func chooseSegmentPlugin(forcedSegmentType string,
 
 func (s *Scorch) loadSegmentPlugin(forcedSegmentType string,
 	forcedSegmentVersion uint32) error {
-		segPlugin, err := chooseSegmentPlugin(forcedSegmentType,
-			forcedSegmentVersion)
-		if err != nil {
-			return err
-		}
-		s.segPlugin = segPlugin
-		return nil
+	segPlugin, err := chooseSegmentPlugin(forcedSegmentType,
+		forcedSegmentVersion)
+	if err != nil {
+		return err
+	}
+	s.segPlugin = segPlugin
+	return nil
 }
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 53eb3ed35..9d17bcb2c 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -514,10 +514,20 @@ func (i *IndexSnapshot) allocTermFieldReaderDicts(field string) (tfr *IndexSnaps
 		}
 	}
 	i.m2.Unlock()
-	return &IndexSnapshotTermFieldReader{}
+	return &IndexSnapshotTermFieldReader{
+		recycle: true,
+	}
 }
 
 func (i *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReader) {
+	if !tfr.recycle {
+		// Do not recycle an optimized unadorned term field reader (used for
+		// ConjunctionUnadorned or DisjunctionUnadorned), during when a fresh
+		// roaring.Bitmap is built by AND-ing or OR-ing individual bitmaps,
+		// and we'll need to release them for GC. (See MB-40916)
+		return
+	}
+
 	i.parent.rootLock.RLock()
 	obsolete := i.parent.root != i
 	i.parent.rootLock.RUnlock()
diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go
index 61537fc4f..239f68fbe 100644
--- a/index/scorch/snapshot_index_tfr.go
+++ b/index/scorch/snapshot_index_tfr.go
@@ -45,6 +45,7 @@ type IndexSnapshotTermFieldReader struct {
 	includeTermVectors bool
 	currPosting        segment.Posting
 	currID             index.IndexInternalID
+	recycle            bool
 }
 
 func (i *IndexSnapshotTermFieldReader) Size() int {
diff --git a/search/searcher/search_multi_term.go b/search/searcher/search_multi_term.go
index 85aacc176..06a265c7e 100644
--- a/search/searcher/search_multi_term.go
+++ b/search/searcher/search_multi_term.go
@@ -33,7 +33,7 @@ func NewMultiTermSearcher(indexReader index.IndexReader, terms []string,
 		}
 	}
 
-	qsearchers, err  := makeBatchSearchers(indexReader, terms, field, boost, options)
+	qsearchers, err := makeBatchSearchers(indexReader, terms, field, boost, options)
 	if err != nil {
 		return nil, err
 	}
@@ -57,7 +57,7 @@ func NewMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byte,
 		}
 	}
 
-	qsearchers, err  := makeBatchSearchersBytes(indexReader, terms, field, boost, options)
+	qsearchers, err := makeBatchSearchersBytes(indexReader, terms, field, boost, options)
 	if err != nil {
 		return nil, err
 	}
@@ -112,7 +112,7 @@ func optimizeMultiTermSearcher(indexReader index.IndexReader, terms []string,
 				}
 			}
 		}
-		finalSearcher, err =  optimizeCompositeSearcher("disjunction:unadorned-force",
+		finalSearcher, err = optimizeCompositeSearcher("disjunction:unadorned-force",
 			indexReader, batch, options)
 		// all searchers in batch should be closed, regardless of error or optimization failure
 		// either we're returning, or continuing and only finalSearcher is needed for next loop
@@ -177,7 +177,7 @@ func optimizeMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byt
 				}
 			}
 		}
-		finalSearcher, err =  optimizeCompositeSearcher("disjunction:unadorned-force",
+		finalSearcher, err = optimizeCompositeSearcher("disjunction:unadorned-force",
 			indexReader, batch, options)
 		// all searchers in batch should be closed, regardless of error or optimization failure
 		// either we're returning, or continuing and only finalSearcher is needed for next loop
diff --git a/search/searcher/search_term_range_test.go b/search/searcher/search_term_range_test.go
index ec8935c3e..22cf065d2 100644
--- a/search/searcher/search_term_range_test.go
+++ b/search/searcher/search_term_range_test.go
@@ -280,5 +280,4 @@ func TestTermRangeSearchTooManyTerms(t *testing.T) {
 	if !reflect.DeepEqual(got, want) {
 		t.Errorf("expected: %#v, got %#v", want, got)
 	}
-
-}
\ No newline at end of file
+}

From 6c7808ca5341bf5fb1a17f14b3fb1bb5c3061727 Mon Sep 17 00:00:00 2001
From: Abhinav Dangeti <abhinav@couchbase.com>
Date: Wed, 19 Aug 2020 09:08:32 -0600
Subject: [PATCH 706/728] Fix mishandled error within NewNumericRangeSearcher
 (#1445)

+ Return the error upon failure to close the field dictionary
  obtained from the IndexReader.
+ Fixes: https://github.com/blevesearch/bleve/issues/1444
---
 search/searcher/search_numeric_range.go | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/search/searcher/search_numeric_range.go b/search/searcher/search_numeric_range.go
index 675f569d9..48d6226e1 100644
--- a/search/searcher/search_numeric_range.go
+++ b/search/searcher/search_numeric_range.go
@@ -74,9 +74,8 @@ func NewNumericRangeSearcher(indexReader index.IndexReader,
 	terms := termRanges.Enumerate(isIndexed)
 	if fieldDict != nil {
 		if fd, ok := fieldDict.(index.FieldDict); ok {
-			cerr := fd.Close()
-			if cerr != nil {
-				err = cerr
+			if err = fd.Close(); err != nil {
+				return nil, err
 			}
 		}
 	}

From ed463b9d02851c11c84603f30febb5a3efdc7318 Mon Sep 17 00:00:00 2001
From: Abhinav Dangeti <abhinav@couchbase.com>
Date: Wed, 19 Aug 2020 12:09:49 -0600
Subject: [PATCH 707/728] MB-40962: Dropping the heuristic to skip disjunction
 optimizations (#1446)

+ Getting rid of the minChildCardinality heuristic for optimized
  unadorned disjunctions where we previously used to skip the
  optimization in case of low-cardinality bitmaps.
+ Removing this heuristic showed tremendous improvement for certain
  types of queries which involved a conjunction of disjunctions of
  fuzzy queries.
+ However, here's something worth noting from the commit that introduced
  this heuristic .. https://github.com/blevesearch/bleve/commit/58e6641d13844e212687bd35823914c5e26b04dd
  ```
   Regarding perf microbenchmarks (bleve-query) on a 200K en-wiki docs scorch index...

    for a high number of high-frequency terms...

    - wildcard search on "th*" (~31K hits)...
        before the change, with normal scoring         -  ~4.7 q/sec
        w/ NoScore:true                                -  ~5.1 q/sec
        w/ NoScore:true & unadorned disj. optimization - ~11.6 q/sec

    for a low number of high-frequency terms...

    - query-string search on "http www com" (~25K hits)...
        before the change, with normal scoring         - ~190 q/sec
        w/ NoScore:true                                - ~260 q/sec
        w/ NoScore:true & unadorned disj. optimization - ~415 q/sec

    for a low number of low-frequency terms...

    - query-string search on "marty shoch" (207 hits)...
        before the change, with normal scoring         - ~15.0K q/sec
        w/ NoScore:true                                - ~21.3K q/sec
        w/ NoScore:true & unadorned disj. optimization - ~16.1K q/sec
  ```
---
 index/scorch/optimize.go             | 20 +++-----------------
 search/searcher/search_multi_term.go |  4 ++--
 2 files changed, 5 insertions(+), 19 deletions(-)

diff --git a/index/scorch/optimize.go b/index/scorch/optimize.go
index b3d736f71..658354cd7 100644
--- a/index/scorch/optimize.go
+++ b/index/scorch/optimize.go
@@ -37,11 +37,7 @@ func (s *IndexSnapshotTermFieldReader) Optimize(kind string,
 	}
 
 	if OptimizeDisjunctionUnadorned && kind == "disjunction:unadorned" {
-		return s.optimizeDisjunctionUnadorned(octx, OptimizeDisjunctionUnadornedMinChildCardinality)
-	}
-
-	if OptimizeDisjunctionUnadorned && kind == "disjunction:unadorned-force" {
-		return s.optimizeDisjunctionUnadorned(octx, 0)
+		return s.optimizeDisjunctionUnadorned(octx)
 	}
 
 	return nil, nil
@@ -272,11 +268,10 @@ OUTER:
 // term-vectors are not required, and instead only the internal-id's
 // are needed.
 func (s *IndexSnapshotTermFieldReader) optimizeDisjunctionUnadorned(
-	octx index.OptimizableContext, minChildCardinality uint64) (index.OptimizableContext, error) {
+	octx index.OptimizableContext) (index.OptimizableContext, error) {
 	if octx == nil {
 		octx = &OptimizeTFRDisjunctionUnadorned{
-			snapshot:            s.snapshot,
-			minChildCardinality: minChildCardinality,
+			snapshot: s.snapshot,
 		}
 	}
 
@@ -298,8 +293,6 @@ type OptimizeTFRDisjunctionUnadorned struct {
 	snapshot *IndexSnapshot
 
 	tfrs []*IndexSnapshotTermFieldReader
-
-	minChildCardinality uint64
 }
 
 var OptimizeTFRDisjunctionUnadornedTerm = []byte("<disjunction:unadorned>")
@@ -330,13 +323,6 @@ func (o *OptimizeTFRDisjunctionUnadorned) Finish() (rv index.Optimized, err erro
 				}
 			}
 		}
-
-		// Heuristic to skip the optimization if all the constituent
-		// bitmaps are too small, where the processing & resource
-		// overhead to create the OR'ed bitmap outweighs the benefit.
-		if cMax < o.minChildCardinality {
-			return nil, nil
-		}
 	}
 
 	// We use an artificial term and field because the optimized
diff --git a/search/searcher/search_multi_term.go b/search/searcher/search_multi_term.go
index 06a265c7e..70a2fa38c 100644
--- a/search/searcher/search_multi_term.go
+++ b/search/searcher/search_multi_term.go
@@ -112,7 +112,7 @@ func optimizeMultiTermSearcher(indexReader index.IndexReader, terms []string,
 				}
 			}
 		}
-		finalSearcher, err = optimizeCompositeSearcher("disjunction:unadorned-force",
+		finalSearcher, err = optimizeCompositeSearcher("disjunction:unadorned",
 			indexReader, batch, options)
 		// all searchers in batch should be closed, regardless of error or optimization failure
 		// either we're returning, or continuing and only finalSearcher is needed for next loop
@@ -177,7 +177,7 @@ func optimizeMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byt
 				}
 			}
 		}
-		finalSearcher, err = optimizeCompositeSearcher("disjunction:unadorned-force",
+		finalSearcher, err = optimizeCompositeSearcher("disjunction:unadorned",
 			indexReader, batch, options)
 		// all searchers in batch should be closed, regardless of error or optimization failure
 		// either we're returning, or continuing and only finalSearcher is needed for next loop

From 9a99dfbb79795c6bd5f0dad12242d572f6dc20f4 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Thu, 20 Aug 2020 16:46:06 +0530
Subject: [PATCH 708/728] MB-40730 - Sort by field returns incorrect results

Fixing the filterTermsByType method by resetting
the terms only when valid prefix coded, zero shifted
values are found to guard against any inadvertent overrides
of the original field values.
---
 search/sort.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/search/sort.go b/search/sort.go
index 3dc118518..dca422ebd 100644
--- a/search/sort.go
+++ b/search/sort.go
@@ -427,7 +427,8 @@ func (s *SortField) filterTermsByType(terms [][]byte) [][]byte {
 				allTermsPrefixCoded = false
 			}
 		}
-		if allTermsPrefixCoded {
+		// reset the terms only when valid zero shift terms are found.
+		if allTermsPrefixCoded && len(termsWithShiftZero) > 0 {
 			terms = termsWithShiftZero
 			s.tmp = termsWithShiftZero[:0]
 		}

From 5ef9653c4e13f3ff8f48368c816752b404ac8706 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Mon, 24 Aug 2020 10:44:16 -0400
Subject: [PATCH 709/728] fix ineffectual assignment (#1450)

Intention was for calcBudget local var to either use
the CalcBudget provided in the Options, or fall back
to the default mergeplan.CalcBudget method.  However,
after assigning the correct impl to the calcBudget
local var, the code always invoked
mergeplan.CalcBudget anwyay.

This change allows the CalcBudget option to work as
intended.
---
 index/scorch/mergeplan/merge_plan.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/index/scorch/mergeplan/merge_plan.go b/index/scorch/mergeplan/merge_plan.go
index e02923cc1..752350662 100644
--- a/index/scorch/mergeplan/merge_plan.go
+++ b/index/scorch/mergeplan/merge_plan.go
@@ -184,7 +184,7 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) {
 		calcBudget = CalcBudget
 	}
 
-	budgetNumSegments := CalcBudget(eligiblesLiveSize, minLiveSize, o)
+	budgetNumSegments := calcBudget(eligiblesLiveSize, minLiveSize, o)
 
 	scoreSegments := o.ScoreSegments
 	if scoreSegments == nil {

From abc1d043a690fcf392cf4b107fcb80915a58f401 Mon Sep 17 00:00:00 2001
From: Abhinav Dangeti <abhinav@couchbase.com>
Date: Mon, 24 Aug 2020 08:44:32 -0600
Subject: [PATCH 710/728] Update vellum version to v1.0.2 (#1451)

Diff since v1.0.1:
* 8f70ead Marty Schoch | fix bug in iterator next optimization
* 915a44a Marty Schoch | update README with badge showing github CI
* 2dd513f Marty Schoch | add github workflow to run tests
---
 go.mod | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 09bf53735..de5fe8425 100644
--- a/go.mod
+++ b/go.mod
@@ -13,7 +13,7 @@ require (
 	github.com/blevesearch/zap/v13 v13.0.1
 	github.com/blevesearch/zap/v14 v14.0.0
 	github.com/couchbase/moss v0.1.0
-	github.com/couchbase/vellum v1.0.1
+	github.com/couchbase/vellum v1.0.2
 	github.com/golang/protobuf v1.3.2
 	github.com/kljensen/snowball v0.6.0
 	github.com/rcrowley/go-metrics v0.0.0-20190826022208-cac0b30c2563

From d553d2c9f1613896f411d642a2cfc4b19e42ec9c Mon Sep 17 00:00:00 2001
From: Abhinav Dangeti <abhinav@couchbase.com>
Date: Mon, 24 Aug 2020 08:46:18 -0600
Subject: [PATCH 711/728] Upgrading go.mod tags for etcd-io/bbolt,
 RoaringBitmap/roaring (#1422)

* Upgrading etcd-io/bbolt's tag to v1.3.5

This fixes issues related to bad usage of the
unsafe library.

* Update RoaringBitmap/roaring's tag to v0.4.23

This fixes issues related to bad usage of the
unsafe library.

https://github.com/RoaringBitmap/roaring/commit/9f3bcbda4913333a8445291f5055bb5b2619feb4
---
 go.mod | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/go.mod b/go.mod
index de5fe8425..758d17d51 100644
--- a/go.mod
+++ b/go.mod
@@ -3,7 +3,7 @@ module github.com/blevesearch/bleve
 go 1.13
 
 require (
-	github.com/RoaringBitmap/roaring v0.4.21
+	github.com/RoaringBitmap/roaring v0.4.23
 	github.com/blevesearch/blevex v0.0.0-20190916190636-152f0fe5c040
 	github.com/blevesearch/go-porterstemmer v1.0.3
 	github.com/blevesearch/segment v0.9.0
@@ -21,6 +21,6 @@ require (
 	github.com/steveyen/gtreap v0.1.0
 	github.com/syndtr/goleveldb v1.0.0
 	github.com/willf/bitset v1.1.10
-	go.etcd.io/bbolt v1.3.4
+	go.etcd.io/bbolt v1.3.5
 	golang.org/x/text v0.3.0
 )

From b7678e38dcb6c73c16183c9a37ed9fb14b426c45 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Mon, 24 Aug 2020 10:48:06 -0400
Subject: [PATCH 712/728] simplify compute geo range (#1447)

* simplify compute geo range

attempt to go back to a simpler compute geo range
but not give up the perf improvements

* refactor again to consolidate arguments

I went back to the other point Steve had made about
how many arguments we passed around, and observed that
many of them were never altered.  So, I converted that
into a struct, and made the functions methods on that
struct.

This helped performance get back to match or beat
Steve's version.

* avoid an extra bit shift op

review feedback from Abhinav
---
 search/searcher/search_geoboundingbox.go | 184 +++++++++++++----------
 1 file changed, 101 insertions(+), 83 deletions(-)

diff --git a/search/searcher/search_geoboundingbox.go b/search/searcher/search_geoboundingbox.go
index c4b8af927..76157f01a 100644
--- a/search/searcher/search_geoboundingbox.go
+++ b/search/searcher/search_geoboundingbox.go
@@ -24,7 +24,7 @@ import (
 
 type filterFunc func(key []byte) bool
 
-var GeoBitsShift1 = (geo.GeoBits << 1)
+var GeoBitsShift1 = geo.GeoBits << 1
 var GeoBitsShift1Minus1 = GeoBitsShift1 - 1
 
 func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
@@ -100,30 +100,42 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
 
 var geoMaxShift = document.GeoPrecisionStep * 4
 var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2
+type closeFunc func() error
 
 func ComputeGeoRange(term uint64, shift uint,
 	sminLon, sminLat, smaxLon, smaxLat float64, checkBoundaries bool,
 	indexReader index.IndexReader, field string) (
 	onBoundary [][]byte, notOnBoundary [][]byte, err error) {
-	preallocBytesLen := 32
-	preallocBytes := make([]byte, preallocBytesLen)
 
-	makePrefixCoded := func(in int64, shift uint) (rv numeric.PrefixCoded) {
-		if len(preallocBytes) <= 0 {
-			preallocBytesLen = preallocBytesLen * 2
-			preallocBytes = make([]byte, preallocBytesLen)
-		}
-
-		rv, preallocBytes, err =
-			numeric.NewPrefixCodedInt64Prealloc(in, shift, preallocBytes)
+	isIndexed, closeF, err := buildIsIndexedFunc(indexReader, field)
+	if closeF != nil {
+		defer func() {
+			cerr := closeF()
+			if cerr != nil {
+				err = cerr
+			}
+		}()
+	}
 
-		return rv
+	grc := &geoRangeCompute{
+		preallocBytesLen: 32,
+		preallocBytes:    make([]byte, 32),
+		sminLon:          sminLon,
+		sminLat:          sminLat,
+		smaxLon:          smaxLon,
+		smaxLat:          smaxLat,
+		checkBoundaries:  checkBoundaries,
+		isIndexed:        isIndexed,
 	}
 
-	var fieldDict index.FieldDictContains
-	var isIndexed filterFunc
+	grc.computeGeoRange(term, shift)
+
+	return grc.onBoundary, grc.notOnBoundary, nil
+}
+
+func buildIsIndexedFunc(indexReader index.IndexReader, field string) (isIndexed filterFunc, closeF closeFunc, err error) {
 	if irr, ok := indexReader.(index.IndexReaderContains); ok {
-		fieldDict, err = irr.FieldDictContains(field)
+		fieldDict, err := irr.FieldDictContains(field)
 		if err != nil {
 			return nil, nil, err
 		}
@@ -132,22 +144,18 @@ func ComputeGeoRange(term uint64, shift uint,
 			found, err := fieldDict.Contains(term)
 			return err == nil && found
 		}
-	}
 
-	defer func() {
-		if fieldDict != nil {
+		closeF = func() error {
 			if fd, ok := fieldDict.(index.FieldDict); ok {
-				cerr := fd.Close()
-				if cerr != nil {
-					err = cerr
+				err := fd.Close()
+				if err != nil {
+					return err
 				}
 			}
+			return nil
 		}
-	}()
-
-	if isIndexed == nil {
+	} else if indexReader != nil {
 		isIndexed = func(term []byte) bool {
-			if indexReader != nil {
 				reader, err := indexReader.TermFieldReader(term, field, false, false, false)
 				if err != nil || reader == nil {
 					return false
@@ -157,68 +165,15 @@ func ComputeGeoRange(term uint64, shift uint,
 					return false
 				}
 				_ = reader.Close()
-			}
-			return true
+				return true
 		}
-	}
 
-	var computeGeoRange func(term uint64, shift uint) // declare for recursion
-
-	relateAndRecurse := func(start, end uint64, res, level uint) {
-		minLon := geo.MortonUnhashLon(start)
-		minLat := geo.MortonUnhashLat(start)
-		maxLon := geo.MortonUnhashLon(end)
-		maxLat := geo.MortonUnhashLat(end)
-
-		within := res%document.GeoPrecisionStep == 0 &&
-			geo.RectWithin(minLon, minLat, maxLon, maxLat,
-				sminLon, sminLat, smaxLon, smaxLat)
-		if within || (level == geoDetailLevel &&
-			geo.RectIntersects(minLon, minLat, maxLon, maxLat,
-				sminLon, sminLat, smaxLon, smaxLat)) {
-			codedTerm := makePrefixCoded(int64(start), res)
-			if isIndexed(codedTerm) {
-				if !within && checkBoundaries {
-					onBoundary = append(onBoundary, codedTerm)
-				} else {
-					notOnBoundary = append(notOnBoundary, codedTerm)
-				}
-			}
-		} else if level < geoDetailLevel &&
-			geo.RectIntersects(minLon, minLat, maxLon, maxLat,
-				sminLon, sminLat, smaxLon, smaxLat) {
-			computeGeoRange(start, res-1)
+	} else {
+		isIndexed = func([]byte) bool {
+			return true
 		}
 	}
-
-	computeGeoRange = func(term uint64, shift uint) {
-		if err != nil {
-			return
-		}
-
-		split := term | uint64(0x1)<<shift
-		var upperMax uint64
-		if shift < 63 {
-			upperMax = term | ((uint64(1) << (shift + 1)) - 1)
-		} else {
-			upperMax = 0xffffffffffffffff
-		}
-
-		lowerMax := split - 1
-
-		level := (GeoBitsShift1 - shift) >> 1
-
-		relateAndRecurse(term, lowerMax, shift, level)
-		relateAndRecurse(split, upperMax, shift, level)
-	}
-
-	computeGeoRange(term, shift)
-
-	if err != nil {
-		return nil, nil, err
-	}
-
-	return onBoundary, notOnBoundary, err
+	return isIndexed, closeF, err
 }
 
 func buildRectFilter(dvReader index.DocValueReader, field string,
@@ -252,3 +207,66 @@ func buildRectFilter(dvReader index.DocValueReader, field string,
 		return false
 	}
 }
+
+type geoRangeCompute struct {
+	preallocBytesLen int
+	preallocBytes []byte
+	sminLon, sminLat, smaxLon, smaxLat float64
+	checkBoundaries bool
+	onBoundary, notOnBoundary [][]byte
+	isIndexed func(term []byte) bool
+}
+
+func (grc *geoRangeCompute) makePrefixCoded(in int64, shift uint) (rv numeric.PrefixCoded) {
+	if len(grc.preallocBytes) <= 0 {
+		grc.preallocBytesLen = grc.preallocBytesLen * 2
+		grc.preallocBytes = make([]byte, grc.preallocBytesLen)
+	}
+
+	rv, grc.preallocBytes, _ =
+		numeric.NewPrefixCodedInt64Prealloc(in, shift, grc.preallocBytes)
+
+	return rv
+}
+
+func (grc *geoRangeCompute) computeGeoRange(term uint64, shift uint) {
+	split := term | uint64(0x1)<<shift
+	var upperMax uint64
+	if shift < 63 {
+		upperMax = term | ((uint64(1) << (shift + 1)) - 1)
+	} else {
+		upperMax = 0xffffffffffffffff
+	}
+	lowerMax := split - 1
+	grc.relateAndRecurse(term, lowerMax, shift)
+	grc.relateAndRecurse(split, upperMax, shift)
+}
+
+func (grc *geoRangeCompute) relateAndRecurse(start, end uint64, res uint) {
+	minLon := geo.MortonUnhashLon(start)
+	minLat := geo.MortonUnhashLat(start)
+	maxLon := geo.MortonUnhashLon(end)
+	maxLat := geo.MortonUnhashLat(end)
+
+	level := (GeoBitsShift1 - res) >> 1
+
+	within := res%document.GeoPrecisionStep == 0 &&
+		geo.RectWithin(minLon, minLat, maxLon, maxLat,
+			grc.sminLon, grc.sminLat, grc.smaxLon, grc.smaxLat)
+	if within || (level == geoDetailLevel &&
+		geo.RectIntersects(minLon, minLat, maxLon, maxLat,
+			grc.sminLon, grc.sminLat, grc.smaxLon, grc.smaxLat)) {
+		codedTerm := grc.makePrefixCoded(int64(start), res)
+		if grc.isIndexed(codedTerm) {
+			if !within && grc.checkBoundaries {
+				grc.onBoundary = append(grc.onBoundary, codedTerm)
+			} else {
+				grc.notOnBoundary = append(grc.notOnBoundary, codedTerm)
+			}
+		}
+	} else if level < geoDetailLevel &&
+		geo.RectIntersects(minLon, minLat, maxLon, maxLat,
+			grc.sminLon, grc.sminLat, grc.smaxLon, grc.smaxLat) {
+		grc.computeGeoRange(start, res-1)
+	}
+}
\ No newline at end of file

From a929f94722d22f3a80da56a7519c4e5de1c58fa4 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Mon, 24 Aug 2020 14:06:09 -0400
Subject: [PATCH 713/728] bump bleve versions for next bleve release (#1454)

zap v11 v11.0.10
zap v12 v12.0.10
zap v13 v13.0.2
zap v14 v14.0.1
---
 go.mod | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/go.mod b/go.mod
index 758d17d51..9a12454fd 100644
--- a/go.mod
+++ b/go.mod
@@ -8,10 +8,10 @@ require (
 	github.com/blevesearch/go-porterstemmer v1.0.3
 	github.com/blevesearch/segment v0.9.0
 	github.com/blevesearch/snowballstem v0.9.0
-	github.com/blevesearch/zap/v11 v11.0.9
-	github.com/blevesearch/zap/v12 v12.0.9
-	github.com/blevesearch/zap/v13 v13.0.1
-	github.com/blevesearch/zap/v14 v14.0.0
+	github.com/blevesearch/zap/v11 v11.0.10
+	github.com/blevesearch/zap/v12 v12.0.10
+	github.com/blevesearch/zap/v13 v13.0.2
+	github.com/blevesearch/zap/v14 v14.0.1
 	github.com/couchbase/moss v0.1.0
 	github.com/couchbase/vellum v1.0.2
 	github.com/golang/protobuf v1.3.2

From c997b4a57648e06d8c588d93e279562279f74959 Mon Sep 17 00:00:00 2001
From: Tuomas Salo <tuomas.salo@iki.fi>
Date: Tue, 22 Sep 2020 15:23:34 +0300
Subject: [PATCH 714/728] fix tests on go 1.15 (#1466)

---
 mapping/mapping_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mapping/mapping_test.go b/mapping/mapping_test.go
index 2b6870966..1c9ced58a 100644
--- a/mapping/mapping_test.go
+++ b/mapping/mapping_test.go
@@ -959,7 +959,7 @@ func TestMappingForGeo(t *testing.T) {
 	expect = append(expect, []float64{-71.34, 41.12})
 
 	for i, geopoint := range geopoints {
-		doc := document.NewDocument(string(i))
+		doc := document.NewDocument(fmt.Sprint(i))
 		err := mapping.MapDocument(doc, geopoint)
 		if err != nil {
 			t.Fatal(err)

From 99e9e901c29699c3f99bc0b2c5480b74d3e27535 Mon Sep 17 00:00:00 2001
From: Sreekanth Sivasankaran <sreekanth.sivasankaran@couchbase.com>
Date: Mon, 28 Sep 2020 19:53:47 +0530
Subject: [PATCH 715/728] New stats for reclaimable disk space from scorch
 index (#1470)

* New stats for reclaimable disk space from scorch index

Adding a new stats `num_bytes_used_disk_by_root_reclaimable`
which helps to approximate the amount of reclaimable disk space
from a scorch index. This could turn out to be a useful insight
into knowing the amount of disk space wasted due to tombstoned/obsoleted
contents across the segments.

* refactoring to reduce the lock time for capturing the disk stats.
---
 index/scorch/scorch.go         | 22 +++++++++++-----------
 index/scorch/snapshot_index.go | 27 +++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go
index ba98a460d..fccff67ab 100644
--- a/index/scorch/scorch.go
+++ b/index/scorch/scorch.go
@@ -515,21 +515,17 @@ func (s *Scorch) diskFileStats(rootSegmentPaths map[string]struct{}) (uint64,
 	return numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot
 }
 
-func (s *Scorch) rootDiskSegmentsPaths() map[string]struct{} {
-	rv := make(map[string]struct{}, len(s.root.segment))
-	for _, segmentSnapshot := range s.root.segment {
-		if seg, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok {
-			rv[seg.Path()] = struct{}{}
-		}
-	}
-	return rv
-}
-
 func (s *Scorch) StatsMap() map[string]interface{} {
 	m := s.stats.ToMap()
 
+	indexSnapshot := s.currentSnapshot()
+	defer func() {
+		_ = indexSnapshot.Close()
+	}()
+
+	rootSegPaths := indexSnapshot.diskSegmentsPaths()
+
 	s.rootLock.RLock()
-	rootSegPaths := s.rootDiskSegmentsPaths()
 	m["CurFilesIneligibleForRemoval"] = uint64(len(s.ineligibleForRemoval))
 	s.rootLock.RUnlock()
 
@@ -556,6 +552,10 @@ func (s *Scorch) StatsMap() map[string]interface{} {
 	m["num_bytes_used_disk"] = numBytesUsedDisk
 	// total disk bytes by the latest root index, exclusive of older snapshots
 	m["num_bytes_used_disk_by_root"] = numBytesOnDiskByRoot
+	// num_bytes_used_disk_by_root_reclaimable is an approximation about the
+	// reclaimable disk space in an index. (eg: from a full compaction)
+	m["num_bytes_used_disk_by_root_reclaimable"] = uint64(float64(numBytesOnDiskByRoot) *
+		indexSnapshot.reClaimableDocsRatio())
 	m["num_files_on_disk"] = numFilesOnDisk
 	m["num_root_memorysegments"] = m["TotMemorySegmentsAtRoot"]
 	m["num_root_filesegments"] = m["TotFileSegmentsAtRoot"]
diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 9d17bcb2c..61204ebbc 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -708,6 +708,33 @@ func (i *IndexSnapshot) DumpFields() chan interface{} {
 	return rv
 }
 
+func (i *IndexSnapshot) diskSegmentsPaths() map[string]struct{} {
+	rv := make(map[string]struct{}, len(i.segment))
+	for _, segmentSnapshot := range i.segment {
+		if seg, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok {
+			rv[seg.Path()] = struct{}{}
+		}
+	}
+	return rv
+}
+
+// reClaimableDocsRatio gives a ratio about the obsoleted or
+// reclaimable documents present in a given index snapshot.
+func (i *IndexSnapshot) reClaimableDocsRatio() float64 {
+	var totalCount, liveCount uint64
+	for _, segmentSnapshot := range i.segment {
+		if _, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok {
+			totalCount += uint64(segmentSnapshot.FullSize())
+			liveCount += uint64(segmentSnapshot.Count())
+		}
+	}
+
+	if totalCount > 0 {
+		return float64(totalCount-liveCount) / float64(totalCount)
+	}
+	return 0
+}
+
 // subtractStrings returns set a minus elements of set b.
 func subtractStrings(a, b []string) []string {
 	if len(b) == 0 {

From 0197e8ddfac6af208d0cf6b58d3b7b5f863b08a7 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Wed, 30 Sep 2020 10:00:03 -0400
Subject: [PATCH 716/728] an optimization fix test was no longer helping
 (#1459)

it was determined that this test was no longer testing what
it was designed to test, due to other changes.

this fix now correctly verifies the fix again by disabling
an optimization that interferes with the test
---
 index_test.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/index_test.go b/index_test.go
index c535456b0..50684420d 100644
--- a/index_test.go
+++ b/index_test.go
@@ -2135,6 +2135,11 @@ func testBatchRaceBug1149(t *testing.T, i Index) {
 }
 
 func TestOptimisedConjunctionSearchHits(t *testing.T) {
+	scorch.OptimizeDisjunctionUnadorned = false
+	defer func() {
+		scorch.OptimizeDisjunctionUnadorned = true
+	}()
+
 	defer func() {
 		err := os.RemoveAll("testidx")
 		if err != nil {

From 7a377b75a2295f20568129a81dadd53efd013762 Mon Sep 17 00:00:00 2001
From: pavelbazika <pavelbazika@users.noreply.github.com>
Date: Tue, 6 Oct 2020 17:22:43 +0200
Subject: [PATCH 717/728] Index is always left closed when opening process
 failed (#1479)

Co-authored-by: Pavel Bazika <pavel.bazika@icewarp.com>
---
 index_impl.go | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/index_impl.go b/index_impl.go
index 629cc9b2f..0520fe43b 100644
--- a/index_impl.go
+++ b/index_impl.go
@@ -112,6 +112,11 @@ func newIndexUsing(path string, mapping mapping.IndexMapping, indexType string,
 		}
 		return nil, err
 	}
+	defer func(rv *indexImpl) {
+		if !rv.open {
+			rv.i.Close()
+		}
+	}(&rv)
 
 	// now persist the mapping
 	mappingBytes, err := json.Marshal(mapping)
@@ -177,6 +182,11 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde
 		}
 		return nil, err
 	}
+	defer func(rv *indexImpl) {
+		if !rv.open {
+			rv.i.Close()
+		}
+	}(rv)
 
 	// now load the mapping
 	indexReader, err := rv.i.Reader()

From bffe7ea0d6b1d48bfac69661b69a60cd1fda9da4 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 6 Oct 2020 11:23:28 -0400
Subject: [PATCH 718/728] add go 1.15.x to the testing matrix (#1468)

---
 .github/workflows/tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index bcafb812b..f57aa35ec 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -8,7 +8,7 @@ jobs:
   test:
     strategy:
       matrix:
-        go-version: [1.13.x, 1.14.x]
+        go-version: [1.13.x, 1.14.x, 1.15.x]
         platform: [ubuntu-latest, macos-latest, windows-latest]
     runs-on: ${{ matrix.platform }}
     steps:

From 3bb3c7b4883f9cc7d46616a01a31c145105126ad Mon Sep 17 00:00:00 2001
From: vdimir <vdimirc@gmail.com>
Date: Tue, 6 Oct 2020 20:51:08 +0300
Subject: [PATCH 719/728] Shutdown analysis queue (#1414)

---
 config.go | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/config.go b/config.go
index 482efb408..99f2e081f 100644
--- a/config.go
+++ b/config.go
@@ -43,9 +43,16 @@ type configuration struct {
 }
 
 func (c *configuration) SetAnalysisQueueSize(n int) {
+	if c.analysisQueue != nil {
+		c.analysisQueue.Close()
+	}
 	c.analysisQueue = index.NewAnalysisQueue(n)
 }
 
+func (c *configuration) Shutdown() {
+	c.SetAnalysisQueueSize(0)
+}
+
 func newConfiguration() *configuration {
 	return &configuration{
 		Cache:         registry.NewCache(),

From cc29456f6e8b72c06776eb63e7ac3b66c98e505e Mon Sep 17 00:00:00 2001
From: pavelbazika <pavelbazika@users.noreply.github.com>
Date: Tue, 6 Oct 2020 20:40:04 +0200
Subject: [PATCH 720/728] Fixed ASCII folding of \u24A2 (#1434)

* Fixed ASCII folding of \u24A2

* add unit test

* fix comment

Co-authored-by: Pavel Bazika <pavel.bazika@icewarp.com>
Co-authored-by: Marty Schoch <marty.schoch@gmail.com>
---
 analysis/char/asciifolding/asciifolding.go      | 2 +-
 analysis/char/asciifolding/asciifolding_test.go | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/analysis/char/asciifolding/asciifolding.go b/analysis/char/asciifolding/asciifolding.go
index 469102e2c..facce07e1 100644
--- a/analysis/char/asciifolding/asciifolding.go
+++ b/analysis/char/asciifolding/asciifolding.go
@@ -874,7 +874,7 @@ func foldToASCII(input []rune, inputPos int, output []rune, outputPos int, lengt
 				outputPos++
 
 			case '\u24A2': // ⒢ [PARENTHESIZED LATIN SMALL LETTER G]
-				output = output[:(len(output) + 1)]
+				output = output[:(len(output) + 2)]
 				output[outputPos] = '('
 				outputPos++
 				output[outputPos] = 'g'
diff --git a/analysis/char/asciifolding/asciifolding_test.go b/analysis/char/asciifolding/asciifolding_test.go
index d79542a8a..3eafd8cdd 100644
--- a/analysis/char/asciifolding/asciifolding_test.go
+++ b/analysis/char/asciifolding/asciifolding_test.go
@@ -48,6 +48,10 @@ func TestAsciiFoldingFilter(t *testing.T) {
 			// apples from https://issues.couchbase.com/browse/MB-33486
 			input:  []byte(`Ápple Àpple Äpple Âpple Ãpple Åpple`),
 			output: []byte(`Apple Apple Apple Apple Apple Apple`),
+		}, {
+			// Fix ASCII folding of \u24A2
+			input:  []byte(`⒢`),
+			output: []byte(`(g)`),
 		},
 	}
 

From b9b775998330bf9efdc8748021a80e38fc1e250c Mon Sep 17 00:00:00 2001
From: Tuomas Salo <tuomas.salo@iki.fi>
Date: Tue, 6 Oct 2020 21:43:59 +0300
Subject: [PATCH 721/728] escape the input text in HTML highlighter when adding
 <mark> tags, fixes #1464 (#1465)

---
 search/highlight/format/html/html.go | 10 ++++++----
 test/tests/basic/data/b.json         |  2 +-
 test/tests/basic/searches.json       |  8 ++++----
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/search/highlight/format/html/html.go b/search/highlight/format/html/html.go
index 8154e790b..259a03795 100644
--- a/search/highlight/format/html/html.go
+++ b/search/highlight/format/html/html.go
@@ -15,6 +15,8 @@
 package html
 
 import (
+	"html"
+
 	"github.com/blevesearch/bleve/registry"
 	"github.com/blevesearch/bleve/search/highlight"
 )
@@ -54,18 +56,18 @@ func (a *FragmentFormatter) Format(f *highlight.Fragment, orderedTermLocations h
 			break
 		}
 		// add the stuff before this location
-		rv += string(f.Orig[curr:termLocation.Start])
-		// add the color
+		rv += html.EscapeString(string(f.Orig[curr:termLocation.Start]))
+		// start the <mark> tag
 		rv += a.before
 		// add the term itself
 		rv += string(f.Orig[termLocation.Start:termLocation.End])
-		// reset the color
+		// end the <mark> tag
 		rv += a.after
 		// update current
 		curr = termLocation.End
 	}
 	// add any remaining text after the last token
-	rv += string(f.Orig[curr:f.End])
+	rv += html.EscapeString(string(f.Orig[curr:f.End]))
 
 	return rv
 }
diff --git a/test/tests/basic/data/b.json b/test/tests/basic/data/b.json
index 58118a55c..069727276 100644
--- a/test/tests/basic/data/b.json
+++ b/test/tests/basic/data/b.json
@@ -1,6 +1,6 @@
 {
 	"id": "b",
-	"name": "steve has a long name",
+	"name": "steve has <a> long & complicated name",
 	"age": 27,
 	"birthday": "2001-09-09T01:46:40Z",
 	"title": "missess"
diff --git a/test/tests/basic/searches.json b/test/tests/basic/searches.json
index 8b6206c32..7ddfce375 100644
--- a/test/tests/basic/searches.json
+++ b/test/tests/basic/searches.json
@@ -42,7 +42,7 @@
 			"size": 10,
 			"sort": ["-_score", "_id"],
 			"query": {
-				"match_phrase": "long name"
+				"match_phrase": "steve has"
 			}
 		},
 		"result": {
@@ -385,7 +385,7 @@
 				{
 					"id": "b",
 					"fragments": {
-						"name": ["steve has a <mark>long</mark> name"]
+						"name": ["steve has &lt;a&gt; <mark>long</mark> &amp; complicated name"]
 					}
 				}
 			]
@@ -409,7 +409,7 @@
 				{
 					"id": "b",
 					"fragments": {
-						"name": ["steve has a <mark>long</mark> name"]
+						"name": ["steve has &lt;a&gt; <mark>long</mark> &amp; complicated name"]
 					}
 				}
 			]
@@ -485,7 +485,7 @@
 				{
 					"id": "b",
 					"fragments": {
-						"name": ["steve has a <mark>long</mark> name"],
+						"name": ["steve has &lt;a&gt; <mark>long</mark> &amp; complicated name"],
 						"title": ["missess"]
 					}
 				}

From 7efdbbb66a9e0cf40e7fe8ec4d9e194ec54d94dd Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 6 Oct 2020 15:34:03 -0400
Subject: [PATCH 722/728] add support for zap v15 and update zap version
 (#1482)

* add support for zap v15 and update zap version
---
 go.mod                         | 9 +++++----
 index/scorch/segment_plugin.go | 2 ++
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/go.mod b/go.mod
index 9a12454fd..615430132 100644
--- a/go.mod
+++ b/go.mod
@@ -8,10 +8,11 @@ require (
 	github.com/blevesearch/go-porterstemmer v1.0.3
 	github.com/blevesearch/segment v0.9.0
 	github.com/blevesearch/snowballstem v0.9.0
-	github.com/blevesearch/zap/v11 v11.0.10
-	github.com/blevesearch/zap/v12 v12.0.10
-	github.com/blevesearch/zap/v13 v13.0.2
-	github.com/blevesearch/zap/v14 v14.0.1
+	github.com/blevesearch/zap/v11 v11.0.11
+	github.com/blevesearch/zap/v12 v12.0.11
+	github.com/blevesearch/zap/v13 v13.0.3
+	github.com/blevesearch/zap/v14 v14.0.2
+	github.com/blevesearch/zap/v15 v15.0.0
 	github.com/couchbase/moss v0.1.0
 	github.com/couchbase/vellum v1.0.2
 	github.com/golang/protobuf v1.3.2
diff --git a/index/scorch/segment_plugin.go b/index/scorch/segment_plugin.go
index b830b2c05..2f7db48b3 100644
--- a/index/scorch/segment_plugin.go
+++ b/index/scorch/segment_plugin.go
@@ -23,6 +23,7 @@ import (
 	zapv12 "github.com/blevesearch/zap/v12"
 	zapv13 "github.com/blevesearch/zap/v13"
 	zapv14 "github.com/blevesearch/zap/v14"
+	zapv15 "github.com/blevesearch/zap/v15"
 )
 
 var supportedSegmentPlugins map[string]map[uint32]segment.Plugin
@@ -30,6 +31,7 @@ var defaultSegmentPlugin segment.Plugin
 
 func init() {
 	ResetPlugins()
+	RegisterPlugin(zapv15.Plugin(), false)
 	RegisterPlugin(zapv14.Plugin(), false)
 	RegisterPlugin(zapv13.Plugin(), false)
 	RegisterPlugin(zapv12.Plugin(), false)

From 7e8fd30c725926ae0a70df5e26ee9dac5c6176b0 Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 6 Oct 2020 16:31:53 -0400
Subject: [PATCH 723/728] prepare bleve v1.0.12 (#1483)

---
 go.mod | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/go.mod b/go.mod
index 615430132..fe5d25a35 100644
--- a/go.mod
+++ b/go.mod
@@ -8,11 +8,11 @@ require (
 	github.com/blevesearch/go-porterstemmer v1.0.3
 	github.com/blevesearch/segment v0.9.0
 	github.com/blevesearch/snowballstem v0.9.0
-	github.com/blevesearch/zap/v11 v11.0.11
-	github.com/blevesearch/zap/v12 v12.0.11
-	github.com/blevesearch/zap/v13 v13.0.3
-	github.com/blevesearch/zap/v14 v14.0.2
-	github.com/blevesearch/zap/v15 v15.0.0
+	github.com/blevesearch/zap/v11 v11.0.12
+	github.com/blevesearch/zap/v12 v12.0.12
+	github.com/blevesearch/zap/v13 v13.0.4
+	github.com/blevesearch/zap/v14 v14.0.3
+	github.com/blevesearch/zap/v15 v15.0.1
 	github.com/couchbase/moss v0.1.0
 	github.com/couchbase/vellum v1.0.2
 	github.com/golang/protobuf v1.3.2

From c67bc71f5a3f50c9365be68eb07cbdc3ee43f507 Mon Sep 17 00:00:00 2001
From: Abhinav Dangeti <abhinav@couchbase.com>
Date: Fri, 9 Oct 2020 09:18:38 -0600
Subject: [PATCH 724/728] Fix DocValuesDynamic's json-ability within
 IndexMappingImpl (#1485)

+ DocValuesDynamic defaults to "true" if unset.
+ Now, because of the "omitempty" json setting for the attribute,
  we incorrectly flip a "false" setting for the attribute (which is
  omitted as it's the zero value for boolean) to "true" (it's default).
+ Dropping the omitempty setting for this attribute corrects the
  behavior to this:
    - when unset, DocValuesDynamic will default to true (like before)
    - when set, DocValuesDynamic will take that value and will retain
      the value until an index definition update for the setting.
+ Fixes https://github.com/blevesearch/bleve/issues/1484
    (where the scenario is highlighted better)
---
 index_test.go    | 37 +++++++++++++++++++++++++++++++++++++
 mapping/index.go |  2 +-
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/index_test.go b/index_test.go
index 50684420d..394e73083 100644
--- a/index_test.go
+++ b/index_test.go
@@ -16,6 +16,7 @@ package bleve
 
 import (
 	"context"
+	"encoding/json"
 	"fmt"
 	"io/ioutil"
 	"log"
@@ -2236,3 +2237,39 @@ func TestOptimisedConjunctionSearchHits(t *testing.T) {
 		t.Fatal(err)
 	}
 }
+
+func TestIndexMappingDocValuesDynamic(t *testing.T) {
+	im := NewIndexMapping()
+	// DocValuesDynamic's default is true
+	// Now explicitly set it to false
+	im.DocValuesDynamic = false
+
+	// Next, retrieve the JSON dump of the index mapping
+	var data []byte
+	data, err = json.Marshal(im)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Now, edit an unrelated setting in the index mapping
+	var m map[string]interface{}
+	err = json.Unmarshal(data, &m)
+	if err != nil {
+		t.Fatal(err)
+	}
+	m["index_dynamic"] = false
+	data, err = json.Marshal(m)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Unmarshal back the changes into the index mapping struct
+	if err = im.UnmarshalJSON(data); err != nil {
+		t.Fatal(err)
+	}
+
+	// Expect DocValuesDynamic to remain false!
+	if im.DocValuesDynamic {
+		t.Fatalf("Expected DocValuesDynamic to remain false after the index mapping edit")
+	}
+}
diff --git a/mapping/index.go b/mapping/index.go
index 21ca5cce3..319ba949c 100644
--- a/mapping/index.go
+++ b/mapping/index.go
@@ -50,7 +50,7 @@ type IndexMappingImpl struct {
 	DefaultField          string                      `json:"default_field"`
 	StoreDynamic          bool                        `json:"store_dynamic"`
 	IndexDynamic          bool                        `json:"index_dynamic"`
-	DocValuesDynamic      bool                        `json:"docvalues_dynamic,omitempty"`
+	DocValuesDynamic      bool                        `json:"docvalues_dynamic"`
 	CustomAnalysis        *customAnalysis             `json:"analysis,omitempty"`
 	cache                 *registry.Cache
 }

From e24b3afc4804a779eff58954221cd52f70215f09 Mon Sep 17 00:00:00 2001
From: Eric Lindsey <eslindsey@gmail.com>
Date: Wed, 14 Oct 2020 11:13:14 -0400
Subject: [PATCH 725/728] Remove default from text description (#1489)

Default is added to the output automatically by the flags package, so
having it in the text as well is redundant.
---
 cmd/bleve/cmd/bulk.go  |  4 ++--
 cmd/bleve/cmd/check.go |  4 ++--
 cmd/bleve/cmd/index.go |  6 +++---
 cmd/bleve/cmd/query.go | 16 ++++++++--------
 4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/cmd/bleve/cmd/bulk.go b/cmd/bleve/cmd/bulk.go
index c3c90e73a..3f374e769 100644
--- a/cmd/bleve/cmd/bulk.go
+++ b/cmd/bleve/cmd/bulk.go
@@ -112,6 +112,6 @@ func randomString(n int) string {
 func init() {
 	RootCmd.AddCommand(bulkCmd)
 
-	bulkCmd.Flags().IntVarP(&batchSize, "batch", "b", 1000, "Batch size for loading, default 1000.")
-	bulkCmd.Flags().BoolVarP(&parseJSON, "json", "j", true, "Parse the contents as JSON, defaults true.")
+	bulkCmd.Flags().IntVarP(&batchSize, "batch", "b", 1000, "Batch size for loading.")
+	bulkCmd.Flags().BoolVarP(&parseJSON, "json", "j", true, "Parse the contents as JSON.")
 }
diff --git a/cmd/bleve/cmd/check.go b/cmd/bleve/cmd/check.go
index a71def801..370ece983 100644
--- a/cmd/bleve/cmd/check.go
+++ b/cmd/bleve/cmd/check.go
@@ -126,6 +126,6 @@ func getDictionary(index bleve.Index, field string) (map[string]uint64, error) {
 
 func init() {
 	RootCmd.AddCommand(checkCmd)
-	checkCmd.Flags().StringVarP(&checkFieldName, "field", "f", "", "Restrict check to the specified field name, by default check all fields.")
-	checkCmd.Flags().IntVarP(&checkCount, "count", "c", 100, "Check this many terms, default 100.")
+	checkCmd.Flags().StringVarP(&checkFieldName, "field", "f", "", "Restrict check to the specified field name.")
+	checkCmd.Flags().IntVarP(&checkCount, "count", "c", 100, "Check this many terms.")
 }
diff --git a/cmd/bleve/cmd/index.go b/cmd/bleve/cmd/index.go
index c8925b49b..96b6067a9 100644
--- a/cmd/bleve/cmd/index.go
+++ b/cmd/bleve/cmd/index.go
@@ -111,7 +111,7 @@ func getAllFiles(args []string, rv chan file) {
 func init() {
 	RootCmd.AddCommand(indexCmd)
 
-	indexCmd.Flags().BoolVarP(&keepDir, "keepDir", "d", false, "Keep the directory in the document id, defaults false.")
-	indexCmd.Flags().BoolVarP(&keepExt, "keepExt", "x", false, "Keep the extension in the document id, defaults false.")
-	indexCmd.Flags().BoolVarP(&parseJSON, "json", "j", true, "Parse the contents as JSON, defaults true.")
+	indexCmd.Flags().BoolVarP(&keepDir, "keepDir", "d", false, "Keep the directory in the document id.")
+	indexCmd.Flags().BoolVarP(&keepExt, "keepExt", "x", false, "Keep the extension in the document id.")
+	indexCmd.Flags().BoolVarP(&parseJSON, "json", "j", true, "Parse the contents as JSON.")
 }
diff --git a/cmd/bleve/cmd/query.go b/cmd/bleve/cmd/query.go
index e0babc88f..6e1bd0943 100644
--- a/cmd/bleve/cmd/query.go
+++ b/cmd/bleve/cmd/query.go
@@ -89,13 +89,13 @@ func buildQuery(args []string) query.Query {
 func init() {
 	RootCmd.AddCommand(queryCmd)
 
-	queryCmd.Flags().IntVarP(&repeat, "repeat", "r", 1, "Repeat the query this many times, default 1.")
-	queryCmd.Flags().IntVarP(&limit, "limit", "l", 10, "Limit number of results returned, default 10.")
-	queryCmd.Flags().IntVarP(&skip, "skip", "s", 0, "Skip the first N results, default 0.")
-	queryCmd.Flags().BoolVarP(&explain, "explain", "x", false, "Explain the result scoring, default false.")
-	queryCmd.Flags().BoolVar(&highlight, "highlight", true, "Highlight matching text in results, default true.")
-	queryCmd.Flags().BoolVar(&fields, "fields", false, "Load stored fields, default false.")
-	queryCmd.Flags().StringVarP(&qtype, "type", "t", "query_string", "Type of query to run, defaults to 'query_string'")
-	queryCmd.Flags().StringVarP(&qfield, "field", "f", "", "Restrict query to field, by default no restriction, not applicable to query_string queries.")
+	queryCmd.Flags().IntVarP(&repeat, "repeat", "r", 1, "Repeat the query this many times.")
+	queryCmd.Flags().IntVarP(&limit, "limit", "l", 10, "Limit number of results returned.")
+	queryCmd.Flags().IntVarP(&skip, "skip", "s", 0, "Skip the first N results.")
+	queryCmd.Flags().BoolVarP(&explain, "explain", "x", false, "Explain the result scoring.")
+	queryCmd.Flags().BoolVar(&highlight, "highlight", true, "Highlight matching text in results.")
+	queryCmd.Flags().BoolVar(&fields, "fields", false, "Load stored fields.")
+	queryCmd.Flags().StringVarP(&qtype, "type", "t", "query_string", "Type of query to run.")
+	queryCmd.Flags().StringVarP(&qfield, "field", "f", "", "Restrict query to field, not applicable to query_string queries.")
 	queryCmd.Flags().StringVarP(&sortby, "sort-by", "b", "", "Sort by field.")
 }

From 4b6bddf7bce80d69649c2bdfb4180906a9df395a Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Wed, 11 Nov 2020 13:49:17 -0500
Subject: [PATCH 726/728] fix analyzer lookup when field name contains dot
 (#1496)

At query time, we sometimes attempt to lookup the correct
analyzer for a field.  When doing so, the "." character can
be interpretted as a path separater.  Doing this can result
in lookup failure in cases where the user specified a field
name which also contains a dot.

This fix addresses the case where the path contains a dot,
but the next element does not match a mapping at this level.
Previously this ended all looukp in this mapping, now we
attempt to match the remaining path as a whole, without
splitting on the dot.

This fix is intended to fix most common cases where a user has
given a field a name with a dot.  However, ambigutities
between custom field names containing dots, and actual mapping
paths can still happen, and must be manually avoided.
---
 mapping/document.go     | 41 ++++++++++++++++++++++-------------------
 mapping/mapping_test.go | 20 ++++++++++++++++++++
 2 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/mapping/document.go b/mapping/document.go
index 355a602e5..dd42fab96 100644
--- a/mapping/document.go
+++ b/mapping/document.go
@@ -106,28 +106,31 @@ func (dm *DocumentMapping) fieldDescribedByPath(path string) *FieldMapping {
 				return subDocMapping.fieldDescribedByPath(encodePath(pathElements[1:]))
 			}
 		}
-	} else {
-		// just 1 path elememnt
-		// first look for property name with empty field
-		for propName, subDocMapping := range dm.Properties {
-			if propName == pathElements[0] {
-				// found property name match, now look at its fields
-				for _, field := range subDocMapping.Fields {
-					if field.Name == "" || field.Name == pathElements[0] {
-						// match
-						return field
-					}
+	}
+
+	// either the path just had one element
+	// or it had multiple, but no match for the first element at this level
+	// look for match with full path
+
+	// first look for property name with empty field
+	for propName, subDocMapping := range dm.Properties {
+		if propName == path {
+			// found property name match, now look at its fields
+			for _, field := range subDocMapping.Fields {
+				if field.Name == "" || field.Name == path {
+					// match
+					return field
 				}
 			}
 		}
-		// next, walk the properties again, looking for field overriding the name
-		for propName, subDocMapping := range dm.Properties {
-			if propName != pathElements[0] {
-				// property name isn't a match, but field name could override it
-				for _, field := range subDocMapping.Fields {
-					if field.Name == pathElements[0] {
-						return field
-					}
+	}
+	// next, walk the properties again, looking for field overriding the name
+	for propName, subDocMapping := range dm.Properties {
+		if propName != path {
+			// property name isn't a match, but field name could override it
+			for _, field := range subDocMapping.Fields {
+				if field.Name == path {
+					return field
 				}
 			}
 		}
diff --git a/mapping/mapping_test.go b/mapping/mapping_test.go
index 1c9ced58a..564fbb212 100644
--- a/mapping/mapping_test.go
+++ b/mapping/mapping_test.go
@@ -1151,3 +1151,23 @@ func TestDefaultAnalyzerInheritance(t *testing.T) {
 		t.Fatalf("Expected analyzer: xyz to be inherited by field, but got: '%v'", analyzer)
 	}
 }
+
+func TestWrongAnalyzerSearchableAs(t *testing.T) {
+	fieldMapping := NewTextFieldMapping()
+	fieldMapping.Name = "geo.accuracy"
+	fieldMapping.Analyzer = "xyz"
+
+	nestedMapping := NewDocumentMapping()
+	nestedMapping.AddFieldMappingsAt("accuracy", fieldMapping)
+
+	docMapping := NewDocumentMapping()
+	docMapping.AddSubDocumentMapping("geo", nestedMapping)
+
+	indexMapping := NewIndexMapping()
+	indexMapping.AddDocumentMapping("brewery", docMapping)
+
+	analyzerName := indexMapping.AnalyzerNameForPath("geo.geo.accuracy")
+	if analyzerName != "xyz" {
+		t.Errorf("expected analyzer name `xyz`, got `%s`", analyzerName)
+	}
+}

From c3457bb70309d45b3c5daef87d06f6a0b3d0813c Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Fri, 20 Nov 2020 15:56:01 -0500
Subject: [PATCH 727/728] prepare to support zap v15.0.2 (#1499)

---
 go.mod | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/go.mod b/go.mod
index fe5d25a35..10f4c5711 100644
--- a/go.mod
+++ b/go.mod
@@ -8,11 +8,11 @@ require (
 	github.com/blevesearch/go-porterstemmer v1.0.3
 	github.com/blevesearch/segment v0.9.0
 	github.com/blevesearch/snowballstem v0.9.0
-	github.com/blevesearch/zap/v11 v11.0.12
-	github.com/blevesearch/zap/v12 v12.0.12
-	github.com/blevesearch/zap/v13 v13.0.4
-	github.com/blevesearch/zap/v14 v14.0.3
-	github.com/blevesearch/zap/v15 v15.0.1
+	github.com/blevesearch/zap/v11 v11.0.13
+	github.com/blevesearch/zap/v12 v12.0.13
+	github.com/blevesearch/zap/v13 v13.0.5
+	github.com/blevesearch/zap/v14 v14.0.4
+	github.com/blevesearch/zap/v15 v15.0.2
 	github.com/couchbase/moss v0.1.0
 	github.com/couchbase/vellum v1.0.2
 	github.com/golang/protobuf v1.3.2

From d6a3fe28350d693afd14d58fe9f03e70ec693eca Mon Sep 17 00:00:00 2001
From: Marty Schoch <marty.schoch@gmail.com>
Date: Tue, 8 Dec 2020 12:04:30 -0500
Subject: [PATCH 728/728] prepare for blevex go.mod (#1519)

---
 go.mod | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/go.mod b/go.mod
index 10f4c5711..6300aea17 100644
--- a/go.mod
+++ b/go.mod
@@ -4,15 +4,15 @@ go 1.13
 
 require (
 	github.com/RoaringBitmap/roaring v0.4.23
-	github.com/blevesearch/blevex v0.0.0-20190916190636-152f0fe5c040
+	github.com/blevesearch/blevex v1.0.0
 	github.com/blevesearch/go-porterstemmer v1.0.3
 	github.com/blevesearch/segment v0.9.0
 	github.com/blevesearch/snowballstem v0.9.0
-	github.com/blevesearch/zap/v11 v11.0.13
-	github.com/blevesearch/zap/v12 v12.0.13
-	github.com/blevesearch/zap/v13 v13.0.5
-	github.com/blevesearch/zap/v14 v14.0.4
-	github.com/blevesearch/zap/v15 v15.0.2
+	github.com/blevesearch/zap/v11 v11.0.14
+	github.com/blevesearch/zap/v12 v12.0.14
+	github.com/blevesearch/zap/v13 v13.0.6
+	github.com/blevesearch/zap/v14 v14.0.5
+	github.com/blevesearch/zap/v15 v15.0.3
 	github.com/couchbase/moss v0.1.0
 	github.com/couchbase/vellum v1.0.2
 	github.com/golang/protobuf v1.3.2