From b7f95f1b79179c46088af2389625238231e83dff Mon Sep 17 00:00:00 2001 From: NamanJain8 Date: Thu, 17 Dec 2020 22:53:07 +0530 Subject: [PATCH 1/3] check properly if the key is present in a table (cherry picked from commit 6d6d8cc4809f1a4b7e8841df763b00efa75bfe20) DropPrefix assumes that key passed to it contains ts in last 8 bytes (cherry picked from commit 19216ac028188c891c1f738976edd091267bdc02) add test (cherry picked from commit efad954b6377e8989f5da10203a3a579ed5347e2) --- levels.go | 29 +++++++++++++++++++++++++---- levels_test.go | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 4 deletions(-) diff --git a/levels.go b/levels.go index aeabec232..4f544136b 100644 --- a/levels.go +++ b/levels.go @@ -309,7 +309,7 @@ func (s *levelsController) dropPrefixes(prefixes [][]byte) error { } for _, table := range l.tables { - if containsAnyPrefixes(table.Smallest(), table.Biggest(), prefixes) { + if containsAnyPrefixes(table, prefixes) { tableGroup = append(tableGroup, table) } else { finishGroup() @@ -940,24 +940,45 @@ func hasAnyPrefixes(s []byte, listOfPrefixes [][]byte) bool { return false } -func containsPrefix(smallValue, largeValue, prefix []byte) bool { +func containsPrefix(table *table.Table, prefix []byte) bool { + smallValue := table.Smallest() + largeValue := table.Biggest() if bytes.HasPrefix(smallValue, prefix) { return true } if bytes.HasPrefix(largeValue, prefix) { return true } + isPresent := func() bool { + ti := table.NewIterator(false) + defer ti.Close() + // In table iterator's Seek, we assume that key has timestamp in last 8 bytes. We set ts=0, + // so that we don't skip the key prefixed with prefix. + prefixWithTs := make([]byte, len(prefix)+8) + copy(prefixWithTs, prefix) + ti.Seek(prefixWithTs) + if bytes.HasPrefix(ti.Key(), prefix) { + return true + } + return false + } + if bytes.Compare(prefix, smallValue) > 0 && bytes.Compare(prefix, largeValue) < 0 { + // There may be a case when table contains [0x0000,...., 0xffff]. If we are searching for + // k=0x0011, we should not directly infer that k is present. It may not be present. + if !isPresent() { + return false + } return true } return false } -func containsAnyPrefixes(smallValue, largeValue []byte, listOfPrefixes [][]byte) bool { +func containsAnyPrefixes(table *table.Table, listOfPrefixes [][]byte) bool { for _, prefix := range listOfPrefixes { - if containsPrefix(smallValue, largeValue, prefix) { + if containsPrefix(table, prefix) { return true } } diff --git a/levels_test.go b/levels_test.go index f928df28b..94b0ef6b0 100644 --- a/levels_test.go +++ b/levels_test.go @@ -19,6 +19,9 @@ package badger import ( "fmt" "math" + "math/rand" + "os" + "sort" "testing" "time" @@ -1049,3 +1052,48 @@ func TestKeyVersions(t *testing.T) { }) }) } + +func TestTableContainsPrefix(t *testing.T) { + opts := table.Options{ + LoadingMode: options.LoadToRAM, + BlockSize: 4 * 1024, + BloomFalsePositive: 0.001, + } + + buildTable := func(keys []string) *os.File { + filename := fmt.Sprintf("%s%s%d.sst", os.TempDir(), string(os.PathSeparator), rand.Uint32()) + f, err := y.CreateSyncedFile(filename, true) + require.NoError(t, err) + b := table.NewTableBuilder(opts) + defer b.Close() + + v := []byte("value") + sort.Slice(keys, func(i, j int) bool { + return keys[i] < keys[j] + }) + for _, k := range keys { + b.Add(y.KeyWithTs([]byte(k), 1), y.ValueStruct{Value: v}, 0) + b.Add(y.KeyWithTs([]byte(k), 2), y.ValueStruct{Value: v}, 0) + } + _, err = f.Write(b.Finish()) + require.NoError(t, err) + f.Close() + f, _ = y.OpenSyncedFile(filename, true) + return f + } + + f := buildTable([]string{"key1", "key3", "key31", "key32", "key4"}) + tbl, err := table.OpenTable(f, opts) + require.NoError(t, err) + defer tbl.DecrRef() + + require.True(t, containsPrefix(tbl, []byte("key"))) + require.True(t, containsPrefix(tbl, []byte("key1"))) + require.True(t, containsPrefix(tbl, []byte("key3"))) + require.True(t, containsPrefix(tbl, []byte("key32"))) + require.True(t, containsPrefix(tbl, []byte("key4"))) + + require.False(t, containsPrefix(tbl, []byte("key0"))) + require.False(t, containsPrefix(tbl, []byte("key2"))) + require.False(t, containsPrefix(tbl, []byte("key5"))) +} From 3f28d6c48df8acbc1bd9a962ef9403ae04082971 Mon Sep 17 00:00:00 2001 From: NamanJain8 Date: Mon, 21 Dec 2020 13:29:49 +0530 Subject: [PATCH 2/3] fixes for the new badger changes --- levels.go | 10 ++++------ levels_test.go | 17 +++++------------ 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/levels.go b/levels.go index 4f544136b..84fd29070 100644 --- a/levels.go +++ b/levels.go @@ -950,13 +950,11 @@ func containsPrefix(table *table.Table, prefix []byte) bool { return true } isPresent := func() bool { - ti := table.NewIterator(false) + ti := table.NewIterator(0) defer ti.Close() - // In table iterator's Seek, we assume that key has timestamp in last 8 bytes. We set ts=0, - // so that we don't skip the key prefixed with prefix. - prefixWithTs := make([]byte, len(prefix)+8) - copy(prefixWithTs, prefix) - ti.Seek(prefixWithTs) + // In table iterator's Seek, we assume that key has version in last 8 bytes. We set + // version=0 (ts=math.MaxUint64), so that we don't skip the key prefixed with prefix. + ti.Seek(y.KeyWithTs(prefix, math.MaxUint64)) if bytes.HasPrefix(ti.Key(), prefix) { return true } diff --git a/levels_test.go b/levels_test.go index 94b0ef6b0..501383b96 100644 --- a/levels_test.go +++ b/levels_test.go @@ -1055,15 +1055,12 @@ func TestKeyVersions(t *testing.T) { func TestTableContainsPrefix(t *testing.T) { opts := table.Options{ - LoadingMode: options.LoadToRAM, BlockSize: 4 * 1024, - BloomFalsePositive: 0.001, + BloomFalsePositive: 0.01, } - buildTable := func(keys []string) *os.File { + buildTable := func(keys []string) *table.Table { filename := fmt.Sprintf("%s%s%d.sst", os.TempDir(), string(os.PathSeparator), rand.Uint32()) - f, err := y.CreateSyncedFile(filename, true) - require.NoError(t, err) b := table.NewTableBuilder(opts) defer b.Close() @@ -1075,16 +1072,12 @@ func TestTableContainsPrefix(t *testing.T) { b.Add(y.KeyWithTs([]byte(k), 1), y.ValueStruct{Value: v}, 0) b.Add(y.KeyWithTs([]byte(k), 2), y.ValueStruct{Value: v}, 0) } - _, err = f.Write(b.Finish()) + tbl, err := table.CreateTable(filename, b) require.NoError(t, err) - f.Close() - f, _ = y.OpenSyncedFile(filename, true) - return f + return tbl } - f := buildTable([]string{"key1", "key3", "key31", "key32", "key4"}) - tbl, err := table.OpenTable(f, opts) - require.NoError(t, err) + tbl := buildTable([]string{"key1", "key3", "key31", "key32", "key4"}) defer tbl.DecrRef() require.True(t, containsPrefix(tbl, []byte("key"))) From fcff24af5b05a260bf9e7dd7a764a41d4eefd4fa Mon Sep 17 00:00:00 2001 From: NamanJain8 Date: Mon, 21 Dec 2020 17:27:17 +0530 Subject: [PATCH 3/3] address review comment --- levels_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/levels_test.go b/levels_test.go index 501383b96..76746ff05 100644 --- a/levels_test.go +++ b/levels_test.go @@ -1088,5 +1088,6 @@ func TestTableContainsPrefix(t *testing.T) { require.False(t, containsPrefix(tbl, []byte("key0"))) require.False(t, containsPrefix(tbl, []byte("key2"))) + require.False(t, containsPrefix(tbl, []byte("key323"))) require.False(t, containsPrefix(tbl, []byte("key5"))) }