Skip to content

Commit

Permalink
Fix SeekLowerBound where tree contains keys that are prefixes of each…
Browse files Browse the repository at this point in the history
… other (#39)

* Fix SeekLowerBound where tree contains keys that are prefixes of each other.

This seems like a trivial case to miss howevere it stemmed from some confusion (on my part) about whether iradix was ever intended to support keys that were prefixes. It's a long story and I don't even recall exactly why I thought this was the case now given that every other operation on iradix supports this and most even include this case in their test examples. We even had it in the README example!

But some combination of:
 - I was working at the time on a related radix tree that did have the assumption of null-terminated keys
 - hashicorp/go-memdb always null terminates even for simpler string indexes

Anyway the fix is simple and the examples now pass.

In addition the fuzz test (which explicitly used the null-termination trick to never trigger this assuming it was necessary in general) now passes without null terminating keys every time I've run it.

* Clean up other tests that copied the same assumptions and verify they work too.

* Prevent panics in SeekLowerBound and SeekReverseLowerBound;

Also adds support for SeekReverseLowerBound to work when keys are prefixes of each other.

* Clarify some comments and remove unreachable code

* Improve fuzz test coverage

This also uncovered a couple of bugs:
 - Fuzz tests would break if empty strings are involved (which are valid) due to erroneously "de-duplicating" them
 - A panic in SeekLowerBound that only occured in specific tree configurations (fixed)

* Fix reverse fuzz test and an example it caught that was a bug

* Remove pointless assignments to i.node

* Comment typos and test fixture refactor per PR comments

* Clean up comment and ineffectual assignment
  • Loading branch information
banks authored Jun 28, 2021
1 parent f63f49c commit 49d1d02
Show file tree
Hide file tree
Showing 4 changed files with 457 additions and 177 deletions.
125 changes: 95 additions & 30 deletions iradix_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1533,7 +1533,9 @@ func TestLenTxn(t *testing.T) {
}

func TestIterateLowerBound(t *testing.T) {
fixedLenKeys := []string{

// these should be defined in order
var fixedLenKeys = []string{
"00000",
"00001",
"00004",
Expand All @@ -1542,10 +1544,12 @@ func TestIterateLowerBound(t *testing.T) {
"20020",
}

mixedLenKeys := []string{
// these should be defined in order
var mixedLenKeys = []string{
"a1",
"abc",
"barbazboo",
"f",
"foo",
"found",
"zap",
Expand All @@ -1562,7 +1566,8 @@ func TestIterateLowerBound(t *testing.T) {
fixedLenKeys,
"00000",
fixedLenKeys,
}, {
},
{
fixedLenKeys,
"00003",
[]string{
Expand All @@ -1571,72 +1576,86 @@ func TestIterateLowerBound(t *testing.T) {
"00020",
"20020",
},
}, {
},
{
fixedLenKeys,
"00010",
[]string{
"00010",
"00020",
"20020",
},
}, {
},
{
fixedLenKeys,
"20000",
[]string{
"20020",
},
}, {
},
{
fixedLenKeys,
"20020",
[]string{
"20020",
},
}, {
},
{
fixedLenKeys,
"20022",
[]string{},
}, {
},
{
mixedLenKeys,
"A", // before all lower case letters
mixedLenKeys,
}, {
},
{
mixedLenKeys,
"a1",
mixedLenKeys,
}, {
},
{
mixedLenKeys,
"b",
[]string{
"barbazboo",
"f",
"foo",
"found",
"zap",
"zip",
},
}, {
},
{
mixedLenKeys,
"bar",
[]string{
"barbazboo",
"f",
"foo",
"found",
"zap",
"zip",
},
}, {
},
{
mixedLenKeys,
"barbazboo0",
[]string{
"f",
"foo",
"found",
"zap",
"zip",
},
}, {
},
{
mixedLenKeys,
"zippy",
[]string{},
}, {
},
{
mixedLenKeys,
"zi",
[]string{
Expand Down Expand Up @@ -1665,6 +1684,47 @@ func TestIterateLowerBound(t *testing.T) {
"cbacb",
[]string{"cbbaa", "cbbab", "cbbbc", "cbcbb", "cbcbc", "cbcca", "ccaaa", "ccabc", "ccaca", "ccacc", "ccbac", "cccaa", "cccac", "cccca"},
},

// Panic case found be TestIterateLowerBoundFuzz.
{
[]string{"gcgc"},
"",
[]string{"gcgc"},
},

// We SHOULD support keys that are prefixes of each other despite some
// confusion in the original implementation.
{
[]string{"f", "fo", "foo", "food", "bug"},
"foo",
[]string{"foo", "food"},
},

// We also support the empty key (which is a prefix of every other key) as a
// valid key to insert and search for.
{
[]string{"f", "fo", "foo", "food", "bug", ""},
"foo",
[]string{"foo", "food"},
},
{
[]string{"f", "bug", ""},
"",
[]string{"", "bug", "f"},
},
{
[]string{"f", "bug", "xylophone"},
"",
[]string{"bug", "f", "xylophone"},
},

// This is a case we realized we were not covering while fixing
// SeekReverseLowerBound and could panic before.
{
[]string{"bar", "foo00", "foo11"},
"foo",
[]string{"foo00", "foo11"},
},
}

for idx, test := range cases {
Expand All @@ -1685,9 +1745,7 @@ func TestIterateLowerBound(t *testing.T) {
// Get and seek iterator
root := r.Root()
iter := root.Iterator()
if test.search != "" {
iter.SeekLowerBound([]byte(test.search))
}
iter.SeekLowerBound([]byte(test.search))

// Consume all the keys
out := []string{}
Expand All @@ -1710,8 +1768,12 @@ type readableString string

func (s readableString) Generate(rand *rand.Rand, size int) reflect.Value {
// Pick a random string from a limited alphabet that makes it easy to read the
// failure cases. Also never includes a null byte as we don't support that.
const letters = "abcdefghijklmnopqrstuvwxyz/-_0123456789"
// failure cases.
const letters = "abcdefg"

// Ignore size and make them all shortish to provoke bigger chance of hitting
// prefixes and more intersting tree shapes.
size = rand.Intn(8)

b := make([]byte, size)
for i := range b {
Expand All @@ -1725,17 +1787,16 @@ func TestIterateLowerBoundFuzz(t *testing.T) {
set := []string{}

// This specifies a property where each call adds a new random key to the radix
// tree (with a null byte appended since our tree doesn't support one key
// being a prefix of another and treats null bytes specially).
// tree.
//
// It also maintains a plain sorted list of the same set of keys and asserts
// that iterating from some random key to the end using LowerBound produces
// the same list as filtering all sorted keys that are lower.

radixAddAndScan := func(newKey, searchKey readableString) []string {
// Append a null byte
key := []byte(newKey + "\x00")
r, _, _ = r.Insert(key, nil)
r, _, _ = r.Insert([]byte(newKey), nil)

t.Logf("NewKey: %q, SearchKey: %q", newKey, searchKey)

// Now iterate the tree from searchKey to the end
it := r.Root().Iterator()
Expand All @@ -1746,8 +1807,7 @@ func TestIterateLowerBoundFuzz(t *testing.T) {
if !ok {
break
}
// Strip the null byte and append to result set
result = append(result, string(key[0:len(key)-1]))
result = append(result, string(key))
}
return result
}
Expand All @@ -1758,14 +1818,19 @@ func TestIterateLowerBoundFuzz(t *testing.T) {
sort.Strings(set)

t.Logf("Current Set: %#v", set)
t.Logf("Search Key: %#v %v", searchKey, "" >= string(searchKey))

result := []string{}
var prev string
for _, k := range set {
if k >= string(searchKey) && k != prev {
for i, k := range set {
// Check this is not a duplicate of the previous value. Note we don't just
// store the last string to compare because empty string is a valid value
// in the set and makes comparing on the first iteration awkward.
if i > 0 && set[i-1] == k {
continue
}
if k >= string(searchKey) {
result = append(result, k)
}
prev = k
}
return result
}
Expand Down
59 changes: 38 additions & 21 deletions iter.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ func (i *Iterator) SeekPrefixWatch(prefix []byte) (watch <-chan struct{}) {
watch = n.mutateCh
search := prefix
for {
// Check for key exhaution
// Check for key exhaustion
if len(search) == 0 {
i.node = n
return
Expand Down Expand Up @@ -60,10 +60,13 @@ func (i *Iterator) recurseMin(n *Node) *Node {
if n.leaf != nil {
return n
}
if len(n.edges) > 0 {
nEdges := len(n.edges)
if nEdges > 1 {
// Add all the other edges to the stack (the min node will be added as
// we recurse)
i.stack = append(i.stack, n.edges[1:])
}
if nEdges > 0 {
return i.recurseMin(n.edges[0].node)
}
// Shouldn't be possible
Expand All @@ -77,16 +80,32 @@ func (i *Iterator) recurseMin(n *Node) *Node {
func (i *Iterator) SeekLowerBound(key []byte) {
// Wipe the stack. Unlike Prefix iteration, we need to build the stack as we
// go because we need only a subset of edges of many nodes in the path to the
// leaf with the lower bound.
// leaf with the lower bound. Note that the iterator will still recurse into
// children that we don't traverse on the way to the reverse lower bound as it
// walks the stack.
i.stack = []edges{}
// i.node starts off in the common case as pointing to the root node of the
// tree. By the time we return we have either found a lower bound and setup
// the stack to traverse all larger keys, or we have not and the stack and
// node should both be nil to prevent the iterator from assuming it is just
// iterating the whole tree from the root node. Either way this needs to end
// up as nil so just set it here.
n := i.node
i.node = nil
search := key

found := func(n *Node) {
i.node = n
i.stack = append(i.stack, edges{edge{node: n}})
}

findMin := func(n *Node) {
n = i.recurseMin(n)
if n != nil {
found(n)
return
}
}

for {
// Compare current prefix with the search key's same-length prefix.
var prefixCmp int
Expand All @@ -100,10 +119,7 @@ func (i *Iterator) SeekLowerBound(key []byte) {
// Prefix is larger, that means the lower bound is greater than the search
// and from now on we need to follow the minimum path to the smallest
// leaf under this subtree.
n = i.recurseMin(n)
if n != nil {
found(n)
}
findMin(n)
return
}

Expand All @@ -115,27 +131,29 @@ func (i *Iterator) SeekLowerBound(key []byte) {
}

// Prefix is equal, we are still heading for an exact match. If this is a
// leaf we're done.
if n.leaf != nil {
if bytes.Compare(n.leaf.key, key) < 0 {
i.node = nil
return
}
// leaf and an exact match we're done.
if n.leaf != nil && bytes.Equal(n.leaf.key, key) {
found(n)
return
}

// Consume the search prefix
if len(n.prefix) > len(search) {
search = []byte{}
} else {
search = search[len(n.prefix):]
// Consume the search prefix if the current node has one. Note that this is
// safe because if n.prefix is longer than the search slice prefixCmp would
// have been > 0 above and the method would have already returned.
search = search[len(n.prefix):]

if len(search) == 0 {
// We've exhausted the search key, but the current node is not an exact
// match or not a leaf. That means that the leaf value if it exists, and
// all child nodes must be strictly greater, the smallest key in this
// subtree must be the lower bound.
findMin(n)
return
}

// Otherwise, take the lower bound next edge.
idx, lbNode := n.getLowerBoundEdge(search[0])
if lbNode == nil {
i.node = nil
return
}

Expand All @@ -144,7 +162,6 @@ func (i *Iterator) SeekLowerBound(key []byte) {
i.stack = append(i.stack, n.edges[idx+1:])
}

i.node = lbNode
// Recurse
n = lbNode
}
Expand Down
Loading

0 comments on commit 49d1d02

Please sign in to comment.