Fix SeekLowerBound where tree contains keys that are prefixes of each…

… other (#39) * Fix SeekLowerBound where tree contains keys that are prefixes of each other. This seems like a trivial case to miss howevere it stemmed from some confusion (on my part) about whether iradix was ever intended to support keys that were prefixes. It's a long story and I don't even recall exactly why I thought this was the case now given that every other operation on iradix supports this and most even include this case in their test examples. We even had it in the README example! But some combination of: - I was working at the time on a related radix tree that did have the assumption of null-terminated keys - hashicorp/go-memdb always null terminates even for simpler string indexes Anyway the fix is simple and the examples now pass. In addition the fuzz test (which explicitly used the null-termination trick to never trigger this assuming it was necessary in general) now passes without null terminating keys every time I've run it. * Clean up other tests that copied the same assumptions and verify they work too. * Prevent panics in SeekLowerBound and SeekReverseLowerBound; Also adds support for SeekReverseLowerBound to work when keys are prefixes of each other. * Clarify some comments and remove unreachable code * Improve fuzz test coverage This also uncovered a couple of bugs: - Fuzz tests would break if empty strings are involved (which are valid) due to erroneously "de-duplicating" them - A panic in SeekLowerBound that only occured in specific tree configurations (fixed) * Fix reverse fuzz test and an example it caught that was a bug * Remove pointless assignments to i.node * Comment typos and test fixture refactor per PR comments * Clean up comment and ineffectual assignment
hashicorp · Jun 28, 2021 · 49d1d02 · 49d1d02
1 parent f63f49c
commit 49d1d02
Show file tree

Hide file tree

Showing 4 changed files with 457 additions and 177 deletions.
diff --git a/iradix_test.go b/iradix_test.go
@@ -1533,7 +1533,9 @@ func TestLenTxn(t *testing.T) {
 }
 
 func TestIterateLowerBound(t *testing.T) {
-	fixedLenKeys := []string{
+
+	// these should be defined in order
+	var fixedLenKeys = []string{
 		"00000",
 		"00001",
 		"00004",
@@ -1542,10 +1544,12 @@ func TestIterateLowerBound(t *testing.T) {
 		"20020",
 	}
 
-	mixedLenKeys := []string{
+	// these should be defined in order
+	var mixedLenKeys = []string{
 		"a1",
 		"abc",
 		"barbazboo",
+		"f",
 		"foo",
 		"found",
 		"zap",
@@ -1562,7 +1566,8 @@ func TestIterateLowerBound(t *testing.T) {
 			fixedLenKeys,
 			"00000",
 			fixedLenKeys,
-		}, {
+		},
+		{
 			fixedLenKeys,
 			"00003",
 			[]string{
@@ -1571,72 +1576,86 @@ func TestIterateLowerBound(t *testing.T) {
 				"00020",
 				"20020",
 			},
-		}, {
+		},
+		{
 			fixedLenKeys,
 			"00010",
 			[]string{
 				"00010",
 				"00020",
 				"20020",
 			},
-		}, {
+		},
+		{
 			fixedLenKeys,
 			"20000",
 			[]string{
 				"20020",
 			},
-		}, {
+		},
+		{
 			fixedLenKeys,
 			"20020",
 			[]string{
 				"20020",
 			},
-		}, {
+		},
+		{
 			fixedLenKeys,
 			"20022",
 			[]string{},
-		}, {
+		},
+		{
 			mixedLenKeys,
 			"A", // before all lower case letters
 			mixedLenKeys,
-		}, {
+		},
+		{
 			mixedLenKeys,
 			"a1",
 			mixedLenKeys,
-		}, {
+		},
+		{
 			mixedLenKeys,
 			"b",
 			[]string{
 				"barbazboo",
+				"f",
 				"foo",
 				"found",
 				"zap",
 				"zip",
 			},
-		}, {
+		},
+		{
 			mixedLenKeys,
 			"bar",
 			[]string{
 				"barbazboo",
+				"f",
 				"foo",
 				"found",
 				"zap",
 				"zip",
 			},
-		}, {
+		},
+		{
 			mixedLenKeys,
 			"barbazboo0",
 			[]string{
+				"f",
 				"foo",
 				"found",
 				"zap",
 				"zip",
 			},
-		}, {
+		},
+		{
 			mixedLenKeys,
 			"zippy",
 			[]string{},
-		}, {
+		},
+		{
 			mixedLenKeys,
 			"zi",
 			[]string{
@@ -1665,6 +1684,47 @@ func TestIterateLowerBound(t *testing.T) {
 			"cbacb",
 			[]string{"cbbaa", "cbbab", "cbbbc", "cbcbb", "cbcbc", "cbcca", "ccaaa", "ccabc", "ccaca", "ccacc", "ccbac", "cccaa", "cccac", "cccca"},
 		},
+
+		// Panic case found be TestIterateLowerBoundFuzz.
+		{
+			[]string{"gcgc"},
+			"",
+			[]string{"gcgc"},
+		},
+
+		// We SHOULD support keys that are prefixes of each other despite some
+		// confusion in the original implementation.
+		{
+			[]string{"f", "fo", "foo", "food", "bug"},
+			"foo",
+			[]string{"foo", "food"},
+		},
+
+		// We also support the empty key (which is a prefix of every other key) as a
+		// valid key to insert and search for.
+		{
+			[]string{"f", "fo", "foo", "food", "bug", ""},
+			"foo",
+			[]string{"foo", "food"},
+		},
+		{
+			[]string{"f", "bug", ""},
+			"",
+			[]string{"", "bug", "f"},
+		},
+		{
+			[]string{"f", "bug", "xylophone"},
+			"",
+			[]string{"bug", "f", "xylophone"},
+		},
+
+		// This is a case we realized we were not covering while fixing
+		// SeekReverseLowerBound and could panic before.
+		{
+			[]string{"bar", "foo00", "foo11"},
+			"foo",
+			[]string{"foo00", "foo11"},
+		},
 	}
 
 	for idx, test := range cases {
@@ -1685,9 +1745,7 @@ func TestIterateLowerBound(t *testing.T) {
 			// Get and seek iterator
 			root := r.Root()
 			iter := root.Iterator()
-			if test.search != "" {
-				iter.SeekLowerBound([]byte(test.search))
-			}
+			iter.SeekLowerBound([]byte(test.search))
 
 			// Consume all the keys
 			out := []string{}
@@ -1710,8 +1768,12 @@ type readableString string
 
 func (s readableString) Generate(rand *rand.Rand, size int) reflect.Value {
 	// Pick a random string from a limited alphabet that makes it easy to read the
-	// failure cases. Also never includes a null byte as we don't support that.
-	const letters = "abcdefghijklmnopqrstuvwxyz/-_0123456789"
+	// failure cases.
+	const letters = "abcdefg"
+
+	// Ignore size and make them all shortish to provoke bigger chance of hitting
+	// prefixes and more intersting tree shapes.
+	size = rand.Intn(8)
 
 	b := make([]byte, size)
 	for i := range b {
@@ -1725,17 +1787,16 @@ func TestIterateLowerBoundFuzz(t *testing.T) {
 	set := []string{}
 
 	// This specifies a property where each call adds a new random key to the radix
-	// tree (with a null byte appended since our tree doesn't support one key
-	// being a prefix of another and treats null bytes specially).
+	// tree.
 	//
 	// It also maintains a plain sorted list of the same set of keys and asserts
 	// that iterating from some random key to the end using LowerBound produces
 	// the same list as filtering all sorted keys that are lower.
 
 	radixAddAndScan := func(newKey, searchKey readableString) []string {
-		// Append a null byte
-		key := []byte(newKey + "\x00")
-		r, _, _ = r.Insert(key, nil)
+		r, _, _ = r.Insert([]byte(newKey), nil)
+
+		t.Logf("NewKey: %q, SearchKey: %q", newKey, searchKey)
 
 		// Now iterate the tree from searchKey to the end
 		it := r.Root().Iterator()
@@ -1746,8 +1807,7 @@ func TestIterateLowerBoundFuzz(t *testing.T) {
 			if !ok {
 				break
 			}
-			// Strip the null byte and append to result set
-			result = append(result, string(key[0:len(key)-1]))
+			result = append(result, string(key))
 		}
 		return result
 	}
@@ -1758,14 +1818,19 @@ func TestIterateLowerBoundFuzz(t *testing.T) {
 		sort.Strings(set)
 
 		t.Logf("Current Set: %#v", set)
+		t.Logf("Search Key: %#v %v", searchKey, "" >= string(searchKey))
 
 		result := []string{}
-		var prev string
-		for _, k := range set {
-			if k >= string(searchKey) && k != prev {
+		for i, k := range set {
+			// Check this is not a duplicate of the previous value. Note we don't just
+			// store the last string to compare because empty string is a valid value
+			// in the set and makes comparing on the first iteration awkward.
+			if i > 0 && set[i-1] == k {
+				continue
+			}
+			if k >= string(searchKey) {
 				result = append(result, k)
 			}
-			prev = k
 		}
 		return result
 	}

diff --git a/iter.go b/iter.go
@@ -20,7 +20,7 @@ func (i *Iterator) SeekPrefixWatch(prefix []byte) (watch <-chan struct{}) {
 	watch = n.mutateCh
 	search := prefix
 	for {
-		// Check for key exhaution
+		// Check for key exhaustion
 		if len(search) == 0 {
 			i.node = n
 			return
@@ -60,10 +60,13 @@ func (i *Iterator) recurseMin(n *Node) *Node {
 	if n.leaf != nil {
 		return n
 	}
-	if len(n.edges) > 0 {
+	nEdges := len(n.edges)
+	if nEdges > 1 {
 		// Add all the other edges to the stack (the min node will be added as
 		// we recurse)
 		i.stack = append(i.stack, n.edges[1:])
+	}
+	if nEdges > 0 {
 		return i.recurseMin(n.edges[0].node)
 	}
 	// Shouldn't be possible
@@ -77,16 +80,32 @@ func (i *Iterator) recurseMin(n *Node) *Node {
 func (i *Iterator) SeekLowerBound(key []byte) {
 	// Wipe the stack. Unlike Prefix iteration, we need to build the stack as we
 	// go because we need only a subset of edges of many nodes in the path to the
-	// leaf with the lower bound.
+	// leaf with the lower bound. Note that the iterator will still recurse into
+	// children that we don't traverse on the way to the reverse lower bound as it
+	// walks the stack.
 	i.stack = []edges{}
+	// i.node starts off in the common case as pointing to the root node of the
+	// tree. By the time we return we have either found a lower bound and setup
+	// the stack to traverse all larger keys, or we have not and the stack and
+	// node should both be nil to prevent the iterator from assuming it is just
+	// iterating the whole tree from the root node. Either way this needs to end
+	// up as nil so just set it here.
 	n := i.node
+	i.node = nil
 	search := key
 
 	found := func(n *Node) {
-		i.node = n
 		i.stack = append(i.stack, edges{edge{node: n}})
 	}
 
+	findMin := func(n *Node) {
+		n = i.recurseMin(n)
+		if n != nil {
+			found(n)
+			return
+		}
+	}
+
 	for {
 		// Compare current prefix with the search key's same-length prefix.
 		var prefixCmp int
@@ -100,10 +119,7 @@ func (i *Iterator) SeekLowerBound(key []byte) {
 			// Prefix is larger, that means the lower bound is greater than the search
 			// and from now on we need to follow the minimum path to the smallest
 			// leaf under this subtree.
-			n = i.recurseMin(n)
-			if n != nil {
-				found(n)
-			}
+			findMin(n)
 			return
 		}
 
@@ -115,27 +131,29 @@ func (i *Iterator) SeekLowerBound(key []byte) {
 		}
 
 		// Prefix is equal, we are still heading for an exact match. If this is a
-		// leaf we're done.
-		if n.leaf != nil {
-			if bytes.Compare(n.leaf.key, key) < 0 {
-				i.node = nil
-				return
-			}
+		// leaf and an exact match we're done.
+		if n.leaf != nil && bytes.Equal(n.leaf.key, key) {
 			found(n)
 			return
 		}
 
-		// Consume the search prefix
-		if len(n.prefix) > len(search) {
-			search = []byte{}
-		} else {
-			search = search[len(n.prefix):]
+		// Consume the search prefix if the current node has one. Note that this is
+		// safe because if n.prefix is longer than the search slice prefixCmp would
+		// have been > 0 above and the method would have already returned.
+		search = search[len(n.prefix):]
+
+		if len(search) == 0 {
+			// We've exhausted the search key, but the current node is not an exact
+			// match or not a leaf. That means that the leaf value if it exists, and
+			// all child nodes must be strictly greater, the smallest key in this
+			// subtree must be the lower bound.
+			findMin(n)
+			return
 		}
 
 		// Otherwise, take the lower bound next edge.
 		idx, lbNode := n.getLowerBoundEdge(search[0])
 		if lbNode == nil {
-			i.node = nil
 			return
 		}
 
@@ -144,7 +162,6 @@ func (i *Iterator) SeekLowerBound(key []byte) {
 			i.stack = append(i.stack, n.edges[idx+1:])
 		}
 
-		i.node = lbNode
 		// Recurse
 		n = lbNode
 	}