Skip to content

Commit

Permalink
allow duplicate query paths (#25)
Browse files Browse the repository at this point in the history
  • Loading branch information
woutslakhorst authored Nov 11, 2022
1 parent 4030fb4 commit 0496850
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 64 deletions.
64 changes: 29 additions & 35 deletions index.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,6 @@ type Index interface {
Iterate(bucket *bbolt.Bucket, query Query, fn iteratorFn) error
// BucketName returns the bucket path for this index
BucketName() []byte
// Sort the query so its parts align with the index parts.
// includeMissing, if true, the sort will append queryParts not matched by an index at the end.
Sort(query Query, includeMissing bool) []QueryPart
// QueryPartsOutsideIndex selects the queryParts that are not covered by the index.
QueryPartsOutsideIndex(query Query) []QueryPart
// Depth returns the number of indexed fields
Expand Down Expand Up @@ -185,7 +182,7 @@ func removeRefFromBucket(bucket *bbolt.Bucket, key Key, ref Reference) error {
func (i *index) IsMatch(query Query) float64 {
hitcount := 0

parts := i.Sort(query, false)
parts := i.matchingParts(query)

outer:
for thc, ip := range i.indexParts {
Expand All @@ -203,15 +200,18 @@ outer:
return float64(hitcount) / float64(len(i.indexParts))
}

func (i *index) Sort(query Query, includeMissing bool) []QueryPart {
// matchingParts returns the queryParts that match the index.
// it also sorts them in the right order. If multiple matches exist a index position, the first is returned.
func (i *index) matchingParts(query Query) []QueryPart {
var sorted = make([]QueryPart, len(i.indexParts))

outer:
for _, qp := range query.parts {
for j, ip := range i.indexParts {
if ip.Equals(qp) {
sorted[j] = qp
continue outer
if sorted[j] == nil {
sorted[j] = qp
continue outer
}
}
}
}
Expand All @@ -223,40 +223,34 @@ outer:
break
}
}

if includeMissing {
// now include all params not in the sorted list
outerMissing:
for _, qp := range query.parts {
for _, sp := range sorted {
if sp.Equals(qp) {
continue outerMissing
}
}
// missing so append
sorted = append(sorted, qp)
}
}

return sorted
}

func (i *index) QueryPartsOutsideIndex(query Query) []QueryPart {
hits := 0
parts := i.Sort(query, true)
matchingParts := i.matchingParts(query)
resultingParts := make([]QueryPart, 0)
visitedParts := make([]QueryPart, 0)

for j, qp := range parts {
if j >= len(i.indexParts) || !qp.Equals(i.indexParts[j]) {
break
outer:
for _, qp := range query.parts {
for _, mp := range matchingParts {
if mp.Equals(qp) {
for _, hp := range visitedParts {
if hp.Equals(qp) { // already excluded once
resultingParts = append(resultingParts, qp)
continue outer
}
}
// exclude and continue
visitedParts = append(visitedParts, mp)
continue outer
}
}
hits++
}

if hits == len(parts) {
return []QueryPart{}
// no hit in index parts
resultingParts = append(resultingParts, qp)
}

return parts[hits:]
return resultingParts
}

func (i *index) Iterate(bucket *bbolt.Bucket, query Query, fn iteratorFn) error {
Expand All @@ -268,7 +262,7 @@ func (i *index) Iterate(bucket *bbolt.Bucket, query Query, fn iteratorFn) error
}

// Sort the parts of the Query to conform to the index key building order
sortedQueryParts := i.Sort(query, false)
sortedQueryParts := i.matchingParts(query)

if len(sortedQueryParts) == 0 {
return errors.New("unable to iterate over index without matching keys")
Expand Down
17 changes: 17 additions & 0 deletions index_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,23 @@ func TestIndex_CursorDynamics(t *testing.T) {
assert.Equal(t, 2, found)
})

t.Run("2 docs found on single prefix key using duplicate key", func(t *testing.T) {
q := New(Prefix(key1, MustParseScalar("1"))).And(
Prefix(key1, MustParseScalar("1")))
found := 0

err := db.View(func(tx *bbolt.Tx) error {
b := testBucket(t, tx)
return i.Iterate(b, q, func(key Reference, value []byte) error {
found++
return nil
})
})

assert.NoError(t, err)
assert.Equal(t, 2, found)
})

t.Run("2 docs found on prefix key and notNil", func(t *testing.T) {
q := New(Prefix(key1, MustParseScalar("1"))).And(NotNil(key2))
found := 0
Expand Down
71 changes: 42 additions & 29 deletions index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
)

var valueAsScalar = MustParseScalar("value")
var valueAsScalar2 = MustParseScalar("value2")

func TestNewIndex(t *testing.T) {
_, c := testCollection(t)
Expand Down Expand Up @@ -549,20 +550,19 @@ func TestIndex_addRefToBucket(t *testing.T) {
})
}

func TestIndex_Sort(t *testing.T) {
func TestIndex_matchingParts(t *testing.T) {
_, c := testCollection(t)
key := NewJSONPath("path.part")
key2 := NewJSONPath("path.more.#.parts")
key3 := NewJSONPath("key3")
i := c.NewIndex(t.Name(),
NewFieldIndexer(key),
NewFieldIndexer(key2),
)
).(*index)

t.Run("returns correct order when given in reverse", func(t *testing.T) {
sorted := i.Sort(
sorted := i.matchingParts(
New(Eq(key2, valueAsScalar)).
And(Eq(key, valueAsScalar)), false)
And(Eq(key, valueAsScalar)))

if !assert.Len(t, sorted, 2) {
return
Expand All @@ -572,9 +572,9 @@ func TestIndex_Sort(t *testing.T) {
})

t.Run("returns correct order when given in correct order", func(t *testing.T) {
sorted := i.Sort(
sorted := i.matchingParts(
New(Eq(key, valueAsScalar)).
And(Eq(key2, valueAsScalar)), false)
And(Eq(key2, valueAsScalar)))

if !assert.Len(t, sorted, 2) {
return
Expand All @@ -584,34 +584,21 @@ func TestIndex_Sort(t *testing.T) {
})

t.Run("does not include any keys when primary key is missing", func(t *testing.T) {
sorted := i.Sort(
New(Eq(key2, valueAsScalar)), false)
sorted := i.matchingParts(
New(Eq(key2, valueAsScalar)))

assert.Len(t, sorted, 0)
})

t.Run("includes all keys when includeMissing option is given", func(t *testing.T) {
sorted := i.Sort(
New(Eq(key3, valueAsScalar)).
And(Eq(key2, valueAsScalar)), true)

if !assert.Len(t, sorted, 2) {
return
}
assert.Equal(t, key3, sorted[0].QueryPath())
assert.Equal(t, key2, sorted[1].QueryPath())
})

t.Run("includes additional keys when includeMissing option is given", func(t *testing.T) {
sorted := i.Sort(
New(Eq(key3, valueAsScalar)).
And(Eq(key, valueAsScalar)), true)
t.Run("returns first key when duplicate keys are given for index", func(t *testing.T) {
sorted := i.matchingParts(
New(Eq(key, valueAsScalar)).
And(Eq(key, valueAsScalar2)))

if !assert.Len(t, sorted, 2) {
if !assert.Len(t, sorted, 1) {
return
}
assert.Equal(t, key, sorted[0].QueryPath())
assert.Equal(t, key3, sorted[1].QueryPath())
assert.True(t, sorted[0].Condition(Key("value"), nil))
})
}

Expand All @@ -624,7 +611,7 @@ func TestIndex_QueryPartsOutsideIndex(t *testing.T) {
i := c.NewIndex(t.Name(),
NewFieldIndexer(key),
NewFieldIndexer(key2),
)
).(*index)

t.Run("returns empty list when all parts in index", func(t *testing.T) {
additional := i.QueryPartsOutsideIndex(
Expand All @@ -651,6 +638,32 @@ func TestIndex_QueryPartsOutsideIndex(t *testing.T) {
}
assert.Equal(t, key3, additional[0].QueryPath())
})

t.Run("returns param if duplicate and is index hit", func(t *testing.T) {
additional := i.QueryPartsOutsideIndex(
New(Eq(key, valueAsScalar)).
And(Eq(key, valueAsScalar)))

if !assert.Len(t, additional, 1) {
return
}
assert.Equal(t, key, additional[0].QueryPath())
})

t.Run("returns all duplicates", func(t *testing.T) {
additional := i.QueryPartsOutsideIndex(
New(Eq(key, valueAsScalar)).
And(Eq(key, valueAsScalar)).
And(Eq(key3, valueAsScalar)).
And(Eq(key3, valueAsScalar)))

if !assert.Len(t, additional, 3) {
return
}
assert.Equal(t, key, additional[0].QueryPath())
assert.Equal(t, key3, additional[1].QueryPath())
assert.Equal(t, key3, additional[2].QueryPath())
})
}

func TestIndex_Keys(t *testing.T) {
Expand Down

0 comments on commit 0496850

Please sign in to comment.