Skip to content

Commit

Permalink
pushdown filter, enum column (#2)
Browse files Browse the repository at this point in the history
  • Loading branch information
kelindar authored Jun 19, 2021
1 parent 7c6791a commit d617816
Show file tree
Hide file tree
Showing 16 changed files with 1,183 additions and 1,155 deletions.
19 changes: 9 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -427,16 +427,15 @@ The benchmarks below were ran on a collection of *500 items* containing a dozen

```
cpu: Intel(R) Core(TM) i7-9700K CPU @ 3.60GHz
BenchmarkCollection/insert-8 27589314 43.05 ns/op 1 B/op 0 allocs/op
BenchmarkCollection/fetch-8 21041593 56.84 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/count-slow-8 109107 11001 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/count-8 9300270 128.6 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/range-8 1871557 641.0 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/select-8 1214799 975.8 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/update-at-8 28573945 41.99 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/update-all-8 184694 6481 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/delete-at-8 2613982 459.1 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/delete-all-8 296901 3762 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/insert-8 5013795 239.9 ns/op 27 B/op 0 allocs/op
BenchmarkCollection/fetch-8 23730796 50.63 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/scan-8 234990 4743 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/count-8 7965873 152.7 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/range-8 1512513 799.9 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/update-at-8 5409420 224.7 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/update-all-8 196626 6099 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/delete-at-8 2006052 594.9 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/delete-all-8 1889685 643.2 ns/op 0 B/op 0 allocs/op
```

## Contributing
Expand Down
35 changes: 24 additions & 11 deletions collection_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,17 @@ import (
"github.com/stretchr/testify/assert"
)

// BenchmarkCollection/insert-8 5717271 210.1 ns/op 3 B/op 0 allocs/op
// BenchmarkCollection/fetch-8 23014076 52.73 ns/op 0 B/op 0 allocs/op
// BenchmarkCollection/unindexed-8 144264 7534 ns/op 0 B/op 0 allocs/op
// BenchmarkCollection/count-8 8954762 132.2 ns/op 0 B/op 0 allocs/op
// BenchmarkCollection/range-8 1760739 682.5 ns/op 0 B/op 0 allocs/op
// BenchmarkCollection/update-at-8 9917469 122.9 ns/op 0 B/op 0 allocs/op
// BenchmarkCollection/update-all-8 200008 6014 ns/op 0 B/op 0 allocs/op
// BenchmarkCollection/delete-at-8 2208020 544.8 ns/op 0 B/op 0 allocs/op
// BenchmarkCollection/delete-all-8 2013384 599.2 ns/op 0 B/op 0 allocs/op
/*
BenchmarkCollection/insert-8 5013795 239.9 ns/op 27 B/op 0 allocs/op
BenchmarkCollection/fetch-8 23730796 50.63 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/scan-8 234990 4743 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/count-8 7965873 152.7 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/range-8 1512513 799.9 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/update-at-8 5409420 224.7 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/update-all-8 196626 6099 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/delete-at-8 2006052 594.9 ns/op 0 B/op 0 allocs/op
BenchmarkCollection/delete-all-8 1889685 643.2 ns/op 0 B/op 0 allocs/op
*/
func BenchmarkCollection(b *testing.B) {
players := loadPlayers()
obj := Object{
Expand Down Expand Up @@ -62,7 +64,7 @@ func BenchmarkCollection(b *testing.B) {
assert.NotEmpty(b, name)
})

b.Run("unindexed", func(b *testing.B) {
b.Run("scan", func(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
for n := 0; n < b.N; n++ {
Expand Down Expand Up @@ -392,7 +394,18 @@ func loadPlayers() *Collection {

// Load the items into the collection
players := loadFixture("players.json")
out.CreateColumnsOf(players[0])
out.CreateColumn("serial", ForAny())
out.CreateColumn("name", ForAny())
out.CreateColumn("active", ForBool())
out.CreateColumn("class", ForEnum())
out.CreateColumn("race", ForEnum())
out.CreateColumn("age", ForFloat64())
out.CreateColumn("hp", ForFloat64())
out.CreateColumn("mp", ForFloat64())
out.CreateColumn("balance", ForFloat64())
out.CreateColumn("gender", ForEnum())
out.CreateColumn("guild", ForEnum())
out.CreateColumn("location", ForAny())
out.Query(func(txn *Txn) error {
for _, p := range players {
txn.Insert(p)
Expand Down
205 changes: 117 additions & 88 deletions columns.go → column.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
// Copyright (c) Roman Atachiants and contributors. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for details.

//go:generate genny -pkg=column -in=generic.go -out=z_numbers.go gen "number=float32,float64,int,int16,int32,int64,uint,uint16,uint32,uint64"
//go:generate genny -pkg=column -in=generic_test.go -out=z_numbers_test.go gen "number=float32,float64,int,int16,int32,int64,uint,uint16,uint32,uint64"
//go:generate genny -pkg=column -in=column_generate.go -out=column_numbers.go gen "number=float32,float64,int,int16,int32,int64,uint,uint16,uint32,uint64"

package column

Expand All @@ -14,6 +13,28 @@ import (
"github.com/kelindar/column/commit"
)

// columnType represents a type of a column.
type columnType uint8

const (
typeGeneric = columnType(0) // Generic column, every column should support this
typeNumeric = columnType(1 << 0) // Numeric column supporting float64, int64 or uint64
typeTextual = columnType(1 << 1) // Textual column supporting strings
)

// typeOf resolves all supported types of the column
func typeOf(column Column) (typ columnType) {
if _, ok := column.(Numeric); ok {
typ = typ | typeNumeric
}
if _, ok := column.(Textual); ok {
typ = typ | typeTextual
}
return
}

// --------------------------- Contracts ----------------------------

// Column represents a column implementation
type Column interface {
Grow(idx uint32)
Expand All @@ -24,11 +45,22 @@ type Column interface {
Index() *bitmap.Bitmap
}

// Numerical represents a numerical column implementation
type numerical interface {
Float64(uint32) (float64, bool)
Uint64(uint32) (uint64, bool)
Int64(uint32) (int64, bool)
// Numeric represents a column that stores numbers.
type Numeric interface {
Column
LoadFloat64(uint32) (float64, bool)
LoadUint64(uint32) (uint64, bool)
LoadInt64(uint32) (int64, bool)
FilterFloat64(*bitmap.Bitmap, func(v float64) bool)
FilterUint64(*bitmap.Bitmap, func(v uint64) bool)
FilterInt64(*bitmap.Bitmap, func(v int64) bool)
}

// Textual represents a column that stores strings.
type Textual interface {
Column
LoadString(uint32) (string, bool)
FilterString(*bitmap.Bitmap, func(v string) bool)
}

// --------------------------- Constructors ----------------------------
Expand All @@ -48,6 +80,7 @@ var (
ForUint32 = makeUint32s
ForUint64 = makeUint64s
ForBool = makeBools
ForEnum = makeEnum
)

// ForKind creates a new column instance for a specified reflect.Kind
Expand Down Expand Up @@ -85,18 +118,30 @@ func ForKind(kind reflect.Kind) Column {
// column represents a column wrapper that synchronizes operations
type column struct {
sync.RWMutex
name string
Column
kind columnType // The type of the colum
name string // The name of the column
}

// columnFor creates a synchronized column for a column implementation
func columnFor(name string, v Column) *column {
return &column{
kind: typeOf(v),
name: name,
Column: v,
}
}

// Is checks whether a column type supports certain numerical operations.
func (c *column) IsNumeric() bool {
return (c.kind & typeNumeric) == typeNumeric
}

// Is checks whether a column type supports certain string operations.
func (c *column) IsTextual() bool {
return (c.kind & typeTextual) == typeTextual
}

// Intersect performs a logical and operation and updates the destination bitmap.
func (c *column) Intersect(dst *bitmap.Bitmap) {
c.RLock()
Expand Down Expand Up @@ -147,6 +192,14 @@ func (c *column) Value(idx uint32) (v interface{}, ok bool) {
return
}

// Value retrieves a value at a specified index
func (c *column) String(idx uint32) (v string, ok bool) {
c.RLock()
v, ok = c.loadString(idx)
c.RUnlock()
return
}

// Float64 retrieves a float64 value at a specified index
func (c *column) Float64(idx uint32) (v float64, ok bool) {
c.RLock()
Expand Down Expand Up @@ -177,26 +230,34 @@ func (c *column) loadValue(idx uint32) (v interface{}, ok bool) {
return
}

// loadFloat64 (unlocked) retrieves a float64 value at a specified index
func (c *column) loadString(idx uint32) (v string, ok bool) {
if column, ok := c.Column.(Textual); ok {
v, ok = column.LoadString(idx)
}
return
}

// loadFloat64 (unlocked) retrieves a float64 value at a specified index
func (c *column) loadFloat64(idx uint32) (v float64, ok bool) {
if n, contains := c.Column.(numerical); contains {
v, ok = n.Float64(idx)
if n, contains := c.Column.(Numeric); contains {
v, ok = n.LoadFloat64(idx)
}
return
}

// loadInt64 (unlocked) retrieves an int64 value at a specified index
func (c *column) loadInt64(idx uint32) (v int64, ok bool) {
if n, contains := c.Column.(numerical); contains {
v, ok = n.Int64(idx)
if n, contains := c.Column.(Numeric); contains {
v, ok = n.LoadInt64(idx)
}
return
}

// loadUint64 (unlocked) retrieves an uint64 value at a specified index
func (c *column) loadUint64(idx uint32) (v uint64, ok bool) {
if n, contains := c.Column.(numerical); contains {
v, ok = n.Uint64(idx)
if n, contains := c.Column.(Numeric); contains {
v, ok = n.LoadUint64(idx)
}
return
}
Expand All @@ -219,11 +280,18 @@ func makeAny() Column {

// Grow grows the size of the column until we have enough to store
func (c *columnAny) Grow(idx uint32) {
// TODO: also grow the bitmap
size := uint32(len(c.data))
for i := size; i <= idx; i++ {
c.data = append(c.data, nil)
if idx < uint32(len(c.data)) {
return
}

if idx < uint32(cap(c.data)) {
c.data = c.data[:idx+1]
return
}

clone := make([]interface{}, idx+1, capacityFor(idx+1))
copy(clone, c.data)
c.data = clone
}

// Update performs a series of updates at once
Expand Down Expand Up @@ -261,6 +329,26 @@ func (c *columnAny) Index() *bitmap.Bitmap {
return &c.fill
}

// LoadString retrieves a value at a specified index
func (c *columnAny) LoadString(idx uint32) (string, bool) {
v, has := c.Value(idx)
s, ok := v.(string)
return s, has && ok
}

// FilterString filters down the values based on the specified predicate. The column for
// this filter must be a string.
func (c *columnAny) FilterString(index *bitmap.Bitmap, predicate func(v string) bool) {
index.Filter(func(idx uint32) (match bool) {
if idx < uint32(len(c.data)) && c.fill.Contains(idx) {
if s, ok := c.LoadString(idx); ok {
return predicate(s)
}
}
return false
})
}

// --------------------------- booleans ----------------------------

// columnBool represents a boolean column
Expand Down Expand Up @@ -315,75 +403,16 @@ func (c *columnBool) Index() *bitmap.Bitmap {
return &c.data
}

// --------------------------- computed index ----------------------------
// --------------------------- funcs ----------------------------

// computed represents a computed column
type computed interface {
Column() string
}

// Index represents the index implementation
type index struct {
fill bitmap.Bitmap
prop string
rule func(v interface{}) bool
}

// newIndex creates a new indexer
func newIndex(indexName, columnName string, rule func(v interface{}) bool) *column {
return columnFor(indexName, &index{
fill: make(bitmap.Bitmap, 0, 4),
prop: columnName,
rule: rule,
})
}

// Grow grows the size of the column until we have enough to store
func (c *index) Grow(idx uint32) {
// TODO
}

// Column returns the target name of the column on which this index should apply.
func (c *index) Column() string {
return c.prop
}

// Update performs a series of updates at once
func (c *index) Update(updates []commit.Update) {

// Index can only be updated based on the final stored value, so we can only work
// with put operations here. The trick is to update the final value after applying
// on the actual column.
for _, u := range updates {
if u.Type == commit.Put {
if c.rule(u.Value) {
c.fill.Set(u.Index)
} else {
c.fill.Remove(u.Index)
}
}
}
}

// Delete deletes a set of items from the column.
func (c *index) Delete(items *bitmap.Bitmap) {
c.fill.AndNot(*items)
}

// Value retrieves a value at a specified index.
func (c *index) Value(idx uint32) (v interface{}, ok bool) {
if idx < uint32(len(c.fill))<<6 {
v, ok = c.fill.Contains(idx), true
}
return
}

// Contains checks whether the column has a value at a specified index.
func (c *index) Contains(idx uint32) bool {
return c.fill.Contains(idx)
}

// Index returns the fill list for the column
func (c *index) Index() *bitmap.Bitmap {
return &c.fill
// capacityFor computes the next power of 2 for a given index
func capacityFor(v uint32) int {
v--
v |= v >> 1
v |= v >> 2
v |= v >> 4
v |= v >> 8
v |= v >> 16
v++
return int(v)
}
Loading

0 comments on commit d617816

Please sign in to comment.