Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify enum strings column #32

Merged
merged 1 commit into from
Nov 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 23 additions & 69 deletions column_strings.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,37 +4,35 @@
package column

import (
"encoding/binary"
"hash/crc32"
"math"
"reflect"
"sync"
"unsafe"

"github.com/kelindar/bitmap"
"github.com/kelindar/column/commit"
"github.com/kelindar/intmap"
)

// --------------------------- Enum ----------------------------

var _ Textual = new(columnEnum)

// columnEnum represents a enumerable string column
// columnEnum represents a string column
type columnEnum struct {
lock sync.RWMutex
fill bitmap.Bitmap // The fill-list
locs []uint32 // The list of locations
data []byte // The actual values
cache map[string]uint32 // Cache for string locations (no need to persist)
lock sync.RWMutex
fill bitmap.Bitmap // The fill-list
locs []uint32 // The list of locations
seek *intmap.Map // The hash->location table
data []string // The string data
}

// makeEnum creates a new column
func makeEnum() Column {
return &columnEnum{
fill: make(bitmap.Bitmap, 0, 4),
locs: make([]uint32, 0, 64),
data: make([]byte, 0, 16*32),
cache: make(map[string]uint32, 16),
fill: make(bitmap.Bitmap, 0, 4),
locs: make([]uint32, 0, 64),
seek: intmap.New(64, .95),
data: make([]string, 0, 64),
}
}

Expand All @@ -61,24 +59,9 @@ func (c *columnEnum) Apply(r *commit.Reader) {
for r.Next() {
switch r.Type {
case commit.Put:
// Attempt to find if we already have the location of this value from the
// cache, and if we don't, find it and set the offset for faster lookup.
value := r.String()

c.lock.RLock()
offset, cached := c.cache[value]
c.lock.RUnlock()

if !cached {
c.lock.Lock()
offset = c.findOrAdd(value)
c.cache[value] = offset
c.lock.Unlock()
}

// Set the value at the index
c.fill[r.Offset>>6] |= 1 << (r.Offset & 0x3f)
c.locs[r.Offset] = offset
c.locs[r.Offset] = c.findOrAdd(r.Bytes())

case commit.Delete:
c.fill.Remove(r.Index())
Expand All @@ -89,34 +72,24 @@ func (c *columnEnum) Apply(r *commit.Reader) {
}

// Search for the string or adds it and returns the offset
func (c *columnEnum) findOrAdd(v string) uint32 {
value := toBytes(v)
target := crc32.ChecksumIEEE(value)
for i := 0; i < len(c.data); {
hash := binary.BigEndian.Uint32(c.data[i : i+4])
size := int(c.data[i+4])
if hash == target {
return uint32(i + 4)
}
func (c *columnEnum) findOrAdd(v []byte) uint32 {
target := crc32.ChecksumIEEE(v)

i += 5 + size
c.lock.Lock()
defer c.lock.Unlock()
if at, ok := c.seek.Load(target); ok {
return at
}

// Not found, add
var head [5]byte
binary.BigEndian.PutUint32(head[0:4], target)
head[4] = byte(len(value)) // Max 255 chars
addedAt := len(c.data)
c.data = append(c.data, head[:]...)
c.data = append(c.data, value...)
return uint32(addedAt + 4)
offset := uint32(len(c.data))
c.data = append(c.data, string(v))
c.seek.Store(target, offset)
return offset
}

// readAt reads a string at a location
func (c *columnEnum) readAt(at uint32) string {
size := uint32(c.data[at])
data := c.data[at+1 : at+1+size]
return toString(&data)
return c.data[at]
}

// Value retrieves a value at a specified index
Expand Down Expand Up @@ -255,22 +228,3 @@ func (c *columnString) FilterString(offset uint32, index bitmap.Bitmap, predicat
return idx < uint32(len(c.data)) && predicate(c.data[idx])
})
}

// --------------------------- Convert ----------------------------

// toBytes converts a string to a byte slice without allocating.
func toBytes(v string) (b []byte) {
strHeader := (*reflect.StringHeader)(unsafe.Pointer(&v))
byteHeader := (*reflect.SliceHeader)(unsafe.Pointer(&b))
byteHeader.Data = strHeader.Data

l := len(v)
byteHeader.Len = l
byteHeader.Cap = l
return
}

// toString converts a strign to a byte slice without allocating.
func toString(b *[]byte) string {
return *(*string)(unsafe.Pointer(b))
}
107 changes: 54 additions & 53 deletions examples/bench/README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# Concurrency Benchmark

This is an example benchmark with various workloads (90% read / 10% write, etc) on a collection of 1 million elements with different goroutine pools. In this example we're combining two types of transactions:
* Read transactions that update a random element (point-read).
* Write transactions that update a random element (point-write).

- Read transactions that update a random element (point-read).
- Write transactions that update a random element (point-write).

Note that the goal of this benchmark is to validate concurrency, not throughput this represents the current "best" case scenario when the updates are random and do less likely to incur contention. Reads, however quite often would hit the same chunks as only the index itself is randomized.

Expand All @@ -12,54 +13,54 @@ Below are some results from running on my 8-core machine (Intel(R) Core(TM) i7-9

```
WORK PROCS READ RATE WRITE RATE
100%-0% 1 8,149,482 txn/s 0 txn/s
100%-0% 2 12,622,747 txn/s 0 txn/s
100%-0% 4 14,378,647 txn/s 0 txn/s
100%-0% 8 16,298,860 txn/s 0 txn/s
100%-0% 16 16,276,835 txn/s 0 txn/s
100%-0% 32 16,297,247 txn/s 0 txn/s
100%-0% 64 16,214,731 txn/s 0 txn/s
100%-0% 128 16,185,721 txn/s 0 txn/s
100%-0% 256 16,171,638 txn/s 0 txn/s
100%-0% 512 16,237,574 txn/s 0 txn/s
90%-10% 1 2,248,513 txn/s 239,309 txn/s
90%-10% 2 2,297,998 txn/s 226,016 txn/s
90%-10% 4 1,432,691 txn/s 184,189 txn/s
90%-10% 8 1,112,076 txn/s 153,934 txn/s
90%-10% 16 1,432,723 txn/s 147,244 txn/s
90%-10% 32 1,375,383 txn/s 161,755 txn/s
90%-10% 64 1,441,755 txn/s 144,570 txn/s
90%-10% 128 1,272,174 txn/s 140,107 txn/s
90%-10% 256 925,191 txn/s 105,999 txn/s
90%-10% 512 858,555 txn/s 89,202 txn/s
50%-50% 1 305,245 txn/s 320,159 txn/s
50%-50% 2 262,496 txn/s 250,654 txn/s
50%-50% 4 255,906 txn/s 262,823 txn/s
50%-50% 8 238,096 txn/s 225,565 txn/s
50%-50% 16 236,144 txn/s 240,810 txn/s
50%-50% 32 250,954 txn/s 237,928 txn/s
50%-50% 64 214,474 txn/s 220,495 txn/s
50%-50% 128 156,660 txn/s 162,219 txn/s
50%-50% 256 125,956 txn/s 120,344 txn/s
50%-50% 512 103,619 txn/s 98,510 txn/s
10%-90% 1 40,723 txn/s 339,694 txn/s
10%-90% 2 24,746 txn/s 298,934 txn/s
10%-90% 4 35,483 txn/s 290,769 txn/s
10%-90% 8 34,265 txn/s 279,838 txn/s
10%-90% 16 28,678 txn/s 274,759 txn/s
10%-90% 32 23,662 txn/s 227,651 txn/s
10%-90% 64 36,056 txn/s 208,993 txn/s
10%-90% 128 17,463 txn/s 149,558 txn/s
10%-90% 256 14,125 txn/s 113,701 txn/s
10%-90% 512 11,435 txn/s 96,999 txn/s
0%-100% 1 0 txn/s 345,335 txn/s
0%-100% 2 0 txn/s 297,386 txn/s
0%-100% 4 0 txn/s 300,023 txn/s
0%-100% 8 0 txn/s 276,361 txn/s
0%-100% 16 0 txn/s 243,448 txn/s
0%-100% 32 0 txn/s 208,523 txn/s
0%-100% 64 0 txn/s 195,732 txn/s
0%-100% 128 0 txn/s 145,990 txn/s
0%-100% 256 0 txn/s 110,786 txn/s
0%-100% 512 0 txn/s 94,313 txn/s
```
100%-0% 1 8,877,887 txn/s 0 txn/s
100%-0% 2 15,898,759 txn/s 0 txn/s
100%-0% 4 30,186,227 txn/s 0 txn/s
100%-0% 8 60,411,415 txn/s 0 txn/s
100%-0% 16 60,562,479 txn/s 0 txn/s
100%-0% 32 61,969,664 txn/s 0 txn/s
100%-0% 64 61,116,153 txn/s 0 txn/s
100%-0% 128 61,273,966 txn/s 0 txn/s
100%-0% 256 62,303,786 txn/s 0 txn/s
100%-0% 512 62,162,812 txn/s 0 txn/s
90%-10% 1 2,007,549 txn/s 223,615 txn/s
90%-10% 2 2,405,165 txn/s 252,705 txn/s
90%-10% 4 2,375,443 txn/s 255,679 txn/s
90%-10% 8 2,332,451 txn/s 234,237 txn/s
90%-10% 16 2,002,032 txn/s 218,043 txn/s
90%-10% 32 2,264,347 txn/s 201,639 txn/s
90%-10% 64 1,491,475 txn/s 181,956 txn/s
90%-10% 128 1,537,664 txn/s 180,435 txn/s
90%-10% 256 1,565,039 txn/s 157,420 txn/s
90%-10% 512 1,241,398 txn/s 124,654 txn/s
50%-50% 1 285,995 txn/s 298,950 txn/s
50%-50% 2 279,422 txn/s 287,377 txn/s
50%-50% 4 298,716 txn/s 265,197 txn/s
50%-50% 8 258,017 txn/s 250,169 txn/s
50%-50% 16 267,412 txn/s 238,427 txn/s
50%-50% 32 217,380 txn/s 201,791 txn/s
50%-50% 64 161,592 txn/s 178,441 txn/s
50%-50% 128 156,302 txn/s 147,838 txn/s
50%-50% 256 98,375 txn/s 114,311 txn/s
50%-50% 512 104,266 txn/s 96,785 txn/s
10%-90% 1 36,726 txn/s 315,646 txn/s
10%-90% 2 25,663 txn/s 244,789 txn/s
10%-90% 4 31,266 txn/s 234,497 txn/s
10%-90% 8 24,672 txn/s 221,105 txn/s
10%-90% 16 22,289 txn/s 205,061 txn/s
10%-90% 32 16,630 txn/s 188,473 txn/s
10%-90% 64 21,779 txn/s 216,389 txn/s
10%-90% 128 19,997 txn/s 164,261 txn/s
10%-90% 256 12,962 txn/s 109,386 txn/s
10%-90% 512 10,434 txn/s 93,333 txn/s
0%-100% 1 0 txn/s 313,133 txn/s
0%-100% 2 0 txn/s 239,831 txn/s
0%-100% 4 0 txn/s 231,702 txn/s
0%-100% 8 0 txn/s 218,349 txn/s
0%-100% 16 0 txn/s 204,190 txn/s
0%-100% 32 0 txn/s 192,038 txn/s
0%-100% 64 0 txn/s 173,347 txn/s
0%-100% 128 0 txn/s 138,415 txn/s
0%-100% 256 0 txn/s 105,254 txn/s
0%-100% 512 0 txn/s 93,103 txn/s
```
2 changes: 1 addition & 1 deletion examples/cache/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,4 @@ running insert of 50000 rows...
running query of user_11255...
Hi, User 11255 true
-> query took 1.271µs
```
```
17 changes: 9 additions & 8 deletions examples/million/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
This example adds one million rows to a collection, runs and measures a few different queries and transaction around it.

## Example output

```
running insert of 1000000 rows...
-> inserted 0 rows
Expand All @@ -15,29 +16,29 @@ running insert of 1000000 rows...
-> inserted 700000 rows
-> inserted 800000 rows
-> inserted 900000 rows
-> insert took 1.2334132s
-> insert took 1.0899436s

running full scan of age >= 30...
-> result = 510000
-> full scan took 2.715572ms
-> full scan took 2.834398ms

running full scan of class == "rogue"...
-> result = 358000
-> full scan took 4.481034ms
-> full scan took 4.381428ms

running indexed query of human mages...
-> result = 68000
-> indexed query took 15.954µs
-> indexed query took 17.813µs

running indexed query of human female mages...
-> result = 32000
-> indexed query took 19.908µs
-> indexed query took 22.354µs

running update of balance of everyone...
-> updated 1000000 rows
-> update took 11.146144ms
-> update took 11.882838ms

running update of age of mages...
-> updated 302000 rows
-> update took 4.25921ms
```
-> update took 4.353562ms
```
7 changes: 4 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ go 1.17

require (
github.com/kelindar/bitmap v1.1.3
github.com/kelindar/intmap v1.0.1
github.com/kelindar/smutex v1.0.0
github.com/stretchr/testify v1.7.0
)
Expand All @@ -13,8 +14,8 @@ require (
github.com/dustin/go-humanize v1.0.0
github.com/kelindar/async v1.0.0
github.com/kelindar/xxrand v1.0.1
github.com/klauspost/cpuid/v2 v2.0.6 // indirect
github.com/klauspost/cpuid/v2 v2.0.9 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba // indirect
gopkg.in/yaml.v3 v3.0.0-20200605160147-a5ece683394c // indirect
golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac // indirect
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect
)
12 changes: 8 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,25 @@ github.com/kelindar/async v1.0.0 h1:oJiFAt3fVB/b5zVZKPBU+pP9lR3JVyeox9pYlpdnIK8=
github.com/kelindar/async v1.0.0/go.mod h1:bJRlwaRiqdHi+4dpVDNHdwgyRyk6TxpA21fByLf7hIY=
github.com/kelindar/bitmap v1.1.3 h1:rLtS9wZEb3xk/3AY13JPfQ+09UtEOWJN7JkbWUp6EnI=
github.com/kelindar/bitmap v1.1.3/go.mod h1:shAFyS8BOif+pvJ05GqxnCM0SdohHQjKvDetqI/9z6M=
github.com/kelindar/intmap v1.0.1 h1:8tidWd72+NvFvHhGsMvtmMDZmDXRY0DhXbhS8FgW88E=
github.com/kelindar/intmap v1.0.1/go.mod h1:tDanawPWq1B0HC+X3W8Z6IKNrJqxjruy6CdyTlf6Nic=
github.com/kelindar/smutex v1.0.0 h1:+LIZYwPz+v3IWPOse764fNaVQGMVxKV6mbD6OWjQV3o=
github.com/kelindar/smutex v1.0.0/go.mod h1:nMbCZeAHWCsY9Kt4JqX7ETd+NJeR6Swy9im+Th+qUZQ=
github.com/kelindar/xxrand v1.0.1 h1:TG9Ix5h3ulBXVWwRUF8ePXl65FjIj48CzsgZw0nHvfY=
github.com/kelindar/xxrand v1.0.1/go.mod h1:tb7XX0TvlKSIsCqkVUs7GAWdkeab3Ln2vWWxHEADDuA=
github.com/klauspost/cpuid/v2 v2.0.6 h1:dQ5ueTiftKxp0gyjKSx5+8BtPWkyQbd95m8Gys/RarI=
github.com/klauspost/cpuid/v2 v2.0.6/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4=
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba h1:O8mE0/t419eoIwhTFpKVkHiTs/Igowgfkj25AcZrtiE=
golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac h1:7zkz7BUtwNFFqcowJ+RIgu2MaV/MapERkDIy+mwPyjs=
golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20200605160147-a5ece683394c h1:grhR+C34yXImVGp7EzNk+DTIk+323eIUWOmEevy6bDo=
gopkg.in/yaml.v3 v3.0.0-20200605160147-a5ece683394c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo=
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=