From ad954c7d77f0a5d029d921c8d6d932466c91720c Mon Sep 17 00:00:00 2001 From: Zejun Li Date: Mon, 19 Aug 2019 11:12:36 +0800 Subject: [PATCH 01/10] kv: replace memdb with a more memory efficient version --- kv/memdb/arena.go | 119 ++++++++++++ kv/memdb/iterator.go | 102 +++++++++++ kv/memdb/memdb.go | 398 +++++++++++++++++++++++++++++++++++++++++ kv/memdb/memdb.s | 12 ++ kv/memdb/memdb_test.go | 256 ++++++++++++++++++++++++++ kv/memdb_buffer.go | 58 +++--- 6 files changed, 921 insertions(+), 24 deletions(-) create mode 100644 kv/memdb/arena.go create mode 100644 kv/memdb/iterator.go create mode 100644 kv/memdb/memdb.go create mode 100644 kv/memdb/memdb.s create mode 100644 kv/memdb/memdb_test.go diff --git a/kv/memdb/arena.go b/kv/memdb/arena.go new file mode 100644 index 0000000000000..deedc36610777 --- /dev/null +++ b/kv/memdb/arena.go @@ -0,0 +1,119 @@ +// Copyright 2019 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package memdb + +import "math" + +type arenaAddr uint64 + +const ( + alignMask = 1<<32 - 8 // 29 bit 1 and 3 bit 0. + nullBlockOffset = math.MaxUint32 + maxBlockSize = 128 << 20 + nullArenaAddr arenaAddr = 0 +) + +func (addr arenaAddr) blockIdx() int { + return int(addr>>32 - 1) +} + +func (addr arenaAddr) blockOffset() uint32 { + return uint32(addr) +} + +func newArenaAddr(blockIdx int, blockOffset uint32) arenaAddr { + return arenaAddr(uint64(blockIdx+1)<<32 | uint64(blockOffset)) +} + +type arena struct { + blockSize int + availIdx int + blocks []*arenaBlock +} + +func newArenaLocator(initBlockSize int) *arena { + return &arena{ + blockSize: initBlockSize, + blocks: []*arenaBlock{newArenaBlock(initBlockSize)}, + } +} + +func (a *arena) getFrom(addr arenaAddr) []byte { + return a.blocks[addr.blockIdx()].getFrom(addr.blockOffset()) +} + +func (a *arena) alloc(size int) (arenaAddr, []byte) { + if size > a.blockSize { + // Use a separate block to store entry which size larger than specified block size. + blk := newArenaBlock(size) + addr := newArenaAddr(len(a.blocks), 0) + a.blocks = append(a.blocks, blk) + return addr, blk.buf + } + + for { + block := a.blocks[a.availIdx] + blockOffset := block.alloc(size) + if blockOffset != nullBlockOffset { + return newArenaAddr(a.availIdx, blockOffset), block.buf[blockOffset : int(blockOffset)+size] + } + + blockSize := a.blockSize << 1 + if blockSize <= maxBlockSize { + a.blockSize = blockSize + } + a.blocks = append(a.blocks, newArenaBlock(a.blockSize)) + a.availIdx = len(a.blocks) - 1 + } +} + +func (a *arena) reset() { + a.availIdx = 0 + a.blockSize = len(a.blocks[0].buf) + a.blocks = a.blocks[:1] + a.blocks[0].reset() +} + +type arenaBlock struct { + buf []byte + ref uint64 + length int +} + +func newArenaBlock(blockSize int) *arenaBlock { + return &arenaBlock{ + buf: make([]byte, blockSize), + } +} + +func (a *arenaBlock) getFrom(offset uint32) []byte { + return a.buf[offset:] +} + +func (a *arenaBlock) alloc(size int) uint32 { + // The returned addr should be aligned in 8 bytes. + offset := (a.length + 7) & alignMask + a.length = offset + size + if a.length > len(a.buf) { + return nullBlockOffset + } + a.ref++ + return uint32(offset) +} + +func (a *arenaBlock) reset() { + a.buf = a.buf[:0] + a.ref = 0 + a.length = 0 +} diff --git a/kv/memdb/iterator.go b/kv/memdb/iterator.go new file mode 100644 index 0000000000000..3da99255a2b97 --- /dev/null +++ b/kv/memdb/iterator.go @@ -0,0 +1,102 @@ +// Copyright 2019 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package memdb + +import "unsafe" + +// Iterator iterates the entries in the DB. +type Iterator struct { + db *DB + curr *node + key []byte + val []byte +} + +// NewIterator returns a new Iterator for the lock store. +func (db *DB) NewIterator() Iterator { + return Iterator{ + db: db, + } +} + +// Valid returns true iff the iterator is positioned at a valid node. +func (it *Iterator) Valid() bool { return it.curr != nil } + +// Key returns the key at the current position. +func (it *Iterator) Key() []byte { + return it.key +} + +// Value returns value. +func (it *Iterator) Value() []byte { + return it.val +} + +// Next moves the iterator to the next entry. +func (it *Iterator) Next() { + it.changeToAddr(it.curr.getNextAddr(0)) +} + +// Prev moves the iterator to the previous entry. +func (it *Iterator) Prev() { + it.changeToAddr(it.curr.getPrevAddr()) +} + +// Seek locates the iterator to the first entry with a key >= seekKey. +func (it *Iterator) Seek(seekKey []byte) { + node, nodeData, _ := it.db.findGreater(seekKey, true) // find >=. + it.updateState(node, nodeData) +} + +// SeekForPrev locates the iterator to the last entry with key <= target. +func (it *Iterator) SeekForPrev(target []byte) { + node, nodeData, _ := it.db.findLess(target, true) // find <=. + it.updateState(node, nodeData) +} + +// SeekForExclusivePrev locates the iterator to the last entry with key < target. +func (it *Iterator) SeekForExclusivePrev(target []byte) { + node, nodeData, _ := it.db.findLess(target, false) + it.updateState(node, nodeData) +} + +// SeekToFirst locates the iterator to the first entry. +func (it *Iterator) SeekToFirst() { + node, nodeData := it.db.getNext(it.db.head.node, 0) + it.updateState(node, nodeData) +} + +// SeekToLast locates the iterator to the last entry. +func (it *Iterator) SeekToLast() { + node, nodeData := it.db.findLast() + it.updateState(node, nodeData) +} + +func (it *Iterator) updateState(node *node, nodeData []byte) { + it.curr = node + if node != nil { + it.key = node.getKey(nodeData) + it.val = node.getValue(nodeData) + } +} + +func (it *Iterator) changeToAddr(addr arenaAddr) { + var data []byte + var n *node + if addr != nullArenaAddr { + data = it.db.getArena().getFrom(addr) + n = (*node)(unsafe.Pointer(&data[0])) + } + it.updateState(n, data) +} diff --git a/kv/memdb/memdb.go b/kv/memdb/memdb.go new file mode 100644 index 0000000000000..35e5e2540133d --- /dev/null +++ b/kv/memdb/memdb.go @@ -0,0 +1,398 @@ +// Copyright 2019 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package memdb + +import ( + "bytes" + "math" + "unsafe" +) + +const ( + maxHeight = 16 + nodeHeaderSize = int(unsafe.Sizeof(nodeHeader{})) +) + +// DB is an in-memory key/value database. +type DB struct { + height int + head nodeWithAddr + arena *arena + + length int + size int +} + +// New creates a new initialized in-memory key/value DB. +// The initBlockSize is the size of first block. +// This DB is append-only, deleting an entry would remove entry node but not +// reclaim KV buffer. +func New(initBlockSize int) *DB { + return &DB{ + height: 1, + head: nodeWithAddr{node: new(node)}, + arena: newArenaLocator(initBlockSize), + } +} + +// Reset resets the DB to initial empty state. +// Release all blocks except the init one. +func (db *DB) Reset() { + db.height = 1 + db.head.node = new(node) + db.length = 0 + db.size = 0 + db.arena.reset() +} + +// Get gets the value for the given key. It returns nil if the +// DB does not contain the key. +func (db *DB) Get(key []byte) []byte { + node, data, match := db.findGreater(key, true) + if !match { + return nil + } + return node.getValue(data) +} + +// Put sets the value for the given key. +// It overwrites any previous value for that key. +func (db *DB) Put(key []byte, v []byte) bool { + arena := db.getArena() + lsHeight := db.getHeight() + var prev [maxHeight + 1]nodeWithAddr + var next [maxHeight + 1]nodeWithAddr + prev[lsHeight] = db.head + + var exists bool + for i := lsHeight - 1; i >= 0; i-- { + // Use higher level to speed up for current level. + prev[i], next[i], exists = db.findSpliceForLevel(db.getArena(), key, prev[i+1], i) + } + + var height int + if !exists { + height = db.randomHeight() + } else { + height = db.prepareOverwrite(next[:]) + } + + x, addr := db.newNode(arena, key, v, height) + if height > lsHeight { + db.setHeight(height) + } + + // We always insert from the base level and up. After you add a node in base level, we cannot + // create a node in the level above because it would have discovered the node in the base level. + for i := 0; i < height; i++ { + x.nexts[i] = uint64(next[i].addr) + if prev[i].node == nil { + prev[i] = db.head + } + prev[i].setNextAddr(i, addr) + } + + x.prev = uint64(prev[0].addr) + if next[0].node != nil { + next[0].prev = uint64(addr) + } + + db.length++ + db.size += len(key) + len(v) + return true +} + +func (db *DB) prepareOverwrite(next []nodeWithAddr) int { + old := next[0] + height := int(old.height) + db.size -= int(old.valLen) + int(old.keyLen) + for i := 0; i < height; i++ { + if next[i].addr == old.addr { + next[i].addr = old.getNextAddr(i) + if next[i].addr != nullArenaAddr { + data := db.getArena().getFrom(next[i].addr) + next[i].node = (*node)(unsafe.Pointer(&data[0])) + } + } + } + return height +} + +// Delete deletes the value for the given key. +// It returns false if the DB does not contain the key. +func (db *DB) Delete(key []byte) bool { + listHeight := db.getHeight() + var prev [maxHeight + 1]nodeWithAddr + prev[listHeight] = db.head + + var keyNode nodeWithAddr + var match bool + for i := listHeight - 1; i >= 0; i-- { + prev[i], keyNode, match = db.findSpliceForLevel(db.getArena(), key, prev[i+1], i) + } + if !match { + return false + } + + for i := int(keyNode.height) - 1; i >= 0; i-- { + prev[i].setNextAddr(i, keyNode.getNextAddr(i)) + } + nextAddr := keyNode.getNextAddr(0) + if nextAddr != nullArenaAddr { + nextData := db.getArena().getFrom(nextAddr) + next := (*node)(unsafe.Pointer(&nextData[0])) + next.prev = uint64(prev[0].addr) + } + + db.length-- + db.size -= int(keyNode.keyLen) + int(keyNode.valLen) + return true +} + +// Len returns the number of entries in the DB. +func (db *DB) Len() int { + return db.length +} + +// Size returns sum of keys and values length. Note that deleted +// key/value will not be accounted for, but it will still consume +// the buffer, since the buffer is append only. +func (db *DB) Size() int { + return db.size +} + +type nodeHeader struct { + height uint16 + keyLen uint16 + valLen uint32 +} + +type node struct { + nodeHeader + + // Addr of previous node at base level. + prev uint64 + // Height of the nexts. + nexts [maxHeight]uint64 +} + +type nodeWithAddr struct { + *node + addr arenaAddr +} + +func (n *node) getPrevAddr() arenaAddr { + return arenaAddr(n.prev) +} + +func (n *node) getNextAddr(level int) arenaAddr { + return arenaAddr(n.nexts[level]) +} + +func (n *node) setNextAddr(level int, addr arenaAddr) { + n.nexts[level] = uint64(addr) +} + +func (n *node) entryLen() int { + return n.nodeLen() + int(n.keyLen) + int(n.valLen) +} + +func (n *node) nodeLen() int { + return int(n.height)*8 + 8 + nodeHeaderSize +} + +func (n *node) getKey(buf []byte) []byte { + nodeLen := n.nodeLen() + return buf[nodeLen : nodeLen+int(n.keyLen)] +} + +func (n *node) getValue(buf []byte) []byte { + nodeLenKeyLen := n.nodeLen() + int(n.keyLen) + return buf[nodeLenKeyLen : nodeLenKeyLen+int(n.valLen)] +} + +func (db *DB) getHeight() int { + return db.height +} + +func (db *DB) setHeight(height int) { + db.height = height +} + +func (db *DB) getNext(n *node, level int) (*node, []byte) { + addr := n.getNextAddr(level) + if addr == nullArenaAddr { + return nil, nil + } + arena := db.getArena() + data := arena.getFrom(addr) + node := (*node)(unsafe.Pointer(&data[0])) + return node, data +} + +// findSpliceForLevel returns (outBefore, outAfter) with outBefore.key < key <= outAfter.key. +// The input "before" tells us where to start looking. +// If we found a node with the same key, then we return true. +func (db *DB) findSpliceForLevel(arena *arena, key []byte, before nodeWithAddr, level int) (nodeWithAddr, nodeWithAddr, bool) { + for { + // Assume before.key < key. + nextAddr := before.getNextAddr(level) + if nextAddr == nullArenaAddr { + return before, nodeWithAddr{}, false + } + data := arena.getFrom(nextAddr) + next := nodeWithAddr{(*node)(unsafe.Pointer(&data[0])), nextAddr} + nextKey := next.getKey(data) + cmp := bytes.Compare(nextKey, key) + if cmp >= 0 { + // before.key < key < next.key. We are done for this level. + return before, next, cmp == 0 + } + before = next // Keep moving right on this level. + } +} + +func (db *DB) findGreater(key []byte, allowEqual bool) (*node, []byte, bool) { + var prev *node + prev = db.head.node + level := db.getHeight() - 1 + + for { + var nextData []byte + var next *node + addr := prev.getNextAddr(level) + if addr != nullArenaAddr { + arena := db.getArena() + nextData = arena.getFrom(addr) + next = (*node)(unsafe.Pointer(&nextData[0])) + + nextKey := next.getKey(nextData) + cmp := bytes.Compare(nextKey, key) + if cmp < 0 { + // next key is still smaller, keep moving. + prev = next + continue + } + if cmp == 0 { + // prev.key < key == next.key. + if allowEqual { + return next, nextData, true + } + level = 0 + prev = next + continue + } + } + // next is greater than key or next is nil. go to the lower level. + if level > 0 { + level-- + continue + } + return next, nextData, false + } +} + +func (db *DB) findLess(key []byte, allowEqual bool) (*node, []byte, bool) { + var prev *node + var prevData []byte + prev = db.head.node + level := db.getHeight() - 1 + + for { + next, nextData := db.getNext(prev, level) + if next != nil { + cmp := bytes.Compare(key, next.getKey(nextData)) + if cmp > 0 { + // prev.key < next.key < key. We can continue to move right. + prev = next + prevData = nextData + continue + } + if cmp == 0 && allowEqual { + // prev.key < key == next.key. + return next, nextData, true + } + } + // get closer to the key in the lower level. + if level > 0 { + level-- + continue + } + break + } + + // We are not going to return head. + if prev == db.head.node { + return nil, nil, false + } + return prev, prevData, false +} + +// findLast returns the last element. If head (empty db), we return nil. All the find functions +// will NEVER return the head nodes. +func (db *DB) findLast() (*node, []byte) { + var node *node + var nodeData []byte + node = db.head.node + level := db.getHeight() - 1 + + for { + next, nextData := db.getNext(node, level) + if next != nil { + node = next + nodeData = nextData + continue + } + if level == 0 { + if node == db.head.node { + return nil, nil + } + return node, nodeData + } + level-- + } +} + +func (db *DB) newNode(arena *arena, key []byte, v []byte, height int) (*node, arenaAddr) { + // The base level is already allocated in the node struct. + nodeSize := nodeHeaderSize + height*8 + 8 + len(key) + len(v) + addr, data := arena.alloc(nodeSize) + node := (*node)(unsafe.Pointer(&data[0])) + node.keyLen = uint16(len(key)) + node.height = uint16(height) + node.valLen = uint32(len(v)) + copy(data[node.nodeLen():], key) + copy(data[node.nodeLen()+int(node.keyLen):], v) + return node, addr +} + +func (db *DB) getArena() *arena { + return db.arena +} + +func (db *DB) setArena(al *arena) { + db.arena = al +} + +func (db *DB) randomHeight() int { + h := 1 + for h < maxHeight && fastRand() < uint32(math.MaxUint32)/4 { + h++ + } + return h +} + +// fastRand is a fast thread local random function. +//go:linkname fastRand runtime.fastrand +func fastRand() uint32 diff --git a/kv/memdb/memdb.s b/kv/memdb/memdb.s new file mode 100644 index 0000000000000..d57f14436cecb --- /dev/null +++ b/kv/memdb/memdb.s @@ -0,0 +1,12 @@ +// Copyright 2019 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. diff --git a/kv/memdb/memdb_test.go b/kv/memdb/memdb_test.go new file mode 100644 index 0000000000000..4eed9198fd5e6 --- /dev/null +++ b/kv/memdb/memdb_test.go @@ -0,0 +1,256 @@ +// Copyright 2019 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package memdb + +import ( + "encoding/binary" + "math/rand" + "testing" + + . "github.com/pingcap/check" +) + +const ( + keySize = 16 + valueSize = 128 +) + +func TestT(t *testing.T) { + TestingT(t) +} + +type testMemDBSuite struct{} + +var _ = Suite(testMemDBSuite{}) + +func (s testMemDBSuite) TestGetSet(c *C) { + const cnt = 10000 + p := s.fillDB(cnt) + + var buf [4]byte + for i := 0; i < cnt; i++ { + binary.BigEndian.PutUint32(buf[:], uint32(i)) + v := p.Get(buf[:]) + c.Check(v, BytesEquals, buf[:]) + } +} + +func (s testMemDBSuite) TestIterator(c *C) { + const cnt = 10000 + p := s.fillDB(cnt) + + var buf [4]byte + var i int + it := p.NewIterator() + + for it.SeekToFirst(); it.Valid(); it.Next() { + binary.BigEndian.PutUint32(buf[:], uint32(i)) + c.Check(it.Key(), BytesEquals, buf[:]) + c.Check(it.Value(), BytesEquals, buf[:]) + i++ + } + c.Check(i, Equals, cnt) + + i-- + for it.SeekToLast(); it.Valid(); it.Prev() { + binary.BigEndian.PutUint32(buf[:], uint32(i)) + c.Check(it.Key(), BytesEquals, buf[:]) + c.Check(it.Value(), BytesEquals, buf[:]) + i-- + } + c.Check(i, Equals, -1) +} + +func (s testMemDBSuite) TestOverwrite(c *C) { + const cnt = 10000 + p := s.fillDB(cnt) + var buf [4]byte + + for i := 0; i < cnt; i += 3 { + var newBuf [4]byte + binary.BigEndian.PutUint32(buf[:], uint32(i)) + binary.BigEndian.PutUint32(newBuf[:], uint32(i*10)) + p.Put(buf[:], newBuf[:]) + } + + for i := 0; i < cnt; i++ { + binary.BigEndian.PutUint32(buf[:], uint32(i)) + v := binary.BigEndian.Uint32(p.Get(buf[:])) + if i%3 == 0 { + c.Check(v, Equals, uint32(i*10)) + } else { + c.Check(v, Equals, uint32(i)) + } + } + + it := p.NewIterator() + var i int + + for it.SeekToFirst(); it.Valid(); it.Next() { + binary.BigEndian.PutUint32(buf[:], uint32(i)) + c.Check(it.Key(), BytesEquals, buf[:]) + v := binary.BigEndian.Uint32(it.Value()) + if i%3 == 0 { + c.Check(v, Equals, uint32(i*10)) + } else { + c.Check(v, Equals, uint32(i)) + } + i++ + } + c.Check(i, Equals, cnt) + + i-- + for it.SeekToLast(); it.Valid(); it.Prev() { + binary.BigEndian.PutUint32(buf[:], uint32(i)) + c.Check(it.Key(), BytesEquals, buf[:]) + v := binary.BigEndian.Uint32(it.Value()) + if i%3 == 0 { + c.Check(v, Equals, uint32(i*10)) + } else { + c.Check(v, Equals, uint32(i)) + } + i-- + } + c.Check(i, Equals, -1) +} + +func (s testMemDBSuite) TestDelete(c *C) { + const cnt = 10000 + p := s.fillDB(cnt) + var buf [4]byte + + for i := 0; i < cnt; i += 3 { + binary.BigEndian.PutUint32(buf[:], uint32(i)) + p.Delete(buf[:]) + } + + for i := 0; i < cnt; i++ { + binary.BigEndian.PutUint32(buf[:], uint32(i)) + v := p.Get(buf[:]) + if i%3 == 0 { + c.Check(v, IsNil) + } else { + c.Check(v, BytesEquals, buf[:]) + } + } + + it := p.NewIterator() + var i int + + for it.SeekToFirst(); it.Valid(); it.Next() { + if i%3 == 0 { + i++ + } + binary.BigEndian.PutUint32(buf[:], uint32(i)) + c.Check(it.Key(), BytesEquals, buf[:]) + c.Check(it.Value(), BytesEquals, buf[:]) + i++ + } + + i-- + for it.SeekToLast(); it.Valid(); it.Prev() { + if i%3 == 0 { + i-- + } + binary.BigEndian.PutUint32(buf[:], uint32(i)) + c.Check(it.Key(), BytesEquals, buf[:]) + c.Check(it.Value(), BytesEquals, buf[:]) + i-- + } +} + +func (s testMemDBSuite) fillDB(cnt int) *DB { + p := New(4 * 1024 * 1024) + var buf [4]byte + for i := 0; i < cnt; i++ { + binary.BigEndian.PutUint32(buf[:], uint32(i)) + p.Put(buf[:], buf[:]) + } + return p +} + +func BenchmarkLargeIndex(b *testing.B) { + buf := make([][valueSize]byte, 10000000) + for i := range buf { + binary.LittleEndian.PutUint32(buf[i][:], uint32(i)) + } + p := New(4 * 1024 * 1024) + b.ResetTimer() + + for i := range buf { + p.Put(buf[i][:keySize], buf[i][:]) + } +} + +func BenchmarkPut(b *testing.B) { + buf := make([][valueSize]byte, b.N) + for i := range buf { + binary.LittleEndian.PutUint32(buf[i][:], uint32(i)) + } + + p := New(4 * 1024 * 1024) + b.ResetTimer() + + for i := range buf { + p.Put(buf[i][:keySize], buf[i][:]) + } +} + +func BenchmarkPutRandom(b *testing.B) { + buf := make([][valueSize]byte, b.N) + for i := range buf { + binary.LittleEndian.PutUint32(buf[i][:], uint32(rand.Int())) + } + + p := New(4 * 1024 * 1024) + b.ResetTimer() + + for i := range buf { + p.Put(buf[i][:keySize], buf[i][:]) + } +} + +func BenchmarkGet(b *testing.B) { + buf := make([][valueSize]byte, b.N) + for i := range buf { + binary.LittleEndian.PutUint32(buf[i][:], uint32(i)) + } + + p := New(4 * 1024 * 1024) + for i := range buf { + p.Put(buf[i][:keySize], buf[i][:]) + } + + b.ResetTimer() + for i := range buf { + p.Get(buf[i][:]) + } +} + +func BenchmarkGetRandom(b *testing.B) { + buf := make([][valueSize]byte, b.N) + for i := range buf { + binary.LittleEndian.PutUint32(buf[i][:], uint32(i)) + } + + p := New(4 * 1024 * 1024) + for i := range buf { + p.Put(buf[i][:keySize], buf[i][:]) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + p.Get(buf[rand.Int()%b.N][:]) + } +} diff --git a/kv/memdb_buffer.go b/kv/memdb_buffer.go index 953b7933c9847..b3cabb0598de4 100644 --- a/kv/memdb_buffer.go +++ b/kv/memdb_buffer.go @@ -16,16 +16,12 @@ package kv import ( + "bytes" "context" "sync/atomic" "github.com/pingcap/errors" - "github.com/pingcap/goleveldb/leveldb" - "github.com/pingcap/goleveldb/leveldb/comparer" - "github.com/pingcap/goleveldb/leveldb/iterator" - "github.com/pingcap/goleveldb/leveldb/memdb" - "github.com/pingcap/goleveldb/leveldb/util" - "github.com/pingcap/parser/terror" + "github.com/pingcap/tidb/kv/memdb" ) // memDbBuffer implements the MemBuffer interface. @@ -37,14 +33,16 @@ type memDbBuffer struct { } type memDbIter struct { - iter iterator.Iterator + iter memdb.Iterator + start []byte + end []byte reverse bool } // NewMemDbBuffer creates a new memDbBuffer. -func NewMemDbBuffer(cap int) MemBuffer { +func NewMemDbBuffer(initBlockSize int) MemBuffer { return &memDbBuffer{ - db: memdb.New(comparer.DefaultComparer, cap), + db: memdb.New(initBlockSize), entrySizeLimit: TxnEntrySizeLimit, bufferSizeLimit: atomic.LoadUint64(&TxnTotalSizeLimit), } @@ -52,11 +50,17 @@ func NewMemDbBuffer(cap int) MemBuffer { // Iter creates an Iterator. func (m *memDbBuffer) Iter(k Key, upperBound Key) (Iterator, error) { - i := &memDbIter{iter: m.db.NewIterator(&util.Range{Start: []byte(k), Limit: []byte(upperBound)}), reverse: false} + i := &memDbIter{ + iter: m.db.NewIterator(), + start: k, + end: upperBound, + reverse: false, + } - err := i.Next() - if err != nil { - return nil, err + if k == nil { + i.iter.SeekToFirst() + } else { + i.iter.Seek(k) } return i, nil } @@ -66,20 +70,23 @@ func (m *memDbBuffer) SetCap(cap int) { } func (m *memDbBuffer) IterReverse(k Key) (Iterator, error) { - var i *memDbIter + i := &memDbIter{ + iter: m.db.NewIterator(), + end: k, + reverse: true, + } if k == nil { - i = &memDbIter{iter: m.db.NewIterator(&util.Range{}), reverse: true} + i.iter.SeekToLast() } else { - i = &memDbIter{iter: m.db.NewIterator(&util.Range{Limit: []byte(k)}), reverse: true} + i.iter.SeekForExclusivePrev(k) } - i.iter.Last() return i, nil } // Get returns the value associated with key. func (m *memDbBuffer) Get(ctx context.Context, k Key) ([]byte, error) { - v, err := m.db.Get(k) - if terror.ErrorEqual(err, leveldb.ErrNotFound) { + v := m.db.Get(k) + if v == nil { return nil, ErrNotExist } return v, nil @@ -94,17 +101,17 @@ func (m *memDbBuffer) Set(k Key, v []byte) error { return ErrEntryTooLarge.GenWithStackByArgs(m.entrySizeLimit, len(k)+len(v)) } - err := m.db.Put(k, v) + m.db.Put(k, v) if m.Size() > int(m.bufferSizeLimit) { return ErrTxnTooLarge.GenWithStack("transaction too large, size:%d", m.Size()) } - return errors.Trace(err) + return nil } // Delete removes the entry from buffer with provided key. func (m *memDbBuffer) Delete(k Key) error { - err := m.db.Put(k, nil) - return errors.Trace(err) + m.db.Put(k, nil) + return nil } // Size returns sum of keys and values length. @@ -134,6 +141,9 @@ func (i *memDbIter) Next() error { // Valid implements the Iterator Valid. func (i *memDbIter) Valid() bool { + if !i.reverse { + return i.iter.Valid() && (i.end == nil || bytes.Compare(i.Key(), i.end) < 0) + } return i.iter.Valid() } @@ -149,7 +159,7 @@ func (i *memDbIter) Value() []byte { // Close Implements the Iterator Close. func (i *memDbIter) Close() { - i.iter.Release() + } // WalkMemBuffer iterates all buffered kv pairs in memBuf From 70298996ff1b80e22e0a0320c31997bac84c5d37 Mon Sep 17 00:00:00 2001 From: Zejun Li Date: Wed, 21 Aug 2019 15:46:44 +0800 Subject: [PATCH 02/10] code style --- kv/memdb/memdb.go | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/kv/memdb/memdb.go b/kv/memdb/memdb.go index 35e5e2540133d..a21091fe33fda 100644 --- a/kv/memdb/memdb.go +++ b/kv/memdb/memdb.go @@ -264,8 +264,7 @@ func (db *DB) findSpliceForLevel(arena *arena, key []byte, before nodeWithAddr, } func (db *DB) findGreater(key []byte, allowEqual bool) (*node, []byte, bool) { - var prev *node - prev = db.head.node + prev := db.head.node level := db.getHeight() - 1 for { @@ -304,9 +303,8 @@ func (db *DB) findGreater(key []byte, allowEqual bool) (*node, []byte, bool) { } func (db *DB) findLess(key []byte, allowEqual bool) (*node, []byte, bool) { - var prev *node var prevData []byte - prev = db.head.node + prev := db.head.node level := db.getHeight() - 1 for { @@ -342,9 +340,8 @@ func (db *DB) findLess(key []byte, allowEqual bool) (*node, []byte, bool) { // findLast returns the last element. If head (empty db), we return nil. All the find functions // will NEVER return the head nodes. func (db *DB) findLast() (*node, []byte) { - var node *node var nodeData []byte - node = db.head.node + node := db.head.node level := db.getHeight() - 1 for { From 53bfa00ab6c01b62956e84ad5417d11e603d3c38 Mon Sep 17 00:00:00 2001 From: Zejun Li Date: Wed, 21 Aug 2019 19:25:50 +0800 Subject: [PATCH 03/10] address comments --- kv/memdb/arena.go | 52 +++++++++++++++++++++++++----------------- kv/memdb/iterator.go | 6 ++--- kv/memdb/memdb.go | 54 +++++++++++++++++++++----------------------- 3 files changed, 60 insertions(+), 52 deletions(-) diff --git a/kv/memdb/arena.go b/kv/memdb/arena.go index deedc36610777..cd92ff57d513e 100644 --- a/kv/memdb/arena.go +++ b/kv/memdb/arena.go @@ -15,27 +15,38 @@ package memdb import "math" -type arenaAddr uint64 +type arenaAddr struct { + blockIdx uint32 + blockOffset uint32 +} -const ( - alignMask = 1<<32 - 8 // 29 bit 1 and 3 bit 0. - nullBlockOffset = math.MaxUint32 - maxBlockSize = 128 << 20 - nullArenaAddr arenaAddr = 0 -) +func (addr arenaAddr) isNull() bool { + return addr.blockIdx == 0 && addr.blockOffset == 0 +} -func (addr arenaAddr) blockIdx() int { - return int(addr>>32 - 1) +func (addr arenaAddr) encode() uint64 { + return uint64(addr.blockIdx)<<32 | uint64(addr.blockOffset) } -func (addr arenaAddr) blockOffset() uint32 { - return uint32(addr) +func newArenaAddr(idx int, offset uint32) arenaAddr { + return arenaAddr{ + blockIdx: uint32(idx) + 1, + blockOffset: offset, + } } -func newArenaAddr(blockIdx int, blockOffset uint32) arenaAddr { - return arenaAddr(uint64(blockIdx+1)<<32 | uint64(blockOffset)) +func decodeArenaAddr(encoded uint64) arenaAddr { + return arenaAddr{ + blockIdx: uint32(encoded >> 32), + blockOffset: uint32(encoded), + } } +const ( + nullBlockOffset = math.MaxUint32 + maxBlockSize = 128 << 20 +) + type arena struct { blockSize int availIdx int @@ -50,7 +61,7 @@ func newArenaLocator(initBlockSize int) *arena { } func (a *arena) getFrom(addr arenaAddr) []byte { - return a.blocks[addr.blockIdx()].getFrom(addr.blockOffset()) + return a.blocks[addr.blockIdx-1].getFrom(addr.blockOffset) } func (a *arena) alloc(size int) (arenaAddr, []byte) { @@ -66,7 +77,9 @@ func (a *arena) alloc(size int) (arenaAddr, []byte) { block := a.blocks[a.availIdx] blockOffset := block.alloc(size) if blockOffset != nullBlockOffset { - return newArenaAddr(a.availIdx, blockOffset), block.buf[blockOffset : int(blockOffset)+size] + addr := newArenaAddr(a.availIdx, blockOffset) + data := block.buf[blockOffset : int(blockOffset)+size] + return addr, data } blockSize := a.blockSize << 1 @@ -74,20 +87,19 @@ func (a *arena) alloc(size int) (arenaAddr, []byte) { a.blockSize = blockSize } a.blocks = append(a.blocks, newArenaBlock(a.blockSize)) - a.availIdx = len(a.blocks) - 1 + a.availIdx = int(uint32(len(a.blocks) - 1)) } } func (a *arena) reset() { a.availIdx = 0 a.blockSize = len(a.blocks[0].buf) - a.blocks = a.blocks[:1] + a.blocks = []*arenaBlock{a.blocks[0]} a.blocks[0].reset() } type arenaBlock struct { buf []byte - ref uint64 length int } @@ -103,17 +115,15 @@ func (a *arenaBlock) getFrom(offset uint32) []byte { func (a *arenaBlock) alloc(size int) uint32 { // The returned addr should be aligned in 8 bytes. - offset := (a.length + 7) & alignMask + offset := a.length a.length = offset + size if a.length > len(a.buf) { return nullBlockOffset } - a.ref++ return uint32(offset) } func (a *arenaBlock) reset() { a.buf = a.buf[:0] - a.ref = 0 a.length = 0 } diff --git a/kv/memdb/iterator.go b/kv/memdb/iterator.go index 3da99255a2b97..280518c0672dc 100644 --- a/kv/memdb/iterator.go +++ b/kv/memdb/iterator.go @@ -30,7 +30,7 @@ func (db *DB) NewIterator() Iterator { } } -// Valid returns true iff the iterator is positioned at a valid node. +// Valid returns true if the iterator is positioned at a valid node. func (it *Iterator) Valid() bool { return it.curr != nil } // Key returns the key at the current position. @@ -94,8 +94,8 @@ func (it *Iterator) updateState(node *node, nodeData []byte) { func (it *Iterator) changeToAddr(addr arenaAddr) { var data []byte var n *node - if addr != nullArenaAddr { - data = it.db.getArena().getFrom(addr) + if !addr.isNull() { + data = it.db.arena.getFrom(addr) n = (*node)(unsafe.Pointer(&data[0])) } it.updateState(n, data) diff --git a/kv/memdb/memdb.go b/kv/memdb/memdb.go index a21091fe33fda..4bb7e4cc2dff6 100644 --- a/kv/memdb/memdb.go +++ b/kv/memdb/memdb.go @@ -69,7 +69,7 @@ func (db *DB) Get(key []byte) []byte { // Put sets the value for the given key. // It overwrites any previous value for that key. func (db *DB) Put(key []byte, v []byte) bool { - arena := db.getArena() + arena := db.arena lsHeight := db.getHeight() var prev [maxHeight + 1]nodeWithAddr var next [maxHeight + 1]nodeWithAddr @@ -78,7 +78,7 @@ func (db *DB) Put(key []byte, v []byte) bool { var exists bool for i := lsHeight - 1; i >= 0; i-- { // Use higher level to speed up for current level. - prev[i], next[i], exists = db.findSpliceForLevel(db.getArena(), key, prev[i+1], i) + prev[i], next[i], exists = db.findSpliceForLevel(db.arena, key, prev[i+1], i) } var height int @@ -96,16 +96,16 @@ func (db *DB) Put(key []byte, v []byte) bool { // We always insert from the base level and up. After you add a node in base level, we cannot // create a node in the level above because it would have discovered the node in the base level. for i := 0; i < height; i++ { - x.nexts[i] = uint64(next[i].addr) + x.nexts[i] = next[i].addr.encode() if prev[i].node == nil { prev[i] = db.head } prev[i].setNextAddr(i, addr) } - x.prev = uint64(prev[0].addr) + x.prev = prev[0].addr.encode() if next[0].node != nil { - next[0].prev = uint64(addr) + next[0].prev = addr.encode() } db.length++ @@ -113,15 +113,21 @@ func (db *DB) Put(key []byte, v []byte) bool { return true } +// The pointers in findSpliceForLevel may point to the node which going to be overwrite, +// prepareOverwrite update them to point to the next node, so we can link new node with the list correctly. func (db *DB) prepareOverwrite(next []nodeWithAddr) int { old := next[0] - height := int(old.height) + + // Update necessary states. db.size -= int(old.valLen) + int(old.keyLen) + db.length-- + + height := int(old.height) for i := 0; i < height; i++ { if next[i].addr == old.addr { next[i].addr = old.getNextAddr(i) - if next[i].addr != nullArenaAddr { - data := db.getArena().getFrom(next[i].addr) + if !next[i].addr.isNull() { + data := db.arena.getFrom(next[i].addr) next[i].node = (*node)(unsafe.Pointer(&data[0])) } } @@ -139,7 +145,7 @@ func (db *DB) Delete(key []byte) bool { var keyNode nodeWithAddr var match bool for i := listHeight - 1; i >= 0; i-- { - prev[i], keyNode, match = db.findSpliceForLevel(db.getArena(), key, prev[i+1], i) + prev[i], keyNode, match = db.findSpliceForLevel(db.arena, key, prev[i+1], i) } if !match { return false @@ -149,10 +155,10 @@ func (db *DB) Delete(key []byte) bool { prev[i].setNextAddr(i, keyNode.getNextAddr(i)) } nextAddr := keyNode.getNextAddr(0) - if nextAddr != nullArenaAddr { - nextData := db.getArena().getFrom(nextAddr) + if !nextAddr.isNull() { + nextData := db.arena.getFrom(nextAddr) next := (*node)(unsafe.Pointer(&nextData[0])) - next.prev = uint64(prev[0].addr) + next.prev = prev[0].addr.encode() } db.length-- @@ -193,15 +199,15 @@ type nodeWithAddr struct { } func (n *node) getPrevAddr() arenaAddr { - return arenaAddr(n.prev) + return decodeArenaAddr(n.prev) } func (n *node) getNextAddr(level int) arenaAddr { - return arenaAddr(n.nexts[level]) + return decodeArenaAddr(n.nexts[level]) } func (n *node) setNextAddr(level int, addr arenaAddr) { - n.nexts[level] = uint64(addr) + n.nexts[level] = addr.encode() } func (n *node) entryLen() int { @@ -232,10 +238,10 @@ func (db *DB) setHeight(height int) { func (db *DB) getNext(n *node, level int) (*node, []byte) { addr := n.getNextAddr(level) - if addr == nullArenaAddr { + if addr.isNull() { return nil, nil } - arena := db.getArena() + arena := db.arena data := arena.getFrom(addr) node := (*node)(unsafe.Pointer(&data[0])) return node, data @@ -248,7 +254,7 @@ func (db *DB) findSpliceForLevel(arena *arena, key []byte, before nodeWithAddr, for { // Assume before.key < key. nextAddr := before.getNextAddr(level) - if nextAddr == nullArenaAddr { + if nextAddr.isNull() { return before, nodeWithAddr{}, false } data := arena.getFrom(nextAddr) @@ -271,8 +277,8 @@ func (db *DB) findGreater(key []byte, allowEqual bool) (*node, []byte, bool) { var nextData []byte var next *node addr := prev.getNextAddr(level) - if addr != nullArenaAddr { - arena := db.getArena() + if !addr.isNull() { + arena := db.arena nextData = arena.getFrom(addr) next = (*node)(unsafe.Pointer(&nextData[0])) @@ -374,14 +380,6 @@ func (db *DB) newNode(arena *arena, key []byte, v []byte, height int) (*node, ar return node, addr } -func (db *DB) getArena() *arena { - return db.arena -} - -func (db *DB) setArena(al *arena) { - db.arena = al -} - func (db *DB) randomHeight() int { h := 1 for h < maxHeight && fastRand() < uint32(math.MaxUint32)/4 { From 1b43806b88ea9cf02800dde7e2578c8caad45633 Mon Sep 17 00:00:00 2001 From: Zejun Li Date: Wed, 21 Aug 2019 19:53:16 +0800 Subject: [PATCH 04/10] remove encode and decode of arenaAddr --- kv/memdb/arena.go | 11 ----------- kv/memdb/memdb.go | 18 +++++++++--------- 2 files changed, 9 insertions(+), 20 deletions(-) diff --git a/kv/memdb/arena.go b/kv/memdb/arena.go index cd92ff57d513e..976c5430639de 100644 --- a/kv/memdb/arena.go +++ b/kv/memdb/arena.go @@ -24,10 +24,6 @@ func (addr arenaAddr) isNull() bool { return addr.blockIdx == 0 && addr.blockOffset == 0 } -func (addr arenaAddr) encode() uint64 { - return uint64(addr.blockIdx)<<32 | uint64(addr.blockOffset) -} - func newArenaAddr(idx int, offset uint32) arenaAddr { return arenaAddr{ blockIdx: uint32(idx) + 1, @@ -35,13 +31,6 @@ func newArenaAddr(idx int, offset uint32) arenaAddr { } } -func decodeArenaAddr(encoded uint64) arenaAddr { - return arenaAddr{ - blockIdx: uint32(encoded >> 32), - blockOffset: uint32(encoded), - } -} - const ( nullBlockOffset = math.MaxUint32 maxBlockSize = 128 << 20 diff --git a/kv/memdb/memdb.go b/kv/memdb/memdb.go index 4bb7e4cc2dff6..26642e07120c9 100644 --- a/kv/memdb/memdb.go +++ b/kv/memdb/memdb.go @@ -96,16 +96,16 @@ func (db *DB) Put(key []byte, v []byte) bool { // We always insert from the base level and up. After you add a node in base level, we cannot // create a node in the level above because it would have discovered the node in the base level. for i := 0; i < height; i++ { - x.nexts[i] = next[i].addr.encode() + x.nexts[i] = next[i].addr if prev[i].node == nil { prev[i] = db.head } prev[i].setNextAddr(i, addr) } - x.prev = prev[0].addr.encode() + x.prev = prev[0].addr if next[0].node != nil { - next[0].prev = addr.encode() + next[0].prev = addr } db.length++ @@ -158,7 +158,7 @@ func (db *DB) Delete(key []byte) bool { if !nextAddr.isNull() { nextData := db.arena.getFrom(nextAddr) next := (*node)(unsafe.Pointer(&nextData[0])) - next.prev = prev[0].addr.encode() + next.prev = prev[0].addr } db.length-- @@ -188,9 +188,9 @@ type node struct { nodeHeader // Addr of previous node at base level. - prev uint64 + prev arenaAddr // Height of the nexts. - nexts [maxHeight]uint64 + nexts [maxHeight]arenaAddr } type nodeWithAddr struct { @@ -199,15 +199,15 @@ type nodeWithAddr struct { } func (n *node) getPrevAddr() arenaAddr { - return decodeArenaAddr(n.prev) + return n.prev } func (n *node) getNextAddr(level int) arenaAddr { - return decodeArenaAddr(n.nexts[level]) + return n.nexts[level] } func (n *node) setNextAddr(level int, addr arenaAddr) { - n.nexts[level] = addr.encode() + n.nexts[level] = addr } func (n *node) entryLen() int { From 605bcbea94536d518bfb55022c18c7d2797fdf87 Mon Sep 17 00:00:00 2001 From: Zejun Li Date: Thu, 22 Aug 2019 11:50:33 +0800 Subject: [PATCH 05/10] address comments --- kv/memdb/iterator.go | 4 +- kv/memdb/memdb.go | 148 +++++++++++++++++++------------------------ 2 files changed, 66 insertions(+), 86 deletions(-) diff --git a/kv/memdb/iterator.go b/kv/memdb/iterator.go index 280518c0672dc..9a2dbecc9dd70 100644 --- a/kv/memdb/iterator.go +++ b/kv/memdb/iterator.go @@ -45,12 +45,12 @@ func (it *Iterator) Value() []byte { // Next moves the iterator to the next entry. func (it *Iterator) Next() { - it.changeToAddr(it.curr.getNextAddr(0)) + it.changeToAddr(it.curr.nexts[0]) } // Prev moves the iterator to the previous entry. func (it *Iterator) Prev() { - it.changeToAddr(it.curr.getPrevAddr()) + it.changeToAddr(it.curr.prev) } // Seek locates the iterator to the first entry with a key >= seekKey. diff --git a/kv/memdb/memdb.go b/kv/memdb/memdb.go index 26642e07120c9..2ee7e5875cf0d 100644 --- a/kv/memdb/memdb.go +++ b/kv/memdb/memdb.go @@ -70,7 +70,7 @@ func (db *DB) Get(key []byte) []byte { // It overwrites any previous value for that key. func (db *DB) Put(key []byte, v []byte) bool { arena := db.arena - lsHeight := db.getHeight() + lsHeight := db.height var prev [maxHeight + 1]nodeWithAddr var next [maxHeight + 1]nodeWithAddr prev[lsHeight] = db.head @@ -90,7 +90,7 @@ func (db *DB) Put(key []byte, v []byte) bool { x, addr := db.newNode(arena, key, v, height) if height > lsHeight { - db.setHeight(height) + db.height = height } // We always insert from the base level and up. After you add a node in base level, we cannot @@ -100,7 +100,7 @@ func (db *DB) Put(key []byte, v []byte) bool { if prev[i].node == nil { prev[i] = db.head } - prev[i].setNextAddr(i, addr) + prev[i].nexts[i] = addr } x.prev = prev[0].addr @@ -125,7 +125,7 @@ func (db *DB) prepareOverwrite(next []nodeWithAddr) int { height := int(old.height) for i := 0; i < height; i++ { if next[i].addr == old.addr { - next[i].addr = old.getNextAddr(i) + next[i].addr = old.nexts[i] if !next[i].addr.isNull() { data := db.arena.getFrom(next[i].addr) next[i].node = (*node)(unsafe.Pointer(&data[0])) @@ -138,7 +138,7 @@ func (db *DB) prepareOverwrite(next []nodeWithAddr) int { // Delete deletes the value for the given key. // It returns false if the DB does not contain the key. func (db *DB) Delete(key []byte) bool { - listHeight := db.getHeight() + listHeight := db.height var prev [maxHeight + 1]nodeWithAddr prev[listHeight] = db.head @@ -152,9 +152,9 @@ func (db *DB) Delete(key []byte) bool { } for i := int(keyNode.height) - 1; i >= 0; i-- { - prev[i].setNextAddr(i, keyNode.getNextAddr(i)) + prev[i].nexts[i] = keyNode.nexts[i] } - nextAddr := keyNode.getNextAddr(0) + nextAddr := keyNode.nexts[0] if !nextAddr.isNull() { nextData := db.arena.getFrom(nextAddr) next := (*node)(unsafe.Pointer(&nextData[0])) @@ -178,82 +178,13 @@ func (db *DB) Size() int { return db.size } -type nodeHeader struct { - height uint16 - keyLen uint16 - valLen uint32 -} - -type node struct { - nodeHeader - - // Addr of previous node at base level. - prev arenaAddr - // Height of the nexts. - nexts [maxHeight]arenaAddr -} - -type nodeWithAddr struct { - *node - addr arenaAddr -} - -func (n *node) getPrevAddr() arenaAddr { - return n.prev -} - -func (n *node) getNextAddr(level int) arenaAddr { - return n.nexts[level] -} - -func (n *node) setNextAddr(level int, addr arenaAddr) { - n.nexts[level] = addr -} - -func (n *node) entryLen() int { - return n.nodeLen() + int(n.keyLen) + int(n.valLen) -} - -func (n *node) nodeLen() int { - return int(n.height)*8 + 8 + nodeHeaderSize -} - -func (n *node) getKey(buf []byte) []byte { - nodeLen := n.nodeLen() - return buf[nodeLen : nodeLen+int(n.keyLen)] -} - -func (n *node) getValue(buf []byte) []byte { - nodeLenKeyLen := n.nodeLen() + int(n.keyLen) - return buf[nodeLenKeyLen : nodeLenKeyLen+int(n.valLen)] -} - -func (db *DB) getHeight() int { - return db.height -} - -func (db *DB) setHeight(height int) { - db.height = height -} - -func (db *DB) getNext(n *node, level int) (*node, []byte) { - addr := n.getNextAddr(level) - if addr.isNull() { - return nil, nil - } - arena := db.arena - data := arena.getFrom(addr) - node := (*node)(unsafe.Pointer(&data[0])) - return node, data -} - // findSpliceForLevel returns (outBefore, outAfter) with outBefore.key < key <= outAfter.key. // The input "before" tells us where to start looking. // If we found a node with the same key, then we return true. func (db *DB) findSpliceForLevel(arena *arena, key []byte, before nodeWithAddr, level int) (nodeWithAddr, nodeWithAddr, bool) { for { // Assume before.key < key. - nextAddr := before.getNextAddr(level) + nextAddr := before.nexts[level] if nextAddr.isNull() { return before, nodeWithAddr{}, false } @@ -271,12 +202,12 @@ func (db *DB) findSpliceForLevel(arena *arena, key []byte, before nodeWithAddr, func (db *DB) findGreater(key []byte, allowEqual bool) (*node, []byte, bool) { prev := db.head.node - level := db.getHeight() - 1 + level := db.height - 1 for { var nextData []byte var next *node - addr := prev.getNextAddr(level) + addr := prev.nexts[level] if !addr.isNull() { arena := db.arena nextData = arena.getFrom(addr) @@ -311,7 +242,7 @@ func (db *DB) findGreater(key []byte, allowEqual bool) (*node, []byte, bool) { func (db *DB) findLess(key []byte, allowEqual bool) (*node, []byte, bool) { var prevData []byte prev := db.head.node - level := db.getHeight() - 1 + level := db.height - 1 for { next, nextData := db.getNext(prev, level) @@ -348,7 +279,7 @@ func (db *DB) findLess(key []byte, allowEqual bool) (*node, []byte, bool) { func (db *DB) findLast() (*node, []byte) { var nodeData []byte node := db.head.node - level := db.getHeight() - 1 + level := db.height - 1 for { next, nextData := db.getNext(node, level) @@ -380,6 +311,10 @@ func (db *DB) newNode(arena *arena, key []byte, v []byte, height int) (*node, ar return node, addr } +// fastRand is a fast thread local random function. +//go:linkname fastRand runtime.fastrand +func fastRand() uint32 + func (db *DB) randomHeight() int { h := 1 for h < maxHeight && fastRand() < uint32(math.MaxUint32)/4 { @@ -388,6 +323,51 @@ func (db *DB) randomHeight() int { return h } -// fastRand is a fast thread local random function. -//go:linkname fastRand runtime.fastrand -func fastRand() uint32 +type nodeHeader struct { + height uint16 + keyLen uint16 + valLen uint32 +} + +type node struct { + nodeHeader + + // Addr of previous node at base level. + prev arenaAddr + // Height of the nexts. + nexts [maxHeight]arenaAddr +} + +type nodeWithAddr struct { + *node + addr arenaAddr +} + +func (n *node) entryLen() int { + return n.nodeLen() + int(n.keyLen) + int(n.valLen) +} + +func (n *node) nodeLen() int { + return int(n.height)*8 + 8 + nodeHeaderSize +} + +func (n *node) getKey(buf []byte) []byte { + nodeLen := n.nodeLen() + return buf[nodeLen : nodeLen+int(n.keyLen)] +} + +func (n *node) getValue(buf []byte) []byte { + nodeLenKeyLen := n.nodeLen() + int(n.keyLen) + return buf[nodeLenKeyLen : nodeLenKeyLen+int(n.valLen)] +} + +func (db *DB) getNext(n *node, level int) (*node, []byte) { + addr := n.nexts[level] + if addr.isNull() { + return nil, nil + } + arena := db.arena + data := arena.getFrom(addr) + node := (*node)(unsafe.Pointer(&data[0])) + return node, data +} From c7cb9f03769a91b50424c33c910c60fa7ae91d31 Mon Sep 17 00:00:00 2001 From: Zejun Li Date: Thu, 22 Aug 2019 20:05:29 +0800 Subject: [PATCH 06/10] remove unused code and add more unit test --- kv/memdb/arena.go | 1 - kv/memdb/iterator.go | 2 +- kv/memdb/memdb.go | 15 +---- kv/memdb/memdb_test.go | 129 ++++++++++++++++++++++++++++++++++++++++- 4 files changed, 132 insertions(+), 15 deletions(-) diff --git a/kv/memdb/arena.go b/kv/memdb/arena.go index 976c5430639de..41096c4a1e083 100644 --- a/kv/memdb/arena.go +++ b/kv/memdb/arena.go @@ -103,7 +103,6 @@ func (a *arenaBlock) getFrom(offset uint32) []byte { } func (a *arenaBlock) alloc(size int) uint32 { - // The returned addr should be aligned in 8 bytes. offset := a.length a.length = offset + size if a.length > len(a.buf) { diff --git a/kv/memdb/iterator.go b/kv/memdb/iterator.go index 9a2dbecc9dd70..ebcb501b8d64f 100644 --- a/kv/memdb/iterator.go +++ b/kv/memdb/iterator.go @@ -55,7 +55,7 @@ func (it *Iterator) Prev() { // Seek locates the iterator to the first entry with a key >= seekKey. func (it *Iterator) Seek(seekKey []byte) { - node, nodeData, _ := it.db.findGreater(seekKey, true) // find >=. + node, nodeData, _ := it.db.findGreater(seekKey) // find >=. it.updateState(node, nodeData) } diff --git a/kv/memdb/memdb.go b/kv/memdb/memdb.go index 2ee7e5875cf0d..7b7df99332a5b 100644 --- a/kv/memdb/memdb.go +++ b/kv/memdb/memdb.go @@ -59,7 +59,7 @@ func (db *DB) Reset() { // Get gets the value for the given key. It returns nil if the // DB does not contain the key. func (db *DB) Get(key []byte) []byte { - node, data, match := db.findGreater(key, true) + node, data, match := db.findGreater(key) if !match { return nil } @@ -200,7 +200,7 @@ func (db *DB) findSpliceForLevel(arena *arena, key []byte, before nodeWithAddr, } } -func (db *DB) findGreater(key []byte, allowEqual bool) (*node, []byte, bool) { +func (db *DB) findGreater(key []byte) (*node, []byte, bool) { prev := db.head.node level := db.height - 1 @@ -222,12 +222,7 @@ func (db *DB) findGreater(key []byte, allowEqual bool) (*node, []byte, bool) { } if cmp == 0 { // prev.key < key == next.key. - if allowEqual { - return next, nextData, true - } - level = 0 - prev = next - continue + return next, nextData, true } } // next is greater than key or next is nil. go to the lower level. @@ -343,10 +338,6 @@ type nodeWithAddr struct { addr arenaAddr } -func (n *node) entryLen() int { - return n.nodeLen() + int(n.keyLen) + int(n.valLen) -} - func (n *node) nodeLen() int { return int(n.height)*8 + 8 + nodeHeaderSize } diff --git a/kv/memdb/memdb_test.go b/kv/memdb/memdb_test.go index 4eed9198fd5e6..24fb78ddc4dbc 100644 --- a/kv/memdb/memdb_test.go +++ b/kv/memdb/memdb_test.go @@ -19,6 +19,8 @@ import ( "testing" . "github.com/pingcap/check" + "github.com/pingcap/goleveldb/leveldb/comparer" + "github.com/pingcap/goleveldb/leveldb/memdb" ) const ( @@ -77,12 +79,15 @@ func (s testMemDBSuite) TestOverwrite(c *C) { p := s.fillDB(cnt) var buf [4]byte + sz := p.Size() for i := 0; i < cnt; i += 3 { var newBuf [4]byte binary.BigEndian.PutUint32(buf[:], uint32(i)) binary.BigEndian.PutUint32(newBuf[:], uint32(i*10)) p.Put(buf[:], newBuf[:]) } + c.Check(p.Len(), Equals, cnt) + c.Check(p.Size(), Equals, sz) for i := 0; i < cnt; i++ { binary.BigEndian.PutUint32(buf[:], uint32(i)) @@ -170,8 +175,130 @@ func (s testMemDBSuite) TestDelete(c *C) { } } +func (s testMemDBSuite) TestKVLargeThanBlock(c *C) { + p := New(4 * 1024) + p.Put([]byte{1}, make([]byte, 1)) + p.Put([]byte{2}, make([]byte, 4096)) + c.Check(len(p.arena.blocks), Equals, 2) + p.Put([]byte{3}, make([]byte, 3000)) + c.Check(len(p.arena.blocks), Equals, 2) + c.Check(len(p.Get([]byte{3})), Equals, 3000) +} + +func (s testMemDBSuite) TestEmptyDB(c *C) { + p := New(4 * 1024) + c.Check(p.Get([]byte{0}), IsNil) + c.Check(p.Delete([]byte{0}), IsFalse) + it := p.NewIterator() + it.SeekToFirst() + c.Check(it.Valid(), IsFalse) + it.SeekToLast() + c.Check(it.Valid(), IsFalse) + it.SeekForPrev([]byte{0}) + c.Check(it.Valid(), IsFalse) + it.SeekForExclusivePrev([]byte{0}) + c.Check(it.Valid(), IsFalse) + it.Seek([]byte{0xff}) + c.Check(it.Valid(), IsFalse) +} + +func (s testMemDBSuite) TestRest(c *C) { + p := s.fillDB(10000) + p.Reset() + c.Check(p.Get([]byte{0}), IsNil) + c.Check(p.Delete([]byte{0}), IsFalse) + c.Check(p.Size(), Equals, 0) + c.Check(p.Len(), Equals, 0) + + key := []byte{0} + p.Put(key, key) + c.Check(p.Get(key), BytesEquals, key) + + it := p.NewIterator() + it.SeekToFirst() + c.Check(it.Key(), BytesEquals, key) + c.Check(it.Value(), BytesEquals, key) + it.Next() + c.Check(it.Valid(), IsFalse) + + it.SeekToLast() + c.Check(it.Key(), BytesEquals, key) + c.Check(it.Value(), BytesEquals, key) + it.Prev() + c.Check(it.Valid(), IsFalse) +} + +func (s testMemDBSuite) TestRandom(c *C) { + const cnt = 500000 + p1 := New(4 * 1024) + p2 := memdb.New(comparer.DefaultComparer, 4*1024) + var buf [4]byte + for i := 0; i < cnt; i++ { + binary.BigEndian.PutUint32(buf[:], uint32(i)) + p1.Put(buf[:], buf[:]) + _ = p2.Put(buf[:], buf[:]) + } + + c.Check(p1.Len(), Equals, p2.Len()) + c.Check(p1.Size(), Equals, p2.Size()) + + for _, k := range rand.Perm(cnt) { + binary.BigEndian.PutUint32(buf[:], uint32(k)) + switch rand.Intn(4) { + case 0, 1: + var vbuf [4]byte + binary.BigEndian.PutUint32(vbuf[:], uint32(k+rand.Intn(10000))) + p1.Put(buf[:], vbuf[:]) + _ = p2.Put(buf[:], vbuf[:]) + case 2: + p1.Delete(buf[:]) + _ = p2.Delete(buf[:]) + } + } + + c.Check(p1.Len(), Equals, p2.Len()) + c.Check(p1.Size(), Equals, p2.Size()) + + it1 := p1.NewIterator() + it1.SeekToFirst() + + it2 := p2.NewIterator(nil) + + var prevKey, prevVal []byte + for it2.First(); it2.Valid(); it2.Next() { + c.Check(it1.Key(), BytesEquals, it2.Key()) + c.Check(it1.Value(), BytesEquals, it2.Value()) + + it := p1.NewIterator() + it.Seek(it2.Key()) + c.Check(it.Key(), BytesEquals, it2.Key()) + c.Check(it.Value(), BytesEquals, it2.Value()) + + it.SeekForPrev(it2.Key()) + c.Check(it.Key(), BytesEquals, it2.Key()) + c.Check(it.Value(), BytesEquals, it2.Value()) + + if prevKey != nil { + it.SeekForExclusivePrev(it2.Key()) + c.Check(it.Key(), BytesEquals, prevKey) + c.Check(it.Value(), BytesEquals, prevVal) + } + + it1.Next() + prevKey = it2.Key() + prevVal = it2.Value() + } + + it1.SeekToLast() + for it2.Last(); it2.Valid(); it2.Prev() { + c.Check(it1.Key(), BytesEquals, it2.Key()) + c.Check(it1.Value(), BytesEquals, it2.Value()) + it1.Prev() + } +} + func (s testMemDBSuite) fillDB(cnt int) *DB { - p := New(4 * 1024 * 1024) + p := New(4 * 1024) var buf [4]byte for i := 0; i < cnt; i++ { binary.BigEndian.PutUint32(buf[:], uint32(i)) From 85f28dc872f1d2b3de485759c63c866037d1e098 Mon Sep 17 00:00:00 2001 From: Zejun Li Date: Thu, 22 Aug 2019 20:23:24 +0800 Subject: [PATCH 07/10] address comments --- kv/memdb/iterator.go | 2 +- kv/memdb/memdb.go | 4 ++-- kv/memdb/memdb_test.go | 31 ++++++++++++++++++------------- 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/kv/memdb/iterator.go b/kv/memdb/iterator.go index ebcb501b8d64f..ec50995b02064 100644 --- a/kv/memdb/iterator.go +++ b/kv/memdb/iterator.go @@ -55,7 +55,7 @@ func (it *Iterator) Prev() { // Seek locates the iterator to the first entry with a key >= seekKey. func (it *Iterator) Seek(seekKey []byte) { - node, nodeData, _ := it.db.findGreater(seekKey) // find >=. + node, nodeData, _ := it.db.findGreaterEqual(seekKey) // find >=. it.updateState(node, nodeData) } diff --git a/kv/memdb/memdb.go b/kv/memdb/memdb.go index 7b7df99332a5b..e71495de93c31 100644 --- a/kv/memdb/memdb.go +++ b/kv/memdb/memdb.go @@ -59,7 +59,7 @@ func (db *DB) Reset() { // Get gets the value for the given key. It returns nil if the // DB does not contain the key. func (db *DB) Get(key []byte) []byte { - node, data, match := db.findGreater(key) + node, data, match := db.findGreaterEqual(key) if !match { return nil } @@ -200,7 +200,7 @@ func (db *DB) findSpliceForLevel(arena *arena, key []byte, before nodeWithAddr, } } -func (db *DB) findGreater(key []byte) (*node, []byte, bool) { +func (db *DB) findGreaterEqual(key []byte) (*node, []byte, bool) { prev := db.head.node level := db.height - 1 diff --git a/kv/memdb/memdb_test.go b/kv/memdb/memdb_test.go index 24fb78ddc4dbc..35da836a808b7 100644 --- a/kv/memdb/memdb_test.go +++ b/kv/memdb/memdb_test.go @@ -230,29 +230,34 @@ func (s testMemDBSuite) TestRest(c *C) { func (s testMemDBSuite) TestRandom(c *C) { const cnt = 500000 + keys := make([][]byte, cnt) + for i := range keys { + keys[i] = make([]byte, rand.Intn(19)+1) + rand.Read(keys[i]) + } + p1 := New(4 * 1024) p2 := memdb.New(comparer.DefaultComparer, 4*1024) - var buf [4]byte - for i := 0; i < cnt; i++ { - binary.BigEndian.PutUint32(buf[:], uint32(i)) - p1.Put(buf[:], buf[:]) - _ = p2.Put(buf[:], buf[:]) + for _, k := range keys { + p1.Put(k, k) + _ = p2.Put(k, k) } c.Check(p1.Len(), Equals, p2.Len()) c.Check(p1.Size(), Equals, p2.Size()) - for _, k := range rand.Perm(cnt) { - binary.BigEndian.PutUint32(buf[:], uint32(k)) + rand.Shuffle(cnt, func(i, j int) { keys[i], keys[j] = keys[j], keys[i] }) + + for _, k := range keys { switch rand.Intn(4) { case 0, 1: - var vbuf [4]byte - binary.BigEndian.PutUint32(vbuf[:], uint32(k+rand.Intn(10000))) - p1.Put(buf[:], vbuf[:]) - _ = p2.Put(buf[:], vbuf[:]) + newValue := make([]byte, rand.Intn(19)+1) + rand.Read(newValue) + p1.Put(k, newValue) + _ = p2.Put(k, newValue) case 2: - p1.Delete(buf[:]) - _ = p2.Delete(buf[:]) + p1.Delete(k) + _ = p2.Delete(k) } } From a2117e8cd1d4c658770f9d5e1ad9a6e979da9cb7 Mon Sep 17 00:00:00 2001 From: Zejun Li Date: Mon, 26 Aug 2019 12:31:42 +0800 Subject: [PATCH 08/10] tiny optimize --- kv/memdb/arena.go | 18 +++++++++--------- kv/memdb/memdb_test.go | 3 ++- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/kv/memdb/arena.go b/kv/memdb/arena.go index 41096c4a1e083..2ba307dc5dd2e 100644 --- a/kv/memdb/arena.go +++ b/kv/memdb/arena.go @@ -39,13 +39,13 @@ const ( type arena struct { blockSize int availIdx int - blocks []*arenaBlock + blocks []arenaBlock } func newArenaLocator(initBlockSize int) *arena { return &arena{ blockSize: initBlockSize, - blocks: []*arenaBlock{newArenaBlock(initBlockSize)}, + blocks: []arenaBlock{newArenaBlock(initBlockSize)}, } } @@ -63,7 +63,7 @@ func (a *arena) alloc(size int) (arenaAddr, []byte) { } for { - block := a.blocks[a.availIdx] + block := &a.blocks[a.availIdx] blockOffset := block.alloc(size) if blockOffset != nullBlockOffset { addr := newArenaAddr(a.availIdx, blockOffset) @@ -83,7 +83,7 @@ func (a *arena) alloc(size int) (arenaAddr, []byte) { func (a *arena) reset() { a.availIdx = 0 a.blockSize = len(a.blocks[0].buf) - a.blocks = []*arenaBlock{a.blocks[0]} + a.blocks = []arenaBlock{a.blocks[0]} a.blocks[0].reset() } @@ -92,8 +92,8 @@ type arenaBlock struct { length int } -func newArenaBlock(blockSize int) *arenaBlock { - return &arenaBlock{ +func newArenaBlock(blockSize int) arenaBlock { + return arenaBlock{ buf: make([]byte, blockSize), } } @@ -104,14 +104,14 @@ func (a *arenaBlock) getFrom(offset uint32) []byte { func (a *arenaBlock) alloc(size int) uint32 { offset := a.length - a.length = offset + size - if a.length > len(a.buf) { + newLen := offset + size + if newLen > len(a.buf) { return nullBlockOffset } + a.length = newLen return uint32(offset) } func (a *arenaBlock) reset() { - a.buf = a.buf[:0] a.length = 0 } diff --git a/kv/memdb/memdb_test.go b/kv/memdb/memdb_test.go index 35da836a808b7..597674a2e096c 100644 --- a/kv/memdb/memdb_test.go +++ b/kv/memdb/memdb_test.go @@ -202,7 +202,7 @@ func (s testMemDBSuite) TestEmptyDB(c *C) { c.Check(it.Valid(), IsFalse) } -func (s testMemDBSuite) TestRest(c *C) { +func (s testMemDBSuite) TestReset(c *C) { p := s.fillDB(10000) p.Reset() c.Check(p.Get([]byte{0}), IsNil) @@ -213,6 +213,7 @@ func (s testMemDBSuite) TestRest(c *C) { key := []byte{0} p.Put(key, key) c.Check(p.Get(key), BytesEquals, key) + c.Check(p.arena.availIdx, Equals, 0) it := p.NewIterator() it.SeekToFirst() From 6eff72d923949bfc42b8d4fffe9d392aceba2746 Mon Sep 17 00:00:00 2001 From: Zejun Li Date: Mon, 26 Aug 2019 14:03:14 +0800 Subject: [PATCH 09/10] let reset reuse all blocks --- kv/memdb/arena.go | 55 +++++++++++++++++++++++++++--------------- kv/memdb/memdb.go | 1 - kv/memdb/memdb_test.go | 19 +++++++++++++++ 3 files changed, 55 insertions(+), 20 deletions(-) diff --git a/kv/memdb/arena.go b/kv/memdb/arena.go index 2ba307dc5dd2e..edcb8ab8ebc7f 100644 --- a/kv/memdb/arena.go +++ b/kv/memdb/arena.go @@ -54,37 +54,54 @@ func (a *arena) getFrom(addr arenaAddr) []byte { } func (a *arena) alloc(size int) (arenaAddr, []byte) { - if size > a.blockSize { + if size >= maxBlockSize { // Use a separate block to store entry which size larger than specified block size. blk := newArenaBlock(size) - addr := newArenaAddr(len(a.blocks), 0) + blk.length = size a.blocks = append(a.blocks, blk) + + addr := newArenaAddr(len(a.blocks)-1, 0) return addr, blk.buf } - for { - block := &a.blocks[a.availIdx] - blockOffset := block.alloc(size) - if blockOffset != nullBlockOffset { - addr := newArenaAddr(a.availIdx, blockOffset) - data := block.buf[blockOffset : int(blockOffset)+size] + for i := a.availIdx; i < len(a.blocks); i++ { + addr, data := a.allocInBlock(i, size) + if !addr.isNull() { return addr, data } + } - blockSize := a.blockSize << 1 - if blockSize <= maxBlockSize { - a.blockSize = blockSize - } - a.blocks = append(a.blocks, newArenaBlock(a.blockSize)) - a.availIdx = int(uint32(len(a.blocks) - 1)) + a.enlarge(size) + return a.allocInBlock(a.availIdx, size) +} + +func (a *arena) enlarge(size int) { + a.blockSize <<= 1 + for a.blockSize <= size { + a.blockSize <<= 1 } + // Size always less than maxBlockSize. + if a.blockSize > maxBlockSize { + a.blockSize = maxBlockSize + } + a.blocks = append(a.blocks, newArenaBlock(a.blockSize)) + a.availIdx = int(uint32(len(a.blocks) - 1)) +} + +func (a *arena) allocInBlock(idx, size int) (arenaAddr, []byte) { + offset, data := a.blocks[idx].alloc(size) + if offset == nullBlockOffset { + return arenaAddr{}, nil + } + return newArenaAddr(idx, offset), data } func (a *arena) reset() { a.availIdx = 0 a.blockSize = len(a.blocks[0].buf) - a.blocks = []arenaBlock{a.blocks[0]} - a.blocks[0].reset() + for i := 0; i < len(a.blocks); i++ { + a.blocks[i].reset() + } } type arenaBlock struct { @@ -102,14 +119,14 @@ func (a *arenaBlock) getFrom(offset uint32) []byte { return a.buf[offset:] } -func (a *arenaBlock) alloc(size int) uint32 { +func (a *arenaBlock) alloc(size int) (uint32, []byte) { offset := a.length newLen := offset + size if newLen > len(a.buf) { - return nullBlockOffset + return nullBlockOffset, nil } a.length = newLen - return uint32(offset) + return uint32(offset), a.buf[offset : offset+size] } func (a *arenaBlock) reset() { diff --git a/kv/memdb/memdb.go b/kv/memdb/memdb.go index e71495de93c31..702b91d238d77 100644 --- a/kv/memdb/memdb.go +++ b/kv/memdb/memdb.go @@ -47,7 +47,6 @@ func New(initBlockSize int) *DB { } // Reset resets the DB to initial empty state. -// Release all blocks except the init one. func (db *DB) Reset() { db.height = 1 db.head.node = new(node) diff --git a/kv/memdb/memdb_test.go b/kv/memdb/memdb_test.go index 597674a2e096c..faffc64857aff 100644 --- a/kv/memdb/memdb_test.go +++ b/kv/memdb/memdb_test.go @@ -229,6 +229,25 @@ func (s testMemDBSuite) TestReset(c *C) { c.Check(it.Valid(), IsFalse) } +func (s testMemDBSuite) TestMemReuse(c *C) { + p := New(256) + p.Put(make([]byte, 257), nil) + p.Reset() + + p.Put(make([]byte, 257), nil) + c.Check(len(p.arena.blocks), Equals, 2) + c.Check(p.arena.availIdx, Equals, 0) + p.Put(make([]byte, 12), nil) + c.Check(len(p.arena.blocks), Equals, 2) + c.Check(p.arena.blocks[0].length, Greater, 0) + p.Put(make([]byte, 128<<20), nil) + c.Check(len(p.arena.blocks), Equals, 3) + c.Check(p.arena.blocks[0].length, Greater, 0) + p.Put(make([]byte, 2048), nil) + c.Check(len(p.arena.blocks), Equals, 4) + c.Check(p.arena.availIdx, Equals, 3) +} + func (s testMemDBSuite) TestRandom(c *C) { const cnt = 500000 keys := make([][]byte, cnt) From f38d38619f092d5f7ba7a49b704a516647cad5a3 Mon Sep 17 00:00:00 2001 From: Zejun Li Date: Tue, 27 Aug 2019 10:58:18 +0800 Subject: [PATCH 10/10] just reuse the first block --- kv/memdb/arena.go | 13 +++++-------- kv/memdb/memdb.go | 1 + kv/memdb/memdb_test.go | 19 ------------------- 3 files changed, 6 insertions(+), 27 deletions(-) diff --git a/kv/memdb/arena.go b/kv/memdb/arena.go index edcb8ab8ebc7f..4494a8ca7e1c6 100644 --- a/kv/memdb/arena.go +++ b/kv/memdb/arena.go @@ -64,11 +64,9 @@ func (a *arena) alloc(size int) (arenaAddr, []byte) { return addr, blk.buf } - for i := a.availIdx; i < len(a.blocks); i++ { - addr, data := a.allocInBlock(i, size) - if !addr.isNull() { - return addr, data - } + addr, data := a.allocInBlock(a.availIdx, size) + if !addr.isNull() { + return addr, data } a.enlarge(size) @@ -99,9 +97,8 @@ func (a *arena) allocInBlock(idx, size int) (arenaAddr, []byte) { func (a *arena) reset() { a.availIdx = 0 a.blockSize = len(a.blocks[0].buf) - for i := 0; i < len(a.blocks); i++ { - a.blocks[i].reset() - } + a.blocks = []arenaBlock{a.blocks[0]} + a.blocks[0].reset() } type arenaBlock struct { diff --git a/kv/memdb/memdb.go b/kv/memdb/memdb.go index 702b91d238d77..57c05c9c2fdc3 100644 --- a/kv/memdb/memdb.go +++ b/kv/memdb/memdb.go @@ -47,6 +47,7 @@ func New(initBlockSize int) *DB { } // Reset resets the DB to initial empty state. +// Release all blocks except the initial one. func (db *DB) Reset() { db.height = 1 db.head.node = new(node) diff --git a/kv/memdb/memdb_test.go b/kv/memdb/memdb_test.go index faffc64857aff..597674a2e096c 100644 --- a/kv/memdb/memdb_test.go +++ b/kv/memdb/memdb_test.go @@ -229,25 +229,6 @@ func (s testMemDBSuite) TestReset(c *C) { c.Check(it.Valid(), IsFalse) } -func (s testMemDBSuite) TestMemReuse(c *C) { - p := New(256) - p.Put(make([]byte, 257), nil) - p.Reset() - - p.Put(make([]byte, 257), nil) - c.Check(len(p.arena.blocks), Equals, 2) - c.Check(p.arena.availIdx, Equals, 0) - p.Put(make([]byte, 12), nil) - c.Check(len(p.arena.blocks), Equals, 2) - c.Check(p.arena.blocks[0].length, Greater, 0) - p.Put(make([]byte, 128<<20), nil) - c.Check(len(p.arena.blocks), Equals, 3) - c.Check(p.arena.blocks[0].length, Greater, 0) - p.Put(make([]byte, 2048), nil) - c.Check(len(p.arena.blocks), Equals, 4) - c.Check(p.arena.availIdx, Equals, 3) -} - func (s testMemDBSuite) TestRandom(c *C) { const cnt = 500000 keys := make([][]byte, cnt)