Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ethdb/memorydb, trie: reduced allocations #28473

Merged
merged 4 commits into from
Nov 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions ethdb/memorydb/memorydb.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ func (db *Database) Len() int {
// keyvalue is a key-value tuple tagged with a deletion field to allow creating
// memory-database write batches.
type keyvalue struct {
key []byte
key string
value []byte
delete bool
}
Expand All @@ -222,14 +222,14 @@ type batch struct {

// Put inserts the given value into the batch for later committing.
func (b *batch) Put(key, value []byte) error {
b.writes = append(b.writes, keyvalue{common.CopyBytes(key), common.CopyBytes(value), false})
b.writes = append(b.writes, keyvalue{string(key), common.CopyBytes(value), false})
b.size += len(key) + len(value)
return nil
}

// Delete inserts the a key removal into the batch for later committing.
func (b *batch) Delete(key []byte) error {
b.writes = append(b.writes, keyvalue{common.CopyBytes(key), nil, true})
b.writes = append(b.writes, keyvalue{string(key), nil, true})
b.size += len(key)
return nil
}
Expand All @@ -249,10 +249,10 @@ func (b *batch) Write() error {
}
for _, keyvalue := range b.writes {
if keyvalue.delete {
delete(b.db.db, string(keyvalue.key))
delete(b.db.db, keyvalue.key)
continue
}
b.db.db[string(keyvalue.key)] = keyvalue.value
b.db.db[keyvalue.key] = keyvalue.value
}
return nil
}
Expand All @@ -267,12 +267,12 @@ func (b *batch) Reset() {
func (b *batch) Replay(w ethdb.KeyValueWriter) error {
for _, keyvalue := range b.writes {
if keyvalue.delete {
if err := w.Delete(keyvalue.key); err != nil {
if err := w.Delete([]byte(keyvalue.key)); err != nil {
return err
}
continue
}
if err := w.Put(keyvalue.key, keyvalue.value); err != nil {
if err := w.Put([]byte(keyvalue.key), keyvalue.value); err != nil {
return err
}
}
Expand Down
18 changes: 18 additions & 0 deletions ethdb/memorydb/memorydb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package memorydb

import (
"encoding/binary"
"testing"

"github.com/ethereum/go-ethereum/ethdb"
Expand All @@ -30,3 +31,20 @@ func TestMemoryDB(t *testing.T) {
})
})
}

// BenchmarkBatchAllocs measures the time/allocs for storing 120 kB of data
func BenchmarkBatchAllocs(b *testing.B) {
b.ReportAllocs()
var key = make([]byte, 20)
var val = make([]byte, 100)
// 120 * 1_000 -> 120_000 == 120kB
for i := 0; i < b.N; i++ {
batch := New().NewBatch()
for j := uint64(0); j < 1000; j++ {
binary.BigEndian.PutUint64(key, j)
binary.BigEndian.PutUint64(val, j)
batch.Put(key, val)
}
batch.Write()
}
}
71 changes: 45 additions & 26 deletions trie/iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,8 @@ type nodeIterator struct {
path []byte // Path to the current node
err error // Failure set in case of an internal error in the iterator

resolver NodeResolver // optional node resolver for avoiding disk hits
resolver NodeResolver // optional node resolver for avoiding disk hits
pool []*nodeIteratorState // local pool for iteratorstates
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any particular reason to put the pool here? Usually we always define a global pool.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, it's rather that I didn't think a global nodeIteratorPool was "generically useful". We only use node iterators here and there. So I figured that we can take the overhead of 'ramping up' a fresh pool in those instances, rather than having yet another global pool.

Aside from that, a local non-synched pool is slightly faster since it doesn't need syncing.

}

// errIteratorEnd is stored in nodeIterator.err when iteration is done.
Expand Down Expand Up @@ -172,6 +173,24 @@ func newNodeIterator(trie *Trie, start []byte) NodeIterator {
return it
}

func (it *nodeIterator) putInPool(item *nodeIteratorState) {
if len(it.pool) < 40 {
Copy link
Contributor

@Halimao Halimao Nov 16, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @holiman , I'm confused why pool size should be 40? And not sure concurrency safe should be considered in putInPool and getFromPool

item.node = nil
it.pool = append(it.pool, item)
}
}

func (it *nodeIterator) getFromPool() *nodeIteratorState {
idx := len(it.pool) - 1
if idx < 0 {
return new(nodeIteratorState)
}
el := it.pool[idx]
it.pool[idx] = nil
it.pool = it.pool[:idx]
return el
}

func (it *nodeIterator) AddResolver(resolver NodeResolver) {
it.resolver = resolver
}
Expand Down Expand Up @@ -423,8 +442,9 @@ func (st *nodeIteratorState) resolve(it *nodeIterator, path []byte) error {
return nil
}

func findChild(n *fullNode, index int, path []byte, ancestor common.Hash) (node, *nodeIteratorState, []byte, int) {
func (it *nodeIterator) findChild(n *fullNode, index int, ancestor common.Hash) (node, *nodeIteratorState, []byte, int) {
var (
path = it.path
child node
state *nodeIteratorState
childPath []byte
Expand All @@ -433,13 +453,12 @@ func findChild(n *fullNode, index int, path []byte, ancestor common.Hash) (node,
if n.Children[index] != nil {
child = n.Children[index]
hash, _ := child.cache()
state = &nodeIteratorState{
hash: common.BytesToHash(hash),
node: child,
parent: ancestor,
index: -1,
pathlen: len(path),
}
state = it.getFromPool()
state.hash = common.BytesToHash(hash)
state.node = child
state.parent = ancestor
state.index = -1
state.pathlen = len(path)
childPath = append(childPath, path...)
childPath = append(childPath, byte(index))
return child, state, childPath, index
Expand All @@ -452,21 +471,20 @@ func (it *nodeIterator) nextChild(parent *nodeIteratorState, ancestor common.Has
switch node := parent.node.(type) {
case *fullNode:
// Full node, move to the first non-nil child.
if child, state, path, index := findChild(node, parent.index+1, it.path, ancestor); child != nil {
if child, state, path, index := it.findChild(node, parent.index+1, ancestor); child != nil {
parent.index = index - 1
return state, path, true
}
case *shortNode:
// Short node, return the pointer singleton child
if parent.index < 0 {
hash, _ := node.Val.cache()
state := &nodeIteratorState{
hash: common.BytesToHash(hash),
node: node.Val,
parent: ancestor,
index: -1,
pathlen: len(it.path),
}
state := it.getFromPool()
state.hash = common.BytesToHash(hash)
state.node = node.Val
state.parent = ancestor
state.index = -1
state.pathlen = len(it.path)
path := append(it.path, node.Key...)
return state, path, true
}
Expand All @@ -480,7 +498,7 @@ func (it *nodeIterator) nextChildAt(parent *nodeIteratorState, ancestor common.H
switch n := parent.node.(type) {
case *fullNode:
// Full node, move to the first non-nil child before the desired key position
child, state, path, index := findChild(n, parent.index+1, it.path, ancestor)
child, state, path, index := it.findChild(n, parent.index+1, ancestor)
if child == nil {
// No more children in this fullnode
return parent, it.path, false
Expand All @@ -492,7 +510,7 @@ func (it *nodeIterator) nextChildAt(parent *nodeIteratorState, ancestor common.H
}
// The child is before the seek position. Try advancing
for {
nextChild, nextState, nextPath, nextIndex := findChild(n, index+1, it.path, ancestor)
nextChild, nextState, nextPath, nextIndex := it.findChild(n, index+1, ancestor)
// If we run out of children, or skipped past the target, return the
// previous one
if nextChild == nil || bytes.Compare(nextPath, key) >= 0 {
Expand All @@ -506,13 +524,12 @@ func (it *nodeIterator) nextChildAt(parent *nodeIteratorState, ancestor common.H
// Short node, return the pointer singleton child
if parent.index < 0 {
hash, _ := n.Val.cache()
state := &nodeIteratorState{
hash: common.BytesToHash(hash),
node: n.Val,
parent: ancestor,
index: -1,
pathlen: len(it.path),
}
state := it.getFromPool()
state.hash = common.BytesToHash(hash)
state.node = n.Val
state.parent = ancestor
state.index = -1
state.pathlen = len(it.path)
path := append(it.path, n.Key...)
return state, path, true
}
Expand All @@ -533,6 +550,8 @@ func (it *nodeIterator) pop() {
it.path = it.path[:last.pathlen]
it.stack[len(it.stack)-1] = nil
it.stack = it.stack[:len(it.stack)-1]
// last is now unused
it.putInPool(last)
}

func compareNodes(a, b NodeIterator) int {
Expand Down
12 changes: 12 additions & 0 deletions trie/iterator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -616,3 +616,15 @@ func isTrieNode(scheme string, key, val []byte) (bool, []byte, common.Hash) {
}
return true, path, hash
}

func BenchmarkIterator(b *testing.B) {
diskDb, srcDb, tr, _ := makeTestTrie(rawdb.HashScheme)
root := tr.Hash()
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
if err := checkTrieConsistency(diskDb, srcDb.Scheme(), root, false); err != nil {
b.Fatal(err)
}
}
}
2 changes: 1 addition & 1 deletion trie/sync_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,7 @@ func testIncompleteSync(t *testing.T, scheme string) {
hash := crypto.Keccak256Hash(result.Data)
if hash != root {
addedKeys = append(addedKeys, result.Path)
addedHashes = append(addedHashes, crypto.Keccak256Hash(result.Data))
addedHashes = append(addedHashes, hash)
}
}
// Fetch the next batch to retrieve
Expand Down