Skip to content

Commit

Permalink
trie: remove internal nodes between shortNode and child in path mode (e…
Browse files Browse the repository at this point in the history
…thereum#28163)

* trie: remove internal nodes between shortNode and child in path mode

* trie: address comments

* core/rawdb, trie: address comments

* core/rawdb: delete unused func

* trie: change comments

* trie: add missing tests

* trie: fix lint
  • Loading branch information
rjl493456442 authored and joeylichang committed Oct 16, 2023
1 parent c3199ab commit 5a3109b
Show file tree
Hide file tree
Showing 3 changed files with 238 additions and 40 deletions.
20 changes: 20 additions & 0 deletions core/rawdb/accessors_trie.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,16 @@ func HasAccountTrieNode(db ethdb.KeyValueReader, path []byte, hash common.Hash)
return h.hash(data) == hash
}

// ExistsAccountTrieNode checks the presence of the account trie node with the
// specified node path, regardless of the node hash.
func ExistsAccountTrieNode(db ethdb.KeyValueReader, path []byte) bool {
has, err := db.Has(accountTrieNodeKey(path))
if err != nil {
return false
}
return has
}

// WriteAccountTrieNode writes the provided account trie node into database.
func WriteAccountTrieNode(db ethdb.KeyValueWriter, path []byte, node []byte) {
if err := db.Put(accountTrieNodeKey(path), node); err != nil {
Expand Down Expand Up @@ -127,6 +137,16 @@ func HasStorageTrieNode(db ethdb.KeyValueReader, accountHash common.Hash, path [
return h.hash(data) == hash
}

// ExistsStorageTrieNode checks the presence of the storage trie node with the
// specified account hash and node path, regardless of the node hash.
func ExistsStorageTrieNode(db ethdb.KeyValueReader, accountHash common.Hash, path []byte) bool {
has, err := db.Has(storageTrieNodeKey(accountHash, path))
if err != nil {
return false
}
return has
}

// WriteStorageTrieNode writes the provided storage trie node into database.
func WriteStorageTrieNode(db ethdb.KeyValueWriter, accountHash common.Hash, path []byte, node []byte) {
if err := db.Put(storageTrieNodeKey(accountHash, path), node); err != nil {
Expand Down
95 changes: 79 additions & 16 deletions trie/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/metrics"
)

// ErrNotRequested is returned by the trie sync when it's requested to process a
Expand All @@ -42,6 +43,16 @@ var ErrAlreadyProcessed = errors.New("already processed")
// memory if the node was configured with a significant number of peers.
const maxFetchesPerDepth = 16384

var (
// deletionGauge is the metric to track how many trie node deletions
// are performed in total during the sync process.
deletionGauge = metrics.NewRegisteredGauge("trie/sync/delete", nil)

// lookupGauge is the metric to track how many trie node lookups are
// performed to determine if node needs to be deleted.
lookupGauge = metrics.NewRegisteredGauge("trie/sync/lookup", nil)
)

// SyncPath is a path tuple identifying a particular trie node either in a single
// trie (account) or a layered trie (account -> storage).
//
Expand Down Expand Up @@ -93,9 +104,10 @@ type LeafCallback func(keys [][]byte, path []byte, leaf []byte, parent common.Ha

// nodeRequest represents a scheduled or already in-flight trie node retrieval request.
type nodeRequest struct {
hash common.Hash // Hash of the trie node to retrieve
path []byte // Merkle path leading to this node for prioritization
data []byte // Data content of the node, cached until all subtrees complete
hash common.Hash // Hash of the trie node to retrieve
path []byte // Merkle path leading to this node for prioritization
data []byte // Data content of the node, cached until all subtrees complete
deletes [][]byte // List of internal path segments for trie nodes to delete

parent *nodeRequest // Parent state node referencing this entry
deps int // Number of dependencies before allowed to commit this node
Expand Down Expand Up @@ -125,18 +137,20 @@ type CodeSyncResult struct {
// syncMemBatch is an in-memory buffer of successfully downloaded but not yet
// persisted data items.
type syncMemBatch struct {
nodes map[string][]byte // In-memory membatch of recently completed nodes
hashes map[string]common.Hash // Hashes of recently completed nodes
codes map[common.Hash][]byte // In-memory membatch of recently completed codes
size uint64 // Estimated batch-size of in-memory data.
nodes map[string][]byte // In-memory membatch of recently completed nodes
hashes map[string]common.Hash // Hashes of recently completed nodes
deletes map[string]struct{} // List of paths for trie node to delete
codes map[common.Hash][]byte // In-memory membatch of recently completed codes
size uint64 // Estimated batch-size of in-memory data.
}

// newSyncMemBatch allocates a new memory-buffer for not-yet persisted trie nodes.
func newSyncMemBatch() *syncMemBatch {
return &syncMemBatch{
nodes: make(map[string][]byte),
hashes: make(map[string]common.Hash),
codes: make(map[common.Hash][]byte),
nodes: make(map[string][]byte),
hashes: make(map[string]common.Hash),
deletes: make(map[string]struct{}),
codes: make(map[common.Hash][]byte),
}
}

Expand Down Expand Up @@ -347,16 +361,23 @@ func (s *Sync) ProcessNode(result NodeSyncResult) error {
// Commit flushes the data stored in the internal membatch out to persistent
// storage, returning any occurred error.
func (s *Sync) Commit(dbw ethdb.Batch) error {
// Dump the membatch into a database dbw
// Flush the pending node writes into database batch.
for path, value := range s.membatch.nodes {
owner, inner := ResolvePath([]byte(path))
rawdb.WriteTrieNode(dbw, owner, inner, s.membatch.hashes[path], value, s.scheme)
}
// Flush the pending node deletes into the database batch.
// Please note that each written and deleted node has a
// unique path, ensuring no duplication occurs.
for path := range s.membatch.deletes {
owner, inner := ResolvePath([]byte(path))
rawdb.DeleteTrieNode(dbw, owner, inner, common.Hash{} /* unused */, s.scheme)
}
// Flush the pending code writes into database batch.
for hash, value := range s.membatch.codes {
rawdb.WriteCode(dbw, hash, value)
}
// Drop the membatch data and return
s.membatch = newSyncMemBatch()
s.membatch = newSyncMemBatch() // reset the batch
return nil
}

Expand Down Expand Up @@ -425,6 +446,39 @@ func (s *Sync) children(req *nodeRequest, object node) ([]*nodeRequest, error) {
node: node.Val,
path: append(append([]byte(nil), req.path...), key...),
}}
// Mark all internal nodes between shortNode and its **in disk**
// child as invalid. This is essential in the case of path mode
// scheme; otherwise, state healing might overwrite existing child
// nodes silently while leaving a dangling parent node within the
// range of this internal path on disk. This would break the
// guarantee for state healing.
//
// While it's possible for this shortNode to overwrite a previously
// existing full node, the other branches of the fullNode can be
// retained as they remain untouched and complete.
//
// This step is only necessary for path mode, as there is no deletion
// in hash mode at all.
if _, ok := node.Val.(hashNode); ok && s.scheme == rawdb.PathScheme {
owner, inner := ResolvePath(req.path)
for i := 1; i < len(key); i++ {
// While checking for a non-existent item in Pebble can be less efficient
// without a bloom filter, the relatively low frequency of lookups makes
// the performance impact negligible.
var exists bool
if owner == (common.Hash{}) {
exists = rawdb.ExistsAccountTrieNode(s.database, append(inner, key[:i]...))
} else {
exists = rawdb.ExistsStorageTrieNode(s.database, owner, append(inner, key[:i]...))
}
if exists {
req.deletes = append(req.deletes, key[:i])
deletionGauge.Inc(1)
log.Debug("Detected dangling node", "owner", owner, "path", append(inner, key[:i]...))
}
}
lookupGauge.Inc(int64(len(key) - 1))
}
case *fullNode:
for i := 0; i < 17; i++ {
if node.Children[i] != nil {
Expand Down Expand Up @@ -509,10 +563,19 @@ func (s *Sync) commitNodeRequest(req *nodeRequest) error {
// Write the node content to the membatch
s.membatch.nodes[string(req.path)] = req.data
s.membatch.hashes[string(req.path)] = req.hash

// The size tracking refers to the db-batch, not the in-memory data.
// Therefore, we ignore the req.path, and account only for the hash+data
// which eventually is written to db.
s.membatch.size += common.HashLength + uint64(len(req.data))
if s.scheme == rawdb.PathScheme {
s.membatch.size += uint64(len(req.path) + len(req.data))
} else {
s.membatch.size += common.HashLength + uint64(len(req.data))
}
// Delete the internal nodes which are marked as invalid
for _, segment := range req.deletes {
path := append(req.path, segment...)
s.membatch.deletes[string(path)] = struct{}{}
s.membatch.size += uint64(len(path))
}
delete(s.nodeReqs, string(req.path))
s.fetches[len(req.path)]--

Expand Down
Loading

0 comments on commit 5a3109b

Please sign in to comment.