Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

trie: remove internal nodes between shortNode and child in path mode #28163

Merged
merged 7 commits into from
Sep 22, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions core/rawdb/accessors_trie.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,16 @@ func HasAccountTrieNode(db ethdb.KeyValueReader, path []byte, hash common.Hash)
return h.hash(data) == hash
}

// ExistsAccountTrieNode checks the presence of the account trie node with the
// specified node path, regardless of the node hash.
func ExistsAccountTrieNode(db ethdb.KeyValueReader, path []byte) bool {
has, err := db.Has(accountTrieNodeKey(path))
if err != nil {
return false
}
return has
}

// WriteAccountTrieNode writes the provided account trie node into database.
func WriteAccountTrieNode(db ethdb.KeyValueWriter, path []byte, node []byte) {
if err := db.Put(accountTrieNodeKey(path), node); err != nil {
Expand Down Expand Up @@ -127,6 +137,16 @@ func HasStorageTrieNode(db ethdb.KeyValueReader, accountHash common.Hash, path [
return h.hash(data) == hash
}

// ExistsStorageTrieNode checks the presence of the storage trie node with the
// specified account hash and node path, regardless of the node hash.
func ExistsStorageTrieNode(db ethdb.KeyValueReader, accountHash common.Hash, path []byte) bool {
has, err := db.Has(storageTrieNodeKey(accountHash, path))
if err != nil {
return false
}
return has
}

// WriteStorageTrieNode writes the provided storage trie node into database.
func WriteStorageTrieNode(db ethdb.KeyValueWriter, accountHash common.Hash, path []byte, node []byte) {
if err := db.Put(storageTrieNodeKey(accountHash, path), node); err != nil {
Expand Down
95 changes: 79 additions & 16 deletions trie/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/metrics"
)

// ErrNotRequested is returned by the trie sync when it's requested to process a
Expand All @@ -42,6 +43,16 @@ var ErrAlreadyProcessed = errors.New("already processed")
// memory if the node was configured with a significant number of peers.
const maxFetchesPerDepth = 16384

var (
// deletionGauge is the metric to track how many trie node deletions
// are performed in total during the sync process.
deletionGauge = metrics.NewRegisteredGauge("trie/sync/delete", nil)

// lookupGauge is the metric to track how many trie node lookups are
// performed to determine if node needs to be deleted.
lookupGauge = metrics.NewRegisteredGauge("trie/sync/lookup", nil)
)

// SyncPath is a path tuple identifying a particular trie node either in a single
// trie (account) or a layered trie (account -> storage).
//
Expand Down Expand Up @@ -93,9 +104,10 @@ type LeafCallback func(keys [][]byte, path []byte, leaf []byte, parent common.Ha

// nodeRequest represents a scheduled or already in-flight trie node retrieval request.
type nodeRequest struct {
hash common.Hash // Hash of the trie node to retrieve
path []byte // Merkle path leading to this node for prioritization
data []byte // Data content of the node, cached until all subtrees complete
hash common.Hash // Hash of the trie node to retrieve
path []byte // Merkle path leading to this node for prioritization
data []byte // Data content of the node, cached until all subtrees complete
deletes [][]byte // List of internal path segments for trie nodes to delete

parent *nodeRequest // Parent state node referencing this entry
deps int // Number of dependencies before allowed to commit this node
Expand Down Expand Up @@ -125,18 +137,20 @@ type CodeSyncResult struct {
// syncMemBatch is an in-memory buffer of successfully downloaded but not yet
// persisted data items.
type syncMemBatch struct {
nodes map[string][]byte // In-memory membatch of recently completed nodes
hashes map[string]common.Hash // Hashes of recently completed nodes
codes map[common.Hash][]byte // In-memory membatch of recently completed codes
size uint64 // Estimated batch-size of in-memory data.
nodes map[string][]byte // In-memory membatch of recently completed nodes
hashes map[string]common.Hash // Hashes of recently completed nodes
deletes map[string]struct{} // List of paths for trie node to delete
codes map[common.Hash][]byte // In-memory membatch of recently completed codes
size uint64 // Estimated batch-size of in-memory data.
}

// newSyncMemBatch allocates a new memory-buffer for not-yet persisted trie nodes.
func newSyncMemBatch() *syncMemBatch {
return &syncMemBatch{
nodes: make(map[string][]byte),
hashes: make(map[string]common.Hash),
codes: make(map[common.Hash][]byte),
nodes: make(map[string][]byte),
hashes: make(map[string]common.Hash),
deletes: make(map[string]struct{}),
codes: make(map[common.Hash][]byte),
}
}

Expand Down Expand Up @@ -347,16 +361,23 @@ func (s *Sync) ProcessNode(result NodeSyncResult) error {
// Commit flushes the data stored in the internal membatch out to persistent
// storage, returning any occurred error.
func (s *Sync) Commit(dbw ethdb.Batch) error {
// Dump the membatch into a database dbw
// Flush the pending node writes into database batch.
for path, value := range s.membatch.nodes {
owner, inner := ResolvePath([]byte(path))
rawdb.WriteTrieNode(dbw, owner, inner, s.membatch.hashes[path], value, s.scheme)
}
// Flush the pending node deletes into the database batch.
// Please note that each written and deleted node has a
// unique path, ensuring no duplication occurs.
for path := range s.membatch.deletes {
owner, inner := ResolvePath([]byte(path))
rawdb.DeleteTrieNode(dbw, owner, inner, common.Hash{} /* unused */, s.scheme)
}
Comment on lines +372 to +375
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't you do the deletions before the Writes? In case we're about to overwrite some parts of the path, the deletions might contain all parts, and we don't want to delete the things we just wrote. Right?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was also thinking about it. IMO the deletions should only contain the short-key-internal parts, so it should not duplicate any existing new node we've just written.

Copy link
Member Author

@rjl493456442 rjl493456442 Sep 20, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, for each written node and deleted node, the path should be unique. I will add a comment to clarify it.

// Flush the pending code writes into database batch.
for hash, value := range s.membatch.codes {
rawdb.WriteCode(dbw, hash, value)
}
// Drop the membatch data and return
s.membatch = newSyncMemBatch()
s.membatch = newSyncMemBatch() // reset the batch
return nil
}

Expand Down Expand Up @@ -425,6 +446,39 @@ func (s *Sync) children(req *nodeRequest, object node) ([]*nodeRequest, error) {
node: node.Val,
path: append(append([]byte(nil), req.path...), key...),
}}
// Mark all internal nodes between shortNode and its **in disk**
// child as invalid. This is essential in the case of path mode
// scheme; otherwise, state healing might overwrite existing child
// nodes silently while leaving a dangling parent node within the
// range of this internal path on disk. This would break the
// guarantee for state healing.
//
// While it's possible for this shortNode to overwrite a previously
// existing full node, the other branches of the fullNode can be
// retained as they remain untouched and complete.
//
// This step is only necessary for path mode, as there is no deletion
// in hash mode at all.
if _, ok := node.Val.(hashNode); ok && s.scheme == rawdb.PathScheme {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you check against a hashNode? Wouldn't the same issue happen if the child was a valueNode?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are a few possibilities there:

a) the child node is a shortNode, it means the child node is a complete node stored in disk. In this case, we need to cleanup internal nodes to reclaim the ownership of this path.

b) the child node is a valueNode, it is always embedded in the parent. In this case, it means the path is terminated at this shortNode and have no more nodes stored in disk. We don't need to clean up the disk nodes after this shortNode because the path after it is not occupied by us.

c) the child node is an embedded full node(smaller than 32b). It's identical with case b).

owner, inner := ResolvePath(req.path)
for i := 1; i < len(key); i++ {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ugh, we loop over all possible keys and request them from pebble... this is gonna hurt...

Copy link
Member Author

@rjl493456442 rjl493456442 Sep 20, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In practice, it won't be too much. The target shortNode is the one between two fullNodes in a path, just in case a few elements share a path prefix. And in mainnet, the shared prefix won't be too long(possibly a few nibbles). And for the shortNode contains a value, then the key can be quite long, but it's not our target.

// While checking for a non-existent item in Pebble can be less efficient
// without a bloom filter, the relatively low frequency of lookups makes
// the performance impact negligible.
var exists bool
if owner == (common.Hash{}) {
exists = rawdb.ExistsAccountTrieNode(s.database, append(inner, key[:i]...))
} else {
exists = rawdb.ExistsStorageTrieNode(s.database, owner, append(inner, key[:i]...))
}
if exists {
req.deletes = append(req.deletes, key[:i])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This tripped me up. In req.deletes, you store key[:i], which is the key which this shortnode (extension-node) is holding. However, the actual full key is larger, and you use inner to construct the full one:

rawdb.ExistsAccountTrieNode(s.database, append(inner, key[:i]...))

So, a bit later in this code, when going from req.deletes -> membatch.deletes, you convert the "partial paths" into full paths by prefixing:

for _, segment := range req.deletes {
		path := append(req.path, segment...)
		s.membatch.deletes[string(path)] = struct{}{}
		s.membatch.size += uint64(len(path))
	}

I missed the req.deletes -> membatch.deletes conversion the first time around, and couldn't figure out how on earth the snippet which does the deletion could work, using non-full paths:

	for path := range s.membatch.deletes {
		owner, inner := ResolvePath([]byte(path))
		rawdb.DeleteTrieNode(dbw, owner, inner, common.Hash{} /* unused */, s.scheme)
	}

I guess you save a bit of memory by not storing the expanded path here, and recalculating it later. So I can't really object, just noting that it makes it a bit more complex.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's the intention, yes.

deletionGauge.Inc(1)
log.Debug("Detected dangling node", "owner", owner, "path", append(inner, key[:i]...))
}
}
lookupGauge.Inc(int64(len(key) - 1))
}
case *fullNode:
for i := 0; i < 17; i++ {
if node.Children[i] != nil {
Expand Down Expand Up @@ -509,10 +563,19 @@ func (s *Sync) commitNodeRequest(req *nodeRequest) error {
// Write the node content to the membatch
s.membatch.nodes[string(req.path)] = req.data
s.membatch.hashes[string(req.path)] = req.hash

// The size tracking refers to the db-batch, not the in-memory data.
// Therefore, we ignore the req.path, and account only for the hash+data
// which eventually is written to db.
s.membatch.size += common.HashLength + uint64(len(req.data))
if s.scheme == rawdb.PathScheme {
s.membatch.size += uint64(len(req.path) + len(req.data))
} else {
s.membatch.size += common.HashLength + uint64(len(req.data))
}
// Delete the internal nodes which are marked as invalid
for _, segment := range req.deletes {
path := append(req.path, segment...)
s.membatch.deletes[string(path)] = struct{}{}
s.membatch.size += uint64(len(path))
}
delete(s.nodeReqs, string(req.path))
s.fetches[len(req.path)]--

Expand Down
Loading