-
Notifications
You must be signed in to change notification settings - Fork 20.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
trie: remove internal nodes between shortNode and child in path mode #28163
Changes from 6 commits
a9be8e6
3d5732b
1522206
12bbaae
8274567
150b5b1
bc82940
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,7 @@ import ( | |
"github.com/ethereum/go-ethereum/core/types" | ||
"github.com/ethereum/go-ethereum/ethdb" | ||
"github.com/ethereum/go-ethereum/log" | ||
"github.com/ethereum/go-ethereum/metrics" | ||
) | ||
|
||
// ErrNotRequested is returned by the trie sync when it's requested to process a | ||
|
@@ -42,6 +43,16 @@ var ErrAlreadyProcessed = errors.New("already processed") | |
// memory if the node was configured with a significant number of peers. | ||
const maxFetchesPerDepth = 16384 | ||
|
||
var ( | ||
// deletionGauge is the metric to track how many trie node deletions | ||
// are performed in total during the sync process. | ||
deletionGauge = metrics.NewRegisteredGauge("trie/sync/delete", nil) | ||
|
||
// lookupGauge is the metric to track how many trie node lookups are | ||
// performed to determine if node needs to be deleted. | ||
lookupGauge = metrics.NewRegisteredGauge("trie/sync/lookup", nil) | ||
) | ||
|
||
// SyncPath is a path tuple identifying a particular trie node either in a single | ||
// trie (account) or a layered trie (account -> storage). | ||
// | ||
|
@@ -93,9 +104,10 @@ type LeafCallback func(keys [][]byte, path []byte, leaf []byte, parent common.Ha | |
|
||
// nodeRequest represents a scheduled or already in-flight trie node retrieval request. | ||
type nodeRequest struct { | ||
hash common.Hash // Hash of the trie node to retrieve | ||
path []byte // Merkle path leading to this node for prioritization | ||
data []byte // Data content of the node, cached until all subtrees complete | ||
hash common.Hash // Hash of the trie node to retrieve | ||
path []byte // Merkle path leading to this node for prioritization | ||
data []byte // Data content of the node, cached until all subtrees complete | ||
deletes [][]byte // List of internal path segments for trie nodes to delete | ||
|
||
parent *nodeRequest // Parent state node referencing this entry | ||
deps int // Number of dependencies before allowed to commit this node | ||
|
@@ -125,18 +137,20 @@ type CodeSyncResult struct { | |
// syncMemBatch is an in-memory buffer of successfully downloaded but not yet | ||
// persisted data items. | ||
type syncMemBatch struct { | ||
nodes map[string][]byte // In-memory membatch of recently completed nodes | ||
hashes map[string]common.Hash // Hashes of recently completed nodes | ||
codes map[common.Hash][]byte // In-memory membatch of recently completed codes | ||
size uint64 // Estimated batch-size of in-memory data. | ||
nodes map[string][]byte // In-memory membatch of recently completed nodes | ||
hashes map[string]common.Hash // Hashes of recently completed nodes | ||
deletes map[string]struct{} // List of paths for trie node to delete | ||
codes map[common.Hash][]byte // In-memory membatch of recently completed codes | ||
size uint64 // Estimated batch-size of in-memory data. | ||
} | ||
|
||
// newSyncMemBatch allocates a new memory-buffer for not-yet persisted trie nodes. | ||
func newSyncMemBatch() *syncMemBatch { | ||
return &syncMemBatch{ | ||
nodes: make(map[string][]byte), | ||
hashes: make(map[string]common.Hash), | ||
codes: make(map[common.Hash][]byte), | ||
nodes: make(map[string][]byte), | ||
hashes: make(map[string]common.Hash), | ||
deletes: make(map[string]struct{}), | ||
codes: make(map[common.Hash][]byte), | ||
} | ||
} | ||
|
||
|
@@ -347,16 +361,23 @@ func (s *Sync) ProcessNode(result NodeSyncResult) error { | |
// Commit flushes the data stored in the internal membatch out to persistent | ||
// storage, returning any occurred error. | ||
func (s *Sync) Commit(dbw ethdb.Batch) error { | ||
// Dump the membatch into a database dbw | ||
// Flush the pending node writes into database batch. | ||
for path, value := range s.membatch.nodes { | ||
owner, inner := ResolvePath([]byte(path)) | ||
rawdb.WriteTrieNode(dbw, owner, inner, s.membatch.hashes[path], value, s.scheme) | ||
} | ||
// Flush the pending node deletes into the database batch. | ||
// Please note that each written and deleted node has a | ||
// unique path, ensuring no duplication occurs. | ||
for path := range s.membatch.deletes { | ||
owner, inner := ResolvePath([]byte(path)) | ||
rawdb.DeleteTrieNode(dbw, owner, inner, common.Hash{} /* unused */, s.scheme) | ||
} | ||
// Flush the pending code writes into database batch. | ||
for hash, value := range s.membatch.codes { | ||
rawdb.WriteCode(dbw, hash, value) | ||
} | ||
// Drop the membatch data and return | ||
s.membatch = newSyncMemBatch() | ||
s.membatch = newSyncMemBatch() // reset the batch | ||
return nil | ||
} | ||
|
||
|
@@ -425,6 +446,39 @@ func (s *Sync) children(req *nodeRequest, object node) ([]*nodeRequest, error) { | |
node: node.Val, | ||
path: append(append([]byte(nil), req.path...), key...), | ||
}} | ||
// Mark all internal nodes between shortNode and its **in disk** | ||
// child as invalid. This is essential in the case of path mode | ||
// scheme; otherwise, state healing might overwrite existing child | ||
// nodes silently while leaving a dangling parent node within the | ||
// range of this internal path on disk. This would break the | ||
// guarantee for state healing. | ||
// | ||
// While it's possible for this shortNode to overwrite a previously | ||
// existing full node, the other branches of the fullNode can be | ||
// retained as they remain untouched and complete. | ||
// | ||
// This step is only necessary for path mode, as there is no deletion | ||
// in hash mode at all. | ||
if _, ok := node.Val.(hashNode); ok && s.scheme == rawdb.PathScheme { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do you check against a hashNode? Wouldn't the same issue happen if the child was a valueNode? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are a few possibilities there: a) the child node is a shortNode, it means the child node is a complete node stored in disk. In this case, we need to cleanup internal nodes to reclaim the ownership of this path. b) the child node is a valueNode, it is always embedded in the parent. In this case, it means the path is terminated at this shortNode and have no more nodes stored in disk. We don't need to clean up the disk nodes after this shortNode because the path after it is not occupied by us. c) the child node is an embedded full node(smaller than 32b). It's identical with case b). |
||
owner, inner := ResolvePath(req.path) | ||
for i := 1; i < len(key); i++ { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ugh, we loop over all possible keys and request them from pebble... this is gonna hurt... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In practice, it won't be too much. The target shortNode is the one between two fullNodes in a path, just in case a few elements share a path prefix. And in mainnet, the shared prefix won't be too long(possibly a few nibbles). And for the shortNode contains a value, then the key can be quite long, but it's not our target. |
||
// While checking for a non-existent item in Pebble can be less efficient | ||
// without a bloom filter, the relatively low frequency of lookups makes | ||
// the performance impact negligible. | ||
var exists bool | ||
if owner == (common.Hash{}) { | ||
exists = rawdb.ExistsAccountTrieNode(s.database, append(inner, key[:i]...)) | ||
} else { | ||
exists = rawdb.ExistsStorageTrieNode(s.database, owner, append(inner, key[:i]...)) | ||
} | ||
if exists { | ||
req.deletes = append(req.deletes, key[:i]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This tripped me up. In
So, a bit later in this code, when going from
I missed the
I guess you save a bit of memory by not storing the expanded path here, and recalculating it later. So I can't really object, just noting that it makes it a bit more complex. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's the intention, yes. |
||
deletionGauge.Inc(1) | ||
log.Debug("Detected dangling node", "owner", owner, "path", append(inner, key[:i]...)) | ||
} | ||
} | ||
lookupGauge.Inc(int64(len(key) - 1)) | ||
} | ||
case *fullNode: | ||
for i := 0; i < 17; i++ { | ||
if node.Children[i] != nil { | ||
|
@@ -509,10 +563,19 @@ func (s *Sync) commitNodeRequest(req *nodeRequest) error { | |
// Write the node content to the membatch | ||
s.membatch.nodes[string(req.path)] = req.data | ||
s.membatch.hashes[string(req.path)] = req.hash | ||
|
||
// The size tracking refers to the db-batch, not the in-memory data. | ||
// Therefore, we ignore the req.path, and account only for the hash+data | ||
// which eventually is written to db. | ||
s.membatch.size += common.HashLength + uint64(len(req.data)) | ||
if s.scheme == rawdb.PathScheme { | ||
s.membatch.size += uint64(len(req.path) + len(req.data)) | ||
} else { | ||
s.membatch.size += common.HashLength + uint64(len(req.data)) | ||
} | ||
// Delete the internal nodes which are marked as invalid | ||
for _, segment := range req.deletes { | ||
path := append(req.path, segment...) | ||
s.membatch.deletes[string(path)] = struct{}{} | ||
s.membatch.size += uint64(len(path)) | ||
} | ||
delete(s.nodeReqs, string(req.path)) | ||
s.fetches[len(req.path)]-- | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shouldn't you do the deletions before the Writes? In case we're about to overwrite some parts of the path, the deletions might contain all parts, and we don't want to delete the things we just wrote. Right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was also thinking about it. IMO the deletions should only contain the short-key-internal parts, so it should not duplicate any existing new node we've just written.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, for each written node and deleted node, the path should be unique. I will add a comment to clarify it.