|
| 1 | +// Copyright 2024 The go-ethereum Authors |
| 2 | +// This file is part of the go-ethereum library. |
| 3 | +// |
| 4 | +// The go-ethereum library is free software: you can redistribute it and/or modify |
| 5 | +// it under the terms of the GNU Lesser General Public License as published by |
| 6 | +// the Free Software Foundation, either version 3 of the License, or |
| 7 | +// (at your option) any later version. |
| 8 | +// |
| 9 | +// The go-ethereum library is distributed in the hope that it will be useful, |
| 10 | +// but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 | +// GNU Lesser General Public License for more details. |
| 13 | +// |
| 14 | +// You should have received a copy of the GNU Lesser General Public License |
| 15 | +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. |
| 16 | + |
| 17 | +package snap |
| 18 | + |
| 19 | +import ( |
| 20 | + "bytes" |
| 21 | + |
| 22 | + "github.com/ethereum/go-ethereum/common" |
| 23 | + "github.com/ethereum/go-ethereum/core/rawdb" |
| 24 | + "github.com/ethereum/go-ethereum/ethdb" |
| 25 | + "github.com/ethereum/go-ethereum/trie" |
| 26 | +) |
| 27 | + |
| 28 | +// genTrie interface is used by the snap syncer to generate merkle tree nodes |
| 29 | +// based on a received batch of states. |
| 30 | +type genTrie interface { |
| 31 | + // update inserts the state item into generator trie. |
| 32 | + update(key, value []byte) error |
| 33 | + |
| 34 | + // commit flushes the right boundary nodes if complete flag is true. This |
| 35 | + // function must be called before flushing the associated database batch. |
| 36 | + commit(complete bool) common.Hash |
| 37 | +} |
| 38 | + |
| 39 | +// pathTrie is a wrapper over the stackTrie, incorporating numerous additional |
| 40 | +// logics to handle the semi-completed trie and potential leftover dangling |
| 41 | +// nodes in the database. It is utilized for constructing the merkle tree nodes |
| 42 | +// in path mode during the snap sync process. |
| 43 | +type pathTrie struct { |
| 44 | + owner common.Hash // identifier of trie owner, empty for account trie |
| 45 | + tr *trie.StackTrie // underlying raw stack trie |
| 46 | + first []byte // the path of first committed node by stackTrie |
| 47 | + last []byte // the path of last committed node by stackTrie |
| 48 | + |
| 49 | + // This flag indicates whether nodes on the left boundary are skipped for |
| 50 | + // committing. If set, the left boundary nodes are considered incomplete |
| 51 | + // due to potentially missing left children. |
| 52 | + skipLeftBoundary bool |
| 53 | + db ethdb.KeyValueReader |
| 54 | + batch ethdb.Batch |
| 55 | +} |
| 56 | + |
| 57 | +// newPathTrie initializes the path trie. |
| 58 | +func newPathTrie(owner common.Hash, skipLeftBoundary bool, db ethdb.KeyValueReader, batch ethdb.Batch) *pathTrie { |
| 59 | + tr := &pathTrie{ |
| 60 | + owner: owner, |
| 61 | + skipLeftBoundary: skipLeftBoundary, |
| 62 | + db: db, |
| 63 | + batch: batch, |
| 64 | + } |
| 65 | + tr.tr = trie.NewStackTrie(tr.onTrieNode) |
| 66 | + return tr |
| 67 | +} |
| 68 | + |
| 69 | +// onTrieNode is invoked whenever a new node is committed by the stackTrie. |
| 70 | +// |
| 71 | +// As the committed nodes might be incomplete if they are on the boundaries |
| 72 | +// (left or right), this function has the ability to detect the incomplete |
| 73 | +// ones and filter them out for committing. |
| 74 | +// |
| 75 | +// Additionally, the assumption is made that there may exist leftover dangling |
| 76 | +// nodes in the database. This function has the ability to detect the dangling |
| 77 | +// nodes that fall within the path space of committed nodes (specifically on |
| 78 | +// the path covered by internal extension nodes) and remove them from the |
| 79 | +// database. This property ensures that the entire path space is uniquely |
| 80 | +// occupied by committed nodes. |
| 81 | +// |
| 82 | +// Furthermore, all leftover dangling nodes along the path from committed nodes |
| 83 | +// to the trie root (left and right boundaries) should be removed as well; |
| 84 | +// otherwise, they might potentially disrupt the state healing process. |
| 85 | +func (t *pathTrie) onTrieNode(path []byte, hash common.Hash, blob []byte) { |
| 86 | + // Filter out the nodes on the left boundary if skipLeftBoundary is |
| 87 | + // configured. Nodes are considered to be on the left boundary if |
| 88 | + // it's the first one to be committed, or the parent/ancestor of the |
| 89 | + // first committed node. |
| 90 | + if t.skipLeftBoundary && (t.first == nil || bytes.HasPrefix(t.first, path)) { |
| 91 | + if t.first == nil { |
| 92 | + // Memorize the path of first committed node, which is regarded |
| 93 | + // as left boundary. Deep-copy is necessary as the path given |
| 94 | + // is volatile. |
| 95 | + t.first = append([]byte{}, path...) |
| 96 | + |
| 97 | + // The left boundary can be uniquely determined by the first committed node |
| 98 | + // from stackTrie (e.g., N_1), as the shared path prefix between the first |
| 99 | + // two inserted state items is deterministic (the path of N_3). The path |
| 100 | + // from trie root towards the first committed node is considered the left |
| 101 | + // boundary. The potential leftover dangling nodes on left boundary should |
| 102 | + // be cleaned out. |
| 103 | + // |
| 104 | + // +-----+ |
| 105 | + // | N_3 | shared path prefix of state_1 and state_2 |
| 106 | + // +-----+ |
| 107 | + // /- -\ |
| 108 | + // +-----+ +-----+ |
| 109 | + // First committed node | N_1 | | N_2 | latest inserted node (contain state_2) |
| 110 | + // +-----+ +-----+ |
| 111 | + // |
| 112 | + // The node with the path of the first committed one (e.g, N_1) is not |
| 113 | + // removed because it's a sibling of the nodes we want to commit, not |
| 114 | + // the parent or ancestor. |
| 115 | + for i := 0; i < len(path); i++ { |
| 116 | + t.delete(path[:i], false) |
| 117 | + } |
| 118 | + } |
| 119 | + return |
| 120 | + } |
| 121 | + // If boundary filtering is not configured, or the node is not on the left |
| 122 | + // boundary, commit it to database. |
| 123 | + // |
| 124 | + // Note: If the current committed node is an extension node, then the nodes |
| 125 | + // falling within the path between itself and its standalone (not embedded |
| 126 | + // in parent) child should be cleaned out for exclusively occupy the inner |
| 127 | + // path. |
| 128 | + // |
| 129 | + // This is essential in snap sync to avoid leaving dangling nodes within |
| 130 | + // this range covered by extension node which could potentially break the |
| 131 | + // state healing. |
| 132 | + // |
| 133 | + // The extension node is detected if its path is the prefix of last committed |
| 134 | + // one and path gap is larger than one. If the path gap is only one byte, |
| 135 | + // the current node could either be a full node, or a extension with single |
| 136 | + // byte key. In either case, no gaps will be left in the path. |
| 137 | + if t.last != nil && bytes.HasPrefix(t.last, path) && len(t.last)-len(path) > 1 { |
| 138 | + for i := len(path) + 1; i < len(t.last); i++ { |
| 139 | + t.delete(t.last[:i], true) |
| 140 | + } |
| 141 | + } |
| 142 | + t.write(path, blob) |
| 143 | + |
| 144 | + // Update the last flag. Deep-copy is necessary as the provided path is volatile. |
| 145 | + if t.last == nil { |
| 146 | + t.last = append([]byte{}, path...) |
| 147 | + } else { |
| 148 | + t.last = append(t.last[:0], path...) |
| 149 | + } |
| 150 | +} |
| 151 | + |
| 152 | +// write commits the node write to provided database batch in path mode. |
| 153 | +func (t *pathTrie) write(path []byte, blob []byte) { |
| 154 | + if t.owner == (common.Hash{}) { |
| 155 | + rawdb.WriteAccountTrieNode(t.batch, path, blob) |
| 156 | + } else { |
| 157 | + rawdb.WriteStorageTrieNode(t.batch, t.owner, path, blob) |
| 158 | + } |
| 159 | +} |
| 160 | + |
| 161 | +func (t *pathTrie) deleteAccountNode(path []byte, inner bool) { |
| 162 | + if inner { |
| 163 | + accountInnerLookupGauge.Inc(1) |
| 164 | + } else { |
| 165 | + accountOuterLookupGauge.Inc(1) |
| 166 | + } |
| 167 | + if !rawdb.ExistsAccountTrieNode(t.db, path) { |
| 168 | + return |
| 169 | + } |
| 170 | + if inner { |
| 171 | + accountInnerDeleteGauge.Inc(1) |
| 172 | + } else { |
| 173 | + accountOuterDeleteGauge.Inc(1) |
| 174 | + } |
| 175 | + rawdb.DeleteAccountTrieNode(t.batch, path) |
| 176 | +} |
| 177 | + |
| 178 | +func (t *pathTrie) deleteStorageNode(path []byte, inner bool) { |
| 179 | + if inner { |
| 180 | + storageInnerLookupGauge.Inc(1) |
| 181 | + } else { |
| 182 | + storageOuterLookupGauge.Inc(1) |
| 183 | + } |
| 184 | + if !rawdb.ExistsStorageTrieNode(t.db, t.owner, path) { |
| 185 | + return |
| 186 | + } |
| 187 | + if inner { |
| 188 | + storageInnerDeleteGauge.Inc(1) |
| 189 | + } else { |
| 190 | + storageOuterDeleteGauge.Inc(1) |
| 191 | + } |
| 192 | + rawdb.DeleteStorageTrieNode(t.batch, t.owner, path) |
| 193 | +} |
| 194 | + |
| 195 | +// delete commits the node deletion to provided database batch in path mode. |
| 196 | +func (t *pathTrie) delete(path []byte, inner bool) { |
| 197 | + if t.owner == (common.Hash{}) { |
| 198 | + t.deleteAccountNode(path, inner) |
| 199 | + } else { |
| 200 | + t.deleteStorageNode(path, inner) |
| 201 | + } |
| 202 | +} |
| 203 | + |
| 204 | +// update implements genTrie interface, inserting a (key, value) pair into the |
| 205 | +// stack trie. |
| 206 | +func (t *pathTrie) update(key, value []byte) error { |
| 207 | + return t.tr.Update(key, value) |
| 208 | +} |
| 209 | + |
| 210 | +// commit implements genTrie interface, flushing the right boundary if it's |
| 211 | +// considered as complete. Otherwise, the nodes on the right boundary are |
| 212 | +// discarded and cleaned up. |
| 213 | +// |
| 214 | +// Note, this function must be called before flushing database batch, otherwise, |
| 215 | +// dangling nodes might be left in database. |
| 216 | +func (t *pathTrie) commit(complete bool) common.Hash { |
| 217 | + // If the right boundary is claimed as complete, flush them out. |
| 218 | + // The nodes on both left and right boundary will still be filtered |
| 219 | + // out if left boundary filtering is configured. |
| 220 | + if complete { |
| 221 | + // Commit all inserted but not yet committed nodes(on the right |
| 222 | + // boundary) in the stackTrie. |
| 223 | + hash := t.tr.Hash() |
| 224 | + if t.skipLeftBoundary { |
| 225 | + return common.Hash{} // hash is meaningless if left side is incomplete |
| 226 | + } |
| 227 | + return hash |
| 228 | + } |
| 229 | + // Discard nodes on the right boundary as it's claimed as incomplete. These |
| 230 | + // nodes might be incomplete due to missing children on the right side. |
| 231 | + // Furthermore, the potential leftover nodes on right boundary should also |
| 232 | + // be cleaned out. |
| 233 | + // |
| 234 | + // The right boundary can be uniquely determined by the last committed node |
| 235 | + // from stackTrie (e.g., N_1), as the shared path prefix between the last |
| 236 | + // two inserted state items is deterministic (the path of N_3). The path |
| 237 | + // from trie root towards the last committed node is considered the right |
| 238 | + // boundary (root to N_3). |
| 239 | + // |
| 240 | + // +-----+ |
| 241 | + // | N_3 | shared path prefix of last two states |
| 242 | + // +-----+ |
| 243 | + // /- -\ |
| 244 | + // +-----+ +-----+ |
| 245 | + // Last committed node | N_1 | | N_2 | latest inserted node (contain last state) |
| 246 | + // +-----+ +-----+ |
| 247 | + // |
| 248 | + // Another interesting scenario occurs when the trie is committed due to |
| 249 | + // too many items being accumulated in the batch. To flush them out to |
| 250 | + // the database, the path of the last inserted node (N_2) is temporarily |
| 251 | + // treated as an incomplete right boundary, and nodes on this path are |
| 252 | + // removed (e.g. from root to N_3). |
| 253 | + // However, this path will be reclaimed as an internal path by inserting |
| 254 | + // more items after the batch flush. New nodes on this path can be committed |
| 255 | + // with no issues as they are actually complete. Also, from a database |
| 256 | + // perspective, first deleting and then rewriting is a valid data update. |
| 257 | + for i := 0; i < len(t.last); i++ { |
| 258 | + t.delete(t.last[:i], false) |
| 259 | + } |
| 260 | + return common.Hash{} // the hash is meaningless for incomplete commit |
| 261 | +} |
| 262 | + |
| 263 | +// hashTrie is a wrapper over the stackTrie for implementing genTrie interface. |
| 264 | +type hashTrie struct { |
| 265 | + tr *trie.StackTrie |
| 266 | +} |
| 267 | + |
| 268 | +// newHashTrie initializes the hash trie. |
| 269 | +func newHashTrie(batch ethdb.Batch) *hashTrie { |
| 270 | + return &hashTrie{tr: trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { |
| 271 | + rawdb.WriteLegacyTrieNode(batch, hash, blob) |
| 272 | + })} |
| 273 | +} |
| 274 | + |
| 275 | +// update implements genTrie interface, inserting a (key, value) pair into |
| 276 | +// the stack trie. |
| 277 | +func (t *hashTrie) update(key, value []byte) error { |
| 278 | + return t.tr.Update(key, value) |
| 279 | +} |
| 280 | + |
| 281 | +// commit implements genTrie interface, committing the nodes on right boundary. |
| 282 | +func (t *hashTrie) commit(complete bool) common.Hash { |
| 283 | + if !complete { |
| 284 | + return common.Hash{} // the hash is meaningless for incomplete commit |
| 285 | + } |
| 286 | + return t.tr.Hash() // return hash only if it's claimed as complete |
| 287 | +} |
0 commit comments