Skip to content

Commit

Permalink
all: prepare for path-based trie storage (ethereum#26603)
Browse files Browse the repository at this point in the history
This PR moves some trie-related db accessor methods to a different file, and also removes the schema type. Instead of the schema type, a string is used to distinguish between hashbased/pathbased db accessors.
This also moves some code from trie package to rawdb package.

This PR is intended to be a no-functionality-change prep PR for ethereum#25963 .

---------

Co-authored-by: Gary Rong <garyrong0905@gmail.com>
  • Loading branch information
2 people authored and shekhirin committed Jun 6, 2023
1 parent 8207940 commit 7da2539
Show file tree
Hide file tree
Showing 20 changed files with 336 additions and 184 deletions.
4 changes: 2 additions & 2 deletions cmd/geth/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ func traverseRawState(ctx *cli.Context) error {
// Check the present for non-empty hash node(embedded node doesn't
// have their own hash).
if node != (common.Hash{}) {
blob := rawdb.ReadTrieNode(chaindb, node)
blob := rawdb.ReadLegacyTrieNode(chaindb, node)
if len(blob) == 0 {
log.Error("Missing trie node(account)", "hash", node)
return errors.New("missing account")
Expand Down Expand Up @@ -436,7 +436,7 @@ func traverseRawState(ctx *cli.Context) error {
// Check the present for non-empty hash node(embedded node doesn't
// have their own hash).
if node != (common.Hash{}) {
blob := rawdb.ReadTrieNode(chaindb, node)
blob := rawdb.ReadLegacyTrieNode(chaindb, node)
if len(blob) == 0 {
log.Error("Missing trie node(storage)", "hash", node)
return errors.New("missing storage")
Expand Down
48 changes: 11 additions & 37 deletions core/rawdb/accessors_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,17 @@ func ReadPreimage(db ethdb.KeyValueReader, hash common.Hash) []byte {
return data
}

// WritePreimages writes the provided set of preimages to the database.
func WritePreimages(db ethdb.KeyValueWriter, preimages map[common.Hash][]byte) {
for hash, preimage := range preimages {
if err := db.Put(preimageKey(hash), preimage); err != nil {
log.Crit("Failed to store trie preimage", "err", err)
}
}
preimageCounter.Inc(int64(len(preimages)))
preimageHitCounter.Inc(int64(len(preimages)))
}

// ReadCode retrieves the contract code of the provided code hash.
func ReadCode(db ethdb.KeyValueReader, hash common.Hash) []byte {
// Try with the prefixed code scheme first, if not then try with legacy
Expand All @@ -48,12 +59,6 @@ func ReadCodeWithPrefix(db ethdb.KeyValueReader, hash common.Hash) []byte {
return data
}

// ReadTrieNode retrieves the trie node of the provided hash.
func ReadTrieNode(db ethdb.KeyValueReader, hash common.Hash) []byte {
data, _ := db.Get(hash.Bytes())
return data
}

// HasCode checks if the contract code corresponding to the
// provided code hash is present in the db.
func HasCode(db ethdb.KeyValueReader, hash common.Hash) bool {
Expand All @@ -74,47 +79,16 @@ func HasCodeWithPrefix(db ethdb.KeyValueReader, hash common.Hash) bool {
return ok
}

// HasTrieNode checks if the trie node with the provided hash is present in db.
func HasTrieNode(db ethdb.KeyValueReader, hash common.Hash) bool {
ok, _ := db.Has(hash.Bytes())
return ok
}

// WritePreimages writes the provided set of preimages to the database.
func WritePreimages(db ethdb.KeyValueWriter, preimages map[common.Hash][]byte) {
for hash, preimage := range preimages {
if err := db.Put(preimageKey(hash), preimage); err != nil {
log.Crit("Failed to store trie preimage", "err", err)
}
}
preimageCounter.Inc(int64(len(preimages)))
preimageHitCounter.Inc(int64(len(preimages)))
}

// WriteCode writes the provided contract code database.
func WriteCode(db ethdb.KeyValueWriter, hash common.Hash, code []byte) {
if err := db.Put(codeKey(hash), code); err != nil {
log.Crit("Failed to store contract code", "err", err)
}
}

// WriteTrieNode writes the provided trie node database.
func WriteTrieNode(db ethdb.KeyValueWriter, hash common.Hash, node []byte) {
if err := db.Put(hash.Bytes(), node); err != nil {
log.Crit("Failed to store trie node", "err", err)
}
}

// DeleteCode deletes the specified contract code from the database.
func DeleteCode(db ethdb.KeyValueWriter, hash common.Hash) {
if err := db.Delete(codeKey(hash)); err != nil {
log.Crit("Failed to delete contract code", "err", err)
}
}

// DeleteTrieNode deletes the specified trie node from the database.
func DeleteTrieNode(db ethdb.KeyValueWriter, hash common.Hash) {
if err := db.Delete(hash.Bytes()); err != nil {
log.Crit("Failed to delete trie node", "err", err)
}
}
263 changes: 263 additions & 0 deletions core/rawdb/accessors_trie.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,263 @@
// Copyright 2022 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>

package rawdb

import (
"fmt"
"sync"

"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
"golang.org/x/crypto/sha3"
)

// HashScheme is the legacy hash-based state scheme with which trie nodes are
// stored in the disk with node hash as the database key. The advantage of this
// scheme is that different versions of trie nodes can be stored in disk, which
// is very beneficial for constructing archive nodes. The drawback is it will
// store different trie nodes on the same path to different locations on the disk
// with no data locality, and it's unfriendly for designing state pruning.
//
// Now this scheme is still kept for backward compatibility, and it will be used
// for archive node and some other tries(e.g. light trie).
const HashScheme = "hashScheme"

// PathScheme is the new path-based state scheme with which trie nodes are stored
// in the disk with node path as the database key. This scheme will only store one
// version of state data in the disk, which means that the state pruning operation
// is native. At the same time, this scheme will put adjacent trie nodes in the same
// area of the disk with good data locality property. But this scheme needs to rely
// on extra state diffs to survive deep reorg.
const PathScheme = "pathScheme"

// nodeHasher used to derive the hash of trie node.
type nodeHasher struct{ sha crypto.KeccakState }

var hasherPool = sync.Pool{
New: func() interface{} { return &nodeHasher{sha: sha3.NewLegacyKeccak256().(crypto.KeccakState)} },
}

func newNodeHasher() *nodeHasher { return hasherPool.Get().(*nodeHasher) }
func returnHasherToPool(h *nodeHasher) { hasherPool.Put(h) }

func (h *nodeHasher) hashData(data []byte) (n common.Hash) {
h.sha.Reset()
h.sha.Write(data)
h.sha.Read(n[:])
return n
}

// ReadAccountTrieNode retrieves the account trie node and the associated node
// hash with the specified node path.
func ReadAccountTrieNode(db ethdb.KeyValueReader, path []byte) ([]byte, common.Hash) {
data, err := db.Get(accountTrieNodeKey(path))
if err != nil {
return nil, common.Hash{}
}
hasher := newNodeHasher()
defer returnHasherToPool(hasher)
return data, hasher.hashData(data)
}

// HasAccountTrieNode checks the account trie node presence with the specified
// node path and the associated node hash.
func HasAccountTrieNode(db ethdb.KeyValueReader, path []byte, hash common.Hash) bool {
data, err := db.Get(accountTrieNodeKey(path))
if err != nil {
return false
}
hasher := newNodeHasher()
defer returnHasherToPool(hasher)
return hasher.hashData(data) == hash
}

// WriteAccountTrieNode writes the provided account trie node into database.
func WriteAccountTrieNode(db ethdb.KeyValueWriter, path []byte, node []byte) {
if err := db.Put(accountTrieNodeKey(path), node); err != nil {
log.Crit("Failed to store account trie node", "err", err)
}
}

// DeleteAccountTrieNode deletes the specified account trie node from the database.
func DeleteAccountTrieNode(db ethdb.KeyValueWriter, path []byte) {
if err := db.Delete(accountTrieNodeKey(path)); err != nil {
log.Crit("Failed to delete account trie node", "err", err)
}
}

// ReadStorageTrieNode retrieves the storage trie node and the associated node
// hash with the specified node path.
func ReadStorageTrieNode(db ethdb.KeyValueReader, accountHash common.Hash, path []byte) ([]byte, common.Hash) {
data, err := db.Get(storageTrieNodeKey(accountHash, path))
if err != nil {
return nil, common.Hash{}
}
hasher := newNodeHasher()
defer returnHasherToPool(hasher)
return data, hasher.hashData(data)
}

// HasStorageTrieNode checks the storage trie node presence with the provided
// node path and the associated node hash.
func HasStorageTrieNode(db ethdb.KeyValueReader, accountHash common.Hash, path []byte, hash common.Hash) bool {
data, err := db.Get(storageTrieNodeKey(accountHash, path))
if err != nil {
return false
}
hasher := newNodeHasher()
defer returnHasherToPool(hasher)
return hasher.hashData(data) == hash
}

// WriteStorageTrieNode writes the provided storage trie node into database.
func WriteStorageTrieNode(db ethdb.KeyValueWriter, accountHash common.Hash, path []byte, node []byte) {
if err := db.Put(storageTrieNodeKey(accountHash, path), node); err != nil {
log.Crit("Failed to store storage trie node", "err", err)
}
}

// DeleteStorageTrieNode deletes the specified storage trie node from the database.
func DeleteStorageTrieNode(db ethdb.KeyValueWriter, accountHash common.Hash, path []byte) {
if err := db.Delete(storageTrieNodeKey(accountHash, path)); err != nil {
log.Crit("Failed to delete storage trie node", "err", err)
}
}

// ReadLegacyTrieNode retrieves the legacy trie node with the given
// associated node hash.
func ReadLegacyTrieNode(db ethdb.KeyValueReader, hash common.Hash) []byte {
data, err := db.Get(hash.Bytes())
if err != nil {
return nil
}
return data
}

// HasLegacyTrieNode checks if the trie node with the provided hash is present in db.
func HasLegacyTrieNode(db ethdb.KeyValueReader, hash common.Hash) bool {
ok, _ := db.Has(hash.Bytes())
return ok
}

// WriteLegacyTrieNode writes the provided legacy trie node to database.
func WriteLegacyTrieNode(db ethdb.KeyValueWriter, hash common.Hash, node []byte) {
if err := db.Put(hash.Bytes(), node); err != nil {
log.Crit("Failed to store legacy trie node", "err", err)
}
}

// DeleteLegacyTrieNode deletes the specified legacy trie node from database.
func DeleteLegacyTrieNode(db ethdb.KeyValueWriter, hash common.Hash) {
if err := db.Delete(hash.Bytes()); err != nil {
log.Crit("Failed to delete legacy trie node", "err", err)
}
}

// HasTrieNode checks the trie node presence with the provided node info and
// the associated node hash.
func HasTrieNode(db ethdb.KeyValueReader, owner common.Hash, path []byte, hash common.Hash, scheme string) bool {
switch scheme {
case HashScheme:
return HasLegacyTrieNode(db, hash)
case PathScheme:
if owner == (common.Hash{}) {
return HasAccountTrieNode(db, path, hash)
}
return HasStorageTrieNode(db, owner, path, hash)
default:
panic(fmt.Sprintf("Unknown scheme %v", scheme))
}
}

// ReadTrieNode retrieves the trie node from database with the provided node info
// and associated node hash.
// hashScheme-based lookup requires the following:
// - hash
//
// pathScheme-based lookup requires the following:
// - owner
// - path
func ReadTrieNode(db ethdb.KeyValueReader, owner common.Hash, path []byte, hash common.Hash, scheme string) []byte {
switch scheme {
case HashScheme:
return ReadLegacyTrieNode(db, hash)
case PathScheme:
var (
blob []byte
nHash common.Hash
)
if owner == (common.Hash{}) {
blob, nHash = ReadAccountTrieNode(db, path)
} else {
blob, nHash = ReadStorageTrieNode(db, owner, path)
}
if nHash != hash {
return nil
}
return blob
default:
panic(fmt.Sprintf("Unknown scheme %v", scheme))
}
}

// WriteTrieNode writes the trie node into database with the provided node info
// and associated node hash.
// hashScheme-based lookup requires the following:
// - hash
//
// pathScheme-based lookup requires the following:
// - owner
// - path
func WriteTrieNode(db ethdb.KeyValueWriter, owner common.Hash, path []byte, hash common.Hash, node []byte, scheme string) {
switch scheme {
case HashScheme:
WriteLegacyTrieNode(db, hash, node)
case PathScheme:
if owner == (common.Hash{}) {
WriteAccountTrieNode(db, path, node)
} else {
WriteStorageTrieNode(db, owner, path, node)
}
default:
panic(fmt.Sprintf("Unknown scheme %v", scheme))
}
}

// DeleteTrieNode deletes the trie node from database with the provided node info
// and associated node hash.
// hashScheme-based lookup requires the following:
// - hash
//
// pathScheme-based lookup requires the following:
// - owner
// - path
func DeleteTrieNode(db ethdb.KeyValueWriter, owner common.Hash, path []byte, hash common.Hash, scheme string) {
switch scheme {
case HashScheme:
DeleteLegacyTrieNode(db, hash)
case PathScheme:
if owner == (common.Hash{}) {
DeleteAccountTrieNode(db, path)
} else {
DeleteStorageTrieNode(db, owner, path)
}
default:
panic(fmt.Sprintf("Unknown scheme %v", scheme))
}
}
16 changes: 14 additions & 2 deletions core/rawdb/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,14 @@ var (
CodePrefix = []byte("c") // CodePrefix + code hash -> account code
skeletonHeaderPrefix = []byte("S") // skeletonHeaderPrefix + num (uint64 big endian) -> header

// Path-based trie node scheme.
trieNodeAccountPrefix = []byte("A") // trieNodeAccountPrefix + hexPath -> trie node
trieNodeStoragePrefix = []byte("O") // trieNodeStoragePrefix + accountHash + hexPath -> trie node

PreimagePrefix = []byte("secure-key-") // PreimagePrefix + hash -> preimage
configPrefix = []byte("ethereum-config-") // config prefix for the db
genesisPrefix = []byte("ethereum-genesis-") // genesis state prefix for the db

// Chain index prefixes (use `i` + single byte to avoid mixing data types).

// BloomBitsIndexPrefix is the data table of a chain indexer to track its progress
BloomBitsIndexPrefix = []byte("iB")

Expand Down Expand Up @@ -236,3 +238,13 @@ func configKey(hash common.Hash) []byte {
func genesisStateSpecKey(hash common.Hash) []byte {
return append(genesisPrefix, hash.Bytes()...)
}

// accountTrieNodeKey = trieNodeAccountPrefix + nodePath.
func accountTrieNodeKey(path []byte) []byte {
return append(trieNodeAccountPrefix, path...)
}

// storageTrieNodeKey = trieNodeStoragePrefix + accountHash + nodePath.
func storageTrieNodeKey(accountHash common.Hash, path []byte) []byte {
return append(append(trieNodeStoragePrefix, accountHash.Bytes()...), path...)
}
Loading

0 comments on commit 7da2539

Please sign in to comment.