From bd0fbc2cc93390226333db49657ae7264a9df19b Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Thu, 25 Jun 2020 16:32:37 +0200 Subject: [PATCH 1/6] trie: Add the initial stacktrie.go --- trie/stacktrie.go | 528 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 528 insertions(+) create mode 100644 trie/stacktrie.go diff --git a/trie/stacktrie.go b/trie/stacktrie.go new file mode 100644 index 000000000000..33a5768c5061 --- /dev/null +++ b/trie/stacktrie.go @@ -0,0 +1,528 @@ +// Copyright 2020 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "io" + + "github.com/ethereum/go-ethereum/common" + "golang.org/x/crypto/sha3" +) + +// ReStackTrie is a reimplementation of the Stacktrie, that fixes +// bugs in the previous implementation, and which also implements +// its own hashing mechanism which is more specific and hopefully +// more efficient that the default hasher. +type ReStackTrie struct { + nodeType uint8 // node type (as in branch, ext, leaf) + val []byte // value contained by this node if it's a leaf + key []byte // key chunk covered by this (full|ext) node + keyOffset int // offset of the key chunk inside a full key + children [16]*ReStackTrie // list of children (for fullnodes and exts) +} + +// NewReStackTrie allocates and initializes an empty trie. +func NewReStackTrie() *ReStackTrie { + return &ReStackTrie{ + nodeType: emptyNode, + } +} + +// List all values that ReStackTrie#nodeType can hold +const ( + emptyNode = iota + branchNode + extNode + leafNode + hashedNode +) + +func (st *ReStackTrie) TryUpdate(key, value []byte) error { + k := keybytesToHex(key) + if len(value) == 0 { + panic("deletion not supported") + } + st.insert(k[:len(k)-1], value) + return nil +} + +// Helper function that, given a full key, determines the index +// at which the chunk pointed by st.keyOffset is different from +// the same chunk in the full key. +func (st *ReStackTrie) getDiffIndex(key []byte) int { + diffindex := 0 + for ; diffindex < len(st.key) && st.key[diffindex] == key[st.keyOffset+diffindex]; diffindex++ { + } + return diffindex +} + +// Helper function to that inserts a (key, value) pair into +// the trie. +func (st *ReStackTrie) insert(key, value []byte) { + switch st.nodeType { + case branchNode: /* Branch */ + idx := int(key[st.keyOffset]) + if st.children[idx] == nil { + st.children[idx] = NewReStackTrie() + st.children[idx].keyOffset = st.keyOffset + 1 + } + for i := idx - 1; i >= 0; i-- { + if st.children[i] != nil { + if st.children[i].nodeType != hashedNode { + st.children[i].val = st.children[i].hash() + st.children[i].key = nil + st.children[i].nodeType = hashedNode + } + + break + } + + } + st.children[idx].insert(key, value) + case extNode: /* Ext */ + // Compare both key chunks and see where they differ + diffidx := st.getDiffIndex(key) + + // Check if chunks are identical. If so, recurse into + // the child node. Otherwise, the key has to be split + // into 1) an optional common prefix, 2) the fullnode + // representing the two differing path, and 3) a leaf + // for each of the differentiated subtrees. + if diffidx == len(st.key) { + // Ext key and key segment are identical, recurse into + // the child node. + st.children[0].insert(key, value) + return + } + // Save the original part. Depending if the break is + // at the extension's last byte or not, create an + // intermediate extension or use the extension's child + // node directly. + var n *ReStackTrie + if diffidx < len(st.key)-1 { + n = NewReStackTrie() + n.key = st.key[diffidx+1:] + n.children[0] = st.children[0] + n.nodeType = extNode + } else { + // Break on the last byte, no need to insert + // an extension node: reuse the current node + n = st.children[0] + } + n.keyOffset = st.keyOffset + diffidx + 1 + + var p *ReStackTrie + if diffidx == 0 { + // the break is on the first byte, so + // the current node is converted into + // a branch node. + st.children[0] = nil + p = st + st.nodeType = branchNode + } else { + // the common prefix is at least one byte + // long, insert a new intermediate branch + // node. + st.children[0] = NewReStackTrie() + st.children[0].nodeType = branchNode + st.children[0].keyOffset = st.keyOffset + diffidx + p = st.children[0] + } + + n.val = n.hash() + n.nodeType = hashedNode + n.key = nil + + // Create a leaf for the inserted part + o := NewReStackTrie() + o.keyOffset = st.keyOffset + diffidx + 1 + o.key = key[o.keyOffset:] + o.val = value + o.nodeType = leafNode + + // Insert both child leaves where they belong: + origIdx := st.key[diffidx] + newIdx := key[diffidx+st.keyOffset] + p.children[origIdx] = n + p.children[newIdx] = o + st.key = st.key[:diffidx] + + case leafNode: /* Leaf */ + // Compare both key chunks and see where they differ + diffidx := st.getDiffIndex(key) + + // Overwriting a key isn't supported, which means that + // the current leaf is expected to be split into 1) an + // optional extension for the common prefix of these 2 + // keys, 2) a fullnode selecting the path on which the + // keys differ, and 3) one leaf for the differentiated + // component of each key. + if diffidx >= len(st.key) { + panic("Trying to insert into existing key") + } + + // Check if the split occurs at the first nibble of the + // chunk. In that case, no prefix extnode is necessary. + // Otherwise, create that + var p *ReStackTrie + if diffidx == 0 { + // Convert current leaf into a branch + st.nodeType = branchNode + p = st + st.children[0] = nil + } else { + // Convert current node into an ext, + // and insert a child branch node. + st.nodeType = extNode + st.children[0] = NewReStackTrie() + st.children[0].nodeType = branchNode + st.children[0].keyOffset = st.keyOffset + diffidx + p = st.children[0] + } + + // Create the two child leaves: the one containing the + // original value and the one containing the new value + // The child leave will be hashed directly in order to + // free up some memory. + origIdx := st.key[diffidx] + p.children[origIdx] = NewReStackTrie() + p.children[origIdx].nodeType = leafNode + p.children[origIdx].key = st.key[diffidx+1:] + p.children[origIdx].val = st.val + p.children[origIdx].keyOffset = p.keyOffset + 1 + + p.children[origIdx].val = p.children[origIdx].hash() + p.children[origIdx].nodeType = hashedNode + p.children[origIdx].key = nil + + newIdx := key[diffidx+st.keyOffset] + p.children[newIdx] = NewReStackTrie() + p.children[newIdx].nodeType = leafNode + p.children[newIdx].key = key[p.keyOffset+1:] + p.children[newIdx].val = value + p.children[newIdx].keyOffset = p.keyOffset + 1 + + st.key = st.key[:diffidx] + case emptyNode: /* Empty */ + st.nodeType = leafNode + st.key = key[st.keyOffset:] + st.val = value + case hashedNode: + panic("trying to insert into hash") + default: + panic("invalid type") + } +} + +// rawExtHPRLP is called when the length of the RLP of +// an extension is less than 32. It will return the +// un-hashed payload. +func rawExtHPRLP(key, val []byte) []byte { + rlp := [32]byte{} + nkeybytes := len(key) / 2 + oddkeylength := len(key) % 2 + + // This is the position at which RLP data is written. + // The first byte is initially skipped because its final + // value will be fully known by the end of the process. + pos := 1 + + // Write key size if it should be present + if nkeybytes > 0 || key[0] > 128 { + rlp[pos] = byte(128 + 1 + nkeybytes) + pos++ + } + + // Copy key data, including hex prefix. If the key length + // is odd, write the oddness marker, and otherwise skip the + // HP byte altogether since the leaf marker isn't set (i.e. + // this is an ext) and no odd-nibble needs to be stored. + rlp[pos] = byte(16 * oddkeylength) + pos += 1 - oddkeylength + + for i := 0; i < len(key); i++ { + rlp[pos+(i+oddkeylength)/2] |= key[i] << uint(4*((i+1+len(key))%2)) + } + // `+oddkeylength` adds the accounting for the HP byte, since + // in that case `pos` wasn't incremented. + pos += (len(key) + oddkeylength) / 2 + + // Copy the value, no need for a header because the child is + // already RLP and directly embedded. + copy(rlp[pos:], val) + pos += len(val) + + // RLP header + rlp[0] = byte(192 + pos - 1) + + return rlp[:pos] +} + +// rawLeafHPRLP is called when the length of the RLP of a leaf is +// less than 32. It will return the un-hashed payload. +func rawLeafHPRLP(key, val []byte, leaf bool) []byte { + // payload size - none of the components are larger + // than 56 since the whole size is smaller than 32 + rlp := [32]byte{} + oddkeylength := len(key) % 2 + + // This is the position at which RLP data is written. + // The first byte is initially skipped because its final + // value will be fully known by the end of the process. + pos := 1 + + // Add key, if present + if len(key) > 0 { + // add length prefix if needed. If len(key) == 1, + // then no size prefix is needed as 1 < 128. + if len(key) > 1 { + rlp[1] = 128 + byte(1+len(key)/2) + pos++ + } + + // hex prefix + rlp[pos] = byte(16 * (len(key) % 2)) + if leaf { + rlp[pos] |= 32 + } + // Advance to next byte iff the key has an even nibble length + pos += 1 - oddkeylength + + // copy key data + for i, nibble := range key { + offset := 1 - uint((len(key)+i)%2) + rlp[pos] |= byte(int(nibble) << (4 * offset)) + if offset == 0 { + pos++ + } + } + } + + // copy value data. If the payload isn't a single byte + // lower than 128, also add the header. + if len(val) > 1 || val[0] >= 128 { + rlp[pos] = byte(len(val)) + if len(val) > 1 || val[0] > 128 { + rlp[pos] += 128 + } + pos += 1 + + } + copy(rlp[pos:], val) + pos += len(val) + + // In case the payload is only one byte, + // no header is needed. + if pos == 2 { + return rlp[1:pos] + } + rlp[0] = 192 + byte(pos) - 1 + + // If the payload reaches exactly 32 bytes, then + // it needs to be hashed. + if pos == 32 { + d := sha3.NewLegacyKeccak256() + d.Write(rlp[:pos]) + return d.Sum(nil) + } + + return rlp[:pos] +} + +// writeEvenHP writes a key with its hex prefix into a writer (presumably, the +// input of a hasher) and then writes the value. The value can be a maximum of +// 256 bytes, as it is only concerned with writing account leaves and optimize +// for this use case. +func writeHPRLP(writer io.Writer, key, val []byte, leaf bool) { + // DEBUG don't remove yet + //var writer bytes.Buffer + + // Determine the _t_ part of the hex prefix + hp := byte(0) + if leaf { + hp = 32 + } + + const maxHeaderSize = 1 /* key byte list header */ + + 1 /* list header for key + value */ + + 1 /* potential size byte if total size > 56 */ + + 1 /* hex prefix if key is even-length*/ + header := [maxHeaderSize]byte{} + keyOffset := 0 + headerPos := maxHeaderSize - 1 + + // Add the hex prefix to its own byte if the key length is even, and + // as the most significant nibble of the key if it's odd. + // In the latter case, the first nibble of the key will be part of + // the header and it will be skipped later when it's added to the + // hasher sponge. + if len(key)%2 == 0 { + header[headerPos] = hp + } else { + header[headerPos] = hp | key[0] | 16 + keyOffset = 1 + } + headerPos-- + + // Add the key byte header, the key is 32 bytes max so it's always + // under 56 bytes - no extra byte needed. + keyByteSize := byte(len(key) / 2) + if len(key) > 1 || header[len(header)-1] > 128 { + header[headerPos] = 0x80 + keyByteSize + 1 /* HP */ + headerPos-- + } + + // If this is a leaf being inserted, the header length for the + // value part will be two bytes as the leaf is more than 56 bytes + // long. + valHeaderLen := 1 + if len(val) == 1 && val[0] < 128 { + // Don't reserve space for the header if this + // is an integer < 128 + valHeaderLen = 0 + } + if len(val) > 56 { + valHeaderLen = 2 + } + + // Add the global header, with optional length, and specify at + // which byte the header is starting. + payloadSize := int(keyByteSize) + (len(header) - headerPos - 1) + + valHeaderLen + len(val) /* value + rlp header */ + var start int + if payloadSize >= 56 { + header[headerPos] = byte(payloadSize) + headerPos-- + header[headerPos] = 0xf8 + start = headerPos + } else { + header[headerPos] = 0xc0 + byte(payloadSize) + start = headerPos + } + + // Write the header into the sponge + writer.Write(header[start:]) + + // Write the key into the sponge + var m byte + for i, nibble := range key { + // Skip the first byte if the key has an odd-length, since + // it has already been written with the header. + if i >= keyOffset { + if (i-keyOffset)%2 == 0 { + m = nibble + } else { + writer.Write([]byte{m*16 + nibble}) + } + } + } + + // Write the RLP prefix to the value if needed + if len(val) > 56 { + writer.Write([]byte{0xb8, byte(len(val))}) + } else if len(val) > 1 || val[0] >= 128 { + writer.Write([]byte{0x80 + byte(len(val))}) + } + writer.Write(val) + + // DEBUG don't remove yet + //if leaf { + //fmt.Println("leaf rlp ", writer) + //} else { + //fmt.Println("ext rlp ", writer) + //} + //io.Copy(w, &writer) +} + +func (st *ReStackTrie) hash() []byte { + /* Shortcut if node is already hashed */ + if st.nodeType == hashedNode { + return st.val + } + + d := sha3.NewLegacyKeccak256() + switch st.nodeType { + case branchNode: + payload := [544]byte{} + pos := 3 // maximum header length given what we know + for i, v := range st.children { + if v != nil { + // Write a 32 byte list to the sponge + childhash := v.hash() + if len(childhash) == 32 { + payload[pos] = 128 + byte(len(childhash)) + pos++ + } + copy(payload[pos:pos+len(childhash)], childhash) + pos += len(childhash) + st.children[i] = nil // Reclaim mem from subtree + } else { + // Write an empty list to the sponge + payload[pos] = 0x80 + pos++ + } + } + // Add an empty 17th value + payload[pos] = 0x80 + pos++ + + // Compute the header, length size is either 0, 1 or 2 bytes since + // there are at least 17 empty list headers, and at most 16 hashes + // plus an empty header for the value. + var start int + if pos-3 < 56 { + payload[2] = 0xc0 + byte(pos-3) + start = 2 + } else if pos-3 < 256 { + payload[2] = byte(pos - 3) + payload[1] = 0xf8 + start = 1 + } else { + payload[2] = byte(pos - 3) + payload[1] = byte((pos - 3) >> 8) + payload[0] = 0xf9 + start = 0 + } + + // Do not hash if the payload length is less than 32 bytes + if pos-start < 32 { + return payload[start:pos] + } + d.Write(payload[start:pos]) + case extNode: + ch := st.children[0].hash() + if (len(st.key)/2)+1+len(ch) < 29 { + return rawExtHPRLP(st.key, st.val) + } + writeHPRLP(d, st.key, ch, false) + st.children[0] = nil // Reclaim mem from subtree + case leafNode: + if (len(st.key)/2)+1+len(st.val) < 30 { + return rawLeafHPRLP(st.key, st.val, true) + } + writeHPRLP(d, st.key, st.val, true) + case emptyNode: + return emptyRoot[:] + default: + panic("Invalid node type") + } + return d.Sum(nil) +} + +func (st *ReStackTrie) Hash() (h common.Hash) { + return common.BytesToHash(st.hash()) +} From 6aae512a178662468779409e0eec0c23fd883c88 Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Tue, 30 Jun 2020 12:24:55 +0200 Subject: [PATCH 2/6] rename ReStackTrie to StackTrie --- trie/stacktrie.go | 57 +++++++++++++++++++++++------------------------ 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/trie/stacktrie.go b/trie/stacktrie.go index 33a5768c5061..3cb2b2d7efca 100644 --- a/trie/stacktrie.go +++ b/trie/stacktrie.go @@ -23,26 +23,25 @@ import ( "golang.org/x/crypto/sha3" ) -// ReStackTrie is a reimplementation of the Stacktrie, that fixes -// bugs in the previous implementation, and which also implements -// its own hashing mechanism which is more specific and hopefully -// more efficient that the default hasher. -type ReStackTrie struct { - nodeType uint8 // node type (as in branch, ext, leaf) - val []byte // value contained by this node if it's a leaf - key []byte // key chunk covered by this (full|ext) node - keyOffset int // offset of the key chunk inside a full key - children [16]*ReStackTrie // list of children (for fullnodes and exts) +// StackTrie is a trie implementation that expects keys to be inserted +// in order. Once it determines that a subtree will no longer be inserted +// into, it will hash it and free up the memory it uses. +type StackTrie struct { + nodeType uint8 // node type (as in branch, ext, leaf) + val []byte // value contained by this node if it's a leaf + key []byte // key chunk covered by this (full|ext) node + keyOffset int // offset of the key chunk inside a full key + children [16]*StackTrie // list of children (for fullnodes and exts) } -// NewReStackTrie allocates and initializes an empty trie. -func NewReStackTrie() *ReStackTrie { - return &ReStackTrie{ +// NewStackTrie allocates and initializes an empty trie. +func NewStackTrie() *StackTrie { + return &StackTrie{ nodeType: emptyNode, } } -// List all values that ReStackTrie#nodeType can hold +// List all values that StackTrie#nodeType can hold const ( emptyNode = iota branchNode @@ -51,7 +50,7 @@ const ( hashedNode ) -func (st *ReStackTrie) TryUpdate(key, value []byte) error { +func (st *StackTrie) TryUpdate(key, value []byte) error { k := keybytesToHex(key) if len(value) == 0 { panic("deletion not supported") @@ -63,7 +62,7 @@ func (st *ReStackTrie) TryUpdate(key, value []byte) error { // Helper function that, given a full key, determines the index // at which the chunk pointed by st.keyOffset is different from // the same chunk in the full key. -func (st *ReStackTrie) getDiffIndex(key []byte) int { +func (st *StackTrie) getDiffIndex(key []byte) int { diffindex := 0 for ; diffindex < len(st.key) && st.key[diffindex] == key[st.keyOffset+diffindex]; diffindex++ { } @@ -72,12 +71,12 @@ func (st *ReStackTrie) getDiffIndex(key []byte) int { // Helper function to that inserts a (key, value) pair into // the trie. -func (st *ReStackTrie) insert(key, value []byte) { +func (st *StackTrie) insert(key, value []byte) { switch st.nodeType { case branchNode: /* Branch */ idx := int(key[st.keyOffset]) if st.children[idx] == nil { - st.children[idx] = NewReStackTrie() + st.children[idx] = NewStackTrie() st.children[idx].keyOffset = st.keyOffset + 1 } for i := idx - 1; i >= 0; i-- { @@ -112,9 +111,9 @@ func (st *ReStackTrie) insert(key, value []byte) { // at the extension's last byte or not, create an // intermediate extension or use the extension's child // node directly. - var n *ReStackTrie + var n *StackTrie if diffidx < len(st.key)-1 { - n = NewReStackTrie() + n = NewStackTrie() n.key = st.key[diffidx+1:] n.children[0] = st.children[0] n.nodeType = extNode @@ -125,7 +124,7 @@ func (st *ReStackTrie) insert(key, value []byte) { } n.keyOffset = st.keyOffset + diffidx + 1 - var p *ReStackTrie + var p *StackTrie if diffidx == 0 { // the break is on the first byte, so // the current node is converted into @@ -137,7 +136,7 @@ func (st *ReStackTrie) insert(key, value []byte) { // the common prefix is at least one byte // long, insert a new intermediate branch // node. - st.children[0] = NewReStackTrie() + st.children[0] = NewStackTrie() st.children[0].nodeType = branchNode st.children[0].keyOffset = st.keyOffset + diffidx p = st.children[0] @@ -148,7 +147,7 @@ func (st *ReStackTrie) insert(key, value []byte) { n.key = nil // Create a leaf for the inserted part - o := NewReStackTrie() + o := NewStackTrie() o.keyOffset = st.keyOffset + diffidx + 1 o.key = key[o.keyOffset:] o.val = value @@ -178,7 +177,7 @@ func (st *ReStackTrie) insert(key, value []byte) { // Check if the split occurs at the first nibble of the // chunk. In that case, no prefix extnode is necessary. // Otherwise, create that - var p *ReStackTrie + var p *StackTrie if diffidx == 0 { // Convert current leaf into a branch st.nodeType = branchNode @@ -188,7 +187,7 @@ func (st *ReStackTrie) insert(key, value []byte) { // Convert current node into an ext, // and insert a child branch node. st.nodeType = extNode - st.children[0] = NewReStackTrie() + st.children[0] = NewStackTrie() st.children[0].nodeType = branchNode st.children[0].keyOffset = st.keyOffset + diffidx p = st.children[0] @@ -199,7 +198,7 @@ func (st *ReStackTrie) insert(key, value []byte) { // The child leave will be hashed directly in order to // free up some memory. origIdx := st.key[diffidx] - p.children[origIdx] = NewReStackTrie() + p.children[origIdx] = NewStackTrie() p.children[origIdx].nodeType = leafNode p.children[origIdx].key = st.key[diffidx+1:] p.children[origIdx].val = st.val @@ -210,7 +209,7 @@ func (st *ReStackTrie) insert(key, value []byte) { p.children[origIdx].key = nil newIdx := key[diffidx+st.keyOffset] - p.children[newIdx] = NewReStackTrie() + p.children[newIdx] = NewStackTrie() p.children[newIdx].nodeType = leafNode p.children[newIdx].key = key[p.keyOffset+1:] p.children[newIdx].val = value @@ -448,7 +447,7 @@ func writeHPRLP(writer io.Writer, key, val []byte, leaf bool) { //io.Copy(w, &writer) } -func (st *ReStackTrie) hash() []byte { +func (st *StackTrie) hash() []byte { /* Shortcut if node is already hashed */ if st.nodeType == hashedNode { return st.val @@ -523,6 +522,6 @@ func (st *ReStackTrie) hash() []byte { return d.Sum(nil) } -func (st *ReStackTrie) Hash() (h common.Hash) { +func (st *StackTrie) Hash() (h common.Hash) { return common.BytesToHash(st.hash()) } From d2fec167ca2dd287ee4f12679b66fc18646b04ce Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Tue, 30 Jun 2020 12:51:13 +0200 Subject: [PATCH 3/6] rework stacktrie tests to work on master --- trie/stacktrie_test.go | 135 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 trie/stacktrie_test.go diff --git a/trie/stacktrie_test.go b/trie/stacktrie_test.go new file mode 100644 index 000000000000..baeb9d98c472 --- /dev/null +++ b/trie/stacktrie_test.go @@ -0,0 +1,135 @@ +// Copyright 2020 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "bytes" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethdb/memorydb" +) + +func TestRawHPRLP(t *testing.T) { + got := rawLeafHPRLP([]byte{0x00, 0x01}, []byte{0x02, 0x03}, true) + exp := []byte{198, 130, 32, 1, 130, 2, 3} + + if !bytes.Equal(exp, got) { + t.Fatalf("invalid RLP generated for leaf with even length key: got %v, expected %v", common.ToHex(got), common.ToHex(exp)) + } + + got = rawLeafHPRLP([]byte{0x01}, []byte{0x02, 0x03}, true) + exp = []byte{196, 49, 130, 2, 3} + + if !bytes.Equal(exp, got) { + t.Fatalf("invalid RLP generated for leaf with odd length key: got %v, expected %v", common.ToHex(got), common.ToHex(exp)) + } + + got = rawLeafHPRLP([]byte{0x00, 0x01}, []byte{0x02, 0x03}, false) + exp = []byte{198, 130, 0, 1, 130, 2, 3} + + if !bytes.Equal(exp, got) { + t.Fatalf("invalid RLP generated for ext with even length key: got %v, expected %v", common.ToHex(got), common.ToHex(exp)) + } + + got = rawLeafHPRLP([]byte{0x01}, []byte{0x02, 0x03}, false) + exp = []byte{196, 17, 130, 2, 3} + + if !bytes.Equal(exp, got) { + t.Fatalf("invalid RLP generated for ext with odd length key: got %v, expected %v", common.ToHex(got), common.ToHex(exp)) + } +} + +// smallRLPTrie encodes a list of key, value pairs that will not +type smallRLPTrie []struct { + Key, Value string +} + +var smallRLPTests = []smallRLPTrie{ + // One leaf will have a size > 32, the other not. + smallRLPTrie{ + { + "2ba639a09a19480b3290299aa982d38c688871e70b0734ac8aa69b9d59492fb3", + "8181", + }, + { + "2ba639a09acf0edbf01831ef3366124dece00d7e4c498f46126d214a8bca7436", + "a03330333335343331333033613332333333613330333732653330333033303561", + }, + }, + // Both leaves have sizes smaller than 32. + smallRLPTrie{ + { + "2ba639a09a19480b3290299aa982d38c688871e70b0734ac8aa69b9d59492fb3", + "8181", + }, + { + "2ba639a09acf0edbf01831ef3366124dece00d7e4c498f46126d214a8bca7436", + "a033", + }, + }, + // Only one leaf + smallRLPTrie{ + { + "2ba639a09a19480b3290299aa982d38c688871e70b0734ac8aa69b9d59492fb3", + "8181", + }, + }, + // Leaf with an odd-length value and a size < 32 + smallRLPTrie{ + { + "2ba639a09a19480b3290299aa982d38c688871e70b0734ac8aa69b9d59492fb3", + "81", + }, + }, + // Two leaves whose size is < 32 and one of them has a one-byte value + // bigger than 128. + smallRLPTrie{ + { + "2ba639a09a19480b3290299aa982d38c688871e70b0734ac8aa69b9d59492fb3", + "8181", + }, + { + "2ba639a09acf0edbf01831ef3366124dece00d7e4c498f46126d214a8bca7436", + "a0", + }, + }, +} + +func TestHashWithSmallRLP(t *testing.T) { + for _, test := range smallRLPTests { + trie := NewStackTrie() + for _, kv := range test { + trie.TryUpdate(common.FromHex(kv.Key), common.FromHex(kv.Value)) + } + + stdtrie, err := New(emptyRoot, NewDatabase(memorydb.New())) + if err != nil { + t.Fatalf("error initializing std trie: %v", stdtrie) + } + for _, kv := range test { + stdtrie.TryUpdate(common.FromHex(kv.Key), common.FromHex(kv.Value)) + } + + got := trie.Hash() + exp := stdtrie.Hash() + + if !bytes.Equal(got[:], exp[:]) { + t.Fatalf("error calculating hash for embedded RLP: %v != %v", common.ToHex(exp[:]), common.ToHex(got[:])) + } + } +} From 81c434826b60ace04b6f3dc9dfd5dd05ae4f252b Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Tue, 30 Jun 2020 13:11:08 +0200 Subject: [PATCH 4/6] Move node creation code into helper functions --- trie/stacktrie.go | 61 ++++++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 24 deletions(-) diff --git a/trie/stacktrie.go b/trie/stacktrie.go index 3cb2b2d7efca..a42fcecaad6e 100644 --- a/trie/stacktrie.go +++ b/trie/stacktrie.go @@ -41,6 +41,32 @@ func NewStackTrie() *StackTrie { } } +func newLeaf(ko int, key, val []byte) *StackTrie { + return &StackTrie{ + nodeType: leafNode, + keyOffset: ko, + key: key[ko:], + val: val, + } +} + +func (st *StackTrie) convertToHash(ko int) { + st.keyOffset = ko + st.val = st.hash() + st.nodeType = hashedNode + st.key = nil +} + +func newExt(ko int, key []byte, child *StackTrie) *StackTrie { + st := &StackTrie{ + nodeType: leafNode, + keyOffset: ko, + key: key[ko:], + } + st.children[0] = child + return st +} + // List all values that StackTrie#nodeType can hold const ( emptyNode = iota @@ -113,16 +139,16 @@ func (st *StackTrie) insert(key, value []byte) { // node directly. var n *StackTrie if diffidx < len(st.key)-1 { - n = NewStackTrie() - n.key = st.key[diffidx+1:] - n.children[0] = st.children[0] - n.nodeType = extNode + n = newExt(st.keyOffset+diffidx+1, key, st.children[0]) } else { // Break on the last byte, no need to insert // an extension node: reuse the current node n = st.children[0] + // DEBUG this line shouldn't be necessary since + // st.children[0] should already have keyOffset + // set correctly. Keeping it until confirmation + //n.keyOffset = st.keyOffset + diffidx + 1 } - n.keyOffset = st.keyOffset + diffidx + 1 var p *StackTrie if diffidx == 0 { @@ -147,11 +173,7 @@ func (st *StackTrie) insert(key, value []byte) { n.key = nil // Create a leaf for the inserted part - o := NewStackTrie() - o.keyOffset = st.keyOffset + diffidx + 1 - o.key = key[o.keyOffset:] - o.val = value - o.nodeType = leafNode + o := newLeaf(st.keyOffset+diffidx+1, key, value) // Insert both child leaves where they belong: origIdx := st.key[diffidx] @@ -198,23 +220,14 @@ func (st *StackTrie) insert(key, value []byte) { // The child leave will be hashed directly in order to // free up some memory. origIdx := st.key[diffidx] - p.children[origIdx] = NewStackTrie() - p.children[origIdx].nodeType = leafNode - p.children[origIdx].key = st.key[diffidx+1:] - p.children[origIdx].val = st.val - p.children[origIdx].keyOffset = p.keyOffset + 1 - - p.children[origIdx].val = p.children[origIdx].hash() - p.children[origIdx].nodeType = hashedNode - p.children[origIdx].key = nil + p.children[origIdx] = newLeaf(diffidx+1, st.key, st.val) + p.children[origIdx].convertToHash(p.keyOffset + 1) newIdx := key[diffidx+st.keyOffset] - p.children[newIdx] = NewStackTrie() - p.children[newIdx].nodeType = leafNode - p.children[newIdx].key = key[p.keyOffset+1:] - p.children[newIdx].val = value - p.children[newIdx].keyOffset = p.keyOffset + 1 + p.children[newIdx] = newLeaf(p.keyOffset+1, key, value) + // Finally, cut off the key part that has been passed + // over to the children. st.key = st.key[:diffidx] case emptyNode: /* Empty */ st.nodeType = leafNode From e742f7d9eee80a3ec80ba6153eb61494f4f79bbc Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Wed, 1 Jul 2020 11:35:18 +0200 Subject: [PATCH 5/6] stacktrie: implement commit --- trie/stacktrie.go | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/trie/stacktrie.go b/trie/stacktrie.go index a42fcecaad6e..ee8acc4562be 100644 --- a/trie/stacktrie.go +++ b/trie/stacktrie.go @@ -17,9 +17,11 @@ package trie import ( + "bytes" "io" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" "golang.org/x/crypto/sha3" ) @@ -32,6 +34,8 @@ type StackTrie struct { key []byte // key chunk covered by this (full|ext) node keyOffset int // offset of the key chunk inside a full key children [16]*StackTrie // list of children (for fullnodes and exts) + + db *Database // Pointer to the commit db, can be nil } // NewStackTrie allocates and initializes an empty trie. @@ -466,7 +470,10 @@ func (st *StackTrie) hash() []byte { return st.val } - d := sha3.NewLegacyKeccak256() + var ret [32]byte + d := hasherPool.Get().(crypto.KeccakState) + var preimage bytes.Buffer + switch st.nodeType { case branchNode: payload := [544]byte{} @@ -512,29 +519,51 @@ func (st *StackTrie) hash() []byte { // Do not hash if the payload length is less than 32 bytes if pos-start < 32 { + // rlp len < 32, will be embedded + // into its parent. return payload[start:pos] } - d.Write(payload[start:pos]) + preimage.Write(payload[start:pos]) case extNode: ch := st.children[0].hash() if (len(st.key)/2)+1+len(ch) < 29 { + // rlp len < 32, will be embedded + // into its parent. return rawExtHPRLP(st.key, st.val) } - writeHPRLP(d, st.key, ch, false) + writeHPRLP(&preimage, st.key, ch, false) st.children[0] = nil // Reclaim mem from subtree case leafNode: if (len(st.key)/2)+1+len(st.val) < 30 { + // rlp len < 32, will be embedded + // into its parent. return rawLeafHPRLP(st.key, st.val, true) } - writeHPRLP(d, st.key, st.val, true) + writeHPRLP(&preimage, st.key, st.val, true) case emptyNode: return emptyRoot[:] default: panic("Invalid node type") } - return d.Sum(nil) + io.Copy(d, &preimage) + d.Read(ret[:]) + + if st.db != nil { + st.db.insertPreimage(common.BytesToHash(ret[:]), preimage.Bytes()) + } + + return ret[:] } func (st *StackTrie) Hash() (h common.Hash) { return common.BytesToHash(st.hash()) } + +func (st *StackTrie) Commit(db *Database) common.Hash { + old_db := st.db + st.db = db + defer func() { + st.db = old_db + }() + return common.BytesToHash(st.hash()) +} From fde6d59047d6c32730f9cffdf02eb6f39a102933 Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Wed, 1 Jul 2020 14:26:48 +0200 Subject: [PATCH 6/6] trie: fix hasher pool runtime error --- trie/stacktrie.go | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/trie/stacktrie.go b/trie/stacktrie.go index ee8acc4562be..edaab77147f0 100644 --- a/trie/stacktrie.go +++ b/trie/stacktrie.go @@ -19,6 +19,7 @@ package trie import ( "bytes" "io" + "sync" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/crypto" @@ -464,6 +465,13 @@ func writeHPRLP(writer io.Writer, key, val []byte, leaf bool) { //io.Copy(w, &writer) } +// hasherPool holds LegacyKeccak hashers. +var hasherPoolNoCache = sync.Pool{ + New: func() interface{} { + return sha3.NewLegacyKeccak256() + }, +} + func (st *StackTrie) hash() []byte { /* Shortcut if node is already hashed */ if st.nodeType == hashedNode { @@ -471,7 +479,9 @@ func (st *StackTrie) hash() []byte { } var ret [32]byte - d := hasherPool.Get().(crypto.KeccakState) + d := hasherPoolNoCache.Get().(crypto.KeccakState) + d.Reset() + defer hasherPoolNoCache.Put(d) var preimage bytes.Buffer switch st.nodeType {