Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor encode funcs #1399

Merged
merged 11 commits into from
Jan 9, 2024
2 changes: 2 additions & 0 deletions evm/src/cpu/kernel/asm/main.asm
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ global main:

// Initialise the shift table
%shift_table_init
// Encode constant nodes
%initialize_rlp_segment

// Initialize the state, transaction and receipt trie root pointers.
PROVER_INPUT(trie_ptr::state)
Expand Down
94 changes: 28 additions & 66 deletions evm/src/cpu/kernel/asm/mpt/hash/hash.asm
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,9 @@ mpt_hash_hash_rlp_after_unpacking:
// The result is given as a (value, length) pair, where the length is given
// in bytes.
//
// Pre stack: node_ptr, encode_value, retdest
// Post stack: result, result_len
// Pre stack: node_ptr, encode_value, cur_len, retdest
// Post stack: result, result_len, cur_len
global encode_or_hash_node:
// stack: node_ptr, encode_value, cur_len, retdest
DUP1 %mload_trie_data

// Check if we're dealing with a concrete node, i.e. not a hash node.
Expand Down Expand Up @@ -94,9 +93,9 @@ maybe_hash_node:
JUMP
pack_small_rlp:
// stack: result_ptr, result_len, cur_len, retdest
%stack (result_ptr, result_len, cur_len)
%stack (result_ptr, result_len)
-> (0, @SEGMENT_RLP_RAW, result_ptr, result_len,
after_packed_small_rlp, result_len, cur_len)
after_packed_small_rlp, result_len)
%jump(mload_packing)
after_packed_small_rlp:
%stack (result, result_len, cur_len, retdest) -> (retdest, result, result_len, cur_len)
Expand Down Expand Up @@ -124,19 +123,8 @@ encode_node:

global encode_node_empty:
// stack: node_type, node_payload_ptr, encode_value, cur_len, retdest
// Then length of `TrieData` is unchanged here.
%pop3
// stack: cur_len, retdest
// An empty node is encoded as a single byte, 0x80, which is the RLP encoding of the empty string.
// TODO: Write this byte just once to RLP memory, then we can always return (0, 1).
%alloc_rlp_block
// stack: rlp_pos, cur_len, retdest
PUSH 0x80
// stack: 0x80, rlp_pos, cur_len, retdest
DUP2
// stack: rlp_pos, 0x80, rlp_pos, cur_len, retdest
%mstore_rlp
%stack (rlp_pos, cur_len, retdest) -> (retdest, rlp_pos, 1, cur_len)
%stack (cur_len, retdest) -> (retdest, @ENCODED_EMPTY_NODE_POS, 1, cur_len)
JUMP

global encode_node_branch:
Expand All @@ -147,33 +135,19 @@ global encode_node_branch:
SWAP2 %add_const(18) SWAP2
// stack: node_payload_ptr, encode_value, cur_len, retdest

// Get the next unused offset within the encoded child buffers.
// Then immediately increment the next unused offset by 16, so any
// recursive calls will use nonoverlapping offsets.
// TODO: Allocate a block of RLP memory instead?
%mload_global_metadata(@GLOBAL_METADATA_TRIE_ENCODED_CHILD_SIZE)
DUP1 %add_const(16)
%mstore_global_metadata(@GLOBAL_METADATA_TRIE_ENCODED_CHILD_SIZE)
// stack: base_offset, node_payload_ptr, encode_value, cur_len, retdest
// We will call encode_or_hash_node on each child. For the i'th child, we
// will store the result in SEGMENT_TRIE_ENCODED_CHILD[base + i], and its length in
// SEGMENT_TRIE_ENCODED_CHILD_LEN[base + i].
// Allocate a block of RLP memory
%alloc_rlp_block DUP1
// stack: rlp_pos, rlp_start, node_payload_ptr, encode_value, cur_len retdest

// Call encode_or_hash_node on each child
%encode_child(0) %encode_child(1) %encode_child(2) %encode_child(3)
%encode_child(4) %encode_child(5) %encode_child(6) %encode_child(7)
%encode_child(8) %encode_child(9) %encode_child(10) %encode_child(11)
%encode_child(12) %encode_child(13) %encode_child(14) %encode_child(15)
// stack: base_offset, node_payload_ptr, encode_value, cur_len, retdest

// Now, append each child to our RLP tape.
%alloc_rlp_block DUP1
// stack: rlp_pos, rlp_start, base_offset, node_payload_ptr, encode_value, cur_len, retdest
%append_child(0) %append_child(1) %append_child(2) %append_child(3)
%append_child(4) %append_child(5) %append_child(6) %append_child(7)
%append_child(8) %append_child(9) %append_child(10) %append_child(11)
%append_child(12) %append_child(13) %append_child(14) %append_child(15)
// stack: rlp_pos', rlp_start, base_offset, node_payload_ptr, encode_value, cur_len, retdest
// stack: rlp_pos', rlp_start, node_payload_ptr, encode_value, cur_len, retdest

%stack (rlp_pos, rlp_start, base_offset, node_payload_ptr)
%stack (rlp_pos, rlp_start, node_payload_ptr)
-> (node_payload_ptr, rlp_pos, rlp_start)
%add_const(16)
// stack: value_ptr_ptr, rlp_pos', rlp_start, encode_value, cur_len, retdest
Expand Down Expand Up @@ -205,48 +179,36 @@ encode_node_branch_prepend_prefix:
-> (retdest, rlp_prefix_start, rlp_len, cur_len)
JUMP


// Part of the encode_node_branch function. Encodes the i'th child.
// Stores the result in SEGMENT_TRIE_ENCODED_CHILD[base + i], and its length in
// SEGMENT_TRIE_ENCODED_CHILD_LEN[base + i].
%macro encode_child(i)
// stack: base_offset, node_payload_ptr, encode_value, cur_len, retdest
// stack: rlp_pos, rlp_start, node_payload_ptr, encode_value, cur_len, retdest
PUSH %%after_encode
DUP4 DUP4
// stack: node_payload_ptr, encode_value, %%after_encode, base_offset, node_payload_ptr, encode_value, cur_len, retdest
DUP6 DUP6 DUP6
// stack: node_payload_ptr, encode_value, cur_len, %%after_encode, rlp_pos, rlp_start, node_payload_ptr, encode_value, cur_len, retdest
%add_const($i) %mload_trie_data
// stack: child_i_ptr, encode_value, %%after_encode, base_offset, node_payload_ptr, encode_value, cur_len, retdest
%stack(child_i_ptr, encode_value, after_encode, base_offset, node_payload_ptr, encode_value, cur_len) -> (child_i_ptr, encode_value, cur_len, after_encode, base_offset, node_payload_ptr, encode_value)
// stack: child_i_ptr, encode_value, cur_len, %%after_encode, rlp_pos, rlp_start, node_payload_ptr, encode_value, cur_len, retdest
%stack
(child_i_ptr, encode_value, cur_len, after_encode, rlp_pos, rlp_start, node_payload_ptr, encode_value, cur_len, retdest) ->
(child_i_ptr, encode_value, cur_len, after_encode, rlp_pos, rlp_start, node_payload_ptr, encode_value, retdest)
%jump(encode_or_hash_node)
%%after_encode:
// stack: result, result_len, cur_len, base_offset, node_payload_ptr, encode_value, retdest
%stack(result, result_len, cur_len, base_offset, node_payload_ptr, encode_value) -> (result, result_len, base_offset, node_payload_ptr, encode_value, cur_len)
DUP3 %add_const($i) %mstore_kernel(@SEGMENT_TRIE_ENCODED_CHILD)
// stack: result_len, base_offset, node_payload_ptr, encode_value, cur_len, retdest
DUP2 %add_const($i) %mstore_kernel(@SEGMENT_TRIE_ENCODED_CHILD_LEN)
// stack: base_offset, node_payload_ptr, encode_value, cur_len, retdest
%endmacro

// Part of the encode_node_branch function. Appends the i'th child's RLP.
%macro append_child(i)
// stack: rlp_pos, rlp_start, base_offset, node_payload_ptr, encode_value, cur_len, retdest
DUP3 %add_const($i) %mload_kernel(@SEGMENT_TRIE_ENCODED_CHILD) // load result
DUP4 %add_const($i) %mload_kernel(@SEGMENT_TRIE_ENCODED_CHILD_LEN) // load result_len
// stack: result_len, result, rlp_pos, rlp_start, base_offset, node_payload_ptr, encode_value, cur_len, retdest
// stack: result, result_len, cur_len, rlp_pos, rlp_start, node_payload_ptr, encode_value, retdest
// If result_len != 32, result is raw RLP, with an appropriate RLP prefix already.
DUP1 %sub_const(32) %jumpi(%%unpack)
SWAP1 DUP1 %sub_const(32) %jumpi(%%unpack)
// Otherwise, result is a hash, and we need to add the prefix 0x80 + 32 = 160.
// stack: result_len, result, rlp_pos, rlp_start, base_offset, node_payload_ptr, encode_value, cur_len, retdest
// stack: result_len, result, cur_len, rlp_pos, rlp_start, node_payload_ptr, encode_value, retdest
PUSH 160
DUP4 // rlp_pos
DUP5 // rlp_pos
%mstore_rlp
SWAP2 %increment SWAP2 // rlp_pos += 1
SWAP3 %increment SWAP3 // rlp_pos += 1
%%unpack:
%stack (result_len, result, rlp_pos, rlp_start, base_offset, node_payload_ptr, encode_value, cur_len, retdest)
%stack (result_len, result, cur_len, rlp_pos, rlp_start, node_payload_ptr, encode_value, retdest)
-> (rlp_pos, result, result_len, %%after_unpacking,
rlp_start, base_offset, node_payload_ptr, encode_value, cur_len, retdest)
rlp_start, node_payload_ptr, encode_value, cur_len, retdest)
%jump(mstore_unpacking_rlp)
%%after_unpacking:
// stack: rlp_pos', rlp_start, base_offset, node_payload_ptr, encode_value, cur_len, retdest
// stack: rlp_pos', rlp_start, node_payload_ptr, encode_value, cur_len, retdest
%endmacro

global encode_node_extension:
Expand Down
4 changes: 4 additions & 0 deletions evm/src/cpu/kernel/asm/mpt/hash/hash_trie_specific.asm
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ global encode_account:
DUP3 %add_const(2) %mload_trie_data // storage_root_ptr = value[2]
// stack: storage_root_ptr, cur_len, rlp_pos_5, value_ptr, cur_len, retdest


PUSH debug_after_hash_storage_trie
POP

// Hash storage trie.
%mpt_hash_storage_trie
// stack: storage_root_digest, new_len, rlp_pos_5, value_ptr, cur_len, retdest
Expand Down
8 changes: 7 additions & 1 deletion evm/src/cpu/kernel/asm/mpt/util.asm
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,20 @@
// stack: (empty)
%endmacro

%macro initialize_rlp_segment
PUSH 0x80
PUSH @ENCODED_EMPTY_NODE_POS
%mstore_rlp
%endmacro

%macro alloc_rlp_block
// stack: (empty)
%mload_global_metadata(@GLOBAL_METADATA_RLP_DATA_SIZE)
// stack: block_start
// In our model it's fine to use memory in a sparse way, as long as the gaps aren't larger than
// 2^16 or so. So instead of the caller specifying the size of the block they need, we'll just
// allocate 0x10000 = 2^16 bytes, much larger than any RLP blob the EVM could possibly create.
DUP1 %add_const(0x10000)
DUP1 %add_const(@MAX_RLP_BLOB_SIZE)
// stack: block_end, block_start
%mstore_global_metadata(@GLOBAL_METADATA_RLP_DATA_SIZE)
// stack: block_start
Expand Down
76 changes: 35 additions & 41 deletions evm/src/cpu/kernel/constants/global_metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,68 +30,64 @@ pub(crate) enum GlobalMetadata {
TransactionTrieRootDigestAfter = 11,
ReceiptTrieRootDigestAfter = 12,

/// The sizes of the `TrieEncodedChild` and `TrieEncodedChildLen` buffers. In other words, the
/// next available offset in these buffers.
TrieEncodedChildSize = 13,

// Block metadata.
BlockBeneficiary = 14,
BlockTimestamp = 15,
BlockNumber = 16,
BlockDifficulty = 17,
BlockRandom = 18,
BlockGasLimit = 19,
BlockChainId = 20,
BlockBaseFee = 21,
BlockGasUsed = 22,
BlockBeneficiary = 13,
BlockTimestamp = 14,
4l0n50 marked this conversation as resolved.
Show resolved Hide resolved
BlockNumber = 15,
BlockDifficulty = 16,
BlockRandom = 17,
BlockGasLimit = 18,
BlockChainId = 19,
BlockBaseFee = 20,
BlockGasUsed = 21,
/// Before current transactions block values.
BlockGasUsedBefore = 23,
BlockGasUsedBefore = 22,
/// After current transactions block values.
BlockGasUsedAfter = 24,
BlockGasUsedAfter = 23,
/// Current block header hash
BlockCurrentHash = 25,
BlockCurrentHash = 24,

/// Gas to refund at the end of the transaction.
RefundCounter = 26,
RefundCounter = 25,
/// Length of the addresses access list.
AccessedAddressesLen = 27,
AccessedAddressesLen = 26,
/// Length of the storage keys access list.
AccessedStorageKeysLen = 28,
AccessedStorageKeysLen = 27,
/// Length of the self-destruct list.
SelfDestructListLen = 29,
SelfDestructListLen = 28,
/// Length of the bloom entry buffer.
BloomEntryLen = 30,
BloomEntryLen = 29,

/// Length of the journal.
JournalLen = 31,
JournalLen = 30,
/// Length of the `JournalData` segment.
JournalDataLen = 32,
JournalDataLen = 31,
/// Current checkpoint.
CurrentCheckpoint = 33,
TouchedAddressesLen = 34,
CurrentCheckpoint = 32,
TouchedAddressesLen = 33,
// Gas cost for the access list in type-1 txns. See EIP-2930.
AccessListDataCost = 35,
AccessListDataCost = 34,
// Start of the access list in the RLP for type-1 txns.
AccessListRlpStart = 36,
AccessListRlpStart = 35,
// Length of the access list in the RLP for type-1 txns.
AccessListRlpLen = 37,
AccessListRlpLen = 36,
// Boolean flag indicating if the txn is a contract creation txn.
ContractCreation = 38,
IsPrecompileFromEoa = 39,
CallStackDepth = 40,
ContractCreation = 37,
IsPrecompileFromEoa = 38,
CallStackDepth = 39,
/// Transaction logs list length
LogsLen = 41,
LogsDataLen = 42,
LogsPayloadLen = 43,
TxnNumberBefore = 44,
TxnNumberAfter = 45,
LogsLen = 40,
LogsDataLen = 41,
LogsPayloadLen = 42,
TxnNumberBefore = 43,
TxnNumberAfter = 44,

KernelHash = 46,
KernelLen = 47,
KernelHash = 45,
KernelLen = 46,
}

impl GlobalMetadata {
pub(crate) const COUNT: usize = 48;
pub(crate) const COUNT: usize = 47;
4l0n50 marked this conversation as resolved.
Show resolved Hide resolved

pub(crate) const fn all() -> [Self; Self::COUNT] {
[
Expand All @@ -108,7 +104,6 @@ impl GlobalMetadata {
Self::StateTrieRootDigestAfter,
Self::TransactionTrieRootDigestAfter,
Self::ReceiptTrieRootDigestAfter,
Self::TrieEncodedChildSize,
Self::BlockBeneficiary,
Self::BlockTimestamp,
Self::BlockNumber,
Expand Down Expand Up @@ -162,7 +157,6 @@ impl GlobalMetadata {
Self::StateTrieRootDigestAfter => "GLOBAL_METADATA_STATE_TRIE_DIGEST_AFTER",
Self::TransactionTrieRootDigestAfter => "GLOBAL_METADATA_TXN_TRIE_DIGEST_AFTER",
Self::ReceiptTrieRootDigestAfter => "GLOBAL_METADATA_RECEIPT_TRIE_DIGEST_AFTER",
Self::TrieEncodedChildSize => "GLOBAL_METADATA_TRIE_ENCODED_CHILD_SIZE",
Self::BlockBeneficiary => "GLOBAL_METADATA_BLOCK_BENEFICIARY",
Self::BlockTimestamp => "GLOBAL_METADATA_BLOCK_TIMESTAMP",
Self::BlockNumber => "GLOBAL_METADATA_BLOCK_NUMBER",
Expand Down
14 changes: 13 additions & 1 deletion evm/src/cpu/kernel/constants/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::collections::HashMap;

use ethereum_types::U256;
use hex_literal::hex;
use static_assertions::const_assert;

use crate::cpu::kernel::constants::context_metadata::ContextMetadata;
use crate::cpu::kernel::constants::global_metadata::GlobalMetadata;
Expand Down Expand Up @@ -86,12 +87,23 @@ pub(crate) fn evm_constants() -> HashMap<String, U256> {
c
}

const MISC_CONSTANTS: [(&str, [u8; 32]); 1] = [
const MISC_CONSTANTS: [(&str, [u8; 32]); 3] = [
// Base for limbs used in bignum arithmetic.
(
"BIGNUM_LIMB_BASE",
hex!("0000000000000000000000000000000100000000000000000000000000000000"),
),
// Position in SEGMENT_RLP_RAW where the empty node encoding is stored. It is
// equal to u32::MAX so that all rlp pointers are much smaller than that
4l0n50 marked this conversation as resolved.
Show resolved Hide resolved
(
"ENCODED_EMPTY_NODE_POS",
hex!("00000000000000000000000000000000000000000000000000000000FFFFFFFF"),
),
// 0x10000 = 2^16 bytes, much larger than any RLP blob the EVM could possibly create.
(
"MAX_RLP_BLOB_SIZE",
hex!("0000000000000000000000000000000000000000000000000000000000010000"),
),
];

const HASH_CONSTANTS: [(&str, [u8; 32]); 2] = [
Expand Down
18 changes: 16 additions & 2 deletions evm/src/cpu/kernel/interpreter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,12 +140,14 @@ enum InterpreterMemOpKind {

impl<'a> Interpreter<'a> {
pub(crate) fn new_with_kernel(initial_offset: usize, initial_stack: Vec<U256>) -> Self {
Self::new(
let mut result = Self::new(
&KERNEL.code,
initial_offset,
initial_stack,
&KERNEL.prover_inputs,
)
);
result.initialize_rlp_segment();
result
}

pub(crate) fn new(
Expand Down Expand Up @@ -1171,6 +1173,18 @@ impl<'a> Interpreter<'a> {
}
self.generation_state.registers.context = context;
}

/// Writes the encoding of 0 to position @ENCODED_EMPTY_NODE_POS
4l0n50 marked this conversation as resolved.
Show resolved Hide resolved
pub(crate) fn initialize_rlp_segment(&mut self) {
self.generation_state.memory.set(
MemoryAddress {
context: 0,
segment: Segment::RlpRaw as usize,
virt: 0xFFFFFFFF,
},
128.into(),
)
}
}

// Computes the two's complement of the given integer.
Expand Down
Loading