Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Always record storage when stateless validation is enabled #10859

Merged
merged 6 commits into from
Mar 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 22 additions & 9 deletions chain/chain/src/chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1738,9 +1738,18 @@ impl Chain {
block_preprocess_info: BlockPreprocessInfo,
apply_results: Vec<(ShardId, Result<ShardUpdateResult, Error>)>,
) -> Result<Option<Tip>, Error> {
// Save state transition data to the database only if it might later be needed
// for generating a state witness. Storage space optimization.
let should_save_state_transition_data =
self.should_produce_state_witness_for_this_or_next_epoch(me, block.header())?;
let mut chain_update = self.chain_update();
let new_head =
chain_update.postprocess_block(me, &block, block_preprocess_info, apply_results)?;
let new_head = chain_update.postprocess_block(
me,
&block,
block_preprocess_info,
apply_results,
should_save_state_transition_data,
)?;
chain_update.commit()?;
Ok(new_head)
}
Expand Down Expand Up @@ -2954,9 +2963,17 @@ impl Chain {
results: Vec<Result<ShardUpdateResult, Error>>,
) -> Result<(), Error> {
let block = self.chain_store.get_block(block_hash)?;
// Save state transition data to the database only if it might later be needed
// for generating a state witness. Storage space optimization.
let should_save_state_transition_data =
self.should_produce_state_witness_for_this_or_next_epoch(me, block.header())?;
let mut chain_update = self.chain_update();
let results = results.into_iter().collect::<Result<Vec<_>, Error>>()?;
chain_update.apply_chunk_postprocessing(&block, results)?;
chain_update.apply_chunk_postprocessing(
&block,
results,
should_save_state_transition_data,
)?;
chain_update.commit()?;

let epoch_id = block.header().epoch_id();
Expand Down Expand Up @@ -3323,12 +3340,8 @@ impl Chain {
// only for a single shard. This so far has been enough.
let state_patch = state_patch.take();

let storage_context = StorageContext {
storage_data_source: StorageDataSource::Db,
state_patch,
record_storage: self
.should_produce_state_witness_for_this_or_next_epoch(me, block.header())?,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should_produce_state_witness_for_this_or_next_epoch is used in one other place in prepare_transactions. We should remove it from there as well and delete the function.

};
let storage_context =
StorageContext { storage_data_source: StorageDataSource::Db, state_patch };
let stateful_job = self.get_update_shard_job(
me,
block,
Expand Down
35 changes: 21 additions & 14 deletions chain/chain/src/chain_update.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,11 @@ impl<'a> ChainUpdate<'a> {
&mut self,
block: &Block,
apply_results: Vec<ShardUpdateResult>,
should_save_state_transition_data: bool,
) -> Result<(), Error> {
let _span = tracing::debug_span!(target: "chain", "apply_chunk_postprocessing").entered();
for result in apply_results {
self.process_apply_chunk_result(block, result)?;
self.process_apply_chunk_result(block, result, should_save_state_transition_data)?;
}
Ok(())
}
Expand Down Expand Up @@ -299,6 +300,7 @@ impl<'a> ChainUpdate<'a> {
&mut self,
block: &Block,
result: ShardUpdateResult,
should_save_state_transition_data: bool,
) -> Result<(), Error> {
let block_hash = block.hash();
let prev_hash = block.header().prev_hash();
Expand Down Expand Up @@ -351,12 +353,14 @@ impl<'a> ChainUpdate<'a> {
apply_result.outcomes,
outcome_paths,
);
self.chain_store_update.save_state_transition_data(
*block_hash,
shard_id,
apply_result.proof,
apply_result.applied_receipts_hash,
);
if should_save_state_transition_data {
self.chain_store_update.save_state_transition_data(
*block_hash,
shard_id,
apply_result.proof,
apply_result.applied_receipts_hash,
);
}
if let Some(resharding_results) = resharding_results {
self.process_resharding_results(block, &shard_uid, resharding_results)?;
}
Expand All @@ -383,12 +387,14 @@ impl<'a> ChainUpdate<'a> {

self.chain_store_update.save_chunk_extra(block_hash, &shard_uid, new_extra);
self.chain_store_update.save_trie_changes(apply_result.trie_changes);
self.chain_store_update.save_state_transition_data(
*block_hash,
shard_uid.shard_id(),
apply_result.proof,
apply_result.applied_receipts_hash,
);
if should_save_state_transition_data {
self.chain_store_update.save_state_transition_data(
*block_hash,
shard_uid.shard_id(),
apply_result.proof,
apply_result.applied_receipts_hash,
);
}

if let Some(resharding_config) = resharding_results {
self.process_resharding_results(block, &shard_uid, resharding_config)?;
Expand All @@ -413,6 +419,7 @@ impl<'a> ChainUpdate<'a> {
block: &Block,
block_preprocess_info: BlockPreprocessInfo,
apply_chunks_results: Vec<(ShardId, Result<ShardUpdateResult, Error>)>,
should_save_state_transition_data: bool,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can be computed here, because postprocess_block has me.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's true, but Chain also calls ChainUpdate::process_apply_chunk_result, which doesn't have a me argument. This means that should_produce_state_witness_for_this_or_next_epoch would have to be available from both Chain and ChainUpdate. I can't just do everything inside ChainUpdate.

I guess it would be possible to move should_produce_state_witness_for_this_or_next_epoch to EpochManager and use it from both Chain and ChainUpdate, but I'm not convinced if there's any real benefit gained in exchange for this additional work. At this point I'm tempted to just merge it and unblock the statelessnet release.

Copy link
Member

@Longarithm Longarithm Mar 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Uhh right. Okay, makes sense, feel free to merge

) -> Result<Option<Tip>, Error> {
let shard_ids = self.epoch_manager.shard_ids(block.header().epoch_id())?;
let prev_hash = block.header().prev_hash();
Expand All @@ -422,7 +429,7 @@ impl<'a> ChainUpdate<'a> {
}
x
}).collect::<Result<Vec<_>, Error>>()?;
self.apply_chunk_postprocessing(block, results)?;
self.apply_chunk_postprocessing(block, results, should_save_state_transition_data)?;

let BlockPreprocessInfo {
is_caught_up,
Expand Down
11 changes: 8 additions & 3 deletions chain/chain/src/runtime/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use near_epoch_manager::{EpochManagerAdapter, EpochManagerHandle};
use near_parameters::{ActionCosts, ExtCosts, RuntimeConfigStore};
use near_pool::types::TransactionGroupIterator;
use near_primitives::account::{AccessKey, Account};
use near_primitives::checked_feature;
use near_primitives::errors::{InvalidTxError, RuntimeError, StorageError};
use near_primitives::hash::{hash, CryptoHash};
use near_primitives::receipt::{DelayedReceiptIndices, Receipt};
Expand All @@ -30,7 +31,7 @@ use near_primitives::types::{
AccountId, Balance, BlockHeight, EpochHeight, EpochId, EpochInfoProvider, Gas, MerkleHash,
ShardId, StateChangeCause, StateChangesForResharding, StateRoot, StateRootNode,
};
use near_primitives::version::ProtocolVersion;
use near_primitives::version::{ProtocolVersion, PROTOCOL_VERSION};
use near_primitives::views::{
AccessKeyInfoView, CallResult, ContractCodeView, QueryRequest, QueryResponse,
QueryResponseKind, ViewApplyState, ViewStateResult,
Expand Down Expand Up @@ -704,7 +705,9 @@ impl RuntimeAdapter for NightshadeRuntime {
storage_config.use_flat_storage,
),
};
if storage_config.record_storage {
if checked_feature!("stable", StatelessValidationV0, PROTOCOL_VERSION)
|| cfg!(feature = "shadow_chunk_validation")
{
Comment on lines +708 to +710
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as below. If we want to be super precise, we can use current epoch protocol version, because that recorded storage is not saved anythere, it is used immediately on chunk production.

trie = trie.recording_reads();
}
let mut state_update = TrieUpdate::new(trie);
Expand Down Expand Up @@ -865,7 +868,9 @@ impl RuntimeAdapter for NightshadeRuntime {
storage_config.use_flat_storage,
),
};
if storage_config.record_storage {
if checked_feature!("stable", StatelessValidationV0, PROTOCOL_VERSION)
|| cfg!(feature = "shadow_chunk_validation")
{
Comment on lines +871 to +873
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PROTOCOL_VERSION is not enough, we need a check with protocol version of next epoch here. If we stabilize this version and some validator picks it up before others, its performance will be worsened because they will have to read all the nodes unlike others. And if protocol version switch is delayed for some reason, validator may miss blocks/chunks because of that.
We need next epoch, not current epoch, because state transitions must be recorded 1 epoch before protocol upgrade. EpochManager has a method for that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm I imagined that before switching to stateless validation we would enable this feature (or make it the default), and then all nodes would start recording the data needed for stateless validation. At some point the protocol version would change and the nodes would start using this recorded data, but it's okay to start recording much earlier, without stateless validation the data would just be discarded.
Basically we don't need to start recording at the exact moment when it's needed, we can do it as soon as the updated node starts.

Is it this that much of a performance hit? We have to do it on the epoch before switching to stateless validation, so it has to be performant enough for normal traffic. Anton's shadow_chunk_validation records data and it's able to keep up with the mainnet traffic.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Anton's shadow_chunk_validation records data and it's able to keep up with the mainnet traffic.

Oh, that's nice.
I also realised that memtrie should be enabled not later than stateless validation, which will give desired performance. Then looks good!

trie = trie.recording_reads();
}

Expand Down
16 changes: 12 additions & 4 deletions chain/chain/src/runtime/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@ use near_epoch_manager::{EpochManager, RngSeed};
use near_pool::{
InsertTransactionResult, PoolIteratorWrapper, TransactionGroupIteratorWrapper, TransactionPool,
};
use near_primitives::checked_feature;
use near_primitives::test_utils::create_test_signer;
use near_primitives::types::validator_stake::{ValidatorStake, ValidatorStakeIter};
use near_primitives::version::PROTOCOL_VERSION;
use near_store::flat::{FlatStateChanges, FlatStateDelta, FlatStateDeltaMetadata};
use near_store::genesis::initialize_genesis_state;
use num_rational::Ratio;
Expand Down Expand Up @@ -1601,6 +1603,11 @@ fn prepare_transactions(
/// Check that transactions validation works the same when using recorded storage proof instead of db.
#[test]
fn test_prepare_transactions_storage_proof() {
if !checked_feature!("stable", StatelessValidationV0, PROTOCOL_VERSION) {
println!("Test not applicable without StatelessValidation enabled");
Comment on lines +1606 to +1607
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this mean that this test won't kick in before hitting testnet (with stable build)? isn't it too late?

Copy link
Contributor Author

@jancionear jancionear Mar 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This means that the test will only execute in builds where the default protocol version has stateless validation enabled.
In practice this means that the test will be enabled for builds with --features statelessnet_protocol and --features nightly. Both of those options are present in the CI, so the test will be run on every change, and it will be enabled on statelessnet builds.

Tbh I don't know what "stable" in checked_feature is supposed to mean, the macro just checks the protocol version:

#[macro_export]
macro_rules! checked_feature {
    ("stable", $feature:ident, $current_protocol_version:expr) => {{
        $crate::version::ProtocolFeature::$feature.protocol_version() <= $current_protocol_version
    }};

The code is here if anyone wants to take a look:

pub const PROTOCOL_VERSION: ProtocolVersion = if cfg!(feature = "statelessnet_protocol") {
// Current StatelessNet protocol version.
83
} else if cfg!(feature = "nightly_protocol") {
// On nightly, pick big enough version to support all features.
140
} else {
// Enable all stable features.
STABLE_PROTOCOL_VERSION
};
/// Both, outgoing and incoming tcp connections to peers, will be rejected if `peer's`
/// protocol version is lower than this.
pub const PEER_MIN_ALLOWED_PROTOCOL_VERSION: ProtocolVersion = STABLE_PROTOCOL_VERSION - 2;
#[macro_export]
macro_rules! checked_feature {
("stable", $feature:ident, $current_protocol_version:expr) => {{
$crate::version::ProtocolFeature::$feature.protocol_version() <= $current_protocol_version
}};
($feature_name:tt, $feature:ident, $current_protocol_version:expr) => {{
#[cfg(feature = $feature_name)]
let is_feature_enabled = $crate::version::ProtocolFeature::$feature.protocol_version()
<= $current_protocol_version;
#[cfg(not(feature = $feature_name))]
let is_feature_enabled = {
// Workaround unused variable warning
let _ = $current_protocol_version;
false
};
is_feature_enabled
}};

Copy link
Contributor Author

@jancionear jancionear Mar 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We already have this kind of check in many other tests that don't work without stateless validation, e.g:

if !checked_feature!("stable", StatelessValidationV0, PROTOCOL_VERSION) {

return;
}

let (env, chain, mut transaction_pool) = get_test_env_with_chain_and_pool();
let transactions_count = transaction_pool.len();

Expand All @@ -1609,7 +1616,6 @@ fn test_prepare_transactions_storage_proof() {
use_flat_storage: true,
source: StorageDataSource::Db,
state_patch: Default::default(),
record_storage: true,
};

let proposed_transactions = prepare_transactions(
Expand All @@ -1630,7 +1636,6 @@ fn test_prepare_transactions_storage_proof() {
nodes: proposed_transactions.storage_proof.unwrap(),
}),
state_patch: Default::default(),
record_storage: false,
};

let validated_transactions = prepare_transactions(
Expand All @@ -1647,6 +1652,11 @@ fn test_prepare_transactions_storage_proof() {
/// Check that transactions validation fails if provided empty storage proof.
#[test]
fn test_prepare_transactions_empty_storage_proof() {
if !checked_feature!("stable", StatelessValidationV0, PROTOCOL_VERSION) {
println!("Test not applicable without StatelessValidation enabled");
return;
}

let (env, chain, mut transaction_pool) = get_test_env_with_chain_and_pool();
let transactions_count = transaction_pool.len();

Expand All @@ -1655,7 +1665,6 @@ fn test_prepare_transactions_empty_storage_proof() {
use_flat_storage: true,
source: StorageDataSource::Db,
state_patch: Default::default(),
record_storage: true,
};

let proposed_transactions = prepare_transactions(
Expand All @@ -1676,7 +1685,6 @@ fn test_prepare_transactions_empty_storage_proof() {
nodes: PartialState::default(), // We use empty storage proof here.
}),
state_patch: Default::default(),
record_storage: false,
};

let validation_result = prepare_transactions(
Expand Down
27 changes: 18 additions & 9 deletions chain/chain/src/test_utils/kv_runtime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ use near_primitives::epoch_manager::ValidatorSelectionConfig;
use near_primitives::errors::{EpochError, InvalidTxError};
use near_primitives::hash::{hash, CryptoHash};
use near_primitives::receipt::{ActionReceipt, Receipt, ReceiptEnum};
use near_primitives::shard_layout;
use near_primitives::shard_layout::{ShardLayout, ShardUId};
use near_primitives::sharding::{ChunkHash, ShardChunkHeader};
use near_primitives::state_part::PartId;
Expand All @@ -45,6 +44,7 @@ use near_primitives::views::{
AccessKeyInfoView, AccessKeyList, CallResult, ContractCodeView, EpochValidatorInfo,
QueryRequest, QueryResponse, QueryResponseKind, ViewStateResult,
};
use near_primitives::{checked_feature, shard_layout};
use near_store::test_utils::TestTriesBuilder;
use near_store::{
set_genesis_hash, set_genesis_state_roots, DBCol, ShardTries, StorageError, Store, StoreUpdate,
Expand Down Expand Up @@ -1083,7 +1083,7 @@ impl RuntimeAdapter for KeyValueRuntime {

fn prepare_transactions(
&self,
storage: RuntimeStorageConfig,
_storage: RuntimeStorageConfig,
_chunk: PrepareTransactionsChunkContext,
_prev_block: PrepareTransactionsBlockContext,
transaction_groups: &mut dyn TransactionGroupIterator,
Expand All @@ -1094,11 +1094,14 @@ impl RuntimeAdapter for KeyValueRuntime {
while let Some(iter) = transaction_groups.next() {
res.push(iter.next().unwrap());
}
Ok(PreparedTransactions {
transactions: res,
limited_by: None,
storage_proof: if storage.record_storage { Some(Default::default()) } else { None },
})
let storage_proof = if checked_feature!("stable", StatelessValidationV0, PROTOCOL_VERSION)
|| cfg!(feature = "shadow_chunk_validation")
{
Some(Default::default())
} else {
None
};
Ok(PreparedTransactions { transactions: res, limited_by: None, storage_proof })
}

fn apply_chunk(
Expand Down Expand Up @@ -1242,7 +1245,13 @@ impl RuntimeAdapter for KeyValueRuntime {
let state_root = hash(&data);
self.state.write().unwrap().insert(state_root, state);
self.state_size.write().unwrap().insert(state_root, state_size);

let storage_proof = if checked_feature!("stable", StatelessValidationV0, PROTOCOL_VERSION)
|| cfg!(feature = "shadow_chunk_validation")
{
Some(Default::default())
} else {
None
};
Ok(ApplyChunkResult {
trie_changes: WrappedTrieChanges::new(
self.get_tries(),
Expand All @@ -1258,7 +1267,7 @@ impl RuntimeAdapter for KeyValueRuntime {
validator_proposals: vec![],
total_gas_burnt: 0,
total_balance_burnt: 0,
proof: if storage_config.record_storage { Some(Default::default()) } else { None },
proof: storage_proof,
processed_delayed_receipts: vec![],
applied_receipts_hash: hash(&borsh::to_vec(receipts).unwrap()),
})
Expand Down
2 changes: 0 additions & 2 deletions chain/chain/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,6 @@ pub struct RuntimeStorageConfig {
pub use_flat_storage: bool,
pub source: StorageDataSource,
pub state_patch: SandboxStatePatch,
pub record_storage: bool,
}

impl RuntimeStorageConfig {
Expand All @@ -273,7 +272,6 @@ impl RuntimeStorageConfig {
use_flat_storage,
source: StorageDataSource::Db,
state_patch: Default::default(),
record_storage: false,
}
}
}
Expand Down
3 changes: 0 additions & 3 deletions chain/chain/src/update_shard.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ pub struct StorageContext {
/// Data source used for processing shard update.
pub storage_data_source: StorageDataSource,
pub state_patch: SandboxStatePatch,
pub record_storage: bool,
}

/// Processes shard update with given block and shard.
Expand Down Expand Up @@ -185,7 +184,6 @@ pub fn apply_new_chunk(
use_flat_storage: true,
source: storage_context.storage_data_source,
state_patch: storage_context.state_patch,
record_storage: storage_context.record_storage,
};
match runtime.apply_chunk(
storage_config,
Expand Down Expand Up @@ -247,7 +245,6 @@ pub fn apply_old_chunk(
use_flat_storage: true,
source: storage_context.storage_data_source,
state_patch: storage_context.state_patch,
record_storage: storage_context.record_storage,
};
match runtime.apply_chunk(
storage_config,
Expand Down
7 changes: 0 additions & 7 deletions chain/client/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -999,18 +999,11 @@ impl Client {
let prepared_transactions = if let Some(mut iter) =
sharded_tx_pool.get_pool_iterator(shard_uid)
{
let me = self
.validator_signer
.as_ref()
.map(|validator_signer| validator_signer.validator_id().clone());
let record_storage = chain
.should_produce_state_witness_for_this_or_next_epoch(&me, &prev_block_header)?;
let storage_config = RuntimeStorageConfig {
state_root: *chunk_extra.state_root(),
use_flat_storage: true,
source: StorageDataSource::Db,
state_patch: Default::default(),
record_storage,
};
runtime.prepare_transactions(
storage_config,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,6 @@ pub(crate) fn pre_validate_chunk_state_witness(
nodes: state_witness.new_transactions_validation_state.clone(),
}),
state_patch: Default::default(),
record_storage: false,
};

match validate_prepared_transactions(
Expand Down Expand Up @@ -314,7 +313,6 @@ pub(crate) fn pre_validate_chunk_state_witness(
nodes: state_witness.main_state_transition.base_state.clone(),
}),
state_patch: Default::default(),
record_storage: false,
},
})
};
Expand Down Expand Up @@ -529,7 +527,6 @@ pub(crate) fn validate_chunk_state_witness(
nodes: transition.base_state,
}),
state_patch: Default::default(),
record_storage: false,
},
};
let OldChunkResult { apply_result, .. } = apply_old_chunk(
Expand Down
Loading
Loading