From c6e8702d0c76345031f3c44a90ea89d4d7d11b67 Mon Sep 17 00:00:00 2001 From: wacban Date: Tue, 31 Oct 2023 11:07:55 +0000 Subject: [PATCH] feat: resharding tests for missing chunks --- chain/client/src/client.rs | 5 +- chain/client/src/test_utils/test_env.rs | 7 +- .../client/src/test_utils/test_env_builder.rs | 15 ++- chain/epoch-manager/src/lib.rs | 46 ++++++- core/primitives/src/epoch_manager.rs | 51 ++++++- core/store/src/trie/state_snapshot.rs | 5 +- .../src/tests/client/resharding.rs | 124 +++++++++++------- 7 files changed, 195 insertions(+), 58 deletions(-) diff --git a/chain/client/src/client.rs b/chain/client/src/client.rs index e2a415ec796..7664938663d 100644 --- a/chain/client/src/client.rs +++ b/chain/client/src/client.rs @@ -13,6 +13,7 @@ use crate::SyncAdapter; use crate::SyncMessage; use crate::{metrics, SyncStatus}; use actix_rt::ArbiterHandle; +use itertools::Itertools; use lru::LruCache; use near_async::messaging::{CanSend, Sender}; use near_chain::chain::VerifyBlockHashAndSignatureResult; @@ -626,8 +627,8 @@ impl Client { } let new_chunks = self.get_chunk_headers_ready_for_inclusion(&epoch_id, &prev_hash); - debug!(target: "client", "{:?} Producing block at height {}, parent {} @ {}, {} new chunks", validator_signer.validator_id(), - next_height, prev.height(), format_hash(head.last_block_hash), new_chunks.len()); + debug!(target: "client", "{:?} Producing block at height {}, parent {} @ {}, {} new chunks: {:?}", validator_signer.validator_id(), + next_height, prev.height(), format_hash(head.last_block_hash), new_chunks.len(), new_chunks.keys().sorted()); // If we are producing empty blocks and there are no transactions. if !self.config.produce_empty_blocks && new_chunks.is_empty() { diff --git a/chain/client/src/test_utils/test_env.rs b/chain/client/src/test_utils/test_env.rs index ebb35a7d83d..2b2d4f5856d 100644 --- a/chain/client/src/test_utils/test_env.rs +++ b/chain/client/src/test_utils/test_env.rs @@ -123,7 +123,12 @@ impl TestEnv { let mut keep_going = true; while keep_going { - for network_adapter in network_adapters.iter() { + // for network_adapter in network_adapters.iter() { + for i in 0..network_adapters.len() { + let network_adapter = network_adapters.get(i).unwrap(); + let _span = + tracing::debug_span!(target: "test", "process_partial_encoded_chunks", client=i).entered(); + keep_going = false; // process partial encoded chunks while let Some(request) = network_adapter.pop() { diff --git a/chain/client/src/test_utils/test_env_builder.rs b/chain/client/src/test_utils/test_env_builder.rs index 6643ccb13c4..b122a6bd49c 100644 --- a/chain/client/src/test_utils/test_env_builder.rs +++ b/chain/client/src/test_utils/test_env_builder.rs @@ -16,7 +16,7 @@ use near_chunks::test_utils::MockClientAdapterForShardsManager; use near_epoch_manager::shard_tracker::{ShardTracker, TrackedConfig}; use near_epoch_manager::{EpochManager, EpochManagerAdapter, EpochManagerHandle}; use near_network::test_utils::MockPeerManagerAdapter; -use near_primitives::epoch_manager::RngSeed; +use near_primitives::epoch_manager::{AllEpochConfigTestOverrides, RngSeed}; use near_primitives::types::{AccountId, NumShards}; use near_store::test_utils::create_test_store; use near_store::{NodeStorage, ShardUId, Store, StoreConfig}; @@ -238,8 +238,16 @@ impl TestEnvBuilder { self } - /// Constructs real EpochManager implementations for each instance. pub fn real_epoch_managers(self, genesis_config: &GenesisConfig) -> Self { + self.real_epoch_managers_with_test_overrides(genesis_config, None) + } + + /// Constructs real EpochManager implementations for each instance. + pub fn real_epoch_managers_with_test_overrides( + self, + genesis_config: &GenesisConfig, + test_overrides: Option, + ) -> Self { assert!( self.num_shards.is_none(), "Cannot set both num_shards and epoch_managers at the same time" @@ -247,9 +255,10 @@ impl TestEnvBuilder { let ret = self.ensure_stores(); let epoch_managers = (0..ret.clients.len()) .map(|i| { - EpochManager::new_arc_handle( + EpochManager::new_arc_handle_with_test_overrides( ret.stores.as_ref().unwrap()[i].clone(), genesis_config, + test_overrides.clone(), ) }) .collect(); diff --git a/chain/epoch-manager/src/lib.rs b/chain/epoch-manager/src/lib.rs index 710933d84b5..e9b6b5e1566 100644 --- a/chain/epoch-manager/src/lib.rs +++ b/chain/epoch-manager/src/lib.rs @@ -6,7 +6,8 @@ use near_primitives::checked_feature; use near_primitives::epoch_manager::block_info::BlockInfo; use near_primitives::epoch_manager::epoch_info::{EpochInfo, EpochSummary}; use near_primitives::epoch_manager::{ - AllEpochConfig, EpochConfig, ShardConfig, SlashState, AGGREGATOR_KEY, + AllEpochConfig, AllEpochConfigTestOverrides, EpochConfig, ShardConfig, SlashState, + AGGREGATOR_KEY, }; use near_primitives::errors::EpochError; use near_primitives::hash::CryptoHash; @@ -151,9 +152,18 @@ impl EpochManager { pub fn new_from_genesis_config( store: Store, genesis_config: &GenesisConfig, + ) -> Result { + Self::new_from_genesis_config_with_test_overrides(store, genesis_config, None) + } + + pub fn new_from_genesis_config_with_test_overrides( + store: Store, + genesis_config: &GenesisConfig, + test_overrides: Option, ) -> Result { let reward_calculator = RewardCalculator::new(genesis_config); - let all_epoch_config = AllEpochConfig::from(genesis_config); + let all_epoch_config = + Self::new_all_epoch_config_with_test_overrides(genesis_config, test_overrides); Self::new( store, all_epoch_config, @@ -164,7 +174,37 @@ impl EpochManager { } pub fn new_arc_handle(store: Store, genesis_config: &GenesisConfig) -> Arc { - Arc::new(Self::new_from_genesis_config(store, genesis_config).unwrap().into_handle()) + Self::new_arc_handle_with_test_overrides(store, genesis_config, None) + } + + pub fn new_arc_handle_with_test_overrides( + store: Store, + genesis_config: &GenesisConfig, + test_overrides: Option, + ) -> Arc { + Arc::new( + Self::new_from_genesis_config_with_test_overrides( + store, + genesis_config, + test_overrides, + ) + .unwrap() + .into_handle(), + ) + } + + fn new_all_epoch_config_with_test_overrides( + genesis_config: &GenesisConfig, + test_overrides: Option, + ) -> AllEpochConfig { + let initial_epoch_config = EpochConfig::from(genesis_config); + let epoch_config = AllEpochConfig::new_with_test_overrides( + genesis_config.use_production_config(), + initial_epoch_config, + &genesis_config.chain_id, + test_overrides, + ); + epoch_config } pub fn new( diff --git a/core/primitives/src/epoch_manager.rs b/core/primitives/src/epoch_manager.rs index 9744e39a1d8..b55b30dbbad 100644 --- a/core/primitives/src/epoch_manager.rs +++ b/core/primitives/src/epoch_manager.rs @@ -73,6 +73,14 @@ impl ShardConfig { } } +/// Testing overrides to apply to the EpochConfig returned by the `for_protocol_version`. +/// All fields should be optional and the default should be a no-op. +#[derive(Clone, Default)] +pub struct AllEpochConfigTestOverrides { + pub block_producer_kickout_threshold: Option, + pub chunk_producer_kickout_threshold: Option, +} + /// AllEpochConfig manages protocol configs that might be changing throughout epochs (hence EpochConfig). /// The main function in AllEpochConfig is ::for_protocol_version which takes a protocol version /// and returns the EpochConfig that should be used for this protocol version. @@ -85,6 +93,9 @@ pub struct AllEpochConfig { genesis_epoch_config: EpochConfig, /// Chain Id. Some parameters are specific to certain chains. chain_id: String, + + /// Testing overrides to apply to the EpochConfig returned by the `for_protocol_version`. + test_overrides: AllEpochConfigTestOverrides, } impl AllEpochConfig { @@ -93,7 +104,26 @@ impl AllEpochConfig { genesis_epoch_config: EpochConfig, chain_id: &str, ) -> Self { - Self { use_production_config, genesis_epoch_config, chain_id: chain_id.to_string() } + Self { + use_production_config, + genesis_epoch_config, + chain_id: chain_id.to_string(), + test_overrides: AllEpochConfigTestOverrides::default(), + } + } + + pub fn new_with_test_overrides( + use_production_config: bool, + genesis_epoch_config: EpochConfig, + chain_id: &str, + test_overrides: Option, + ) -> Self { + Self { + use_production_config, + genesis_epoch_config, + chain_id: chain_id.to_string(), + test_overrides: test_overrides.unwrap_or_default(), + } } pub fn for_protocol_version(&self, protocol_version: ProtocolVersion) -> EpochConfig { @@ -108,6 +138,8 @@ impl AllEpochConfig { Self::config_max_kickout_stake(&mut config, protocol_version); + Self::config_test_overrides(&mut config, &self.test_overrides); + config } @@ -170,6 +202,23 @@ impl AllEpochConfig { config.validator_max_kickout_stake_perc = 30; } } + + fn config_test_overrides( + config: &mut EpochConfig, + test_overrides: &AllEpochConfigTestOverrides, + ) { + if let Some(block_producer_kickout_threshold) = + test_overrides.block_producer_kickout_threshold + { + config.block_producer_kickout_threshold = block_producer_kickout_threshold; + } + + if let Some(chunk_producer_kickout_threshold) = + test_overrides.chunk_producer_kickout_threshold + { + config.chunk_producer_kickout_threshold = chunk_producer_kickout_threshold; + } + } } /// Additional configuration parameters for the new validator selection diff --git a/core/store/src/trie/state_snapshot.rs b/core/store/src/trie/state_snapshot.rs index 5821e1064f5..c9de4343dce 100644 --- a/core/store/src/trie/state_snapshot.rs +++ b/core/store/src/trie/state_snapshot.rs @@ -297,7 +297,10 @@ impl ShardTries { let _span = tracing::info_span!(target: "state_snapshot", "delete_all_state_snapshots").entered(); let path = home_dir.join(hot_store_path).join(state_snapshot_subdir); - std::fs::remove_dir_all(&path) + if path.exists() { + std::fs::remove_dir_all(&path)? + } + Ok(()) } pub fn get_state_snapshot_base_dir( diff --git a/integration-tests/src/tests/client/resharding.rs b/integration-tests/src/tests/client/resharding.rs index d470193844f..a98ff75affe 100644 --- a/integration-tests/src/tests/client/resharding.rs +++ b/integration-tests/src/tests/client/resharding.rs @@ -10,7 +10,7 @@ use near_crypto::{InMemorySigner, KeyType, Signer}; use near_o11y::testonly::init_test_logger; use near_primitives::account::id::AccountId; use near_primitives::block::{Block, Tip}; -use near_primitives::epoch_manager::{AllEpochConfig, EpochConfig}; +use near_primitives::epoch_manager::{AllEpochConfig, AllEpochConfigTestOverrides, EpochConfig}; use near_primitives::hash::CryptoHash; use near_primitives::serialize::to_base64; use near_primitives::shard_layout::{account_id_to_shard_id, account_id_to_shard_uid}; @@ -197,11 +197,22 @@ impl TestReshardingEnv { } else { TestEnv::builder(chain_genesis) }; + // Set the kickout thresholds to zero. In some tests we have chunk + // producers missing chunks but we don't want any of the clients to get + // kicked. + // One of the reasons for not kicking is that the current test infra + // doesn't support requesting chunks and non-validators wouldn't be able + // to obtain the chunks at all. + // Same needs to be set in the genesis. + let epoch_config_test_overrides = Some(AllEpochConfigTestOverrides { + block_producer_kickout_threshold: Some(0), + chunk_producer_kickout_threshold: Some(0), + }); let env = builder .clients_count(num_clients) .validator_seats(num_validators) .real_stores() - .real_epoch_managers(&genesis.config) + .real_epoch_managers_with_test_overrides(&genesis.config, epoch_config_test_overrides) .nightshade_runtimes(&genesis) .track_all_shards() .build(); @@ -229,18 +240,6 @@ impl TestReshardingEnv { self.txs_by_height.insert(height, txs); } - /// produces and processes the next block - /// also checks that all accounts in initial_accounts are intact - /// - /// please also see the step_impl for changing the protocol version - fn step(&mut self, drop_chunk_condition: &DropChunkCondition) { - self.step_impl( - &drop_chunk_condition, - SIMPLE_NIGHTSHADE_PROTOCOL_VERSION, - &ReshardingType::V1, - ); - } - /// produces and processes the next block also checks that all accounts in /// initial_accounts are intact /// @@ -400,7 +399,8 @@ impl TestReshardingEnv { /// - all blocks after the block at `height` in the current canonical chain do not have /// new chunks for the corresponding shards fn check_next_block_with_new_chunk(&mut self, height: BlockHeight) { - let block = self.env.clients[0].chain.get_block_by_height(height).unwrap(); + let client = &self.env.clients[0]; + let block = client.chain.get_block_by_height(height).unwrap(); let block_hash = block.hash(); let num_shards = block.chunks().len(); for shard_id in 0..num_shards { @@ -408,14 +408,14 @@ impl TestReshardingEnv { // if `get_next_block_hash_with_new_chunk` returns None, that would be the lastest block // on chain, otherwise, that would be the block before the `block_hash` that the function // call returns - let mut last_block_hash_with_empty_chunk = match self.env.clients[0] + let next_block_hash_with_new_chunk = client .chain .get_next_block_hash_with_new_chunk(block_hash, shard_id as ShardId) - .unwrap() - { + .unwrap(); + + let mut last_block_hash_with_empty_chunk = match next_block_hash_with_new_chunk { Some((new_block_hash, target_shard_id)) => { - let new_block = - self.env.clients[0].chain.get_block(&new_block_hash).unwrap().clone(); + let new_block = client.chain.get_block(&new_block_hash).unwrap().clone(); let chunks = new_block.chunks(); // check that the target chunk in the new block is new assert_eq!( @@ -427,26 +427,35 @@ impl TestReshardingEnv { } *new_block.header().prev_hash() } - None => self.env.clients[0].chain.head().unwrap().last_block_hash, + None => client.chain.head().unwrap().last_block_hash, }; - // check that the target chunks in all prev blocks are not new + + // Check that the target chunks are not new for all blocks between + // last_block_hash_with_empty_chunk (inclusive) and + // block_hash (exclusive). while &last_block_hash_with_empty_chunk != block_hash { - let last_block = self.env.clients[0] - .chain - .get_block(&last_block_hash_with_empty_chunk) - .unwrap() - .clone(); + let last_block_hash = last_block_hash_with_empty_chunk; + let last_block = client.chain.get_block(&last_block_hash).unwrap().clone(); let chunks = last_block.chunks(); - if chunks.len() == num_shards { - assert_ne!( - chunks.get(shard_id).unwrap().height_included(), - last_block.header().height() - ); + + let target_shard_ids = if chunks.len() == num_shards { + // same shard layout between block and last_block + vec![shard_id as ShardId] } else { - for chunk in chunks.iter() { - assert_ne!(chunk.height_included(), last_block.header().height()); - } + // different shard layout between block and last_block + let shard_layout = client + .epoch_manager + .get_shard_layout_from_prev_block(&last_block_hash) + .unwrap(); + + shard_layout.get_split_shard_ids(shard_id as ShardId).unwrap() + }; + + for target_shard_id in target_shard_ids { + let chunk = chunks.get(target_shard_id as usize).unwrap(); + assert_ne!(chunk.height_included(), last_block.header().height()); } + last_block_hash_with_empty_chunk = *last_block.header().prev_hash(); } } @@ -780,7 +789,9 @@ fn setup_genesis( genesis_protocol_version: ProtocolVersion, ) -> Genesis { let mut genesis = Genesis::test(initial_accounts, num_validators); - // No kickout, since we are going to test missing chunks + // No kickout, since we are going to test missing chunks. + // Same needs to be set in the AllEpochConfigTestOverrides. + genesis.config.block_producer_kickout_threshold = 0; genesis.config.chunk_producer_kickout_threshold = 0; genesis.config.epoch_length = epoch_length; genesis.config.protocol_version = genesis_protocol_version; @@ -1312,11 +1323,15 @@ fn test_shard_layout_upgrade_incoming_receipts_impl_v2_seed_44() { // Test cross contract calls // This test case tests when there are missing chunks in the produced blocks // This is to test that all the chunk management logic in sharding split is correct -fn test_shard_layout_upgrade_missing_chunks(p_missing: f64, rng_seed: u64) { +fn test_shard_layout_upgrade_missing_chunks( + resharding_type: ReshardingType, + p_missing: f64, + rng_seed: u64, +) { init_test_logger(); - let resharding_type = ReshardingType::V1; let genesis_protocol_version = get_genesis_protocol_version(&resharding_type); + let target_protocol_version = get_target_protocol_version(&resharding_type); let epoch_length = 10; let mut test_env = @@ -1329,12 +1344,12 @@ fn test_shard_layout_upgrade_missing_chunks(p_missing: f64, rng_seed: u64) { // make sure initial txs (deploy smart contracts) are processed succesfully let drop_chunk_condition = DropChunkCondition::new(); for _ in 1..3 { - test_env.step(&drop_chunk_condition); + test_env.step_impl(&drop_chunk_condition, target_protocol_version, &resharding_type); } let drop_chunk_condition = DropChunkCondition::with_probability(p_missing); for _ in 3..3 * epoch_length { - test_env.step(&drop_chunk_condition); + test_env.step_impl(&drop_chunk_condition, target_protocol_version, &resharding_type); let last_height = test_env.env.clients[0].chain.head().unwrap().height; for height in last_height - 3..=last_height { test_env.check_next_block_with_new_chunk(height); @@ -1345,7 +1360,7 @@ fn test_shard_layout_upgrade_missing_chunks(p_missing: f64, rng_seed: u64) { // make sure all included transactions finished processing let drop_chunk_condition = DropChunkCondition::new(); for _ in 3 * epoch_length..5 * epoch_length { - test_env.step(&drop_chunk_condition); + test_env.step_impl(&drop_chunk_condition, target_protocol_version, &resharding_type); let last_height = test_env.env.clients[0].chain.head().unwrap().height; for height in last_height - 3..=last_height { test_env.check_next_block_with_new_chunk(height); @@ -1362,18 +1377,33 @@ fn test_shard_layout_upgrade_missing_chunks(p_missing: f64, rng_seed: u64) { } #[test] -fn test_shard_layout_upgrade_missing_chunks_low_missing_prob() { - test_shard_layout_upgrade_missing_chunks(0.1, 42); +fn test_shard_layout_upgrade_missing_chunks_low_missing_prob_v1() { + test_shard_layout_upgrade_missing_chunks(ReshardingType::V1, 0.1, 42); +} + +#[test] +fn test_shard_layout_upgrade_missing_chunks_mid_missing_prob_v1() { + test_shard_layout_upgrade_missing_chunks(ReshardingType::V1, 0.5, 42); +} + +#[test] +fn test_shard_layout_upgrade_missing_chunks_high_missing_prob_v1() { + test_shard_layout_upgrade_missing_chunks(ReshardingType::V1, 0.9, 42); +} + +#[test] +fn test_shard_layout_upgrade_missing_chunks_low_missing_prob_v2() { + test_shard_layout_upgrade_missing_chunks(ReshardingType::V2, 0.1, 42); } #[test] -fn test_shard_layout_upgrade_missing_chunks_mid_missing_prob() { - test_shard_layout_upgrade_missing_chunks(0.5, 42); +fn test_shard_layout_upgrade_missing_chunks_mid_missing_prob_v2() { + test_shard_layout_upgrade_missing_chunks(ReshardingType::V2, 0.5, 42); } #[test] -fn test_shard_layout_upgrade_missing_chunks_high_missing_prob() { - test_shard_layout_upgrade_missing_chunks(0.9, 42); +fn test_shard_layout_upgrade_missing_chunks_high_missing_prob_v2() { + test_shard_layout_upgrade_missing_chunks(ReshardingType::V2, 0.9, 42); } // TODO(resharding) add a test with missing blocks