Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validator key hot swap #11536

Merged
merged 62 commits into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from 52 commits
Commits
Show all changes
62 commits
Select commit Hold shift + click to select a range
4c23cfa
Refactor Signer, ValidatorSigner
staffik May 21, 2024
47d9d26
Refactor continued
staffik May 24, 2024
5ca2fd3
refactor
staffik May 26, 2024
b60dde1
formatting
staffik May 26, 2024
edc3486
Pytest
staffik May 15, 2024
3a8b7fd
sighup
May 15, 2024
8d887a5
Dynamic updateable validator key
May 16, 2024
d82dc0b
hotswap
staffik May 26, 2024
6657b3f
Dynamic validator key everywhere
staffik Jun 3, 2024
a335707
Dynamic validator_id for shards_manager_actor
staffik Jun 7, 2024
760f8b3
Advertise Tier1 proxies
staffik Jun 10, 2024
78fe1a8
Adjust ViewClientActorInner
staffik Jun 10, 2024
07ec14a
Adjust StateSyncDumper
staffik Jun 10, 2024
718942a
formatting
staffik Jun 10, 2024
ca88247
clippy
staffik Jun 10, 2024
96bef88
Merge remote-tracking branch 'origin/master' into refactor-signer
staffik Jun 10, 2024
960ca21
clippy
staffik Jun 10, 2024
bc47972
Merge branch 'refactor-signer' into validator-key-hot-swap
staffik Jun 10, 2024
a1ee361
update tests
staffik Jun 10, 2024
f0e444d
add comments
staffik Jun 10, 2024
0b77278
Merge branch 'refactor-signer' into validator-key-hot-swap
staffik Jun 10, 2024
22b1af1
compiler errors
staffik Jun 10, 2024
ff4d55d
style fix
staffik Jun 10, 2024
1866a90
fix
staffik Jun 10, 2024
c3ce95d
nit
staffik Jun 10, 2024
637d384
Merge remote-tracking branch 'origin/master' into validator-key-hot-swap
staffik Jun 10, 2024
a153c73
Turn back into noop
staffik Jun 10, 2024
7a79205
Revert "Turn back into noop"
staffik Jun 10, 2024
25e8ed2
add comments
staffik Jun 10, 2024
e85771e
style fix
staffik Jun 10, 2024
9ed6095
Fix partial witness actor
staffik Jun 11, 2024
d31ff34
Refactor shards_manager_actor
staffik Jun 11, 2024
485da1c
fix
staffik Jun 11, 2024
054c742
fix
staffik Jun 11, 2024
73cf46f
Merge branch 'validator-key-hot-swap' into actual-validator-key-hot-swap
staffik Jun 11, 2024
22390b4
Bring serialize/deserialize back
staffik Jun 11, 2024
aa67b0a
Update pytest
staffik Jun 11, 2024
70cc0ed
Merge remote-tracking branch 'origin/master' into validator-key-hot-swap
staffik Jun 11, 2024
59db162
Merge branch 'validator-key-hot-swap' into actual-validator-key-hot-swap
staffik Jun 11, 2024
a9cd895
Update pytest
staffik Jun 11, 2024
044e772
Refactor client validator_signer
staffik Jun 19, 2024
10c1699
Merge remote-tracking branch 'origin/master' into validator-key-hot-swap
staffik Jun 19, 2024
dc264e8
cargo fmt
staffik Jun 19, 2024
909e03d
FrozenValidatorConfig
staffik Jun 20, 2024
f629898
Merge branch 'validator-key-hot-swap' into actual-validator-key-hot-swap
staffik Jun 20, 2024
d450cce
Update pytest
staffik Jun 20, 2024
b2646ba
comments
staffik Jun 20, 2024
b053500
Reduce me.as_ref() and prefer &Arc<ValidatorSigner>
staffik Jun 20, 2024
2b04477
Enrich NotAValidator error
staffik Jun 20, 2024
a465905
comment
staffik Jun 20, 2024
ca74874
Merge branch 'validator-key-hot-swap' into actual-validator-key-hot-swap
staffik Jun 20, 2024
914c1ee
update pytest
staffik Jun 20, 2024
0597b4e
ConfigUpdaterResult
staffik Jun 21, 2024
4c70bd0
Merge remote-tracking branch 'origin/master' into actual-validator-ke…
staffik Jun 21, 2024
f5d3249
Alias for mutable validator signer
staffik Jun 21, 2024
7dabdeb
Remove serialization from ValidatorSigner
staffik Jun 21, 2024
43af232
Comment advertiseTier1Proxies
staffik Jun 21, 2024
f54f8cf
Remove redundant (de)serialize derives
staffik Jun 21, 2024
9f8804a
nits
staffik Jun 24, 2024
0bb8207
Merge remote-tracking branch 'origin/master' into actual-validator-ke…
staffik Jun 24, 2024
8bc3890
python style fix
staffik Jun 24, 2024
436b11d
Merge branch 'master' into actual-validator-key-hot-swap
staffik Jun 24, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 7 additions & 5 deletions chain/chain-primitives/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,8 @@ pub enum Error {
#[error("Invalid Split Shard Ids when resharding. shard_id: {0}, parent_shard_id: {1}")]
InvalidSplitShardsIds(u64, u64),
/// Someone is not a validator. Usually happens in signature verification
#[error("Not A Validator")]
NotAValidator,
#[error("Not A Validator: {0}")]
NotAValidator(String),
/// Someone is not a chunk validator. Happens if we're asked to validate a chunk we're not
/// supposed to validate, or to verify a chunk approval signed by a validator that isn't
/// supposed to validate the chunk.
Expand Down Expand Up @@ -308,7 +308,7 @@ impl Error {
| Error::InvalidRandomnessBeaconOutput
| Error::InvalidBlockMerkleRoot
| Error::InvalidProtocolVersion
| Error::NotAValidator
| Error::NotAValidator(_)
| Error::NotAChunkValidator
| Error::InvalidChallengeRoot => true,
}
Expand Down Expand Up @@ -384,7 +384,7 @@ impl Error {
Error::InvalidRandomnessBeaconOutput => "invalid_randomness_beacon_output",
Error::InvalidBlockMerkleRoot => "invalid_block_merkele_root",
Error::InvalidProtocolVersion => "invalid_protocol_version",
Error::NotAValidator => "not_a_validator",
Error::NotAValidator(_) => "not_a_validator",
Error::NotAChunkValidator => "not_a_chunk_validator",
Error::InvalidChallengeRoot => "invalid_challenge_root",
}
Expand All @@ -396,7 +396,9 @@ impl From<EpochError> for Error {
match error {
EpochError::EpochOutOfBounds(epoch_id) => Error::EpochOutOfBounds(epoch_id),
EpochError::MissingBlock(h) => Error::DBNotFoundErr(format!("epoch block: {h}")),
EpochError::NotAValidator(_account_id, _epoch_id) => Error::NotAValidator,
EpochError::NotAValidator(account_id, _epoch_id) => {
Error::NotAValidator(format!("{account_id}"))
}
err => Error::ValidatorError(err.to_string()),
}
}
Expand Down
5 changes: 3 additions & 2 deletions chain/chain/src/chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ use near_primitives::unwrap_or_return;
#[cfg(feature = "new_epoch_sync")]
use near_primitives::utils::index_to_bytes;
use near_primitives::utils::MaybeValidated;
use near_primitives::validator_signer::ValidatorSigner;
use near_primitives::version::{ProtocolFeature, ProtocolVersion, PROTOCOL_VERSION};
use near_primitives::views::{
BlockStatusView, DroppedReason, ExecutionOutcomeWithIdView, ExecutionStatusView,
Expand Down Expand Up @@ -400,7 +401,7 @@ impl Chain {
chain_config: ChainConfig,
snapshot_callbacks: Option<SnapshotCallbacks>,
apply_chunks_spawner: Arc<dyn AsyncComputationSpawner>,
validator_account_id: Option<&AccountId>,
validator: MutableConfigValue<Option<Arc<ValidatorSigner>>>,
) -> Result<Chain, Error> {
// Get runtime initial state and create genesis block out of it.
let state_roots = get_genesis_state_roots(runtime_adapter.store())?
Expand Down Expand Up @@ -539,7 +540,7 @@ impl Chain {
.iter()
.filter(|shard_uid| {
shard_tracker.care_about_shard(
validator_account_id,
validator.get().map(|v| v.validator_id().clone()).as_ref(),
&tip.prev_block_hash,
shard_uid.shard_id(),
true,
Expand Down
59 changes: 29 additions & 30 deletions chain/chain/src/doomslug.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,6 @@ pub struct Doomslug {
endorsement_pending: bool,
/// Information to track the timer (see `start_timer` routine in the paper)
timer: DoomslugTimer,
signer: Option<Arc<ValidatorSigner>>,
/// How many approvals to have before producing a block. In production should be always `HalfStake`,
/// but for many tests we use `NoApprovals` to invoke more forkfulness
threshold_mode: DoomslugThresholdMode,
Expand Down Expand Up @@ -362,7 +361,6 @@ impl Doomslug {
min_delay: Duration,
delay_step: Duration,
max_delay: Duration,
signer: Option<Arc<ValidatorSigner>>,
threshold_mode: DoomslugThresholdMode,
) -> Self {
Doomslug {
Expand Down Expand Up @@ -392,7 +390,6 @@ impl Doomslug {
delay_step,
max_delay,
},
signer,
threshold_mode,
history: VecDeque::new(),
}
Expand Down Expand Up @@ -465,7 +462,7 @@ impl Doomslug {
/// A vector of approvals that need to be sent to other block producers as a result of processing
/// the timers
#[must_use]
pub fn process_timer(&mut self) -> Vec<Approval> {
pub fn process_timer(&mut self, signer: &Option<Arc<ValidatorSigner>>) -> Vec<Approval> {
let now = self.clock.now();
let mut ret = vec![];
for _ in 0..MAX_TIMER_ITERS {
Expand All @@ -486,7 +483,7 @@ impl Doomslug {
if tip_height >= self.largest_target_height.get() {
self.largest_target_height.set(tip_height + 1);

if let Some(approval) = self.create_approval(tip_height + 1) {
if let Some(approval) = self.create_approval(tip_height + 1, signer) {
ret.push(approval);
}
self.update_history(ApprovalHistoryEntry {
Expand Down Expand Up @@ -514,7 +511,7 @@ impl Doomslug {
self.largest_target_height
.set(std::cmp::max(self.timer.height + 1, self.largest_target_height.get()));

if let Some(approval) = self.create_approval(self.timer.height + 1) {
if let Some(approval) = self.create_approval(self.timer.height + 1, signer) {
ret.push(approval);
}
self.update_history(ApprovalHistoryEntry {
Expand All @@ -541,9 +538,13 @@ impl Doomslug {
ret
}

fn create_approval(&self, target_height: BlockHeight) -> Option<Approval> {
self.signer.as_ref().map(|signer| {
Approval::new(self.tip.block_hash, self.tip.height, target_height, &**signer)
fn create_approval(
&self,
target_height: BlockHeight,
signer: &Option<Arc<ValidatorSigner>>,
) -> Option<Approval> {
signer.as_ref().map(|signer| {
Approval::new(self.tip.block_hash, self.tip.height, target_height, &*signer)
})
}

Expand Down Expand Up @@ -787,36 +788,36 @@ mod tests {
#[test]
fn test_endorsements_and_skips_basic() {
let clock = FakeClock::new(Utc::UNIX_EPOCH);
let signer = Some(Arc::new(create_test_signer("test").into()));
let mut ds = Doomslug::new(
clock.clock(),
0,
Duration::milliseconds(400),
Duration::milliseconds(1000),
Duration::milliseconds(100),
Duration::milliseconds(3000),
Some(Arc::new(create_test_signer("test"))),
DoomslugThresholdMode::TwoThirds,
);

// Set a new tip, must produce an endorsement
ds.set_tip(hash(&[1]), 1, 1);
clock.advance(Duration::milliseconds(399));
assert_eq!(ds.process_timer().len(), 0);
assert_eq!(ds.process_timer(&signer).len(), 0);
clock.advance(Duration::milliseconds(1));
let approval = ds.process_timer().into_iter().nth(0).unwrap();
let approval = ds.process_timer(&signer).into_iter().nth(0).unwrap();
assert_eq!(approval.inner, ApprovalInner::Endorsement(hash(&[1])));
assert_eq!(approval.target_height, 2);

// Same tip => no approval
assert_eq!(ds.process_timer(), vec![]);
assert_eq!(ds.process_timer(&signer), vec![]);

// The block was `ds_final` and therefore started the timer. Try checking before one second expires
clock.advance(Duration::milliseconds(599));
assert_eq!(ds.process_timer(), vec![]);
assert_eq!(ds.process_timer(&signer), vec![]);

// But one second should trigger the skip
clock.advance(Duration::milliseconds(1));
match ds.process_timer() {
match ds.process_timer(&signer) {
approvals if approvals.is_empty() => assert!(false),
approvals => {
assert_eq!(approvals[0].inner, ApprovalInner::Skip(1));
Expand All @@ -827,26 +828,26 @@ mod tests {
// Not processing a block at height 2 should not produce an appoval
ds.set_tip(hash(&[2]), 2, 0);
clock.advance(Duration::milliseconds(400));
assert_eq!(ds.process_timer(), vec![]);
assert_eq!(ds.process_timer(&signer), vec![]);

// Go forward more so we have 1 second
clock.advance(Duration::milliseconds(600));

// But at height 3 should (also neither block has finality set, keep last final at 0 for now)
ds.set_tip(hash(&[3]), 3, 0);
clock.advance(Duration::milliseconds(400));
let approval = ds.process_timer().into_iter().nth(0).unwrap();
let approval = ds.process_timer(&signer).into_iter().nth(0).unwrap();
assert_eq!(approval.inner, ApprovalInner::Endorsement(hash(&[3])));
assert_eq!(approval.target_height, 4);

// Go forward more so we have another second
clock.advance(Duration::milliseconds(600));

clock.advance(Duration::milliseconds(199));
assert_eq!(ds.process_timer(), vec![]);
assert_eq!(ds.process_timer(&signer), vec![]);

clock.advance(Duration::milliseconds(1));
match ds.process_timer() {
match ds.process_timer(&signer) {
approvals if approvals.is_empty() => assert!(false),
approvals if approvals.len() == 1 => {
assert_eq!(approvals[0].inner, ApprovalInner::Skip(3));
Expand All @@ -860,10 +861,10 @@ mod tests {

// Now skip 5 (the extra delay is 200+300 = 500)
clock.advance(Duration::milliseconds(499));
assert_eq!(ds.process_timer(), vec![]);
assert_eq!(ds.process_timer(&signer), vec![]);

clock.advance(Duration::milliseconds(1));
match ds.process_timer() {
match ds.process_timer(&signer) {
approvals if approvals.is_empty() => assert!(false),
approvals => {
assert_eq!(approvals[0].inner, ApprovalInner::Skip(3));
Expand All @@ -876,10 +877,10 @@ mod tests {

// Skip 6 (the extra delay is 0+200+300+400 = 900)
clock.advance(Duration::milliseconds(899));
assert_eq!(ds.process_timer(), vec![]);
assert_eq!(ds.process_timer(&signer), vec![]);

clock.advance(Duration::milliseconds(1));
match ds.process_timer() {
match ds.process_timer(&signer) {
approvals if approvals.is_empty() => assert!(false),
approvals => {
assert_eq!(approvals[0].inner, ApprovalInner::Skip(3));
Expand All @@ -893,11 +894,11 @@ mod tests {
// Accept block at 5 with finality on the prev block, expect it to not produce an approval
ds.set_tip(hash(&[5]), 5, 4);
clock.advance(Duration::milliseconds(400));
assert_eq!(ds.process_timer(), vec![]);
assert_eq!(ds.process_timer(&signer), vec![]);

// Skip a whole bunch of heights by moving 100 seconds ahead
clock.advance(Duration::seconds(100));
assert!(ds.process_timer().len() > 10);
assert!(ds.process_timer(&signer).len() > 10);

// Add some random small number of milliseconds to test that when the next block is added, the
// timer is reset
Expand All @@ -906,15 +907,15 @@ mod tests {
// No approval, since we skipped 6
ds.set_tip(hash(&[6]), 6, 4);
clock.advance(Duration::milliseconds(400));
assert_eq!(ds.process_timer(), vec![]);
assert_eq!(ds.process_timer(&signer), vec![]);

// The block height was less than the timer height, and thus the timer was reset.
// The wait time for height 7 with last ds final block at 5 is 1100
clock.advance(Duration::milliseconds(699));
assert_eq!(ds.process_timer(), vec![]);
assert_eq!(ds.process_timer(&signer), vec![]);

clock.advance(Duration::milliseconds(1));
match ds.process_timer() {
match ds.process_timer(&signer) {
approvals if approvals.is_empty() => assert!(false),
approvals => {
assert_eq!(approvals[0].inner, ApprovalInner::Skip(6));
Expand Down Expand Up @@ -942,7 +943,6 @@ mod tests {
.map(|(account_id, _, _)| create_test_signer(account_id))
.collect::<Vec<_>>();

let signer = Arc::new(create_test_signer("test"));
let clock = FakeClock::new(Utc::UNIX_EPOCH);
let mut ds = Doomslug::new(
clock.clock(),
Expand All @@ -951,7 +951,6 @@ mod tests {
Duration::milliseconds(1000),
Duration::milliseconds(100),
Duration::milliseconds(3000),
Some(signer),
DoomslugThresholdMode::TwoThirds,
);

Expand Down
6 changes: 3 additions & 3 deletions chain/chain/src/runtime/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ use num_rational::Ratio;
use rand::{rngs::StdRng, seq::SliceRandom, SeedableRng};

use near_chain_configs::{
default_produce_chunk_add_transactions_time_limit, Genesis, DEFAULT_GC_NUM_EPOCHS_TO_KEEP,
NEAR_BASE,
default_produce_chunk_add_transactions_time_limit, Genesis, MutableConfigValue,
DEFAULT_GC_NUM_EPOCHS_TO_KEEP, NEAR_BASE,
};
use near_crypto::{InMemorySigner, KeyType, Signer};
use near_o11y::testonly::init_test_logger;
Expand Down Expand Up @@ -1619,7 +1619,7 @@ fn get_test_env_with_chain_and_pool() -> (TestEnv, Chain, TransactionPool) {
ChainConfig::test(),
None,
Arc::new(RayonAsyncComputationSpawner),
None,
MutableConfigValue::new(None, "validator_signer"),
)
.unwrap();

Expand Down
4 changes: 2 additions & 2 deletions chain/chain/src/store_validator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ impl StoreValidator {
#[cfg(test)]
mod tests {
use near_async::time::Clock;
use near_chain_configs::Genesis;
use near_chain_configs::{Genesis, MutableConfigValue};
use near_epoch_manager::EpochManager;
use near_store::genesis::initialize_genesis_state;
use near_store::test_utils::create_test_store;
Expand Down Expand Up @@ -418,7 +418,7 @@ mod tests {
ChainConfig::test(),
None,
Arc::new(RayonAsyncComputationSpawner),
None,
MutableConfigValue::new(None, "validator_signer"),
)
.unwrap();
(
Expand Down
6 changes: 3 additions & 3 deletions chain/chain/src/test_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use crate::types::{AcceptedBlock, ChainConfig, ChainGenesis};
use crate::DoomslugThresholdMode;
use crate::{BlockProcessingArtifact, Provenance};
use near_async::time::Clock;
use near_chain_configs::Genesis;
use near_chain_configs::{Genesis, MutableConfigValue};
use near_chain_primitives::Error;
use near_epoch_manager::shard_tracker::ShardTracker;
use near_epoch_manager::{EpochManager, EpochManagerHandle};
Expand Down Expand Up @@ -75,7 +75,7 @@ pub fn get_chain_with_epoch_length_and_num_shards(
ChainConfig::test(),
None,
Arc::new(RayonAsyncComputationSpawner),
None,
MutableConfigValue::new(None, "validator_signer"),
)
.unwrap()
}
Expand Down Expand Up @@ -159,7 +159,7 @@ pub fn setup_with_tx_validity_period(
ChainConfig::test(),
None,
Arc::new(RayonAsyncComputationSpawner),
None,
MutableConfigValue::new(None, "validator_signer"),
)
.unwrap();

Expand Down
Loading
Loading