Skip to content
This repository has been archived by the owner on Nov 15, 2023. It is now read-only.

Commit

Permalink
Don't change rep on timeout in collator protocol. (#4642)
Browse files Browse the repository at this point in the history
* Don't change rep on timeout in collator protocol.

* Fix tests.

* Fixes.
  • Loading branch information
eskimor authored Jan 4, 2022
1 parent 67321a2 commit 3ae71a5
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 70 deletions.
38 changes: 23 additions & 15 deletions node/network/collator-protocol/src/validator_side/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ const COST_UNEXPECTED_MESSAGE: Rep = Rep::CostMinor("An unexpected message");
const COST_CORRUPTED_MESSAGE: Rep = Rep::CostMinor("Message was corrupt");
/// Network errors that originated at the remote host should have same cost as timeout.
const COST_NETWORK_ERROR: Rep = Rep::CostMinor("Some network error");
const COST_REQUEST_TIMED_OUT: Rep = Rep::CostMinor("A collation request has timed out");
const COST_INVALID_SIGNATURE: Rep = Rep::Malicious("Invalid network message signature");
const COST_REPORT_BAD: Rep = Rep::Malicious("A collator was reported by another subsystem");
const COST_WRONG_PARA: Rep = Rep::Malicious("A collator provided a collation for the wrong para");
Expand Down Expand Up @@ -1212,7 +1211,7 @@ async fn poll_requests(
if !result.is_ready() {
retained_requested.insert(pending_collation.clone());
}
if let CollationFetchResult::Error(rep) = result {
if let CollationFetchResult::Error(Some(rep)) = result {
reputation_changes.push((pending_collation.peer_id.clone(), rep));
}
}
Expand Down Expand Up @@ -1333,8 +1332,8 @@ enum CollationFetchResult {
/// The collation was fetched successfully.
Success,
/// An error occurred when fetching a collation or it was invalid.
/// A reputation change should be applied to the peer.
Error(Rep),
/// A given reputation change should be applied to the peer.
Error(Option<Rep>),
}

impl CollationFetchResult {
Expand Down Expand Up @@ -1380,7 +1379,19 @@ async fn poll_collation_response(
err = ?err,
"Collator provided response that could not be decoded"
);
CollationFetchResult::Error(COST_CORRUPTED_MESSAGE)
CollationFetchResult::Error(Some(COST_CORRUPTED_MESSAGE))
},
Err(err) if err.is_timed_out() => {
tracing::debug!(
target: LOG_TARGET,
hash = ?pending_collation.relay_parent,
para_id = ?pending_collation.para_id,
peer_id = ?pending_collation.peer_id,
"Request timed out"
);
// For now we don't want to change reputation on timeout, to mitigate issues like
// this: https://github.com/paritytech/polkadot/issues/4617
CollationFetchResult::Error(None)
},
Err(RequestError::NetworkError(err)) => {
tracing::debug!(
Expand All @@ -1392,24 +1403,21 @@ async fn poll_collation_response(
"Fetching collation failed due to network error"
);
// A minor decrease in reputation for any network failure seems
// sensible. In theory this could be exploited, by Dosing this node,
// sensible. In theory this could be exploited, by DoSing this node,
// which would result in reduced reputation for proper nodes, but the
// same can happen for penalties on timeouts, which we also have.
CollationFetchResult::Error(COST_NETWORK_ERROR)
CollationFetchResult::Error(Some(COST_NETWORK_ERROR))
},
Err(RequestError::Canceled(_)) => {
Err(RequestError::Canceled(err)) => {
tracing::debug!(
target: LOG_TARGET,
hash = ?pending_collation.relay_parent,
para_id = ?pending_collation.para_id,
peer_id = ?pending_collation.peer_id,
"Request timed out"
err = ?err,
"Canceled should be handled by `is_timed_out` above - this is a bug!"
);
// A minor decrease in reputation for any network failure seems
// sensible. In theory this could be exploited, by Dosing this node,
// which would result in reduced reputation for proper nodes, but the
// same can happen for penalties on timeouts, which we also have.
CollationFetchResult::Error(COST_REQUEST_TIMED_OUT)
CollationFetchResult::Error(None)
},
Ok(CollationFetchingResponse::Collation(receipt, _))
if receipt.descriptor().para_id != pending_collation.para_id =>
Expand All @@ -1422,7 +1430,7 @@ async fn poll_collation_response(
"Got wrong para ID for requested collation."
);

CollationFetchResult::Error(COST_WRONG_PARA)
CollationFetchResult::Error(Some(COST_WRONG_PARA))
},
Ok(CollationFetchingResponse::Collation(receipt, pov)) => {
tracing::debug!(
Expand Down
55 changes: 0 additions & 55 deletions node/network/collator-protocol/src/validator_side/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -624,17 +624,6 @@ fn fetch_collations_works() {
assert_fetch_collation_request(&mut virtual_overseer, second, test_state.chain_ids[0])
.await;

assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::NetworkBridge(NetworkBridgeMessage::ReportPeer(
peer,
rep,
)) => {
assert_eq!(peer, peer_b);
assert_eq!(rep, COST_REQUEST_TIMED_OUT);
}
);

let response_channel_non_exclusive =
assert_fetch_collation_request(&mut virtual_overseer, second, test_state.chain_ids[0])
.await;
Expand Down Expand Up @@ -861,17 +850,6 @@ fn inactive_disconnected() {

Delay::new(ACTIVITY_TIMEOUT * 3).await;

assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::NetworkBridge(NetworkBridgeMessage::ReportPeer(
peer,
rep,
)) => {
assert_eq!(peer, peer_b);
assert_eq!(rep, COST_REQUEST_TIMED_OUT);
}
);

assert_collator_disconnect(&mut virtual_overseer, peer_b.clone()).await;
virtual_overseer
});
Expand Down Expand Up @@ -924,51 +902,18 @@ fn activity_extends_life() {

advertise_collation(&mut virtual_overseer, peer_b.clone(), hash_b).await;

assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::NetworkBridge(NetworkBridgeMessage::ReportPeer(
peer,
rep,
)) => {
assert_eq!(peer, peer_b);
assert_eq!(rep, COST_REQUEST_TIMED_OUT);
}
);

assert_fetch_collation_request(&mut virtual_overseer, hash_b, test_state.chain_ids[0])
.await;

Delay::new(ACTIVITY_TIMEOUT * 2 / 3).await;

advertise_collation(&mut virtual_overseer, peer_b.clone(), hash_c).await;

assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::NetworkBridge(NetworkBridgeMessage::ReportPeer(
peer,
rep,
)) => {
assert_eq!(peer, peer_b);
assert_eq!(rep, COST_REQUEST_TIMED_OUT);
}
);

assert_fetch_collation_request(&mut virtual_overseer, hash_c, test_state.chain_ids[0])
.await;

Delay::new(ACTIVITY_TIMEOUT * 3 / 2).await;

assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::NetworkBridge(NetworkBridgeMessage::ReportPeer(
peer,
rep,
)) => {
assert_eq!(peer, peer_b);
assert_eq!(rep, COST_REQUEST_TIMED_OUT);
}
);

assert_collator_disconnect(&mut virtual_overseer, peer_b.clone()).await;

virtual_overseer
Expand Down
14 changes: 14 additions & 0 deletions node/network/protocol/src/request_response/outgoing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,20 @@ pub enum RequestError {
Canceled(#[source] oneshot::Canceled),
}

impl RequestError {
/// Whether the error represents some kind of timeout condition.
pub fn is_timed_out(&self) -> bool {
match self {
Self::Canceled(_) |
Self::NetworkError(network::RequestFailure::Obsolete) |
Self::NetworkError(network::RequestFailure::Network(
network::OutboundFailure::Timeout,
)) => true,
_ => false,
}
}
}

/// A request to be sent to the network bridge, including a sender for sending responses/failures.
///
/// The network implementation will make use of that sender for informing the requesting subsystem
Expand Down

0 comments on commit 3ae71a5

Please sign in to comment.