Skip to content
This repository has been archived by the owner on Nov 15, 2023. It is now read-only.

Don't change rep on timeout in collator protocol. #4642

Merged
merged 3 commits into from
Jan 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 23 additions & 15 deletions node/network/collator-protocol/src/validator_side/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ const COST_UNEXPECTED_MESSAGE: Rep = Rep::CostMinor("An unexpected message");
const COST_CORRUPTED_MESSAGE: Rep = Rep::CostMinor("Message was corrupt");
/// Network errors that originated at the remote host should have same cost as timeout.
const COST_NETWORK_ERROR: Rep = Rep::CostMinor("Some network error");
const COST_REQUEST_TIMED_OUT: Rep = Rep::CostMinor("A collation request has timed out");
const COST_INVALID_SIGNATURE: Rep = Rep::Malicious("Invalid network message signature");
const COST_REPORT_BAD: Rep = Rep::Malicious("A collator was reported by another subsystem");
const COST_WRONG_PARA: Rep = Rep::Malicious("A collator provided a collation for the wrong para");
Expand Down Expand Up @@ -1212,7 +1211,7 @@ async fn poll_requests(
if !result.is_ready() {
retained_requested.insert(pending_collation.clone());
}
if let CollationFetchResult::Error(rep) = result {
if let CollationFetchResult::Error(Some(rep)) = result {
reputation_changes.push((pending_collation.peer_id.clone(), rep));
}
}
Expand Down Expand Up @@ -1333,8 +1332,8 @@ enum CollationFetchResult {
/// The collation was fetched successfully.
Success,
/// An error occurred when fetching a collation or it was invalid.
/// A reputation change should be applied to the peer.
Error(Rep),
/// A given reputation change should be applied to the peer.
Error(Option<Rep>),
}

impl CollationFetchResult {
Expand Down Expand Up @@ -1380,7 +1379,19 @@ async fn poll_collation_response(
err = ?err,
"Collator provided response that could not be decoded"
);
CollationFetchResult::Error(COST_CORRUPTED_MESSAGE)
CollationFetchResult::Error(Some(COST_CORRUPTED_MESSAGE))
},
Err(err) if err.is_timed_out() => {
tracing::debug!(
target: LOG_TARGET,
hash = ?pending_collation.relay_parent,
para_id = ?pending_collation.para_id,
peer_id = ?pending_collation.peer_id,
"Request timed out"
);
// For now we don't want to change reputation on timeout, to mitigate issues like
// this: https://github.com/paritytech/polkadot/issues/4617
CollationFetchResult::Error(None)
},
Err(RequestError::NetworkError(err)) => {
tracing::debug!(
Expand All @@ -1392,24 +1403,21 @@ async fn poll_collation_response(
"Fetching collation failed due to network error"
);
// A minor decrease in reputation for any network failure seems
// sensible. In theory this could be exploited, by Dosing this node,
// sensible. In theory this could be exploited, by DoSing this node,
// which would result in reduced reputation for proper nodes, but the
// same can happen for penalties on timeouts, which we also have.
CollationFetchResult::Error(COST_NETWORK_ERROR)
CollationFetchResult::Error(Some(COST_NETWORK_ERROR))
},
Err(RequestError::Canceled(_)) => {
Err(RequestError::Canceled(err)) => {
tracing::debug!(
target: LOG_TARGET,
hash = ?pending_collation.relay_parent,
para_id = ?pending_collation.para_id,
peer_id = ?pending_collation.peer_id,
"Request timed out"
err = ?err,
"Canceled should be handled by `is_timed_out` above - this is a bug!"
);
// A minor decrease in reputation for any network failure seems
// sensible. In theory this could be exploited, by Dosing this node,
// which would result in reduced reputation for proper nodes, but the
// same can happen for penalties on timeouts, which we also have.
CollationFetchResult::Error(COST_REQUEST_TIMED_OUT)
CollationFetchResult::Error(None)
},
Ok(CollationFetchingResponse::Collation(receipt, _))
if receipt.descriptor().para_id != pending_collation.para_id =>
Expand All @@ -1422,7 +1430,7 @@ async fn poll_collation_response(
"Got wrong para ID for requested collation."
);

CollationFetchResult::Error(COST_WRONG_PARA)
CollationFetchResult::Error(Some(COST_WRONG_PARA))
},
Ok(CollationFetchingResponse::Collation(receipt, pov)) => {
tracing::debug!(
Expand Down
55 changes: 0 additions & 55 deletions node/network/collator-protocol/src/validator_side/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -624,17 +624,6 @@ fn fetch_collations_works() {
assert_fetch_collation_request(&mut virtual_overseer, second, test_state.chain_ids[0])
.await;

assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::NetworkBridge(NetworkBridgeMessage::ReportPeer(
peer,
rep,
)) => {
assert_eq!(peer, peer_b);
assert_eq!(rep, COST_REQUEST_TIMED_OUT);
}
);

let response_channel_non_exclusive =
assert_fetch_collation_request(&mut virtual_overseer, second, test_state.chain_ids[0])
.await;
Expand Down Expand Up @@ -861,17 +850,6 @@ fn inactive_disconnected() {

Delay::new(ACTIVITY_TIMEOUT * 3).await;

assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::NetworkBridge(NetworkBridgeMessage::ReportPeer(
peer,
rep,
)) => {
assert_eq!(peer, peer_b);
assert_eq!(rep, COST_REQUEST_TIMED_OUT);
}
);

assert_collator_disconnect(&mut virtual_overseer, peer_b.clone()).await;
virtual_overseer
});
Expand Down Expand Up @@ -924,51 +902,18 @@ fn activity_extends_life() {

advertise_collation(&mut virtual_overseer, peer_b.clone(), hash_b).await;

assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::NetworkBridge(NetworkBridgeMessage::ReportPeer(
peer,
rep,
)) => {
assert_eq!(peer, peer_b);
assert_eq!(rep, COST_REQUEST_TIMED_OUT);
}
);

assert_fetch_collation_request(&mut virtual_overseer, hash_b, test_state.chain_ids[0])
.await;

Delay::new(ACTIVITY_TIMEOUT * 2 / 3).await;

advertise_collation(&mut virtual_overseer, peer_b.clone(), hash_c).await;

assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::NetworkBridge(NetworkBridgeMessage::ReportPeer(
peer,
rep,
)) => {
assert_eq!(peer, peer_b);
assert_eq!(rep, COST_REQUEST_TIMED_OUT);
}
);

assert_fetch_collation_request(&mut virtual_overseer, hash_c, test_state.chain_ids[0])
.await;

Delay::new(ACTIVITY_TIMEOUT * 3 / 2).await;

assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::NetworkBridge(NetworkBridgeMessage::ReportPeer(
peer,
rep,
)) => {
assert_eq!(peer, peer_b);
assert_eq!(rep, COST_REQUEST_TIMED_OUT);
}
);

assert_collator_disconnect(&mut virtual_overseer, peer_b.clone()).await;

virtual_overseer
Expand Down
14 changes: 14 additions & 0 deletions node/network/protocol/src/request_response/outgoing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,20 @@ pub enum RequestError {
Canceled(#[source] oneshot::Canceled),
}

impl RequestError {
/// Whether the error represents some kind of timeout condition.
pub fn is_timed_out(&self) -> bool {
match self {
Self::Canceled(_) |
Self::NetworkError(network::RequestFailure::Obsolete) |
Self::NetworkError(network::RequestFailure::Network(
network::OutboundFailure::Timeout,
)) => true,
_ => false,
}
}
}

/// A request to be sent to the network bridge, including a sender for sending responses/failures.
///
/// The network implementation will make use of that sender for informing the requesting subsystem
Expand Down