Skip to content
This repository has been archived by the owner on Nov 15, 2023. It is now read-only.

Additional tracing in provisioner, vote_selection and dispute-coordinator #6775

Merged
merged 5 commits into from
Mar 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion node/core/dispute-coordinator/src/initialized.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ impl Initialized {
update: ActiveLeavesUpdate,
now: u64,
) -> Result<()> {
gum::trace!(target: LOG_TARGET, timestamp = now, "Processing ActiveLeavesUpdate");
let scraped_updates =
self.scraper.process_active_leaves_update(ctx.sender(), &update).await?;
log_error(
Expand Down Expand Up @@ -314,8 +315,15 @@ impl Initialized {
Ok(SessionWindowUpdate::Unchanged) => {},
};

gum::trace!(
target: LOG_TARGET,
timestamp = now,
"Will process {} onchain votes",
scraped_updates.on_chain_votes.len()
);

// The `runtime-api` subsystem has an internal queue which serializes the execution,
// so there is no point in running these in parallel.
// so there is no point in running these in parallel
for votes in scraped_updates.on_chain_votes {
let _ = self.process_on_chain_votes(ctx, overlay_db, votes, now).await.map_err(
|error| {
Expand All @@ -329,6 +337,7 @@ impl Initialized {
}
}

gum::trace!(target: LOG_TARGET, timestamp = now, "Done processing ActiveLeavesUpdate");
Ok(())
}

Expand Down
2 changes: 1 addition & 1 deletion node/core/dispute-coordinator/src/scraping/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ impl ChainScraper {

let mut scraped_updates = ScrapedUpdates::new();
for (block_number, block_hash) in block_numbers.zip(block_hashes) {
gum::trace!(?block_number, ?block_hash, "In ancestor processing.");
gum::trace!(target: LOG_TARGET, ?block_number, ?block_hash, "In ancestor processing.");

let receipts_for_block =
self.process_candidate_events(sender, block_number, block_hash).await?;
Expand Down
32 changes: 30 additions & 2 deletions node/core/provisioner/src/disputes/prioritized_selection/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,15 @@ where

// Fetch the onchain disputes. We'll do a prioritization based on them.
let onchain = match get_onchain_disputes(sender, leaf.hash).await {
Ok(r) => r,
Ok(r) => {
gum::trace!(
target: LOG_TARGET,
?leaf,
"Successfully fetched {} onchain disputes",
r.len()
);
r
},
Err(GetOnchainDisputesError::NotSupported(runtime_api_err, relay_parent)) => {
// Runtime version is checked before calling this method, so the error below should never happen!
gum::error!(
Expand Down Expand Up @@ -129,7 +137,9 @@ where
HashMap::new()
},
};
metrics.on_fetched_onchain_disputes(onchain.keys().len() as u64);

gum::trace!(target: LOG_TARGET, ?leaf, "Fetching recent disputes");
let recent_disputes = request_disputes(sender).await;
gum::trace!(
target: LOG_TARGET,
Expand All @@ -139,26 +149,32 @@ where
onchain.len(),
);

gum::trace!(target: LOG_TARGET, ?leaf, "Filtering recent disputes");

// Filter out unconfirmed disputes. However if the dispute is already onchain - don't skip it.
// In this case we'd better push as much fresh votes as possible to bring it to conclusion faster.
let recent_disputes = recent_disputes
.into_iter()
.filter(|d| d.2.is_confirmed_concluded() || onchain.contains_key(&(d.0, d.1)))
.collect::<Vec<_>>();

gum::trace!(target: LOG_TARGET, ?leaf, "Partitioning recent disputes");
let partitioned = partition_recent_disputes(recent_disputes, &onchain);
metrics.on_partition_recent_disputes(&partitioned);

if partitioned.inactive_unknown_onchain.len() > 0 {
gum::warn!(
target: LOG_TARGET,
?leaf,
"Got {} inactive unknown onchain disputes. This should not happen!",
"Got {} inactive unknown onchain disputes. This should not happen in normal conditions!",
partitioned.inactive_unknown_onchain.len()
);
}

gum::trace!(target: LOG_TARGET, ?leaf, "Vote selection for recent disputes");
let result = vote_selection(sender, partitioned, &onchain).await;

gum::trace!(target: LOG_TARGET, ?leaf, "Convert to multi dispute statement set");
make_multi_dispute_statement_set(metrics, result)
}

Expand All @@ -179,11 +195,13 @@ where
let mut result = BTreeMap::new();
let mut request_votes_counter = 0;
while !disputes.is_empty() {
gum::trace!(target: LOG_TARGET, "has to process {} disputes left", disputes.len());
let batch_size = std::cmp::min(VOTES_SELECTION_BATCH_SIZE, disputes.len());
let batch = Vec::from_iter(disputes.drain(0..batch_size));

// Filter votes which are already onchain
request_votes_counter += 1;
gum::trace!(target: LOG_TARGET, "requesting onchain votes",);
let votes = super::request_votes(sender, batch)
.await
.into_iter()
Expand Down Expand Up @@ -213,6 +231,7 @@ where
(session_index, candidate_hash, votes)
})
.collect::<Vec<_>>();
gum::trace!(target: LOG_TARGET, "got {} onchain votes after processing", votes.len());

// Check if votes are within the limit
for (session_index, candidate_hash, selected_votes) in votes {
Expand All @@ -221,8 +240,16 @@ where
// we are done - no more votes can be added. Importantly, we don't add any votes for a dispute here
// if we can't fit them all. This gives us an important invariant, that backing votes for
// disputes make it into the provisioned vote set.
gum::trace!(
target: LOG_TARGET,
?request_votes_counter,
?total_votes_len,
"vote_selection DisputeCoordinatorMessage::QueryCandidateVotes counter",
);

return result
}

result.insert((session_index, candidate_hash), selected_votes);
total_votes_len += votes_len
}
Expand All @@ -231,6 +258,7 @@ where
gum::trace!(
target: LOG_TARGET,
?request_votes_counter,
?total_votes_len,
"vote_selection DisputeCoordinatorMessage::QueryCandidateVotes counter",
);

Expand Down
8 changes: 8 additions & 0 deletions node/core/provisioner/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,11 +179,13 @@ fn handle_active_leaves_update(
per_relay_parent: &mut HashMap<Hash, PerRelayParent>,
inherent_delays: &mut InherentDelays,
) {
gum::trace!(target: LOG_TARGET, "Handle ActiveLeavesUpdate");
for deactivated in &update.deactivated {
per_relay_parent.remove(deactivated);
}

if let Some(leaf) = update.activated {
gum::trace!(target: LOG_TARGET, leaf_hash=?leaf.hash, "Adding delay");
let delay_fut = Delay::new(PRE_PROPOSE_TIMEOUT).map(move |_| leaf.hash).boxed();
per_relay_parent.insert(leaf.hash, PerRelayParent::new(leaf));
inherent_delays.push(delay_fut);
Expand Down Expand Up @@ -590,6 +592,10 @@ async fn select_candidates(
}
}

gum::trace!(target: LOG_TARGET,
leaf_hash=?relay_parent,
"before GetBackedCandidates");

// now get the backed candidates corresponding to these candidate receipts
let (tx, rx) = oneshot::channel();
sender.send_unbounded_message(CandidateBackingMessage::GetBackedCandidates(
Expand All @@ -598,6 +604,8 @@ async fn select_candidates(
tx,
));
let mut candidates = rx.await.map_err(|err| Error::CanceledBackedCandidates(err))?;
gum::trace!(target: LOG_TARGET, leaf_hash=?relay_parent,
"Got {} backed candidates", candidates.len());

// `selected_candidates` is generated in ascending order by core index, and `GetBackedCandidates`
// _should_ preserve that property, but let's just make sure.
Expand Down
14 changes: 14 additions & 0 deletions node/core/provisioner/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ struct MetricsInner {

/// The disputes received from `disputes-coordinator` by partition
partitioned_disputes: prometheus::CounterVec<prometheus::U64>,

/// The disputes fetched from the runtime.
fetched_onchain_disputes: prometheus::Counter<prometheus::U64>,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The metric didn't help much during the debugging. Should we keep it?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would remove it if it is not useful, we already have a ton of metrics, most of them we never use :)

}

/// Provisioner metrics.
Expand Down Expand Up @@ -143,6 +146,12 @@ impl Metrics {
.inc_by(inactive_concluded_known_onchain.len().try_into().unwrap_or(0));
}
}

pub(crate) fn on_fetched_onchain_disputes(&self, onchain_count: u64) {
if let Some(metrics) = &self.0 {
metrics.fetched_onchain_disputes.inc_by(onchain_count);
}
}
}

impl metrics::Metrics for Metrics {
Expand Down Expand Up @@ -208,6 +217,11 @@ impl metrics::Metrics for Metrics {
)?,
&registry,
)?,
fetched_onchain_disputes: prometheus::register(
prometheus::Counter::new("polkadot_parachain_fetched_onchain_disputes", "Number of disputes fetched from the runtime"
)?,
&registry,
)?,
};
Ok(Metrics(Some(metrics)))
}
Expand Down