From 759f3cd55cd3f5635e953105d260bc1facc133bb Mon Sep 17 00:00:00 2001 From: ivan-aksamentov Date: Wed, 15 Jan 2025 06:13:06 +0100 Subject: [PATCH] perf: run map_variations in reconsensus concurrently This uses rayon's parallel iterator to run `map_variations()` (basically Nextclade) for individual sequences concurrently in `reconsensus()` step. We already run it concurrently in `solve_promise()`step, [for individual blocks](https://github.com/neherlab/pangraph/blob/2d2e7b046cbbbc24d1c364f5a0afe2176b662a99/packages/pangraph/src/pangraph/graph_merging.rs#L145-L150) and then concurrently [for alignment within each block](https://github.com/neherlab/pangraph/blob/2d2e7b046cbbbc24d1c364f5a0afe2176b662a99/packages/pangraph/src/pangraph/reweave.rs#L38-L49), so I thought we can repeat this success in the `reconsensus()` step as well. This change results in 64.4% speedup in my measurements on ecoli. Command: ```bash /usr/bin/time -qf 'Cmd : %C\nTime: %E\nMem : %M KB' pangraph build -b 20 -l 500 --circular data/ecoli.fa.gz -o tmp/ecoli.fa.gz.json -v ``` | Branch | Time | |-----------------------------------------|---------| | rust (commit 2d2e7b0) | 5m 51s | | perf/parallel-nextclade-reconsensus | 3m 33s | Things to watch out: * increased memory usage due to increased concurrency * potential reorder of the results --- packages/pangraph/src/reconsensus/reconsensus.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/pangraph/src/reconsensus/reconsensus.rs b/packages/pangraph/src/reconsensus/reconsensus.rs index 196de950..c32e33ac 100644 --- a/packages/pangraph/src/reconsensus/reconsensus.rs +++ b/packages/pangraph/src/reconsensus/reconsensus.rs @@ -10,6 +10,7 @@ use crate::utils::collections::insert_at_inplace; use eyre::Report; use itertools::Itertools; use maplit::btreemap; +use rayon::prelude::*; use std::collections::BTreeMap; /// Applies the reconsensus operation to each updated block in the graph: @@ -205,9 +206,11 @@ fn update_block_consensus(block: &mut PangraphBlock, consensus: impl Into>()?;