From 7733605a2b7bc73f0aabe5bc3594a089395e1bd1 Mon Sep 17 00:00:00 2001 From: Katherine Eaton Date: Tue, 2 Jan 2024 11:17:55 -0700 Subject: [PATCH 1/9] docs: update conda install --- README.md | 5 ++--- docs/install.md | 2 -- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index bef7d8c..20f857e 100644 --- a/README.md +++ b/README.md @@ -30,11 +30,10 @@ ## Install -`rebar` is a standalone binary file that you can simply download and run: +`rebar` is a standalone binary file, we recommend [conda](https://anaconda.org/bioconda/rebar) or [direct download](https://github.com/phac-nml/rebar/releases/latest/download/rebar-x86_64-unknown-linux-musl). ```bash -wget -O rebar https://github.com/phac-nml/rebar/releases/latest/download/rebar-x86_64-unknown-linux-musl -./rebar --help +conda install -c bioconda rebar ``` - Please see the [install](docs/install.md) docs for Windows, macOS, Docker, Singularity, and Conda. diff --git a/docs/install.md b/docs/install.md index 5bc4bb5..dbac958 100644 --- a/docs/install.md +++ b/docs/install.md @@ -25,8 +25,6 @@ wget "https://github.com/phac-nml/rebar/releases/latest/download/rebar-x86_64-pc ## Conda -> **Coming Soon!** The conda install option will be available when this page is live: https://anaconda.org/bioconda/rebar - ```bash conda create -c bioconda -n rebar rebar conda activate rebar From 1fc42a62898996ad5a1e3fcaf3a45d5ea0c5327d Mon Sep 17 00:00:00 2001 From: Katherine Eaton Date: Tue, 2 Jan 2024 13:34:58 -0700 Subject: [PATCH 2/9] plot: fix legend overflow for #27 --- src/plot/mod.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/plot/mod.rs b/src/plot/mod.rs index 77f39b6..fca3ee7 100644 --- a/src/plot/mod.rs +++ b/src/plot/mod.rs @@ -268,8 +268,13 @@ pub fn create( .ok_or_else(|| eyre!("Failed to calculated the maximum coord length"))?; // longest legend label (in pixels) + // longest legend label (in pixels) + let default_labels = + vec!["Reference", "Private"].into_iter().map(String::from).collect_vec(); + let longest_legend_label = parents .iter() + .chain(default_labels.iter()) .map(|id| { text::to_image( &format!("{id} Reference"), From 065a3bfa63aab72417b9eb78cb4990a395a98a52 Mon Sep 17 00:00:00 2001 From: Katherine Eaton Date: Tue, 2 Jan 2024 13:35:07 -0700 Subject: [PATCH 3/9] docs: update v0.2.1 notes --- docs/notes/Notes_v0.2.1.md | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 docs/notes/Notes_v0.2.1.md diff --git a/docs/notes/Notes_v0.2.1.md b/docs/notes/Notes_v0.2.1.md new file mode 100644 index 0000000..26ab914 --- /dev/null +++ b/docs/notes/Notes_v0.2.1.md @@ -0,0 +1,9 @@ +# v0.2.1 + +## New + +- Issue #10, PR #34 | Add documentation for Conda installation. + +## Fixes + +- Issue #27, PR TBD | Fix legend overflow in plot. From 2d1fb68207acab395653791233e4f6c818290af6 Mon Sep 17 00:00:00 2001 From: ktmeaton Date: Tue, 2 Jan 2024 21:09:23 +0000 Subject: [PATCH 4/9] plot: add Private Mutation to default labels --- src/plot/mod.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/plot/mod.rs b/src/plot/mod.rs index fca3ee7..619da0e 100644 --- a/src/plot/mod.rs +++ b/src/plot/mod.rs @@ -267,17 +267,20 @@ pub fn create( .max() .ok_or_else(|| eyre!("Failed to calculated the maximum coord length"))?; - // longest legend label (in pixels) // longest legend label (in pixels) let default_labels = - vec!["Reference", "Private"].into_iter().map(String::from).collect_vec(); + vec!["Reference", "Private Mutation"].into_iter().map(String::from).collect_vec(); let longest_legend_label = parents .iter() - .chain(default_labels.iter()) + .chain(default_labels.clone().iter()) .map(|id| { + let label = match default_labels.contains(id) { + true => id.to_string(), + false => format!("{id} Reference"), + }; text::to_image( - &format!("{id} Reference"), + &label, constants::FONT_REGULAR, constants::FONT_SIZE, &constants::TEXT_COLOR, From 7ebd85d9ae9e7bb7428dd19996c75fe18948868f Mon Sep 17 00:00:00 2001 From: ktmeaton Date: Tue, 2 Jan 2024 21:25:45 +0000 Subject: [PATCH 5/9] parsimony: change from_sequence coord type from vec to slice --- src/dataset/mod.rs | 2 +- src/sequence/parsimony.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dataset/mod.rs b/src/dataset/mod.rs index d05894b..0e4efc0 100644 --- a/src/dataset/mod.rs +++ b/src/dataset/mod.rs @@ -141,7 +141,7 @@ impl Dataset { &self, sequence: &Sequence, populations: Option<&Vec<&String>>, - coordinates: Option<&Vec>, + coordinates: Option<&[usize]>, ) -> Result { // initialize an empty result, this will be the final product of this function let mut result = SearchResult::new(sequence); diff --git a/src/sequence/parsimony.rs b/src/sequence/parsimony.rs index 0803eea..a325c05 100644 --- a/src/sequence/parsimony.rs +++ b/src/sequence/parsimony.rs @@ -30,7 +30,7 @@ impl Summary { pub fn from_sequence( sequence: &Sequence, query: &Sequence, - coordinates: Option<&Vec>, + coordinates: Option<&[usize]>, ) -> Result { let mut parsimony_summary = Summary::new(); From c1025481a0bb3f740c6bf9fbb091a5a21d8a421b Mon Sep 17 00:00:00 2001 From: ktmeaton Date: Tue, 2 Jan 2024 21:18:05 +0000 Subject: [PATCH 6/9] ci: nightly only update linelist for #29 --- .github/workflows/nightly.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/nightly.yaml b/.github/workflows/nightly.yaml index 7229cb3..79039e6 100644 --- a/.github/workflows/nightly.yaml +++ b/.github/workflows/nightly.yaml @@ -39,7 +39,7 @@ jobs: binary: ${{ matrix.binary }} # --------------------------------------------------------------------------- - latest: + nightly: needs: compile @@ -88,7 +88,7 @@ jobs: uses: actions/upload-artifact@v3 if: always() with: - name: validate-${{ matrix.arch }} - path: output/sars-cov-2/nightly/validate + name: validate-linelist_${{ matrix.arch }} + path: output/sars-cov-2/nightly/validate/linelist.tsv if-no-files-found: error retention-days: 7 From 8a5055636c6e13f6ad0ca2443e62889432e38ac7 Mon Sep 17 00:00:00 2001 From: ktmeaton Date: Tue, 2 Jan 2024 21:47:27 +0000 Subject: [PATCH 7/9] ci: test restrict artifact upload --- .github/workflows/test.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 289a632..70881a2 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -151,6 +151,10 @@ jobs: if: always() with: name: output-${{ matrix.arch }} - path: output + path: | + output/alignment + output/toy1 + output/populations/linelist.tsv + if-no-files-found: error retention-days: 7 From e17f8c63eeb37dfe9195e1be144cddc26be494be Mon Sep 17 00:00:00 2001 From: ktmeaton Date: Tue, 2 Jan 2024 22:25:12 +0000 Subject: [PATCH 8/9] phylogeny: get_common_ancestors performance improvement for #28 --- src/phylogeny/mod.rs | 104 +++++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 54 deletions(-) diff --git a/src/phylogeny/mod.rs b/src/phylogeny/mod.rs index 459ac27..5fd0256 100644 --- a/src/phylogeny/mod.rs +++ b/src/phylogeny/mod.rs @@ -1,5 +1,5 @@ use crate::utils; -use color_eyre::eyre::{eyre, Report, Result, WrapErr}; +use color_eyre::eyre::{eyre, ContextCompat, Report, Result, WrapErr}; use color_eyre::Help; use itertools::Itertools; use log::debug; @@ -9,7 +9,6 @@ use petgraph::visit::{Dfs, IntoNodeReferences}; use petgraph::Direction; use serde::{Deserialize, Serialize}; use serde_json; -use std::collections::HashMap; use std::fs::File; use std::io::Write; use std::path::Path; @@ -369,65 +368,62 @@ impl Phylogeny { } /// Identify the most recent common ancestor shared between all node names. - pub fn get_common_ancestor(&self, names: &Vec) -> Result { + pub fn get_common_ancestor(&self, names: &[String]) -> Result { // if only one node name was provided, just return it if names.len() == 1 { let common_ancestor = names[0].clone(); return Ok(common_ancestor); } - // Phase 1: Count up the ancestors shared between all named populations - let mut ancestor_counts: HashMap> = HashMap::new(); - let mut ancestor_depths: HashMap = HashMap::new(); - - for name in names { - // directly use the get_paths method over get_ancestors, because - // get_ancestors removes the self node name from the list, - // but some datasets have named internal nodes, so a listed - // node could be a common ancestor! - let ancestor_paths = self.get_paths("root", name, petgraph::Outgoing)?; - - for ancestor_path in ancestor_paths { - for (depth, ancestor) in ancestor_path.iter().enumerate() { - let depth = depth as isize; - // add ancestor if first time encountered - ancestor_depths.entry(ancestor.clone()).or_insert(depth); - - // recombinants can appear multiple times in ancestors, update - // depth map to use deepest one - if depth > ancestor_depths[ancestor] { - ancestor_depths.insert(ancestor.clone(), depth); - } - ancestor_counts - .entry(ancestor.clone()) - .and_modify(|p| { - p.push(name.clone()); - p.dedup(); - }) - .or_insert(vec![name.clone()]); - } - } - } + // mass pile of all ancestors of all named nodes + let ancestors: Vec<_> = names + .iter() + .map(|pop| { + let paths = self.get_paths(pop, "root", Direction::Incoming)?; + let ancestors = paths.into_iter().flatten().unique().collect_vec(); + debug!("{pop}: {ancestors:?}"); + Ok(ancestors) + }) + .collect::, Report>>()? + .into_iter() + .flatten() + .collect::>(); + + // get ancestors shared by all sequences + let common_ancestors: Vec<_> = ancestors + .iter() + .unique() + .filter(|anc| { + let count = ancestors.iter().filter(|pop| pop == anc).count(); + count == names.len() + }) + .collect(); + + debug!("common_ancestors: {common_ancestors:?}"); + + // get the depths (distance to root) of the common ancestors + let depths = common_ancestors + .into_iter() + .map(|pop| { + let paths = self.get_paths(pop, "root", Direction::Incoming)?; + let longest_path = paths + .into_iter() + .max_by(|a, b| a.len().cmp(&b.len())) + .unwrap_or_default(); + let depth = longest_path.len(); + debug!("{pop}: {depth}"); + Ok((pop, depth)) + }) + .collect::, Report>>()?; - // Phase 2: Find the highest depth ancestor shared between all - let mut common_ancestor = "root".to_string(); - let mut max_depth = 0; - - for (ancestor, populations) in ancestor_counts { - // Which ancestors were found in all populations? - if populations.len() == names.len() { - // Which ancestor has the max depth? - - let depth = ancestor_depths - .get(&ancestor) - .cloned() - .expect("Ancestor {ancestor} was not found in ancestor depths."); - if depth > max_depth { - max_depth = depth; - common_ancestor = ancestor; - } - } - } + // get the deepest (ie. most recent common ancestor) + let deepest_ancestor = depths + .into_iter() + .max_by(|a, b| a.1.cmp(&b.1)) + .context("Failed to get common ancestor.")?; + + // tuple (population name, depth) + let common_ancestor = deepest_ancestor.0.to_string(); Ok(common_ancestor) } From 3fc99abe328c846bf2ff5d5c014b242287537688 Mon Sep 17 00:00:00 2001 From: ktmeaton Date: Tue, 2 Jan 2024 22:47:09 +0000 Subject: [PATCH 9/9] docs: add release notes for v0.2.1 --- docs/notes/Notes_v0.2.1.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/docs/notes/Notes_v0.2.1.md b/docs/notes/Notes_v0.2.1.md index 26ab914..6680bed 100644 --- a/docs/notes/Notes_v0.2.1.md +++ b/docs/notes/Notes_v0.2.1.md @@ -1,9 +1,17 @@ # v0.2.1 +This patch release documents the new conda installation method, resolves a plotting artifact and improves performance of the `phylogeny` function `get_common_ancestor`. + ## New - Issue #10, PR #34 | Add documentation for Conda installation. ## Fixes -- Issue #27, PR TBD | Fix legend overflow in plot. +- Issue #27, PR #37 | Fix legend overflow in plot. + +## Changes + +- Issue #23, PR #39 | Change `parsimony::from_sequence` param `coordinates` from Vector to Slice. +- Issue #28, PR #41 | Improve peformance of `phylogeny::get_common_ancestor` +- Issue #29, PR #38 | Reduce large artifact uploads in CI.