diff --git a/change_log.txt b/change_log.txt index a4b6f0c..ddfb31d 100644 --- a/change_log.txt +++ b/change_log.txt @@ -1,5 +1,8 @@ Major changes to PhyKIT are summarized here. +1.16.0 + - Users can now specify an outlier branch threshold when calculating gene coevolution + 1.15.0 - Added function to recode alignments based on 8 different recoding schemes (7 for amino acids; 1 for nucleotides). See function recode diff --git a/docs/change_log/index.rst b/docs/change_log/index.rst index 8638435..82b1cd8 100644 --- a/docs/change_log/index.rst +++ b/docs/change_log/index.rst @@ -8,6 +8,9 @@ Change log Major changes to PhyKIT are summarized here. +**1.16.0** +Users can now specify an outlier branch threshold when calculating gene coevolution + **1.15.0**: Added function to recode alignments based on 8 different recoding schemes (7 for amino acids; 1 for nucleotides). See function recode. diff --git a/docs/usage/index.rst b/docs/usage/index.rst index 0afe4d2..d3e74d6 100644 --- a/docs/usage/index.rst +++ b/docs/usage/index.rst @@ -676,26 +676,28 @@ trees to have the same tips. To transform branch lengths into relative rates, PhyKIT uses the putative species tree's branch lengths, which is inputted by the user. As recommended by the original method developers, outlier branche lengths are removed. Outlier branches have a relative -evolutionary rate greater than five. +evolutionary rate greater than five. Users can specify a custom threshold +using the -ot/--outlier_threshold argument. PhyKIT reports two tab delimited values: col1: correlation coefficient col2: p-value -Method is empirically evaluated by Clark et al., Genome Research -(2012), doi: 10.1101/gr.132647.111. Normalization method using a -species tree follows Sato et al., Bioinformatics (2005), doi: -10.1093/bioinformatics/bti564. +Method is described in Steenwyk et al., Science Advances (2022), +doi: 10.1126/sciadv.abn0105. Method is empirically evaluated by +Clark et al., Genome Research (2012), doi: 10.1101/gr.132647.111. +Normalization method using a species tree follows Sato et al., +Bioinformatics (2005), doi: 10.1093/bioinformatics/bti564. .. code-block:: shell - phykit covarying_evolutionary_rates -r/--reference [-v/--verbose] + phykit covarying_evolutionary_rates -r/--reference [-ot/--outlier_threshold -v/--verbose] Options: |br| **: first argument after function name should be an alignment file |br| **: first argument after function name should be an alignment file |br| -*-r/\\-\\-reference*: a tree to correct branch lengths by in the two input trees. Typically, -this is a putative species tree. |br| +*-r/\\-\\-reference*: a tree to correct branch lengths by in the two input trees. Typically, this is a putative species tree. |br| +*-ot/\\-\\-outlier_threshold*: threshold to define outlier corrected branch lengths (Default: 5) |br| *-v/\\-\\-verbose*: print out corrected branch lengths shared between tree 0 and tree 1 | diff --git a/phykit/phykit.py b/phykit/phykit.py index 2abc2aa..b7b3986 100644 --- a/phykit/phykit.py +++ b/phykit/phykit.py @@ -1190,7 +1190,8 @@ def covarying_evolutionary_rates(argv): Determine if two genes have a signature of covariation with one another. Genes that have covarying evolutionary histories tend to have - similar functions and expression levels. + similar functions, expression levels, and/or be part of the same + multi-meric complexes. Input two phylogenies and calculate the correlation among relative evolutionary rates between the two phylogenies. The two input trees @@ -1198,19 +1199,21 @@ def covarying_evolutionary_rates(argv): trees to have the same tips. To transform branch lengths into relative rates, PhyKIT uses the putative species tree's branch lengths, which is inputted by the user. As recommended by the original method developers, - outlier branche lengths are removed. Outlier branches have a relative - evolutionary rate greater than five. + outlier branch lengths are removed. Outlier branches have a relative + evolutionary rate greater than five. Users can specify a custom threshold + using the -ot/--outlier_threshold argument. PhyKIT reports two tab delimited values: col1: correlation coefficient col2: p-value - Method is empirically evaluated by Clark et al., Genome Research - (2012), doi: 10.1101/gr.132647.111. Normalization method using a - species tree follows Sato et al., Bioinformatics (2005), doi: - 10.1093/bioinformatics/bti564. - + Method is described in Steenwyk et al., Science Advances (2022), + doi: 10.1126/sciadv.abn0105. Method is empirically evaluated by + Clark et al., Genome Research (2012), doi: 10.1101/gr.132647.111. + Normalization method using a species tree follows Sato et al., + Bioinformatics (2005), doi: 10.1093/bioinformatics/bti564. + Aliases: covarying_evolutionary_rates, cover Command line interfaces: @@ -1238,6 +1241,10 @@ def covarying_evolutionary_rates(argv): -v/--verbose print out corrected branch lengths shared between tree 0 and tree 1 + + -ot/--outlier_threshold threshold to define outlier + corrected branch lengths + (Default: 5) """ ), ) @@ -1249,6 +1256,9 @@ def covarying_evolutionary_rates(argv): parser.add_argument( "-v", "--verbose", action="store_true", required=False, help=SUPPRESS ) + parser.add_argument( + "-ot", "--outlier_threshold", type=str, required=False, help=SUPPRESS + ) args = parser.parse_args(argv) CovaryingEvolutionaryRates(args).run() diff --git a/phykit/services/tree/base.py b/phykit/services/tree/base.py index 91e55e5..3c2e037 100644 --- a/phykit/services/tree/base.py +++ b/phykit/services/tree/base.py @@ -27,6 +27,7 @@ def __init__( tip_2=None, clade=None, keep=None, + outlier_threshold=None, ): self.tree_file_path = tree_file_path self.tree1_file_path = tree1_file_path @@ -47,6 +48,7 @@ def __init__( self.tip_2 = tip_2 self.clade = clade self.keep = keep + self.outlier_threshold = outlier_threshold def read_tree_file(self): try: diff --git a/phykit/services/tree/covarying_evolutionary_rates.py b/phykit/services/tree/covarying_evolutionary_rates.py index 91685d9..c2e18b9 100644 --- a/phykit/services/tree/covarying_evolutionary_rates.py +++ b/phykit/services/tree/covarying_evolutionary_rates.py @@ -10,6 +10,13 @@ def __init__(self, args) -> None: super().__init__(**self.process_args(args)) def run(self): + if self.outlier_threshold is None: + outlier_threshold = 5 + elif self.outlier_threshold == "None": + outlier_threshold = "No threshold" + else: + outlier_threshold = float(self.outlier_threshold) + tree_zero = self.read_tree_file() tree_one = self.read_tree1_file() tree_ref = self.read_reference_tree_file() @@ -45,22 +52,27 @@ def run(self): tip_names, ) = self.correct_branch_lengths(tree_zero, tree_one, tree_ref) - # remove corrected BLs greater than 5 - outlier_indices = [] - outlier_indices = self.get_indices_of_outlier_branch_lengths( - tree_zero_corr_branch_lengths, outlier_indices - ) - outlier_indices = self.get_indices_of_outlier_branch_lengths( - tree_one_corr_branch_lengths, outlier_indices - ) + if outlier_threshold != "No threshold": + # remove corrected BLs greater than 5 + outlier_indices = [] + outlier_indices = self.get_indices_of_outlier_branch_lengths( + tree_zero_corr_branch_lengths, outlier_indices, outlier_threshold + ) - tree_zero_corr_branch_lengths = self.remove_outliers_based_on_indices( - tree_zero_corr_branch_lengths, outlier_indices - ) - tree_one_corr_branch_lengths = self.remove_outliers_based_on_indices( - tree_one_corr_branch_lengths, outlier_indices - ) - tip_names = self.remove_outliers_based_on_indices(tip_names, outlier_indices) + outlier_indices = self.get_indices_of_outlier_branch_lengths( + tree_one_corr_branch_lengths, outlier_indices, outlier_threshold + ) + + tree_zero_corr_branch_lengths = self.remove_outliers_based_on_indices( + tree_zero_corr_branch_lengths, outlier_indices + ) + tree_one_corr_branch_lengths = self.remove_outliers_based_on_indices( + tree_one_corr_branch_lengths, outlier_indices + ) + + tip_names = self.remove_outliers_based_on_indices( + tip_names, outlier_indices + ) # standardize values for final correction tree_zero_corr_branch_lengths = zscore(tree_zero_corr_branch_lengths) @@ -89,6 +101,7 @@ def run(self): def process_args(self, args): return dict( + outlier_threshold=args.outlier_threshold, tree_file_path=args.tree_zero, tree1_file_path=args.tree_one, reference=args.reference, @@ -96,7 +109,10 @@ def process_args(self, args): ) def get_indices_of_outlier_branch_lengths( - self, corr_branch_lengths, outlier_indices + self, + corr_branch_lengths, + outlier_indices, + outlier_threshold: float ): """ create index for branch lengths that @@ -104,7 +120,7 @@ def get_indices_of_outlier_branch_lengths( """ for idx in range(0, len(corr_branch_lengths)): try: - if corr_branch_lengths[idx] > 5 or corr_branch_lengths[idx] < -5: + if corr_branch_lengths[idx] > outlier_threshold or corr_branch_lengths[idx] < -outlier_threshold: if idx not in outlier_indices: outlier_indices.append(idx) except TypeError: diff --git a/phykit/version.py b/phykit/version.py index 6b0872c..638c121 100644 --- a/phykit/version.py +++ b/phykit/version.py @@ -1 +1 @@ -__version__ = "1.15.0" +__version__ = "1.16.0" diff --git a/tests/integration/tree/test_covarying_evolutionary_rates_integration.py b/tests/integration/tree/test_covarying_evolutionary_rates_integration.py index 24cde43..37c02f3 100644 --- a/tests/integration/tree/test_covarying_evolutionary_rates_integration.py +++ b/tests/integration/tree/test_covarying_evolutionary_rates_integration.py @@ -181,3 +181,54 @@ def test_covarying_evolutionary_rates_outlier(self, mocked_print): with patch.object(sys, "argv", testargs): Phykit() assert mocked_print.mock_calls == [call(expected_result)] + + @patch("builtins.print") + def test_covarying_evolutionary_rates_outlier_threshold0(self, mocked_print): + expected_result = "0.0793\t0.796757" + testargs = [ + "phykit", + "covarying_evolutionary_rates", + f"{here.parent.parent.parent}/sample_files/tree_simple.tre", + f"{here.parent.parent.parent}/sample_files/tree_simple_outlier_branch.tre", + "-r", + f"{here.parent.parent.parent}/sample_files/tree_simple_2.tre", + "-ot", + "None" + ] + with patch.object(sys, "argv", testargs): + Phykit() + assert mocked_print.mock_calls == [call(expected_result)] + + @patch("builtins.print") + def test_covarying_evolutionary_rates_outlier_threshold1(self, mocked_print): + expected_result = "0.0793\t0.796757" + testargs = [ + "phykit", + "covarying_evolutionary_rates", + f"{here.parent.parent.parent}/sample_files/tree_simple.tre", + f"{here.parent.parent.parent}/sample_files/tree_simple_outlier_branch.tre", + "-r", + f"{here.parent.parent.parent}/sample_files/tree_simple_2.tre", + "--outlier_threshold", + "None" + ] + with patch.object(sys, "argv", testargs): + Phykit() + assert mocked_print.mock_calls == [call(expected_result)] + + @patch("builtins.print") + def test_covarying_evolutionary_rates_outlier_threshold_custom(self, mocked_print): + expected_result = "0.5404\t0.16678" + testargs = [ + "phykit", + "covarying_evolutionary_rates", + f"{here.parent.parent.parent}/sample_files/tree_simple.tre", + f"{here.parent.parent.parent}/sample_files/tree_simple_outlier_branch.tre", + "-r", + f"{here.parent.parent.parent}/sample_files/tree_simple_2.tre", + "--outlier_threshold", + "1" + ] + with patch.object(sys, "argv", testargs): + Phykit() + assert mocked_print.mock_calls == [call(expected_result)]