Skip to content

Commit

Permalink
Merge pull request #373 from padix-key/msa
Browse files Browse the repository at this point in the history
Add support for Muscle 5
  • Loading branch information
padix-key authored Feb 11, 2022
2 parents 5db367e + 744513c commit 49802cf
Show file tree
Hide file tree
Showing 10 changed files with 402 additions and 40 deletions.
29 changes: 29 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,32 @@ jobs:
run: pip install .//dist//*.whl
- name: Testing code
run: pytest --ignore=tests//application//test_blast.py --ignore=tests//application//test_sra.py --ignore=tests//database//test_entrez.py


test-muscle5:
name: Testing Biotite's interface for Muscle 5

runs-on: ubuntu-latest
defaults:
run:
shell: bash -l {0}

steps:
- uses: actions/checkout@v2
- uses: conda-incubator/setup-miniconda@v2
with:
activate-environment: biotite-dev
auto-update-conda: true
python-version: 3.9
mamba-version: "*"
channels: conda-forge,defaults
- name: Installing dependencies
run: conda install -c conda-forge -c bioconda "muscle=5" "cython>=0.29" "numpy=1.19" "requests>=2.12" "msgpack-python>=0.5.6" "networkx>=2.0" "pytest>=3.2"
env:
NUMPY_VERSION: ${{ matrix.numpy }}
- name: Building distribution
run: python setup.py bdist_wheel
- name: Installing distribution
run: pip install .//dist//*.whl
- name: Testing code
run: pytest tests//application//test_msa.py
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ dependencies:
- dssp
- clustalo
- mafft
- muscle
- muscle =3
- sra-tools
- autodock-vina
- viennarna
Expand Down
11 changes: 9 additions & 2 deletions src/biotite/application/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

__name__ = "biotite.application"
__author__ = "Patrick Kunzmann"
__all__ = ["Application", "AppStateError", "TimeoutError", "AppState",
"requires_state"]
__all__ = ["Application", "AppStateError", "TimeoutError", "VersionError",
"AppState", "requires_state"]

import abc
import time
Expand Down Expand Up @@ -251,3 +251,10 @@ class TimeoutError(Exception):
Indicate that the application's timeout expired.
"""
pass


class VersionError(Exception):
"""
Indicate that the application's version is invalid.
"""
pass
3 changes: 2 additions & 1 deletion src/biotite/application/muscle/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@
__name__ = "biotite.application.muscle"
__author__ = "Patrick Kunzmann"

from .app import *
from .app3 import *
from .app5 import *
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
__author__ = "Patrick Kunzmann"
__all__ = ["MuscleApp"]

import re
import numbers
import warnings
import subprocess
from tempfile import NamedTemporaryFile
from ..localapp import cleanup_tempfile
from ..msaapp import MSAApp
from ..application import AppState, requires_state
from ..application import AppState, VersionError, requires_state
from ...sequence.sequence import Sequence
from ...sequence.seqtypes import NucleotideSequence, ProteinSequence
from ...sequence.align.matrix import SubstitutionMatrix
Expand All @@ -21,7 +23,7 @@

class MuscleApp(MSAApp):
"""
Perform a multiple sequence alignment using MUSCLE.
Perform a multiple sequence alignment using MUSCLE version 3.
Parameters
----------
Expand All @@ -32,6 +34,10 @@ class MuscleApp(MSAApp):
matrix : SubstitutionMatrix, optional
A custom substitution matrix.
See also
---------
Muscle5App
Examples
--------
Expand All @@ -51,6 +57,12 @@ class MuscleApp(MSAApp):
"""

def __init__(self, sequences, bin_path="muscle", matrix=None):
major_version = get_version(bin_path)[0]
if major_version != 3:
raise VersionError(
f"Muscle 3 is required, got version {major_version}"
)

super().__init__(sequences, bin_path, matrix)
self._gap_open = None
self._gap_ext = None
Expand All @@ -67,8 +79,8 @@ def __init__(self, sequences, bin_path="muscle", matrix=None):
def run(self):
args = [
"-quiet",
"-in", self.get_input_file_path(),
"-out", self.get_output_file_path(),
"-in", self.get_input_file_path(),
"-out", self.get_output_file_path(),
"-tree1", self._out_tree1_file.name,
"-tree2", self._out_tree2_file.name,
]
Expand Down Expand Up @@ -191,7 +203,7 @@ def align(cls, sequences, bin_path=None, matrix=None,
"""
Perform a multiple sequence alignment.
This is a convenience function, that wraps the :class:`MSAApp`
This is a convenience function, that wraps the :class:`MuscleApp`
execution.
Parameters
Expand Down Expand Up @@ -225,3 +237,16 @@ def align(cls, sequences, bin_path=None, matrix=None,
app.start()
app.join()
return app.get_alignment()


def get_version(bin_path="muscle"):
output = subprocess.run(
[bin_path, "-version"], capture_output=True, text=True
)
# Find matches for version string containing major and minor version
match = re.search("\d+\.\d+", output.stdout)
if match is None:
raise subprocess.SubprocessError("Application did not print version")
version_string = match.group(0)
splitted = version_string.split(".")
return int(splitted[0]), int(splitted[1])
171 changes: 171 additions & 0 deletions src/biotite/application/muscle/app5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
# This source code is part of the Biotite package and is distributed
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
# information.

__name__ = "biotite.application.muscle"
__author__ = "Patrick Kunzmann"
__all__ = ["Muscle5App"]

import numbers
import warnings
from tempfile import NamedTemporaryFile
from ..localapp import cleanup_tempfile
from ..msaapp import MSAApp
from ..application import AppState, VersionError, requires_state
from ...sequence.sequence import Sequence
from ...sequence.seqtypes import NucleotideSequence, ProteinSequence
from ...sequence.align.matrix import SubstitutionMatrix
from ...sequence.align.alignment import Alignment
from ...sequence.phylo.tree import Tree
from .app3 import get_version


class Muscle5App(MSAApp):
"""
Perform a multiple sequence alignment using MUSCLE version 5.
Parameters
----------
sequences : list of Sequence
The sequences to be aligned.
bin_path : str, optional
Path of the MUSCLE binary.
See also
---------
MuscleApp
Notes
-----
Alignment ensemble generation is not supported, yet.
Examples
--------
>>> seq1 = ProteinSequence("BIQTITE")
>>> seq2 = ProteinSequence("TITANITE")
>>> seq3 = ProteinSequence("BISMITE")
>>> seq4 = ProteinSequence("IQLITE")
>>> app = Muscle5App([seq1, seq2, seq3, seq4])
>>> app.start()
>>> app.join()
>>> alignment = app.get_alignment()
>>> print(alignment)
BI-QTITE
TITANITE
BI-SMITE
-I-QLITE
"""

def __init__(self, sequences, bin_path="muscle"):
major_version = get_version(bin_path)[0]
if major_version < 5:
raise VersionError(
f"At least Muscle 5 is required, got version {major_version}"
)

super().__init__(sequences, bin_path)
self._mode = "align"
self._consiters = None
self._refineiters = None
self._n_threads = None

@requires_state(AppState.CREATED)
def set_iterations(self, consistency=None, refinement=None):
"""
Set the number of iterations for the alignment algorithm.
Parameters
----------
consistency : int, optional
The number of consistency iterations.
refinement : int, optional
The number of refinement iterations.
"""
if consistency is not None:
self._consiters = consistency
if refinement is not None:
self._refineiters = refinement

@requires_state(AppState.CREATED)
def set_thread_number(self, number):
"""
Set the number of threads for the alignment run.
Parameters
----------
number : int, optional
The number of threads.
"""
self._n_threads = number

@requires_state(AppState.CREATED)
def use_super5(self):
"""
Use the *Super5* algorithm for the alignment run.
"""
self._mode = "super5"

def run(self):
args = [
f"-{self._mode}",
self.get_input_file_path(),
"-output", self.get_output_file_path(),
]
if self.get_seqtype() == "protein":
args += ["-amino"]
else:
args += ["-nt"]
if self._n_threads is not None:
args += ["-threads", str(self._n_threads)]
if self._consiters is not None:
args += ["-consiters", str(self._consiters)]
if self._refineiters is not None:
args += ["-refineiters", str(self._refineiters)]
self.set_arguments(args)
super().run()

def clean_up(self):
super().clean_up()

@staticmethod
def supports_nucleotide():
return True

@staticmethod
def supports_protein():
return True

@staticmethod
def supports_custom_nucleotide_matrix():
return False

@staticmethod
def supports_custom_protein_matrix():
return False

@classmethod
def align(cls, sequences, bin_path=None):
"""
Perform a multiple sequence alignment.
This is a convenience function, that wraps the :class:`Muscle5App`
execution.
Parameters
----------
sequences : iterable object of Sequence
The sequences to be aligned
bin_path : str, optional
Path of the MSA software binary. By default, the default path
will be used.
Returns
-------
alignment : Alignment
The global multiple sequence alignment.
"""
app = cls(sequences, bin_path)
app.start()
app.join()
return app.get_alignment()
Loading

0 comments on commit 49802cf

Please sign in to comment.