Skip to content

Commit

Permalink
test and doc updates for align.sequence_alignment
Browse files Browse the repository at this point in the history
- added unit test
- turned **kwargs into kwargs with defaults
- updated docs
  • Loading branch information
orbeckst committed Jul 11, 2016
1 parent e801d35 commit c0e5f5f
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 37 deletions.
51 changes: 30 additions & 21 deletions package/MDAnalysis/analysis/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -773,10 +773,12 @@ def rms_fit_trj(
return filename


def sequence_alignment(mobile, reference, **kwargs):
"""Generate a global sequence alignment between residues in *reference* and *mobile*.
def sequence_alignment(mobile, reference, match_score=2, mismatch_penalty=-1,
gap_penalty=-2, gapextension_penalty=-0.1):
"""Generate a global sequence alignment between two residue groups.
The global alignment uses the Needleman-Wunsch algorith as
The residues in `reference` and `mobile` will be globally aligned.
The global alignment uses the Needleman-Wunsch algorithm as
implemented in :mod:`Bio.pairwise2`. The parameters of the dynamic
programming algorithm can be tuned with the keywords. The defaults
should be suitable for two similar sequences. For sequences with
Expand All @@ -789,39 +791,40 @@ def sequence_alignment(mobile, reference, **kwargs):
Atom group to be aligned
reference : AtomGroup
Atom group to be aligned against
** kwargs
*match_score*
score for matching residues, default :2
*mismatch_penalty*
match_score : float, optional, default 2
score for matching residues, default 2
mismatch_penalty : float, optional, default -1
penalty for residues that do not match , default : -1
*gap_penalty*
gap_penalty : float, optional, default -2
penalty for opening a gap; the high default value creates compact
alignments for highly identical sequences but might not be suitable
for sequences with low identity, default : -2
*gapextension_penalty*
gapextension_penalty : float, optional, default -0.1
penalty for extending a gap, default: -0.1
Returns
-------
aln[0]
Tuple of top sequence matching output ('Sequence A', 'Sequence B', score,
begin, end ,prev_pos, nex_pos)
Tuple of top sequence matching output `('Sequence A', 'Sequence B', score,
begin, end)`
BioPython documentation:
http://biopython.org/DIST/docs/api/Bio.pairwise2-module.html
See Also
--------
BioPython documentation for `pairwise2`_. Alternatively, use
:func:`fasta2select` with :program:`clustalw2` and the option
``is_aligned=False``.
.. _`pairwise2`: http://biopython.org/DIST/docs/api/Bio.pairwise2-module.html
.. versionadded:: 0.10.0
"""
import Bio.pairwise2
kwargs.setdefault('match_score', 2)
kwargs.setdefault('mismatch_penalty', -1)
kwargs.setdefault('gap_penalty', -2)
kwargs.setdefault('gapextension_penalty', -0.1)

aln = Bio.pairwise2.align.globalms(
reference.sequence(format="string"), mobile.sequence(format="string"),
kwargs['match_score'], kwargs['mismatch_penalty'],
kwargs['gap_penalty'], kwargs['gapextension_penalty'])
reference.residues.sequence(format="string"),
mobile.residues.sequence(format="string"),
match_score, mismatch_penalty, gap_penalty, gapextension_penalty)
# choose top alignment
return aln[0]

Expand Down Expand Up @@ -892,10 +895,16 @@ def fasta2select(fastafilename, is_aligned=False,
Returns
-------
select_dict
select_dict : dict
dictionary with 'reference' and 'mobile' selection string
that can be used immediately in :func:`rms_fit_trj` as
``select=select_dict``.
See Also
--------
:func:`sequence_alignment`, which does not require external
programs.
"""
import Bio.SeqIO
import Bio.AlignIO
Expand Down
45 changes: 29 additions & 16 deletions testsuite/MDAnalysisTests/analysis/test_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,16 @@
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
from __future__ import print_function
from __future__ import absolute_import, print_function

import MDAnalysis
import MDAnalysis.analysis.align as align
import MDAnalysis.analysis.rms as rms
from MDAnalysis import SelectionError

from numpy.testing import (TestCase, dec,
assert_almost_equal, assert_raises, assert_equal)
assert_almost_equal, assert_raises, assert_equal,
assert_array_equal, assert_)
import numpy as np
from nose.plugins.attrib import attr

Expand Down Expand Up @@ -152,7 +153,7 @@ def test_AlignTraj(self):
del self.outfile
#test weighted
self.outfile = path.join(self.tempdir.name, 'AlignTraj_weighted_test.dcd')
x = align.AlignTraj(self.universe,self.reference,filename=self.outfile,
x = align.AlignTraj(self.universe,self.reference,filename=self.outfile,
mass_weighted=True)
x.run()

Expand All @@ -177,14 +178,14 @@ def test_alignto_checks_selections(self):
def different_size():
a = u.atoms[10:100]
b = u.atoms[10:101]
return MDAnalysis.analysis.align.alignto(a, b)
return align.alignto(a, b)

assert_raises(SelectionError, different_size)

def different_atoms():
a = u.atoms[10:20]
b = u.atoms[10:17] + u.atoms[18:21]
return MDAnalysis.analysis.align.alignto(a, b)
return align.alignto(a, b)

assert_raises(SelectionError, different_atoms)

Expand All @@ -203,9 +204,7 @@ def tearDown(self):
@attr('issue')
def test_fasta2select_aligned(self):
"""test align.fasta2select() on aligned FASTA (Issue 112)"""
from MDAnalysis.analysis.align import fasta2select

sel = fasta2select(self.seq, is_aligned=True)
sel = align.fasta2select(self.seq, is_aligned=True)
# length of the output strings, not residues or anything real...
assert_equal(len(sel['reference']), 30623,
err_msg="selection string has unexpected length")
Expand All @@ -216,18 +215,32 @@ def test_fasta2select_aligned(self):
@dec.skipif(executable_not_found("clustalw2"),
msg="Test skipped because clustalw2 executable not found")
def test_fasta2select_ClustalW(self):
"""MDAnalysis.analysis.align: test fasta2select() with calling
ClustalW (Issue 113)"""
# note: will not be run if clustalw is not installed
from MDAnalysis.analysis.align import fasta2select

sel = fasta2select(self.seq, is_aligned=False,
alnfilename=self.alnfile,
treefilename=self.treefile)
"""MDAnalysis.analysis.align: test fasta2select() with ClustalW (Issue 113)"""
sel = align.fasta2select(self.seq, is_aligned=False,
alnfilename=self.alnfile,
treefilename=self.treefile)
# numbers computed from alignment with clustalw 2.1 on Mac OS X
# [orbeckst] length of the output strings, not residues or anything
# real...
assert_equal(len(sel['reference']), 23080,
err_msg="selection string has unexpected length")
assert_equal(len(sel['mobile']), 23090,
err_msg="selection string has unexpected length")

def test_sequence_alignment():
u = MDAnalysis.Universe(PSF)
reference = u.atoms
mobile = u.select_atoms("resid 122-159")
aln = align.sequence_alignment(mobile, reference)

assert_equal(len(aln), 5, err_msg="return value has wrong tuple size")

seqA, seqB, score, begin, end = aln
assert_equal(seqA, reference.residues.sequence(format="string"),
err_msg="reference sequence mismatch")
assert_(mobile.residues.sequence(format="string") in seqB,
"mobile sequence mismatch")
assert_almost_equal(score, 54.6)
assert_array_equal([begin, end], [0, reference.n_residues])


0 comments on commit c0e5f5f

Please sign in to comment.