Skip to content

Commit

Permalink
Merge pull request #899 from MDAnalysis/tests-align-sequence_alignment
Browse files Browse the repository at this point in the history
test and doc updates for align.sequence_alignment
  • Loading branch information
orbeckst authored Jul 11, 2016
2 parents 4dcf071 + 8d82688 commit 98d5c10
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 35 deletions.
51 changes: 30 additions & 21 deletions package/MDAnalysis/analysis/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -773,10 +773,12 @@ def rms_fit_trj(
return filename


def sequence_alignment(mobile, reference, **kwargs):
"""Generate a global sequence alignment between residues in *reference* and *mobile*.
def sequence_alignment(mobile, reference, match_score=2, mismatch_penalty=-1,
gap_penalty=-2, gapextension_penalty=-0.1):
"""Generate a global sequence alignment between two residue groups.
The global alignment uses the Needleman-Wunsch algorith as
The residues in `reference` and `mobile` will be globally aligned.
The global alignment uses the Needleman-Wunsch algorithm as
implemented in :mod:`Bio.pairwise2`. The parameters of the dynamic
programming algorithm can be tuned with the keywords. The defaults
should be suitable for two similar sequences. For sequences with
Expand All @@ -789,39 +791,40 @@ def sequence_alignment(mobile, reference, **kwargs):
Atom group to be aligned
reference : AtomGroup
Atom group to be aligned against
** kwargs
*match_score*
score for matching residues, default :2
*mismatch_penalty*
match_score : float, optional, default 2
score for matching residues, default 2
mismatch_penalty : float, optional, default -1
penalty for residues that do not match , default : -1
*gap_penalty*
gap_penalty : float, optional, default -2
penalty for opening a gap; the high default value creates compact
alignments for highly identical sequences but might not be suitable
for sequences with low identity, default : -2
*gapextension_penalty*
gapextension_penalty : float, optional, default -0.1
penalty for extending a gap, default: -0.1
Returns
-------
aln[0]
Tuple of top sequence matching output ('Sequence A', 'Sequence B', score,
begin, end ,prev_pos, nex_pos)
Tuple of top sequence matching output `('Sequence A', 'Sequence B', score,
begin, end)`
BioPython documentation:
http://biopython.org/DIST/docs/api/Bio.pairwise2-module.html
See Also
--------
BioPython documentation for `pairwise2`_. Alternatively, use
:func:`fasta2select` with :program:`clustalw2` and the option
``is_aligned=False``.
.. _`pairwise2`: http://biopython.org/DIST/docs/api/Bio.pairwise2-module.html
.. versionadded:: 0.10.0
"""
import Bio.pairwise2
kwargs.setdefault('match_score', 2)
kwargs.setdefault('mismatch_penalty', -1)
kwargs.setdefault('gap_penalty', -2)
kwargs.setdefault('gapextension_penalty', -0.1)

aln = Bio.pairwise2.align.globalms(
reference.sequence(format="string"), mobile.sequence(format="string"),
kwargs['match_score'], kwargs['mismatch_penalty'],
kwargs['gap_penalty'], kwargs['gapextension_penalty'])
reference.residues.sequence(format="string"),
mobile.residues.sequence(format="string"),
match_score, mismatch_penalty, gap_penalty, gapextension_penalty)
# choose top alignment
return aln[0]

Expand Down Expand Up @@ -892,10 +895,16 @@ def fasta2select(fastafilename, is_aligned=False,
Returns
-------
select_dict
select_dict : dict
dictionary with 'reference' and 'mobile' selection string
that can be used immediately in :func:`rms_fit_trj` as
``select=select_dict``.
See Also
--------
:func:`sequence_alignment`, which does not require external
programs.
"""
import Bio.SeqIO
import Bio.AlignIO
Expand Down
40 changes: 26 additions & 14 deletions testsuite/MDAnalysisTests/analysis/test_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

from numpy.testing import (TestCase, dec,
assert_almost_equal, assert_raises, assert_equal,
assert_)
assert_array_equal, assert_)
import numpy as np
from nose.plugins.attrib import attr

Expand Down Expand Up @@ -190,14 +190,14 @@ def test_alignto_checks_selections(self):
def different_size():
a = u.atoms[10:100]
b = u.atoms[10:101]
return MDAnalysis.analysis.align.alignto(a, b)
return align.alignto(a, b)

assert_raises(SelectionError, different_size)

def different_atoms():
a = u.atoms[10:20]
b = u.atoms[10:17] + u.atoms[18:21]
return MDAnalysis.analysis.align.alignto(a, b)
return align.alignto(a, b)

assert_raises(SelectionError, different_atoms)

Expand All @@ -216,9 +216,7 @@ def tearDown(self):
@attr('issue')
def test_fasta2select_aligned(self):
"""test align.fasta2select() on aligned FASTA (Issue 112)"""
from MDAnalysis.analysis.align import fasta2select

sel = fasta2select(self.seq, is_aligned=True)
sel = align.fasta2select(self.seq, is_aligned=True)
# length of the output strings, not residues or anything real...
assert_equal(len(sel['reference']), 30623,
err_msg="selection string has unexpected length")
Expand All @@ -229,18 +227,32 @@ def test_fasta2select_aligned(self):
@dec.skipif(executable_not_found("clustalw2"),
msg="Test skipped because clustalw2 executable not found")
def test_fasta2select_ClustalW(self):
"""MDAnalysis.analysis.align: test fasta2select() with calling
ClustalW (Issue 113)"""
# note: will not be run if clustalw is not installed
from MDAnalysis.analysis.align import fasta2select

sel = fasta2select(self.seq, is_aligned=False,
alnfilename=self.alnfile,
treefilename=self.treefile)
"""MDAnalysis.analysis.align: test fasta2select() with ClustalW (Issue 113)"""
sel = align.fasta2select(self.seq, is_aligned=False,
alnfilename=self.alnfile,
treefilename=self.treefile)
# numbers computed from alignment with clustalw 2.1 on Mac OS X
# [orbeckst] length of the output strings, not residues or anything
# real...
assert_equal(len(sel['reference']), 23080,
err_msg="selection string has unexpected length")
assert_equal(len(sel['mobile']), 23090,
err_msg="selection string has unexpected length")

def test_sequence_alignment():
u = MDAnalysis.Universe(PSF)
reference = u.atoms
mobile = u.select_atoms("resid 122-159")
aln = align.sequence_alignment(mobile, reference)

assert_equal(len(aln), 5, err_msg="return value has wrong tuple size")

seqA, seqB, score, begin, end = aln
assert_equal(seqA, reference.residues.sequence(format="string"),
err_msg="reference sequence mismatch")
assert_(mobile.residues.sequence(format="string") in seqB,
"mobile sequence mismatch")
assert_almost_equal(score, 54.6)
assert_array_equal([begin, end], [0, reference.n_residues])


0 comments on commit 98d5c10

Please sign in to comment.