From 8d82688413d7a844aaac0da16c6b0b0b3d5fd9d4 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Mon, 11 Jul 2016 01:18:17 -0700 Subject: [PATCH] test and doc updates for align.sequence_alignment - added unit test - turned **kwargs into kwargs with defaults - updated docs --- package/MDAnalysis/analysis/align.py | 51 +++++++++++-------- .../MDAnalysisTests/analysis/test_align.py | 40 ++++++++++----- 2 files changed, 56 insertions(+), 35 deletions(-) diff --git a/package/MDAnalysis/analysis/align.py b/package/MDAnalysis/analysis/align.py index 4b1c2c755a0..8cd58a4ae55 100644 --- a/package/MDAnalysis/analysis/align.py +++ b/package/MDAnalysis/analysis/align.py @@ -773,10 +773,12 @@ def rms_fit_trj( return filename -def sequence_alignment(mobile, reference, **kwargs): - """Generate a global sequence alignment between residues in *reference* and *mobile*. +def sequence_alignment(mobile, reference, match_score=2, mismatch_penalty=-1, + gap_penalty=-2, gapextension_penalty=-0.1): + """Generate a global sequence alignment between two residue groups. - The global alignment uses the Needleman-Wunsch algorith as + The residues in `reference` and `mobile` will be globally aligned. + The global alignment uses the Needleman-Wunsch algorithm as implemented in :mod:`Bio.pairwise2`. The parameters of the dynamic programming algorithm can be tuned with the keywords. The defaults should be suitable for two similar sequences. For sequences with @@ -789,39 +791,40 @@ def sequence_alignment(mobile, reference, **kwargs): Atom group to be aligned reference : AtomGroup Atom group to be aligned against - ** kwargs - *match_score* - score for matching residues, default :2 - *mismatch_penalty* + match_score : float, optional, default 2 + score for matching residues, default 2 + mismatch_penalty : float, optional, default -1 penalty for residues that do not match , default : -1 - *gap_penalty* + gap_penalty : float, optional, default -2 penalty for opening a gap; the high default value creates compact alignments for highly identical sequences but might not be suitable for sequences with low identity, default : -2 - *gapextension_penalty* + gapextension_penalty : float, optional, default -0.1 penalty for extending a gap, default: -0.1 Returns ------- aln[0] - Tuple of top sequence matching output ('Sequence A', 'Sequence B', score, - begin, end ,prev_pos, nex_pos) + Tuple of top sequence matching output `('Sequence A', 'Sequence B', score, + begin, end)` - BioPython documentation: - http://biopython.org/DIST/docs/api/Bio.pairwise2-module.html + See Also + -------- + BioPython documentation for `pairwise2`_. Alternatively, use + :func:`fasta2select` with :program:`clustalw2` and the option + ``is_aligned=False``. + + .. _`pairwise2`: http://biopython.org/DIST/docs/api/Bio.pairwise2-module.html .. versionadded:: 0.10.0 + """ import Bio.pairwise2 - kwargs.setdefault('match_score', 2) - kwargs.setdefault('mismatch_penalty', -1) - kwargs.setdefault('gap_penalty', -2) - kwargs.setdefault('gapextension_penalty', -0.1) aln = Bio.pairwise2.align.globalms( - reference.sequence(format="string"), mobile.sequence(format="string"), - kwargs['match_score'], kwargs['mismatch_penalty'], - kwargs['gap_penalty'], kwargs['gapextension_penalty']) + reference.residues.sequence(format="string"), + mobile.residues.sequence(format="string"), + match_score, mismatch_penalty, gap_penalty, gapextension_penalty) # choose top alignment return aln[0] @@ -892,10 +895,16 @@ def fasta2select(fastafilename, is_aligned=False, Returns ------- - select_dict + select_dict : dict dictionary with 'reference' and 'mobile' selection string that can be used immediately in :func:`rms_fit_trj` as ``select=select_dict``. + + See Also + -------- + :func:`sequence_alignment`, which does not require external + programs. + """ import Bio.SeqIO import Bio.AlignIO diff --git a/testsuite/MDAnalysisTests/analysis/test_align.py b/testsuite/MDAnalysisTests/analysis/test_align.py index dddccae33ab..303f1c86404 100644 --- a/testsuite/MDAnalysisTests/analysis/test_align.py +++ b/testsuite/MDAnalysisTests/analysis/test_align.py @@ -22,7 +22,7 @@ from numpy.testing import (TestCase, dec, assert_almost_equal, assert_raises, assert_equal, - assert_) + assert_array_equal, assert_) import numpy as np from nose.plugins.attrib import attr @@ -190,14 +190,14 @@ def test_alignto_checks_selections(self): def different_size(): a = u.atoms[10:100] b = u.atoms[10:101] - return MDAnalysis.analysis.align.alignto(a, b) + return align.alignto(a, b) assert_raises(SelectionError, different_size) def different_atoms(): a = u.atoms[10:20] b = u.atoms[10:17] + u.atoms[18:21] - return MDAnalysis.analysis.align.alignto(a, b) + return align.alignto(a, b) assert_raises(SelectionError, different_atoms) @@ -216,9 +216,7 @@ def tearDown(self): @attr('issue') def test_fasta2select_aligned(self): """test align.fasta2select() on aligned FASTA (Issue 112)""" - from MDAnalysis.analysis.align import fasta2select - - sel = fasta2select(self.seq, is_aligned=True) + sel = align.fasta2select(self.seq, is_aligned=True) # length of the output strings, not residues or anything real... assert_equal(len(sel['reference']), 30623, err_msg="selection string has unexpected length") @@ -229,14 +227,10 @@ def test_fasta2select_aligned(self): @dec.skipif(executable_not_found("clustalw2"), msg="Test skipped because clustalw2 executable not found") def test_fasta2select_ClustalW(self): - """MDAnalysis.analysis.align: test fasta2select() with calling - ClustalW (Issue 113)""" - # note: will not be run if clustalw is not installed - from MDAnalysis.analysis.align import fasta2select - - sel = fasta2select(self.seq, is_aligned=False, - alnfilename=self.alnfile, - treefilename=self.treefile) + """MDAnalysis.analysis.align: test fasta2select() with ClustalW (Issue 113)""" + sel = align.fasta2select(self.seq, is_aligned=False, + alnfilename=self.alnfile, + treefilename=self.treefile) # numbers computed from alignment with clustalw 2.1 on Mac OS X # [orbeckst] length of the output strings, not residues or anything # real... @@ -244,3 +238,21 @@ def test_fasta2select_ClustalW(self): err_msg="selection string has unexpected length") assert_equal(len(sel['mobile']), 23090, err_msg="selection string has unexpected length") + +def test_sequence_alignment(): + u = MDAnalysis.Universe(PSF) + reference = u.atoms + mobile = u.select_atoms("resid 122-159") + aln = align.sequence_alignment(mobile, reference) + + assert_equal(len(aln), 5, err_msg="return value has wrong tuple size") + + seqA, seqB, score, begin, end = aln + assert_equal(seqA, reference.residues.sequence(format="string"), + err_msg="reference sequence mismatch") + assert_(mobile.residues.sequence(format="string") in seqB, + "mobile sequence mismatch") + assert_almost_equal(score, 54.6) + assert_array_equal([begin, end], [0, reference.n_residues]) + +