Rewrote GMSParser to new format

Issue #572
MDAnalysis · Dec 9, 2015 · 440d8e1 · 440d8e1
1 parent f0b5598
commit 440d8e1
Showing 1 changed file with 103 additions and 43 deletions.
diff --git a/package/MDAnalysis/topology/DMSParser.py b/package/MDAnalysis/topology/DMSParser.py
@@ -34,37 +34,65 @@
 """
 from __future__ import absolute_import
 
+import numpy as np
 import sqlite3
 import os
 
-from ..core.AtomGroup import Atom
-from .core import guess_atom_type
-from .base import TopologyReader
+from .base import TopologyReader, squash_by
+from ..core.topology import Topology
+from ..core.topologyattrs import (
+    Atomids,
+    Atomnames,
+    Bonds,
+    Charges,
+    Masses,
+    Resids,
+    Resnames,
+    Segids,
+    AtomAttr,  # for custom Attributes
+)
+
+
+class ChainIDs(AtomAttr):
+    """ChainID for each Atom"""
+    attrname = 'chainids'
+    singular = 'chainid'
+
+
+class Atomnums(AtomAttr):
+    """The number for each Atom"""
+    attrname = 'atomnums'
+    singular = 'atomnum'
 
 
 class DMSParser(TopologyReader):
     """Read a topology from a DESRES_ Molecular Structure file.
 
     Format (DMS_) coordinate files (as used by the Desmond_ MD package).
 
+    Reads the following attributes:
+      Atom:
+        - atomid
+        - atomnum
+        - atomname
+        - mass
+        - charge
+        - chain
+      Residue:
+        - resname
+        - resid
+      Segment:
+        - segid
+
     .. _DESRES: http://www.deshawresearch.com
     .. _Desmond: http://www.deshawresearch.com/resources_desmond.html
     .. _DMS: http://www.deshawresearch.com/Desmond_Users_Guide-0.7.pdf
     """
 
     def parse(self):
-        """Parse DMS file *filename* and return the dict `structure`.
-
-        Only reads the list of atoms.
-
-        :Returns: MDAnalysis internal *structure* dict, which contains
-                  Atom and Bond objects
-
-        .. SeeAlso:: The *structure* dict is defined in
-                     :mod:`MDAnalysis.topology`.
-
-        """
-        # Fix by SB: Needed because sqlite3.connect does not raise anything if file is not there
+        """Parse DMS file *filename* and return the Topology object"""
+        # Fix by SB: Needed because sqlite3.connect does not raise anything
+        # if file is not there
         if not os.path.isfile(self.filename):
             raise IOError("No such file: {0}".format(self.filename))
 
@@ -77,28 +105,34 @@ def dict_factory(cursor, row):
                 d[col[0]] = row[idx]
             return d
 
+        attrs = {}
         with sqlite3.connect(self.filename) as con:
-            try:
-                # This will return dictionaries instead of tuples,
-                # when calling cur.fetch() or fetchall()
-                con.row_factory = dict_factory
-                cur = con.cursor()
-                cur.execute('SELECT * FROM particle')
-                particles = cur.fetchall()
-            except sqlite3.DatabaseError:
-                raise IOError("Failed reading the atoms from DMS Database")
-            else:
-                # p["anum"] contains the atomic number
+            # Selecting single column, so just strip tuple
+            for attrname, dt in [
+                    ('id', np.int32),
+                    ('anum', np.int32),
+                    ('mass', np.float32),
+                    ('charge', np.float32),
+                    ('name', object),
+                    ('resname', object),
+                    ('resid', np.int32),
+                    ('chain', object),
+                    ('segid', object),
+            ]:
                 try:
-                    atoms = [Atom(p["id"], p["name"].strip(),
-                                  guess_atom_type(p["name"].strip()),
-                                  p["resname"].strip(), p["resid"],
-                                  p["segid"].strip(), p["mass"], p["charge"],
-                                  universe=self._u)
-                             for p in particles]
-                except KeyError:
-                    raise ValueError("Failed reading atom information")
+                    cur = con.cursor()
+                    cur.row_factory = lambda c, r: r[0]
+                    cur.execute('SELECT {} FROM particle'
+                                ''.format(attrname))
+                    vals = cur.fetchall()
+                except sqlite3.DatabaseError:
+                    raise IOError(
+                        "Failed reading the atoms from DMS Database")
+                else:
+                    attrs[attrname] = np.array(vals, dtype=dt)
+
             try:
+                cur.row_factory = dict_factory
                 cur.execute('SELECT * FROM bond')
                 bonds = cur.fetchall()
             except sqlite3.DatabaseError:
@@ -110,12 +144,38 @@ def dict_factory(cursor, row):
                     desc = tuple(sorted([b['p0'], b['p1']]))
                     bondlist.append(desc)
                     bondorder[desc] = b['order']
-
-        # All the records below besides donors and acceptors can be contained in a DMS file.
-        # In addition to the coordinates and bonds, DMS may contain the entire force-field
-        # information (terms+parameters),
-        structure = {"atoms": atoms,
-                     "bonds": tuple(bondlist),
-                     "bondorder": bondorder}
-
-        return structure
+                attrs['bond'] = bondlist
+                attrs['bondorder'] = bondorder
+
+        topattrs = []
+        # Bundle in Atom level objects
+        for attr, cls in [
+                ('id', Atomids),
+                ('anum', Atomnums),
+                ('mass', Masses),
+                ('charge', Charges),
+                ('name', Atomnames),
+                ('chain', ChainIDs),
+        ]:
+            topattrs.append(cls(attrs[attr]))
+
+        # Residues
+        atom_residx, res_resids, (res_resnames, res_segids) = squash_by(
+            attrs['resid'], attrs['resname'], attrs['segid'])
+        topattrs.append(Resids(res_resids))
+        topattrs.append(Resnames(res_resnames))
+
+        # Segments
+        res_segidx, seg_segids = squash_by(
+            res_segids)[:2]
+        topattrs.append(Segids(seg_segids))
+
+        # Bonds
+        topattrs.append(Bonds(attrs['bond']))
+
+        top = Topology(len(attrs['id']), len(res_resids), len(seg_segids),
+                       attrs=topattrs,
+                       atom_resindex=atom_residx,
+                       residue_segindex=res_segidx)
+
+        return top