Skip to content

Commit

Permalink
Rewrote GMSParser to new format
Browse files Browse the repository at this point in the history
Issue #572
  • Loading branch information
richardjgowers committed Dec 9, 2015
1 parent f0b5598 commit 440d8e1
Showing 1 changed file with 103 additions and 43 deletions.
146 changes: 103 additions & 43 deletions package/MDAnalysis/topology/DMSParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,37 +34,65 @@
"""
from __future__ import absolute_import

import numpy as np
import sqlite3
import os

from ..core.AtomGroup import Atom
from .core import guess_atom_type
from .base import TopologyReader
from .base import TopologyReader, squash_by
from ..core.topology import Topology
from ..core.topologyattrs import (
Atomids,
Atomnames,
Bonds,
Charges,
Masses,
Resids,
Resnames,
Segids,
AtomAttr, # for custom Attributes
)


class ChainIDs(AtomAttr):
"""ChainID for each Atom"""
attrname = 'chainids'
singular = 'chainid'


class Atomnums(AtomAttr):
"""The number for each Atom"""
attrname = 'atomnums'
singular = 'atomnum'


class DMSParser(TopologyReader):
"""Read a topology from a DESRES_ Molecular Structure file.
Format (DMS_) coordinate files (as used by the Desmond_ MD package).
Reads the following attributes:
Atom:
- atomid
- atomnum
- atomname
- mass
- charge
- chain
Residue:
- resname
- resid
Segment:
- segid
.. _DESRES: http://www.deshawresearch.com
.. _Desmond: http://www.deshawresearch.com/resources_desmond.html
.. _DMS: http://www.deshawresearch.com/Desmond_Users_Guide-0.7.pdf
"""

def parse(self):
"""Parse DMS file *filename* and return the dict `structure`.
Only reads the list of atoms.
:Returns: MDAnalysis internal *structure* dict, which contains
Atom and Bond objects
.. SeeAlso:: The *structure* dict is defined in
:mod:`MDAnalysis.topology`.
"""
# Fix by SB: Needed because sqlite3.connect does not raise anything if file is not there
"""Parse DMS file *filename* and return the Topology object"""
# Fix by SB: Needed because sqlite3.connect does not raise anything
# if file is not there
if not os.path.isfile(self.filename):
raise IOError("No such file: {0}".format(self.filename))

Expand All @@ -77,28 +105,34 @@ def dict_factory(cursor, row):
d[col[0]] = row[idx]
return d

attrs = {}
with sqlite3.connect(self.filename) as con:
try:
# This will return dictionaries instead of tuples,
# when calling cur.fetch() or fetchall()
con.row_factory = dict_factory
cur = con.cursor()
cur.execute('SELECT * FROM particle')
particles = cur.fetchall()
except sqlite3.DatabaseError:
raise IOError("Failed reading the atoms from DMS Database")
else:
# p["anum"] contains the atomic number
# Selecting single column, so just strip tuple
for attrname, dt in [
('id', np.int32),
('anum', np.int32),
('mass', np.float32),
('charge', np.float32),
('name', object),
('resname', object),
('resid', np.int32),
('chain', object),
('segid', object),
]:
try:
atoms = [Atom(p["id"], p["name"].strip(),
guess_atom_type(p["name"].strip()),
p["resname"].strip(), p["resid"],
p["segid"].strip(), p["mass"], p["charge"],
universe=self._u)
for p in particles]
except KeyError:
raise ValueError("Failed reading atom information")
cur = con.cursor()
cur.row_factory = lambda c, r: r[0]
cur.execute('SELECT {} FROM particle'
''.format(attrname))
vals = cur.fetchall()
except sqlite3.DatabaseError:
raise IOError(
"Failed reading the atoms from DMS Database")
else:
attrs[attrname] = np.array(vals, dtype=dt)

try:
cur.row_factory = dict_factory
cur.execute('SELECT * FROM bond')
bonds = cur.fetchall()
except sqlite3.DatabaseError:
Expand All @@ -110,12 +144,38 @@ def dict_factory(cursor, row):
desc = tuple(sorted([b['p0'], b['p1']]))
bondlist.append(desc)
bondorder[desc] = b['order']

# All the records below besides donors and acceptors can be contained in a DMS file.
# In addition to the coordinates and bonds, DMS may contain the entire force-field
# information (terms+parameters),
structure = {"atoms": atoms,
"bonds": tuple(bondlist),
"bondorder": bondorder}

return structure
attrs['bond'] = bondlist
attrs['bondorder'] = bondorder

topattrs = []
# Bundle in Atom level objects
for attr, cls in [
('id', Atomids),
('anum', Atomnums),
('mass', Masses),
('charge', Charges),
('name', Atomnames),
('chain', ChainIDs),
]:
topattrs.append(cls(attrs[attr]))

# Residues
atom_residx, res_resids, (res_resnames, res_segids) = squash_by(
attrs['resid'], attrs['resname'], attrs['segid'])
topattrs.append(Resids(res_resids))
topattrs.append(Resnames(res_resnames))

# Segments
res_segidx, seg_segids = squash_by(
res_segids)[:2]
topattrs.append(Segids(seg_segids))

# Bonds
topattrs.append(Bonds(attrs['bond']))

top = Topology(len(attrs['id']), len(res_resids), len(seg_segids),
attrs=topattrs,
atom_resindex=atom_residx,
residue_segindex=res_segidx)

return top

0 comments on commit 440d8e1

Please sign in to comment.