Skip to content

Commit

Permalink
Merge pull request #48 from trje3733/issue-14-expanded-ensemble-parser
Browse files Browse the repository at this point in the history
- add Gromacs expanded ensemble parser #14 
- closes #14
  • Loading branch information
orbeckst authored Dec 5, 2017
2 parents 0c6f45d + 0065e05 commit 1c8844e
Show file tree
Hide file tree
Showing 4 changed files with 197 additions and 12 deletions.
84 changes: 73 additions & 11 deletions src/alchemlyb/parsing/gmx.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,29 @@ def extract_u_nk(xvg, T):
index=pd.Float64Index(times.values, name='time'))

# create columns for each lambda, indicating state each row sampled from
for i, l in enumerate(lambdas):
try:
u_k[l] = statevec[i]
except TypeError:
u_k[l] = statevec
# if state is None run as expanded ensemble data or REX
if state is None:
# if thermodynamic state is specified map thermodynamic
# state data to lambda values, else (for REX)
# define state based on the legend
if 'Thermodynamic state' in df:
ts_index = df.columns.get_loc('Thermodynamic state')
thermo_state = df[df.columns[ts_index]]
for i, l in enumerate(lambdas):
v = []
for t in thermo_state:
v.append(statevec[int(t)][i])
u_k[l] = v
else:
state_legend = _extract_legend(xvg)
for i, l in enumerate(state_legend):
u_k[l] = state_legend[l]
else:
for i, l in enumerate(lambdas):
try:
u_k[l] = statevec[i]
except TypeError:
u_k[l] = statevec

# set up new multi-index
newind = ['time'] + lambdas
Expand Down Expand Up @@ -121,11 +139,29 @@ def extract_dHdl(xvg, T):
index=pd.Float64Index(times.values, name='time'))

# create columns for each lambda, indicating state each row sampled from
for i, l in enumerate(lambdas):
try:
dHdl[l] = statevec[i]
except TypeError:
dHdl[l] = statevec
# if state is None run as expanded ensemble data or REX
if state is None:
# if thermodynamic state is specified map thermodynamic
# state data to lambda values, else (for REX)
# define state based on the legend
if 'Thermodynamic state' in df:
ts_index = df.columns.get_loc('Thermodynamic state')
thermo_state = df[df.columns[ts_index]]
for i, l in enumerate(lambdas):
v = []
for t in thermo_state:
v.append(statevec[int(t)][i])
dHdl[l] = v
else:
state_legend = _extract_legend(xvg)
for i, l in enumerate(state_legend):
dHdl[l] = state_legend[l]
else:
for i, l in enumerate(lambdas):
try:
dHdl[l] = statevec[i]
except TypeError:
dHdl[l] = statevec

# set up new multi-index
newind = ['time'] + lambdas
Expand All @@ -140,6 +176,7 @@ def _extract_state(xvg):
"""Extract information on state sampled, names of lambdas.
"""
state = None
with anyopen(xvg, 'r') as f:
for line in f:
if ('subtitle' in line) and ('state' in line):
Expand All @@ -148,9 +185,34 @@ def _extract_state(xvg):
statevec = eval(line.strip().split(' = ')[-1].strip('"'))
break

# if expanded ensemble data is used the state variable will never be assigned
# parsing expanded ensemble data
if state is None:
lambdas = []
statevec = []
with anyopen(xvg, 'r') as f:
for line in f:
if ('legend' in line) and ('lambda' in line):
lambdas.append([word.strip(')(,') for word in line.split() if 'lambda' in word][0])
if ('legend' in line) and (' to ' in line):
statevec.append(([float(i) for i in line.strip().split(' to ')[-1].strip('"()').split(',')]))

return state, lambdas, statevec


def _extract_legend(xvg):
"""Extract information on state sampled for REX simulations.
"""
state_legend = {}
with anyopen(xvg, 'r') as f:
for line in f:
if ('legend' in line) and ('lambda' in line):
state_legend[line.split()[4]] = float(line.split()[6].strip('"'))

return state_legend


def _extract_dataframe(xvg):
"""Extract a DataFrame from XVG data.
Expand All @@ -172,7 +234,7 @@ def _extract_dataframe(xvg):

# should catch non-numeric lines so we don't proceed in parsing
# here
if line.startswith(('#', '@')) :
if line.startswith(('#', '@')):
continue

if line.startswith('&'): #pragma: no cover
Expand Down
69 changes: 69 additions & 0 deletions src/alchemlyb/tests/parsing/test_gmx.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from alchemlyb.parsing.gmx import extract_dHdl, extract_u_nk
from alchemtest.gmx import load_benzene
from alchemtest.gmx import load_expanded_ensemble_case_1, load_expanded_ensemble_case_2, load_expanded_ensemble_case_3


def test_dHdl():
Expand Down Expand Up @@ -34,3 +35,71 @@ def test_u_nk():
assert u_nk.shape == (4001, 5)
elif leg == 'VDW':
u_nk.shape == (4001, 16)

def test_u_nk_case1():
"""Test that u_nk has the correct form when extracted from expanded ensemble files (case 1).
"""
dataset = load_expanded_ensemble_case_1()

for leg in dataset['data']:
for filename in dataset['data'][leg]:
u_nk = extract_u_nk(filename, T=300)

assert u_nk.index.names == ['time', 'fep-lambda', 'coul-lambda', 'vdw-lambda', 'restraint-lambda']

assert u_nk.shape == (50001, 28)

def test_dHdl_case1():
"""Test that dHdl has the correct form when extracted from expanded ensemble files (case 1).
"""
dataset = load_expanded_ensemble_case_1()

for leg in dataset['data']:
for filename in dataset['data'][leg]:
dHdl = extract_dHdl(filename, T=300)

assert dHdl.index.names == ['time', 'fep-lambda', 'coul-lambda', 'vdw-lambda', 'restraint-lambda']
assert dHdl.shape == (50001, 4)

def test_u_nk_case2():
"""Test that u_nk has the correct form when extracted from expanded ensemble files (case 2).
"""
dataset = load_expanded_ensemble_case_2()

for leg in dataset['data']:
for filename in dataset['data'][leg]:
u_nk = extract_u_nk(filename, T=300)

assert u_nk.index.names == ['time', 'fep-lambda', 'coul-lambda', 'vdw-lambda', 'restraint-lambda']

assert u_nk.shape == (25001, 28)

def test_u_nk_case3():
"""Test that u_nk has the correct form when extracted from REX files (case 3).
"""
dataset = load_expanded_ensemble_case_3()

for leg in dataset['data']:
for filename in dataset['data'][leg]:
u_nk = extract_u_nk(filename, T=300)

assert u_nk.index.names == ['time', 'fep-lambda', 'coul-lambda', 'vdw-lambda', 'restraint-lambda']

assert u_nk.shape == (2500, 28)

def test_dHdl_case3():
"""Test that dHdl has the correct form when extracted from REX files (case 3).
"""
dataset = load_expanded_ensemble_case_3()

for leg in dataset['data']:
for filename in dataset['data'][leg]:
dHdl = extract_dHdl(filename, T=300)

assert dHdl.index.names == ['time', 'fep-lambda', 'coul-lambda', 'vdw-lambda', 'restraint-lambda']
assert dHdl.shape == (2500, 4)
29 changes: 28 additions & 1 deletion src/alchemlyb/tests/test_fep_estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,41 @@ def gmx_benzene_vdw_u_nk():

return u_nk

def gmx_expanded_ensemble_case_1():
dataset = alchemtest.gmx.load_expanded_ensemble_case_1()

u_nk = pd.concat([gmx.extract_u_nk(filename, T=300)
for filename in dataset['data']['AllStates']])

return u_nk

def gmx_expanded_ensemble_case_2():
dataset = alchemtest.gmx.load_expanded_ensemble_case_2()

u_nk = pd.concat([gmx.extract_u_nk(filename, T=300)
for filename in dataset['data']['AllStates']])

return u_nk

def gmx_expanded_ensemble_case_3():
dataset = alchemtest.gmx.load_expanded_ensemble_case_3()

u_nk = pd.concat([gmx.extract_u_nk(filename, T=300)
for filename in dataset['data']['AllStates']])

return u_nk


class FEPestimatorMixin:
"""Mixin for all FEP Estimator test classes.
"""

@pytest.mark.parametrize('X_delta_f', ((gmx_benzene_coul_u_nk(), 3.041),
(gmx_benzene_vdw_u_nk(), -3.007)))
(gmx_benzene_vdw_u_nk(), -3.007),
(gmx_expanded_ensemble_case_1(), 75.923),
(gmx_expanded_ensemble_case_2(), 75.915),
(gmx_expanded_ensemble_case_3(), 76.173)))
def test_get_delta_f(self, X_delta_f):
est = self.cls().fit(X_delta_f[0])
delta_f = est.delta_f_.iloc[0, -1]
Expand Down
27 changes: 27 additions & 0 deletions src/alchemlyb/tests/test_ti_estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,30 @@ def gmx_benzene_vdw_dHdl():

return dHdl

def gmx_expanded_ensemble_case_1_dHdl():
dataset = alchemtest.gmx.load_expanded_ensemble_case_1()

dHdl = pd.concat([gmx.extract_dHdl(filename, T=300)
for filename in dataset['data']['AllStates']])

return dHdl

def gmx_expanded_ensemble_case_2_dHdl():
dataset = alchemtest.gmx.load_expanded_ensemble_case_2()

dHdl = pd.concat([gmx.extract_dHdl(filename, T=300)
for filename in dataset['data']['AllStates']])

return dHdl

def gmx_expanded_ensemble_case_3_dHdl():
dataset = alchemtest.gmx.load_expanded_ensemble_case_3()

dHdl = pd.concat([gmx.extract_dHdl(filename, T=300)
for filename in dataset['data']['AllStates']])

return dHdl

def amber_simplesolvated_charge_dHdl():
dataset = alchemtest.amber.load_simplesolvated()

Expand All @@ -49,6 +73,9 @@ class TIestimatorMixin:

@pytest.mark.parametrize('X_delta_f', ((gmx_benzene_coul_dHdl(), 3.089),
(gmx_benzene_vdw_dHdl(), -3.056),
(gmx_expanded_ensemble_case_1_dHdl(), 76.220),
(gmx_expanded_ensemble_case_2_dHdl(), 76.247),
(gmx_expanded_ensemble_case_3_dHdl(), 76.387),
(amber_simplesolvated_charge_dHdl(), -60.114),
(amber_simplesolvated_vdw_dHdl(), 3.824)))
def test_get_delta_f(self, X_delta_f):
Expand Down

0 comments on commit 1c8844e

Please sign in to comment.