Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NAMD parser for FEP files #72

Merged
merged 4 commits into from
Mar 21, 2019
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/alchemlyb/estimators/bar_.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class BAR(BaseEstimator):
relative_tolerance : float, optional
Set to determine the relative tolerance convergence criteria.

method : str, optional, defualt='false-position'
method : str, optional, default='false-position'
choice of method to solve BAR nonlinear equations,
one of 'self-consistent-iteration' or 'false-position' (default: 'false-position')

Expand Down Expand Up @@ -57,7 +57,7 @@ def fit(self, u_nk):

Parameters
----------
u_nk : DataFrame
u_nk : DataFrame
u_nk[n,k] is the reduced potential energy of uncorrelated
configuration n evaluated at state k.

Expand Down
104 changes: 104 additions & 0 deletions src/alchemlyb/parsing/namd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""Parsers for extracting alchemical data from `Namd <http://www.ks.uiuc.edu/Research/namd/>`_ output files.

"""
import pandas as pd
import numpy as np
from .util import anyopen

def extract_u_nk(fep_file):
"""Return reduced potentials `u_nk` from NAMD fepout file.

Parameters
----------
fepout : str
Path to fepout file to extract data from.

Returns
-------
u_nk : DataFrame
Potential energy for each alchemical state (k) for each frame (n).

"""
# lists to get timesteps and work values of each window
win_ts = []
win_de = []

# create dataframe for results
df = pd.DataFrame(columns=['timestep','fep-lambda'])

# boolean flag to parse data after equil time
parsing = False

# open and get data from fep file.
with anyopen(fep_file, 'r') as f:
data = f.readlines()

for line in data:
l = line.strip().split()

# this line marks end of window; dump data into dataframe
if '#Free' in l:

# convert last window's work and timestep values to np arrays
win_de_arr = np.asarray(win_de)
win_ts_arr = np.asarray(win_ts)

# extract lambda values for finished window
# lambda1 = sampling lambda (row), lambda2 = evaluated lambda (col)
lambda1 = "{0:.2f}".format(float(l[7]))
lambda2 = "{0:.2f}".format(float(l[8]))

# create dataframe of timestep and work values
# this window's data goes in row LAMBDA1 and column LAMBDA2
tempDF = pd.DataFrame({
'timestep':win_ts_arr,
'fep-lambda': np.full(len(win_de_arr),lambda1),
lambda1:0,
lambda2:win_de_arr})

# join the new window's df to existing df
df = pd.concat([df, tempDF])
df.fillna(0, inplace=True)

# reset values for next window of fepout file
win_de = []
win_ts = []
parsing = False

# append work value from 'dE' column of fepout file
if parsing:
win_de.append(float(l[6]))
win_ts.append(float(l[1]))

# turn parsing on after line 'STARTING COLLECTION OF ENSEMBLE AVERAGE'
if '#STARTING' in l:
parsing = True

# create last dataframe for fep-lambda at last LAMBDA2
tempDF = pd.DataFrame({
'timestep':win_ts_arr,
'fep-lambda': lambda2})
df = pd.concat([df, tempDF])
df.fillna(0, inplace=True)

df.set_index(['timestep','fep-lambda'], inplace=True)
return df




def extract_dHdl(fepout):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it's not implemented then leave it out, better than a pass and weird failures down the line.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, removing it will improve your coverage ;-)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it, I'll take out this placeholder function.

"""Return gradients `dH/dl` from a NAMD TI fepout file.

Parameters
----------
xvg : str
Path to fepout file to extract data from.

Returns
-------
dH/dl : Series
dH/dl as a function of time.

"""
pass
46 changes: 46 additions & 0 deletions src/alchemlyb/tests/parsing/test_namd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""NAMD parser tests.

"""

from alchemlyb.parsing.namd import extract_u_nk
from alchemtest.namd import load_tyr2ala
from numpy.testing import assert_almost_equal


def test_u_nk():
"""Test that u_nk has the correct form when extracted from files.
"""
dataset = load_tyr2ala()

for direction in dataset['data']:
for filename in dataset['data'][direction]:
u_nk = extract_u_nk(filename)

assert u_nk.index.names == ['timestep', 'fep-lambda']
if direction == 'forward':
assert u_nk.shape == (21021, 21)
elif direction == 'backward':
assert u_nk.shape == (21021, 21)

def test_bar_namd():
"""Test BAR calculation on NAMD data.
"""
from alchemlyb.estimators import BAR
import numpy as np

# load data
dataset = load_tyr2ala()
u_nk1 = extract_u_nk(dataset['data']['forward'][0])
u_nk2 = extract_u_nk(dataset['data']['backward'][0])

# combine dataframes of fwd and rev directions
u_nk1.replace(0, np.nan, inplace=True)
u_nk1[u_nk1.isnull()] = u_nk2
u_nk1.replace(np.nan, 0, inplace=True)
u_nk = u_nk1.sort_index(level=u_nk1.index.names[1:])

# after loading BOTH fwd and rev data, do BAR calculation
bar = BAR()
bar.fit(u_nk)
dg = (bar.delta_f_.iloc[0].iloc[-1])
assert_almost_equal(dg, 6.03126982925, decimal=4)