diff --git a/.gitignore b/.gitignore
index 0d20b648..2a45ed72 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
*.pyc
+.vscode
diff --git a/AUTHORS b/AUTHORS
index a0091c9e..672262b7 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -33,3 +33,4 @@ Chronological list of authors
2019
- Victoria Lim (@vlim)
- Hyungro Lee (@lee212)
+ - Mohammad S. Barhaghi (@msoroush)
\ No newline at end of file
diff --git a/CHANGES b/CHANGES
index bab67b0c..7a4df338 100644
--- a/CHANGES
+++ b/CHANGES
@@ -25,6 +25,7 @@ Enhancements
- NAMD FEP parser (#7, #75)
- BAR estimator (#40)
- enhanced performance of Gromacs parsers with pandas.read_csv() (#81)
+ - GOMC TI and FEP parser (#77)
Deprecations
diff --git a/docs/parsing.rst b/docs/parsing.rst
index 0fcf060c..c4cc96f4 100644
--- a/docs/parsing.rst
+++ b/docs/parsing.rst
@@ -111,4 +111,5 @@ See the documentation for the package you are using for more details on parser u
gmx
amber
namd
+ gomc
diff --git a/docs/parsing/alchemlyb.parsing.gomc.rst b/docs/parsing/alchemlyb.parsing.gomc.rst
new file mode 100644
index 00000000..200d4a29
--- /dev/null
+++ b/docs/parsing/alchemlyb.parsing.gomc.rst
@@ -0,0 +1,15 @@
+GOMC parsing
+==============
+.. automodule:: alchemlyb.parsing.gomc
+
+The parsers featured in this module are constructed to properly parse `GOMC `_ free energy output files,
+containing the Hamiltonian derivatives (:math:`\frac{dH}{d\lambda}`) for TI-based estimators and Hamiltonian differences (:math:`\Delta H`
+for all lambda states in the alchemical leg) for FEP-based estimators (BAR/MBAR).
+
+
+API Reference
+-------------
+This submodule includes these parsing functions:
+
+.. autofunction:: alchemlyb.parsing.gomc.extract_dHdl
+.. autofunction:: alchemlyb.parsing.gomc.extract_u_nk
diff --git a/src/alchemlyb/parsing/gomc.py b/src/alchemlyb/parsing/gomc.py
new file mode 100644
index 00000000..dc3433b3
--- /dev/null
+++ b/src/alchemlyb/parsing/gomc.py
@@ -0,0 +1,203 @@
+"""Parsers for extracting alchemical data from `GOMC `_ output files.
+
+"""
+import pandas as pd
+
+from .util import anyopen
+
+
+# TODO: perhaps move constants elsewhere?
+# these are the units we need for dealing with GOMC, so not
+# a bad place for it, honestly
+# (kB in kJ/molK)
+k_b = 8.3144621E-3
+
+
+def extract_u_nk(filename, T):
+ """Return reduced potentials `u_nk` from a Hamiltonian differences dat file.
+
+ Parameters
+ ----------
+ filename : str
+ Path to free energy file to extract data from.
+ T : float
+ Temperature in Kelvin at which the simulation was sampled.
+
+ Returns
+ -------
+ u_nk : DataFrame
+ Potential energy for each alchemical state (k) for each frame (n).
+
+ """
+
+ dh_col_match = "dU/dL"
+ h_col_match = "DelE"
+ pv_col_match = 'PV'
+ u_col_match = ['Total_En']
+ beta = 1/(k_b * T)
+
+ state, lambdas, statevec = _extract_state(filename)
+
+ # extract a DataFrame from free energy file data
+ df = _extract_dataframe(filename)
+
+ times = df[df.columns[0]]
+
+ # want to grab only dH columns
+ DHcols = [col for col in df.columns if (h_col_match in col)]
+ dH = df[DHcols]
+
+ # GOMC also gives us pV directly; need this for reduced potential
+ pv_cols = [col for col in df.columns if (pv_col_match in col)]
+ pv = None
+ if pv_cols:
+ pv = df[pv_cols[0]]
+
+ # GOMC also gives us total energy U directly; need this for reduced potential
+ u_cols = [col for col in df.columns if any(single_u_col_match in col for single_u_col_match in u_col_match)]
+ u = None
+ if u_cols:
+ u = df[u_cols[0]]
+
+ u_k = dict()
+ cols = list()
+ for col in dH:
+ u_col = eval(col.split('->')[1][:-1])
+ # calculate reduced potential u_k = dH + pV + U
+ u_k[u_col] = beta * dH[col].values
+ if pv_cols:
+ u_k[u_col] += beta * pv.values
+ if u_cols:
+ u_k[u_col] += beta * u.values
+ cols.append(u_col)
+
+ u_k = pd.DataFrame(u_k, columns=cols,
+ index=pd.Float64Index(times.values, name='time'))
+
+ # Need to modify the lambda name
+ cols = [l + "-lambda" for l in lambdas]
+ # create columns for each lambda, indicating state each row sampled from
+ for i, l in enumerate(cols):
+ u_k[l] = statevec[i]
+
+ # set up new multi-index
+ newind = ['time'] + cols
+ u_k = u_k.reset_index().set_index(newind)
+
+ u_k.name = 'u_nk'
+
+ return u_k
+
+
+def extract_dHdl(filename, T):
+ """Return gradients `dH/dl` from a Hamiltonian differences free energy file.
+
+ Parameters
+ ----------
+ filename : str
+ Path to free energy file to extract data from.
+ T : float
+ Temperature in Kelvin at which the simulation was sampled.
+
+ Returns
+ -------
+ dH/dl : Series
+ dH/dl as a function of step for this lambda window.
+
+ """
+ beta = 1/(k_b * T)
+
+ state, lambdas, statevec = _extract_state(filename)
+
+ # extract a DataFrame from free energy data
+ df = _extract_dataframe(filename)
+
+ times = df[df.columns[0]]
+
+ # want to grab only dH/dl columns
+ dHcols = []
+ for l in lambdas:
+ dHcols.extend([col for col in df.columns if (l in col)])
+
+ dHdl = df[dHcols]
+
+ # make dimensionless
+ dHdl *= beta
+
+ dHdl = pd.DataFrame(dHdl.values, columns=lambdas,
+ index=pd.Float64Index(times.values, name='time'))
+
+ # Need to modify the lambda name
+ cols = [l + "-lambda" for l in lambdas]
+ # create columns for each lambda, indicating state each row sampled from
+ for i, l in enumerate(cols):
+ dHdl[l] = statevec[i]
+
+ # set up new multi-index
+ newind = ['time'] + cols
+ dHdl= dHdl.reset_index().set_index(newind)
+
+ dHdl.name='dH/dl'
+
+ return dHdl
+
+
+def _extract_state(filename):
+ """Extract information on state sampled, names of lambdas.
+
+ """
+ state = None
+ with anyopen(filename, 'r') as f:
+ for line in f:
+ if ('#' in line) and ('State' in line):
+ state = int(line.split('State')[1].split(':')[0])
+ # GOMC always print these two fields
+ lambdas = ['Coulomb', 'VDW']
+ statevec = eval(line.strip().split(' = ')[-1])
+ break
+
+ return state, lambdas, statevec
+
+
+def _extract_dataframe(filename):
+ """Extract a DataFrame from free energy data.
+
+ """
+ dh_col_match = "dU/dL"
+ h_col_match = "DelE"
+ pv_col_match = 'PV'
+ u_col_match = 'Total_En'
+
+ xaxis = "time"
+ with anyopen(filename, 'r') as f:
+ names = []
+ rows = []
+ for line in f:
+ line = line.strip()
+ if len(line) == 0:
+ # avoid parsing empty line
+ continue
+ elif line.startswith('#T'):
+ # this line has state information. No need to be parsed
+ continue
+ elif line.startswith("#Steps"):
+ # read the headers
+ elements = line.split()
+ for i, element in enumerate(elements):
+ if element.startswith(u_col_match):
+ names.append(element)
+ elif element.startswith(dh_col_match):
+ names.append(element)
+ elif element.startswith(h_col_match):
+ names.append(element)
+ elif element.startswith(pv_col_match):
+ names.append(element)
+ else:
+ # parse line as floats
+ row = map(float, line.split())
+ rows.append(row)
+
+ cols = [xaxis]
+ cols.extend(names)
+
+ return pd.DataFrame(rows, columns=cols)
diff --git a/src/alchemlyb/tests/parsing/test_gomc.py b/src/alchemlyb/tests/parsing/test_gomc.py
new file mode 100644
index 00000000..c03a0b6b
--- /dev/null
+++ b/src/alchemlyb/tests/parsing/test_gomc.py
@@ -0,0 +1,32 @@
+"""GOMC parser tests.
+
+"""
+
+from alchemlyb.parsing.gomc import extract_dHdl, extract_u_nk
+from alchemtest.gomc import load_benzene
+
+
+def test_dHdl():
+ """Test that dHdl has the correct form when extracted from files.
+
+ """
+ dataset = load_benzene()
+
+ for filename in dataset['data']:
+ dHdl = extract_dHdl(filename, T=298)
+
+ assert dHdl.index.names == ['time', 'Coulomb-lambda', 'VDW-lambda']
+ assert dHdl.shape == (1000, 2)
+
+def test_u_nk():
+ """Test that u_nk has the correct form when extracted from files.
+
+ """
+ dataset = load_benzene()
+
+ for filename in dataset['data']:
+ u_nk = extract_u_nk(filename, T=298)
+
+ assert u_nk.index.names == ['time', 'Coulomb-lambda', 'VDW-lambda']
+ assert u_nk.shape == (1000, 23)
+
diff --git a/src/alchemlyb/tests/test_fep_estimators.py b/src/alchemlyb/tests/test_fep_estimators.py
index 839d1574..f33a72fd 100644
--- a/src/alchemlyb/tests/test_fep_estimators.py
+++ b/src/alchemlyb/tests/test_fep_estimators.py
@@ -6,10 +6,11 @@
import numpy as np
import pandas as pd
-from alchemlyb.parsing import gmx, amber, namd
+from alchemlyb.parsing import gmx, amber, namd, gomc
from alchemlyb.estimators import MBAR, BAR
import alchemtest.gmx
import alchemtest.amber
+import alchemtest.gomc
import alchemtest.namd
def gmx_benzene_coul_u_nk():
@@ -83,6 +84,14 @@ def amber_bace_example_complex_vdw():
for filename in dataset['data']['complex']['vdw']])
return u_nk
+def gomc_benzene_u_nk():
+ dataset = alchemtest.gomc.load_benzene()
+
+ u_nk = pd.concat([gomc.extract_u_nk(filename, T=298)
+ for filename in dataset['data']])
+
+ return u_nk
+
def namd_tyr2ala():
dataset = alchemtest.namd.load_tyr2ala()
u_nk1 = namd.extract_u_nk(dataset['data']['forward'][0], T=300)
@@ -126,6 +135,7 @@ class TestMBAR(FEPestimatorMixin):
(gmx_water_particle_with_potential_energy, -11.675, 0.083589),
(gmx_water_particle_without_energy, -11.654, 0.083415),
(amber_bace_example_complex_vdw, 2.40200, 0.0618453),
+ (gomc_benzene_u_nk, -0.79994, 0.091579),
])
def X_delta_f(self, request):
get_unk, E, dE = request.param
@@ -152,6 +162,7 @@ class TestBAR(FEPestimatorMixin):
(gmx_water_particle_without_energy, -11.660, 0.064914),
(amber_bace_example_complex_vdw, 2.37846, 0.050899),
(namd_tyr2ala, 11.0044, 0.10235),
+ (gomc_benzene_u_nk, -0.87095, 0.071263),
])
def X_delta_f(self, request):
get_unk, E, dE = request.param
diff --git a/src/alchemlyb/tests/test_ti_estimators.py b/src/alchemlyb/tests/test_ti_estimators.py
index 537c3d08..208e27a9 100644
--- a/src/alchemlyb/tests/test_ti_estimators.py
+++ b/src/alchemlyb/tests/test_ti_estimators.py
@@ -5,10 +5,11 @@
import pandas as pd
-from alchemlyb.parsing import gmx, amber
+from alchemlyb.parsing import gmx, amber, gomc
from alchemlyb.estimators import TI
import alchemtest.gmx
import alchemtest.amber
+import alchemtest.gomc
def gmx_benzene_coul_dHdl():
@@ -91,9 +92,17 @@ def amber_simplesolvated_vdw_dHdl():
return dHdl
+def gomc_benzene_dHdl():
+ dataset = alchemtest.gomc.load_benzene()
+
+ dHdl = pd.concat([gomc.extract_dHdl(filename, T=298)
+ for filename in dataset['data']])
+
+ return dHdl
class TIestimatorMixin:
+
def test_get_delta_f(self, X_delta_f):
dHdl, E, dE = X_delta_f
est = self.cls().fit(dHdl)
@@ -127,5 +136,3 @@ class TestTI(TIestimatorMixin):
def X_delta_f(self, request):
get_dHdl, E, dE = request.param
return get_dHdl(), E, dE
-
-