Skip to content

Commit

Permalink
Add ability to create residue hydrogen bond interaction matrix. (#960)
Browse files Browse the repository at this point in the history
* Fix include

* Start adding solute solute hbond matrix by residue

* Ensure matrix is allocated

* Protect tests in parallel

* Make SetElement a DataSet_2D function

* Add matrix normalization option by frames

* Add resmax norm but disable for now, needs more testing

* Start adding hbond matrix test

* Add uuresmatrixout

* Add test normalizing by frames

* Change default to normalize by # frames

* Add manual entry

* Enable hbond matrix test

* 6.4.5. Revision increase for hbond uuresmatrix
  • Loading branch information
drroe authored Mar 30, 2022
1 parent 1b624bd commit f1783be
Show file tree
Hide file tree
Showing 16 changed files with 1,225 additions and 34 deletions.
43 changes: 41 additions & 2 deletions doc/cpptraj.lyx
Original file line number Diff line number Diff line change
Expand Up @@ -28076,6 +28076,10 @@ hbond
[bseries [bseriesfile <filename>]]
\end_layout

\begin_layout LyX-Code
[uuresmatrix [uuresmatrixnorm {none|frames}] [uuresmatrixout <file>]]
\end_layout

\begin_layout LyX-Code
[splitframe <comma-separated-list>]
\end_layout
Expand Down Expand Up @@ -28261,6 +28265,30 @@ bridgebyatom
<filename>] File to write bridge time series data to.
\end_layout

\end_deeper
\begin_layout Description
[uuresmatrix] If specified, create a matrix with aspect [UUresmat] containing
# of hydrogen bonds between each possible solute residue pair.
\end_layout

\begin_deeper
\begin_layout Description
[uuresmatrixnorm
\begin_inset space ~
\end_inset

{none|frames}] Control how matrix is normalized: none=no normalization,
frames=normalize by total # frames.
\end_layout

\begin_layout Description
[uuresmatrixout
\begin_inset space ~
\end_inset

<file>] If specified, write matrix data to specified file.
\end_layout

\end_deeper
\begin_layout Description
[splitframe
Expand Down Expand Up @@ -28348,8 +28376,11 @@ series
\end_layout

\begin_layout Description
<dsname>[bridge_<indexlist>] (bseries only) Time series for bridge; 1 for
present, 0 for not present.
<dsname>[bridge_<indexlist>] (
\series bold
bseries
\series default
only) Time series for bridge; 1 for present, 0 for not present.
The <indexlist> is an underscore ('_') delimited list of bridged atom/residue
numbers (depending on
\series bold
Expand All @@ -28358,6 +28389,14 @@ bridgebyatom
).
\end_layout

\begin_layout Description
<dsname>[UUresmatr] (
\series bold
uuresmatrix
\series default
only) Solute residue hydrogen bond matrix.
\end_layout

\end_deeper
\begin_layout Standard

Expand Down
125 changes: 123 additions & 2 deletions src/Action_HydrogenBond.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
#include "Action_HydrogenBond.h"
#include "CpptrajStdio.h"
#include "Constants.h"
#include "TorsionRoutines.h"
#include "StringRoutines.h" // ByteString
#include "DataSet_2D.h"
#include "DataSet_integer.h"
#include "DistRoutines.h"
#include "StringRoutines.h" // ByteString
#include "TorsionRoutines.h"
#ifdef _OPENMP
# include <omp.h>
#endif
Expand All @@ -18,6 +20,7 @@ Action_HydrogenBond::Action_HydrogenBond() :
NumSolvent_(0),
NumBridge_(0),
BridgeID_(0),
UU_matrix_byRes_(0),
UUseriesout_(0),
UVseriesout_(0),
Bseriesout_(0),
Expand All @@ -29,6 +32,7 @@ Action_HydrogenBond::Action_HydrogenBond() :
bothEnd_(0),
Nframes_(0),
debug_(0),
UUmatByRes_norm_(NORM_FRAMES),
series_(false),
Bseries_(false),
seriesUpdated_(false),
Expand All @@ -52,6 +56,7 @@ void Action_HydrogenBond::Help() const {
"\t[solvout <filename>] [bridgeout <filename>] [bridgebyatom]\n"
"\t[series [uuseries <filename>] [uvseries <filename>]]\n"
"\t[bseries [bseriesfile <filename>]]\n"
"\t[uuresmatrix [uuresmatrixnorm {none|frames}] [uuresmatrixout <file>]]\n"
"\t[splitframe <comma-separated-list>]\n"
" Hydrogen bond is defined as A-HD, where A is acceptor heavy atom, H is\n"
" hydrogen, D is donor heavy atom. Hydrogen bond is formed when\n"
Expand Down Expand Up @@ -91,6 +96,27 @@ Action::RetType Action_HydrogenBond::Init(ArgList& actionArgs, ActionInit& init,
Bseriesout_ = init.DFL().AddDataFile(actionArgs.GetStringKey("bseriesfile"), actionArgs);
init.DSL().SetDataSetsPending(true);
}
bool do_uuResMatrix = actionArgs.hasKey("uuresmatrix");
DataFile* uuResMatrixFile = 0;
if (do_uuResMatrix) {
uuResMatrixFile = init.DFL().AddDataFile(actionArgs.GetStringKey("uuresmatrixout"),
ArgList("nosquare2d"),
actionArgs);
std::string uuResMatrixNorm = actionArgs.GetStringKey("uuresmatrixnorm");
if (!uuResMatrixNorm.empty()) {
if (uuResMatrixNorm == "none")
UUmatByRes_norm_ = NORM_NONE;
else if (uuResMatrixNorm == "frames")
UUmatByRes_norm_ = NORM_FRAMES;
//else if (uuResMatrixNorm == "resmax") // DISABLE for now, needs more testing
// UUmatByRes_norm_ = NORM_RESMAX;
else {
mprinterr("Error: Invalid keyword for 'uuresmatrixnorm'.\n");
return Action::ERR;
}
}

}
std::string avgname = actionArgs.GetStringKey("avgout");
std::string solvname = actionArgs.GetStringKey("solvout");
if (solvname.empty()) solvname = avgname;
Expand Down Expand Up @@ -190,6 +216,16 @@ Action::RetType Action_HydrogenBond::Init(ArgList& actionArgs, ActionInit& init,
solvout_ = init.DFL().AddCpptrajFile(solvname,"Avg. solute-solvent HBonds");
bridgeout_ = init.DFL().AddCpptrajFile(bridgename,"Solvent bridging info");
}
if (do_uuResMatrix) {
UU_matrix_byRes_ = (DataSet_2D*)
init.DSL().AddSet(DataSet::MATRIX_DBL, MetaData(hbsetname_, "UUresmat"));
if (UU_matrix_byRes_ == 0) return Action::ERR;
UU_matrix_byRes_->ModifyDim(Dimension::X).SetLabel("Res");
UU_matrix_byRes_->ModifyDim(Dimension::Y).SetLabel("Res");
if (uuResMatrixFile != 0)
uuResMatrixFile->AddDataSet( UU_matrix_byRes_ );
}

# ifdef _OPENMP
// Each thread needs temp. space to store found hbonds every frame
// to avoid memory clashes when adding/updating in map.
Expand Down Expand Up @@ -264,6 +300,17 @@ Action::RetType Action_HydrogenBond::Init(ArgList& actionArgs, ActionInit& init,
if (Bseriesout_ != 0)
mprintf("\tWriting bridge time series to '%s'\n", Bseriesout_->DataFilename().full());
}
if (UU_matrix_byRes_ != 0) {
mprintf("\tCalculating solute-solute residue matrix: %s\n", UU_matrix_byRes_->legend());
if (uuResMatrixFile != 0)
mprintf("\tWriting solute-solute residue matrix to '%s'\n", uuResMatrixFile->DataFilename().full());
if (UUmatByRes_norm_ == NORM_NONE)
mprintf("\tNot normalizing solute-solute residue matrix.\n");
else if (UUmatByRes_norm_ == NORM_FRAMES)
mprintf("\tNormalizing solute-solute residue matrix by frames.\n");
else if (UUmatByRes_norm_ == NORM_RESMAX)
mprintf("\tNormalizing solute-solute residue matrix by max possible h. bonds between residues.\n");
}
if (imageOpt_.UseImage())
mprintf("\tImaging enabled.\n");
masterDSL_ = init.DslPtr();
Expand Down Expand Up @@ -608,6 +655,63 @@ Action::RetType Action_HydrogenBond::Setup(ActionSetup& setup) {
return Action::SKIP;
}

// Hbond matrix setup
if (UU_matrix_byRes_ != 0) {
// Find highest solute index
int highest_U_idx = 0;
for (int rnum = 0; rnum < setup.Top().Nres(); rnum++)
{
// Find molecule number
int at0 = setup.Top().Res(rnum).FirstAtom();
int mnum = setup.Top()[at0].MolNum();
if (!setup.Top().Mol(mnum).IsSolvent())
highest_U_idx = rnum;
}
mprintf("\tHighest solute residue # = %i\n", highest_U_idx+1);
if (UU_matrix_byRes_->AllocateHalf(highest_U_idx+1)) {
mprinterr("Error: Allocating solute-solute hbond matrix failed.\n");
return Action::ERR;
}
// Set up normalization matrix if necesary
if (UUmatByRes_norm_ == NORM_RESMAX) {
UU_norm_byRes_.AllocateHalf( UU_matrix_byRes_->Nrows() );
for (unsigned int sidx0 = 0; sidx0 < Both_.size(); sidx0++)
{
Site const& Site0 = Both_[sidx0];
int mol0 = (*CurrentParm_)[Site0.Idx()].MolNum();
int rnum0 = (*CurrentParm_)[Site0.Idx()].ResNum();
// Loop over solute sites that can be both donor and acceptor
for (unsigned int sidx1 = sidx0 + 1; sidx1 < bothEnd_; sidx1++)
{
Site const& Site1 = Both_[sidx1];
if (!noIntramol_ || mol0 != (*CurrentParm_)[Site1.Idx()].MolNum()) {
int rnum1 = (*CurrentParm_)[Site1.Idx()].ResNum();
// The max # of hbonds between sites depends on # hydrogens.
// However, given H1-X-H2 Y, you tend to have either Y..H1-X or
// Y..H2-X, not both, so do based on sites only.
//UU_norm_byRes_.UpdateElement(rnum0, rnum1, Site0.n_hydrogens() + Site1.n_hydrogens());
UU_norm_byRes_.UpdateElement(rnum0, rnum1, 2.0);
}
} // END loop over solute sites that are D/A
// Loop over solute acceptor-only
for (Iarray::const_iterator a_atom = Acceptor_.begin(); a_atom != Acceptor_.end(); ++a_atom)
{
if (!noIntramol_ || mol0 != (*CurrentParm_)[*a_atom].MolNum()) {
int rnum1 = (*CurrentParm_)[*a_atom].ResNum();
//UU_norm_byRes_.UpdateElement(rnum0, rnum1, Site0.n_hydrogens());
UU_norm_byRes_.UpdateElement(rnum0, rnum1, 1.0);
}
} // END loop over acceptor atoms
} // END loop over all D/A sites
mprintf("DEBUG: Residue normalization matrix:\n");
for (unsigned int rnum0 = 0; rnum0 < UU_norm_byRes_.Nrows(); rnum0++) {
for (unsigned int rnum1 = rnum0; rnum1 < UU_norm_byRes_.Ncols(); rnum1++) {
mprintf("\t%6i %6i %f\n", rnum0+1, rnum1+1, UU_norm_byRes_.GetElement(rnum1, rnum0));
}
}
}
}

return Action::OK;
}

Expand Down Expand Up @@ -759,6 +863,11 @@ void Action_HydrogenBond::AddUU(double dist, double angle, int fnum, int a_atom,
// mprintf("DBG1: OLD hbond : %8i .. %8i - %8i\n", a_atom+1,h_atom+1,d_atom+1);
}
it->second.Update(dist, angle, fnum, splitFrames_, onum);
if (UU_matrix_byRes_ != 0) {
int a_res = (*CurrentParm_)[a_atom].ResNum();
int d_res = (*CurrentParm_)[d_atom].ResNum();
UU_matrix_byRes_->UpdateElement(a_res, d_res, 1.0);
}
}

// Action_HydrogenBond::CalcSiteHbonds()
Expand Down Expand Up @@ -1417,6 +1526,9 @@ std::string Action_HydrogenBond::MemoryUsage(size_t n_uu_pairs, size_t n_uv_pair
memTotal += sizeof(BmapType);
for (BmapType::const_iterator it = BridgeMap_.begin(); it != BridgeMap_.end(); ++it)
memTotal += (sizeBRmapElt + it->first.size()*sizeof(int));
// Matrices
if (UU_matrix_byRes_ != 0)
memTotal += UU_matrix_byRes_->MemUsageInBytes();

return ByteString( memTotal, BYTE_DECIMAL );
}
Expand Down Expand Up @@ -1489,6 +1601,15 @@ void Action_HydrogenBond::Print() {
# endif
// Ensure all series have been updated for all frames.
UpdateSeries();
// Matrix normalization
if (UU_matrix_byRes_ != 0) {
if (UUmatByRes_norm_ == NORM_FRAMES) {
double norm = 1.0 / ((double)Nframes_);
for (unsigned int r = 0; r != UU_matrix_byRes_->Nrows(); r++)
for (unsigned int c = 0; c != UU_matrix_byRes_->Ncols(); c++)
UU_matrix_byRes_->SetElement(c, r, UU_matrix_byRes_->GetElement(c, r) * norm);
}
}

if (CurrentParm_ == 0) return;
// Calculate necessary column width for strings based on how many residues.
Expand Down
7 changes: 7 additions & 0 deletions src/Action_HydrogenBond.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
#include "Action.h"
#include "ImageOption.h"
#include "DataSet_integer.h"
#include "DataSet_MatrixDbl.h"
#include "OnlineVarT.h"
//#inc lude "CpptrajStdio.h" // DEBUG
#ifdef TIMER
# include "Timer.h"
#endif
class DataSet_2D;
class Action_HydrogenBond : public Action {
public:
Action_HydrogenBond();
Expand All @@ -26,6 +28,8 @@ class Action_HydrogenBond : public Action {

typedef std::vector<int> Iarray;

enum MatrixNormType { NORM_NONE = 0, NORM_FRAMES, NORM_RESMAX };

class Site;
class Hbond;
class Bridge;
Expand Down Expand Up @@ -94,6 +98,7 @@ class Action_HydrogenBond : public Action {
AtomMask SolventDonorMask_;
AtomMask SolventAcceptorMask_;
AtomMask Mask_;
DataSet_MatrixDbl UU_norm_byRes_; ///< For normalizing the max possible # hbonds by residue
ImageOption imageOpt_; ///< Used to determine if imaging should be performed
Iarray splitFrames_; ///< For calculating hydrogen bonds by parts
# ifdef TIMER
Expand All @@ -108,6 +113,7 @@ class Action_HydrogenBond : public Action {
DataSet* NumSolvent_; ///< Hold # UV hbonds per frame.
DataSet* NumBridge_; ///< Hold # solute-solvent bridges per frame.
DataSet* BridgeID_; ///< Hold info on each bridge per frame.
DataSet_2D* UU_matrix_byRes_; ///< Record # hbonds between each residue pair.
DataFile* UUseriesout_; ///< File to write UU time series to.
DataFile* UVseriesout_; ///< File to write UV time series to.
DataFile* Bseriesout_; ///< File to write bridge time series to.
Expand All @@ -119,6 +125,7 @@ class Action_HydrogenBond : public Action {
unsigned int bothEnd_; ///< Index in Both_ where donor-only sites begin
int Nframes_; ///< Number of frames action has been active
int debug_;
MatrixNormType UUmatByRes_norm_;
bool series_; ///< If true track hbond time series.
bool Bseries_; ///< If true track bridge time series.
bool seriesUpdated_; ///< If false hbond time series need to be finished.
Expand Down
7 changes: 6 additions & 1 deletion src/DataSet_2D.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#ifndef INC_DATASET_2D_H
#define INC_DATASET_2D_H
#include "DataSet.h"
#include "CpptrajFile.h"
/// Interface for 2D DataSets.
class DataSet_2D : public DataSet {
public:
Expand All @@ -12,16 +11,22 @@ class DataSet_2D : public DataSet {
DataSet(tIn, MATRIX_2D, fIn, 2) {}
// TODO enable Append?
int Append(DataSet*) { return 1; }
// ----- DataSet functions -------------------
int Allocate(SizeArray const& s) { return Allocate2D(s[0], s[1]); }
// ----- DataSet_2D functions ----------------
// TODO: 1 allocate using MatrixKind?
/// Set up matrix for given # columns and rows. // TODO replace with Allocate
virtual int Allocate2D(size_t, size_t) = 0;
/// Set up symmetric matrix with diagonal.
virtual int AllocateHalf(size_t) = 0;
/// Set up symmetrix matrix with no diagonal.
virtual int AllocateTriangle(size_t) = 0;

/// Add given value to element at specified col/row
virtual void UpdateElement(size_t, size_t, double) = 0;
/// Set element at specified col/row to given value
virtual void SetElement(size_t, size_t, double) = 0;

/// \return Data from matrix at col/row
virtual double GetElement(size_t, size_t) const = 0;
/// \return Data from underlying matrix array.
Expand Down
12 changes: 6 additions & 6 deletions src/DataSet_MatrixDbl.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,22 @@ class DataSet_MatrixDbl : public DataSet_2D {
void WriteBuffer(CpptrajFile&, SizeArray const&) const;
size_t MemUsageInBytes() const { return mat_.DataSize(); }
// ----- DataSet_2D functions ----------------
void UpdateElement(size_t x,size_t y,double v) { mat_.updateElement(x,y,v); }
int Allocate2D(size_t x,size_t y) { kind_=FULL; return mat_.resize(x,y); }
int AllocateHalf(size_t x) { kind_=HALF; return mat_.resize(x,0); }
int AllocateTriangle(size_t x) { kind_=TRI; return mat_.resize(0,x); }
double GetElement(size_t x,size_t y) const { return mat_.element(x,y); }
double GetElement(size_t i) const { return mat_[i]; }
size_t Nrows() const { return mat_.Nrows(); }
size_t Ncols() const { return mat_.Ncols(); }
void UpdateElement(size_t x,size_t y,double v) { mat_.updateElement(x,y,v); }
void SetElement(size_t x,size_t y,double d) { mat_.setElement(x,y,d); }
double GetElement(size_t x,size_t y) const { return mat_.element(x,y); }
double GetElement(size_t i) const { return mat_[i]; }
size_t Nrows() const { return mat_.Nrows(); }
size_t Ncols() const { return mat_.Ncols(); }
double* MatrixArray() const;
MatrixKindType MatrixKind() const { return kind_; }
// -------------------------------------------
unsigned int Nsnapshots() const { return snap_; }
void IncrementSnapshots() { ++snap_; }
double& Element(size_t x, size_t y) { return mat_.element(x,y); }
int AddElement(double d) { return mat_.addElement(d); }
void SetElement(size_t x,size_t y,double d){ mat_.setElement(x,y,d); }
/// Type definition of iterator over matrix elements.
typedef Matrix<double>::iterator iterator;
iterator begin() { return mat_.begin(); }
Expand Down
Loading

0 comments on commit f1783be

Please sign in to comment.