From a2a9a5cacbb3399b6516a7ac2e735b0903c627fb Mon Sep 17 00:00:00 2001
From: VOD555 <gn8008@hotmail.com>
Date: Wed, 1 Jul 2020 13:56:09 -0700
Subject: [PATCH] fix InterRDF_s(..., density) normalization

- fix #2811
  (see also https://github.com/MDAnalysis/pmda/issues/120)
- add test to compare InterRDF and InterRDF_s
- add description for density option (was not documented since 0.19.0)
- updated docs for rdf
  - updated docs with equationi
  - attributes for InterRDF and InterRDF_s
  - reference equations
  - minor grammar/style fixes
  - remove ill-advised usage of InterRDF_s from docs
- update CHANGELOG
---
 package/CHANGELOG                             |   5 +-
 package/MDAnalysis/analysis/rdf.py            | 141 ++++++++++++++----
 .../MDAnalysisTests/analysis/test_rdf_s.py    |  17 ++-
 3 files changed, 127 insertions(+), 36 deletions(-)
diff --git a/package/CHANGELOG b/package/CHANGELOG
index cb39b82cc82..d0d568b052f 100644
--- a/package/CHANGELOG
+++ b/package/CHANGELOG
@@ -13,14 +13,15 @@ The rules for this file:
   * release numbers follow "Semantic Versioning" http://semver.org
 
 ------------------------------------------------------------------------------
-??/>>/20 orbeckst
+??/??/20 orbeckst, VOD555
 
   * 1.0.1
 
 Fixes
   * Development status changed from beta to mature (Issue #2773)
   * pip installation only requests Python 2.7-compatible packages (#2736)
-
+  * fixed InterRDF_s function now gives correct results if density=True.
+    (Issue #2811, PR #2812)
 
 
 06/09/20 richardjgowers, kain88-de, lilyminium, p-j-smith, bdice, joaomcteixeira,
diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py
index 6bb37495acb..41f7d1b5534 100644
--- a/package/MDAnalysis/analysis/rdf.py
+++ b/package/MDAnalysis/analysis/rdf.py
@@ -46,6 +46,8 @@
 
 and the average number of :math:`b` particles within radius :math:`r`
 
+.. _equation-countab:
+
 .. math::
 
    N_{ab}(r) = \rho G_{ab}(r)
@@ -55,6 +57,13 @@
 the first solvation shell :math:`N(r_1)` where :math:`r_1` is the position of
 the first minimum in :math:`g(r)`.
 
+In :class:`InterRDF_s`, we provide an option `density`. When `density` is
+`True`, it will return the RDF :math:`g_{ab}(r)`; when `density` is False`,
+it will return the density of particle :math:`b` in a shell at distance
+:math:`r` around a :math:`a` particle, which is
+
+.. math::
+   n_{ab}(r) = \rho g_{ab}(r)
 
 .. _`pair distribution functions`:
    https://en.wikipedia.org/wiki/Pair_distribution_function
@@ -68,7 +77,7 @@
 :class:`InterRDF` is a tool to calculate average radial distribution functions
 between two groups of atoms. Suppose we have two AtomGroups ``A`` and
 ``B``. ``A`` contains atom ``A1``, ``A2``, and ``B`` contains ``B1``,
-``B2``. Give ``A`` and ``B`` to class:`InterRDF`, the output will be the
+``B2``. Given ``A`` and ``B`` to :class:`InterRDF`, the output will be the
 average of RDFs between ``A1`` and ``B1``, ``A1`` and ``B2``, ``A2`` and
 ``B1``, ``A2`` and ``B2``. A typical application is to calculate the RDF of
 solvent with itself or with another solute.
@@ -77,28 +86,82 @@
    :members:
    :inherited-members:
 
+   .. attribute:: bins
+
+      :class:`numpy.ndarray` of the centers of the `nbins` histogram
+      bins.
+
+   .. attribute:: edges
+
+      :class:`numpy.ndarray` of the `nbins + 1` edges of the histogram
+      bins.
+
+   .. attribute:: rdf
+
+      :class:`numpy.ndarray` of the :ref:`radial distribution
+      function<equation-gab>` values for the :attr:`bins`.
+
+   .. attribute:: count
+
+      :class:`numpy.ndarray` representing the radial histogram, i.e.,
+      the raw counts, for all :attr:`bins`.
+
 
 Site-specific radial distribution function
 ------------------------------------------
 
 :class:`InterRDF_s` calculates site-specific radial distribution
 functions. Instead of two groups of atoms it takes as input a list of pairs of
-AtomGroup, ``[[A, B], [C, D], ...]``. Give the same ``A`` and ``B`` to
-:class:`InterRDF_s`, the output will be a list of RDFs between ``A1`` and
-``B1``, ``A1`` and ``B2``, ``A2`` and ``B1``, ``A2`` and ``B2`` (and similarly
-for ``C`` and ``D``). These site-specific radial distribution functions are
-typically calculated if one is interested in the solvation shells of a ligand
-in a binding site or the solvation of specific residues in a protein. A common
-use case is to choose ``A`` and ``C`` to be AtomGroups that only contain a
-single atom and ``W`` all solvent molecules: ``InterRDF_s(u, [[A, W], [B,
-W]])`` will then produce the RDF of solvent around atom ``A[0]`` and around
-atom ``B[0]``.
-
+AtomGroup, ``[[A, B], [C, D], ...]``. Given the same ``A`` and ``B`` to
+:class:`InterRDF_s`, the output will be a list of individual RDFs between
+``A1`` and ``B1``, ``A1`` and ``B2``, ``A2`` and ``B1``, ``A2`` and ``B2`` (and
+similarly for ``C`` and ``D``). These site-specific radial distribution
+functions are typically calculated if one is interested in the solvation shells
+of a ligand in a binding site or the solvation of specific residues in a
+protein.
 
 .. autoclass:: InterRDF_s
    :members:
    :inherited-members:
 
+   .. attribute:: bins
+
+      :class:`numpy.ndarray` of the centers of the `nbins` histogram
+      bins; all individual site-specific RDFs have the same bins.
+
+   .. attribute:: edges
+
+      :class:`numpy.ndarray` of the `nbins + 1` edges of the histogram
+      bins; all individual site-specific RDFs have the same bins.
+
+   .. attribute:: rdf
+
+      :class:`list` of the site-specific :ref:`radial distribution
+      functions<equation-gab>` or :ref:`density
+      functions<equation-nab>` for the :attr:`bins`. The list contains
+      ``len(ags)`` entries. Each entry for the ``i``-th pair ``[A, B]
+      = ags[i]`` in `ags` is a :class:`numpy.ndarray` with shape
+      ``(len(A), len(B))``, i.e., a stack of RDFs. For example,
+      ``rdf[i][0, 2]`` is the RDF between atoms ``A[0]`` and ``B[2]``.
+
+   .. attribute:: count
+
+      :class:`list` of the site-specific radial histograms, i.e., the
+      raw counts, for all :attr:`bins`. The data have the same
+      structure as :attr:`rdf` except that the arrays contain the raw
+      counts.
+
+   .. attribute:: cdf
+
+      :class:`list` of the site-specific :ref:`cumulative
+      counts<equation-countab>`, for all :attr:`bins`. The data have the same
+      structure as :attr:`rdf` except that the arrays contain the cumulative
+      counts.
+
+      This attribute only exists after :meth:`get_cdf` has been run.
+
+
+
 
 .. Not Implemented yet:
 .. - Structure factor?
@@ -115,9 +178,17 @@
 
 
 class InterRDF(AnalysisBase):
-    """Intermolecular pair distribution function
+    r"""Intermolecular pair distribution function
 
-    InterRDF(g1, g2, nbins=75, range=(0.0, 15.0))
+    The :ref:`radial distribution function<equation-gab>` is calculated by
+    histogramming distances between all particles in `g1` and `g2` while taking
+    periodic boundary conditions into account via the minimum image
+    convention.
+
+    The `exclusion_block` keyword may be used to exclude a set of distances
+    from the calculations.
+
+    Results are available in the attributes :attr:`rdf` and :attr:`count`.
 
     Arguments
     ---------
@@ -232,7 +303,7 @@ def _conclude(self):
 
 
 class InterRDF_s(AnalysisBase):
-    """Site-specific intermolecular pair distribution function
+    r"""Site-specific intermolecular pair distribution function
 
     Arguments
     ---------
@@ -245,6 +316,11 @@ class InterRDF_s(AnalysisBase):
           Number of bins in the histogram [75]
     range : tuple or list (optional)
           The size of the RDF [0.0, 15.0]
+    density : bool (optional)
+          Calculate :math:`g_{ab}(r)` if set to ``True``; or calculate
+          :math:`n_{ab}(r)` if set to ``false``; the default is
+          ``True``.
+
 
     Example
     -------
@@ -267,19 +343,20 @@ class InterRDF_s(AnalysisBase):
 
     Results are available through the :attr:`bins` and :attr:`rdf` attributes::
 
-      plt.plot(rdf.bins, rdf.rdf[0][0][0])
+      plt.plot(rdf.bins, rdf.rdf[0][0, 0])
 
     (Which plots the rdf between the first atom in ``s1`` and the first atom in
     ``s2``)
 
-    To generate the *cumulative distribution function* (cdf), use the
+    To generate the *cumulative distribution function* (cdf) in the sense of
+    "particles within radius :math:`r`", i.e., :math:`N_{ab}(r)`, use the
     :meth:`~InterRDF_s.get_cdf` method ::
 
       cdf = rdf.get_cdf()
 
-    Results are available through the :attr:'cdf' attribute::
+    Results are available through the :attr:`cdf` attribute::
 
-      plt.plot(rdf.bins, rdf.cdf[0][0][0])
+      plt.plot(rdf.bins, rdf.cdf[0][0, 0])
 
     (Which plots the cdf between the first atom in ``s1`` and the first atom in
     ``s2``)
@@ -327,8 +404,8 @@ def _single_frame(self):
                                                     box=self.u.dimensions)
 
             for j, (idx1, idx2) in enumerate(pairs):
-                self.count[i][idx1, idx2, :] += np.histogram(dist[j], 
-                                                             **self.rdf_settings)[0]            
+                self.count[i][idx1, idx2, :] += np.histogram(dist[j],
+                                                             **self.rdf_settings)[0]
 
         self.volume += self._ts.volume
 
@@ -344,14 +421,11 @@ def _conclude(self):
 
         for i, (ag1, ag2) in enumerate(self.ags):
             # Number of each selection
-            nA = len(ag1)
-            nB = len(ag2)
-            N = nA * nB
             indices.append([ag1.indices, ag2.indices])
 
             # Average number density
             box_vol = self.volume / self.n_frames
-            density = N / box_vol
+            density = 1 / box_vol
 
             if self._density:
                 rdf.append(self.count[i] / (density * vol * self.n_frames))
@@ -362,13 +436,20 @@ def _conclude(self):
         self.indices = indices
 
     def get_cdf(self):
-        """Calculate the cumulative distribution functions (CDF) for all sites.
-        Note that this is the actual count within a given radius, i.e.,
-        :math:`N(r)`.
+        r"""Calculate the cumulative counts for all sites.
+
+        This is the :ref:`cumulative count<equation-countab>` within a given
+        radius, i.e., :math:`N_{ab}(r)`.
+
+        The result is returned and also stored in the attribute
+        :attr:`cdf`.
+
+
         Returns
         -------
-              cdf : list
-                      list of arrays with the same structure as :attr:`rdf`
+        cdf : list
+              list of arrays with the same structure as :attr:`rdf`
+
         """
         # Calculate cumulative distribution function
         # Empty list to restore CDF
diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py
index 7a2dd06b3c9..874f414a698 100644
--- a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py
+++ b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py
@@ -27,7 +27,7 @@
 from numpy.testing import assert_almost_equal
 
 import MDAnalysis as mda
-from MDAnalysis.analysis.rdf import InterRDF_s
+from MDAnalysis.analysis.rdf import InterRDF_s, InterRDF
 
 from MDAnalysisTests.datafiles import GRO_MEMPROT, XTC_MEMPROT
 
@@ -40,16 +40,19 @@ def u():
 @pytest.fixture(scope='module')
 def sels(u):
     s1 = u.select_atoms('name ZND and resid 289')
-    s2 = u.select_atoms('(name OD1 or name OD2) and resid 51 and sphzone 5.0 (resid 289)')
+    s2 = u.select_atoms(
+         '(name OD1 or name OD2) and resid 51 and sphzone 5.0 (resid 289)')
     s3 = u.select_atoms('name ZND and (resid 291 or resid 292)')
     s4 = u.select_atoms('(name OD1 or name OD2) and sphzone 5.0 (resid 291)')
     ags = [[s1, s2], [s3, s4]]
     return ags
 
+
 @pytest.fixture(scope='module')
 def rdf(u, sels):
     return InterRDF_s(u, sels).run()
 
+
 def test_nbins(u, sels):
     rdf = InterRDF_s(u, sels, nbins=412).run()
 
@@ -92,15 +95,21 @@ def test_double_run(rdf):
     assert len(rdf.count[0][0][1][rdf.count[0][0][1] == 5]) == 1
     assert len(rdf.count[1][1][0][rdf.count[1][1][0] == 3]) == 1
 
+
 def test_cdf(rdf):
     rdf.get_cdf()
     assert rdf.cdf[0][0][0][-1] == rdf.count[0][0][0].sum()/rdf.n_frames
 
 
 @pytest.mark.parametrize("density, value", [
-    (True, 13275.775440503656),
+    (True, 26551.55088100731),
     (False, 0.021915460340071267)])
-
 def test_density(u, sels, density, value):
     rdf = InterRDF_s(u, sels, density=density).run()
     assert_almost_equal(max(rdf.rdf[0][0][0]), value)
+    if density:
+        s1 = u.select_atoms('name ZND and resid 289')
+        s2 = u.select_atoms(
+                'name OD1 and resid 51 and sphzone 5.0 (resid 289)')
+        rdf_ref = InterRDF(s1, s2).run()
+        assert_almost_equal(rdf_ref.rdf, rdf.rdf[0][0][0])