pymc-devs · canyon289 · Apr 3, 2022 · Mar 30, 2022 · Mar 30, 2022 · Mar 31, 2022
diff --git a/pymc/distributions/timeseries.py b/pymc/distributions/timeseries.py
@@ -12,15 +12,20 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
+from typing import Tuple, Union
+
 import aesara.tensor as at
 import numpy as np
 
 from aesara import scan
-from scipy import stats
+from aesara.tensor.random.op import RandomVariable
 
-from pymc.distributions import distribution, multivariate
+from pymc.aesaraf import change_rv_size, floatX, intX
+from pymc.distributions import distribution, logprob, multivariate
 from pymc.distributions.continuous import Flat, Normal, get_tau_sigma
+from pymc.distributions.dist_math import check_parameters
 from pymc.distributions.shape_utils import to_tuple
+from pymc.util import check_dist_not_registered
 
 __all__ = [
     "AR1",
@@ -33,6 +38,195 @@
 ]
 
 
+class GaussianRandomWalkRV(RandomVariable):
+    """
+    GaussianRandomWalk Random Variable
+    """
+
+    name = "GaussianRandomWalk"
+    ndim_supp = 1
+    ndims_params = [0, 0, 0, 0]
+    dtype = "floatX"
+    _print_name = ("GaussianRandomWalk", "\\operatorname{GaussianRandomWalk}")
+
+    def make_node(self, rng, size, dtype, mu, sigma, init, steps):
+        steps = at.as_tensor_variable(steps)
+        if not steps.ndim == 0 or not steps.dtype.startswith("int"):
+            raise ValueError("steps must be an integer scalar (ndim=0).")
+
+        return super().make_node(rng, size, dtype, mu, sigma, init, steps)
+
+    def _supp_shape_from_params(self, dist_params, reop_param_idx=0, param_shapes=None):
+        steps = dist_params[3]
+
+        return (steps + 1,)
+
+    @classmethod
+    def rng_fn(
+        cls,
+        rng: np.random.RandomState,
+        mu: Union[np.ndarray, float],
+        sigma: Union[np.ndarray, float],
+        init: float,
+        steps: int,
+        size: Tuple[int],
+    ) -> np.ndarray:
+        """Gaussian Random Walk generator.
+
+        The init value is drawn from the Normal distribution with the same sigma as the
+        innovations.
+
+        Notes
+        -----
+        Currently does not support custom init distribution
+
+        Parameters
+        ----------
+        rng: np.random.RandomState
+           Numpy random number generator
+        mu: array_like
+           Random walk mean
+        sigma: np.ndarray
+            Standard deviation of innovation (sigma > 0)
+        init: float
+            Initialization value for GaussianRandomWalk
+        steps: int
+            Length of random walk, must be greater than 1. Returned array will be of size+1 to
+            account as first value is initial value
+        size: int
+            The number of Random Walk time series generated
+
+        Returns
+        -------
+        ndarray
+        """
+
+        if steps < 1:
+            raise ValueError("Steps must be greater than 0")
+
+        # If size is None then the returned series should be (*implied_dims, 1+steps)
+        if size is None:
+            # broadcast parameters with each other to find implied dims
+            bcast_shape = np.broadcast_shapes(
+                np.asarray(mu).shape,
+                np.asarray(sigma).shape,
+                np.asarray(init).shape,
+            )
+            dist_shape = (*bcast_shape, int(steps))
+
+        # If size is None then the returned series should be (*size, 1+steps)
+        else:
+            init_size = (*size, 1)
+            dist_shape = (*size, int(steps))
+
+        innovations = rng.normal(loc=mu, scale=sigma, size=dist_shape)
+        grw = np.concatenate([init[..., None], innovations], axis=-1)
+        return np.cumsum(grw, axis=-1)
+
+
+gaussianrandomwalk = GaussianRandomWalkRV()
+
+
+class GaussianRandomWalk(distribution.Continuous):
+    r"""Random Walk with Normal innovations
+
+    Parameters
+    ----------
+    mu : tensor_like of float
+        innovation drift, defaults to 0.0
+    sigma : tensor_like of float, optional
+        sigma > 0, innovation standard deviation, defaults to 1.0
+    init : Univariate PyMC distribution
+        Univariate distribution of the initial value, created with the `.dist()` API.
+        Defaults to Normal with same `mu` and `sigma` as the GaussianRandomWalk
+    steps : int
+        Number of steps in Gaussian Random Walks (steps > 0).
+    """
+
+    rv_op = gaussianrandomwalk
+
+    def __new__(cls, name, mu=0.0, sigma=1.0, init=None, steps=None, **kwargs):
+        if init is not None:
+            check_dist_not_registered(init)
+        return super().__new__(cls, name, mu, sigma, init, steps, **kwargs)
+
+    @classmethod
+    def dist(
+        cls, mu=0.0, sigma=1.0, init=None, steps=None, size=None, **kwargs
+    ) -> at.TensorVariable:
+
+        mu = at.as_tensor_variable(floatX(mu))
+        sigma = at.as_tensor_variable(floatX(sigma))
+        if steps is None:
+            raise ValueError("Must specify steps parameter")
+        steps = at.as_tensor_variable(intX(steps))
+
+        shape = kwargs.get("shape", None)
+        if size is None and shape is None:
+            init_size = None
+        else:
+            init_size = to_tuple(size) if size is not None else to_tuple(shape)[:-1]
+
+        # If no scalar distribution is passed then initialize with a Normal of same mu and sigma
+        if init is None:
+            init = Normal.dist(mu, sigma, size=init_size)
+        else:
+            if not (
+                isinstance(init, at.TensorVariable)
+                and init.owner is not None
+                and isinstance(init.owner.op, RandomVariable)
+                and init.owner.op.ndim_supp == 0
+            ):
+                raise TypeError("init must be a univariate distribution variable")
+
+            if init_size is not None:
+                init = change_rv_size(init, init_size)
+            else:
+                # If not explicit, size is determined by the shapes of mu, sigma, and init
+                bcast_shape = at.broadcast_arrays(mu, sigma, init)[0].shape
+                init = change_rv_size(init, bcast_shape)
+
+        # Ignores logprob of init var because that's accounted for in the logp method
+        init.tag.ignore_logprob = True
+
+        return super().dist([mu, sigma, init, steps], size=size, **kwargs)
+
+    def logp(
+        value: at.Variable,
+        mu: at.Variable,
+        sigma: at.Variable,
+        init: at.Variable,
+        steps: at.Variable,
+    ) -> at.TensorVariable:
+        """Calculate log-probability of Gaussian Random Walk distribution at specified value.
+
+        Parameters
+        ----------
+        value: at.Variable,
+        mu: at.Variable,
+        sigma: at.Variable,
+        init: at.Variable, Not used
+        steps: at.Variable, Not used
+
+        Returns
+        -------
+        TensorVariable
+        """
+
+        # Calculate initialization logp
+        init_logp = logprob.logp(init, value[..., 0])
+
+        # Make time series stationary around the mean value
+        stationary_series = value[..., 1:] - value[..., :-1]
+        series_logp = logprob.logp(Normal.dist(mu, sigma), stationary_series)
+
+        return check_parameters(
+            init_logp + series_logp.sum(axis=-1),
+            steps > 0,
+            msg="steps > 0",
+        )
+
+
 class AR1(distribution.Continuous):
     """
     Autoregressive process with 1 lag.
@@ -171,125 +365,6 @@ def logp(self, value):
         return at.sum(innov_like) + at.sum(init_like)
 
 
-class GaussianRandomWalk(distribution.Continuous):
-    r"""Random Walk with Normal innovations
-
-    Note that this is mainly a user-friendly wrapper to enable an easier specification
-    of GRW. You are not restricted to use only Normal innovations but can use any
-    distribution: just use `aesara.tensor.cumsum()` to create the random walk behavior.
-
-    Parameters
-    ----------
-    mu : tensor_like of float, default 0
-        innovation drift, defaults to 0.0
-        For vector valued `mu`, first dimension must match shape of the random walk, and
-        the first element will be discarded (since there is no innovation in the first timestep)
-    sigma : tensor_like of float, optional
-        `sigma` > 0, innovation standard deviation (only required if `tau` is not specified)
-        For vector valued `sigma`, first dimension must match shape of the random walk, and
-        the first element will be discarded (since there is no innovation in the first timestep)
-    tau : tensor_like of float, optional
-        `tau` > 0, innovation precision (only required if `sigma` is not specified)
-        For vector valued `tau`, first dimension must match shape of the random walk, and
-        the first element will be discarded (since there is no innovation in the first timestep)
-    init : pymc.Distribution, optional
-        distribution for initial value (Defaults to Flat())
-    """
-
-    def __init__(self, tau=None, init=None, sigma=None, mu=0.0, *args, **kwargs):
-        kwargs.setdefault("shape", 1)
-        super().__init__(*args, **kwargs)
-        if sum(self.shape) == 0:
-            raise TypeError("GaussianRandomWalk must be supplied a non-zero shape argument!")
-        tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
-        self.tau = at.as_tensor_variable(tau)
-        sigma = at.as_tensor_variable(sigma)
-        self.sigma = sigma
-        self.mu = at.as_tensor_variable(mu)
-        self.init = init or Flat.dist()
-        self.mean = at.as_tensor_variable(0.0)
-
-    def _mu_and_sigma(self, mu, sigma):
-        """Helper to get mu and sigma if they are high dimensional."""
-        if sigma.ndim > 0:
-            sigma = sigma[1:]
-        if mu.ndim > 0:
-            mu = mu[1:]
-        return mu, sigma
-
-    def logp(self, x):
-        """
-        Calculate log-probability of Gaussian Random Walk distribution at specified value.
-
-        Parameters
-        ----------
-        x : numeric
-            Value for which log-probability is calculated.
-
-        Returns
-        -------
-        TensorVariable
-        """
-        if x.ndim > 0:
-            x_im1 = x[:-1]
-            x_i = x[1:]
-            mu, sigma = self._mu_and_sigma(self.mu, self.sigma)
-            innov_like = Normal.dist(mu=x_im1 + mu, sigma=sigma).logp(x_i)
-            return self.init.logp(x[0]) + at.sum(innov_like)
-        return self.init.logp(x)
-
-    def random(self, point=None, size=None):
-        """Draw random values from GaussianRandomWalk.
-
-        Parameters
-        ----------
-        point : dict or Point, optional
-            Dict of variable values on which random values are to be
-            conditioned (uses default point if not specified).
-        size : int, optional
-            Desired size of random sample (returns one sample if not
-            specified).
-
-        Returns
-        -------
-        array
-        """
-        # sigma, mu = distribution.draw_values([self.sigma, self.mu], point=point, size=size)
-        # return distribution.generate_samples(
-        #     self._random,
-        #     sigma=sigma,
-        #     mu=mu,
-        #     size=size,
-        #     dist_shape=self.shape,
-        #     not_broadcast_kwargs={"sample_shape": to_tuple(size)},
-        # )
-        pass
-
-    def _random(self, sigma, mu, size, sample_shape):
-        """Implement a Gaussian random walk as a cumulative sum of normals.
-        axis = len(size) - 1 denotes the axis along which cumulative sum would be calculated.
-        This might need to be corrected in future when issue #4010 is fixed.
-        """
-        if size[len(sample_shape)] == sample_shape:
-            axis = len(sample_shape)
-        else:
-            axis = len(size) - 1
-        rv = stats.norm(mu, sigma)
-        data = rv.rvs(size).cumsum(axis=axis)
-        data = np.array(data)
-
-        # the following lines center the random walk to start at the origin.
-        if len(data.shape) > 1:
-            for i in range(data.shape[0]):
-                data[i] = data[i] - data[i][0]
-        else:
-            data = data - data[0]
-        return data
-
-    def _distr_parameters_for_repr(self):
-        return ["mu", "sigma"]
-
-
 class GARCH11(distribution.Continuous):
     r"""
     GARCH(1,1) with Normal innovations. The model is specified by

diff --git a/pymc/tests/test_distributions.py b/pymc/tests/test_distributions.py
@@ -2619,6 +2619,22 @@ def test_interpolated_transform(self, transform):
             assert not np.isfinite(m.compile_logp()({"x": -1.0}))
             assert not np.isfinite(m.compile_logp()({"x": 11.0}))
 
+    def test_gaussianrandomwalk(self):
+        def ref_logp(value, mu, sigma, steps):
+            # Relying on fact that init will be normal by default
+            return (
+                scipy.stats.norm.logpdf(value[0], mu, sigma)
+                + scipy.stats.norm.logpdf(np.diff(value), mu, sigma).sum()
+            )
+
+        self.check_logp(
+            pm.GaussianRandomWalk,
+            Vector(R, 4),
+            {"mu": R, "sigma": Rplus, "steps": Nat},
+            ref_logp,
+            decimal=select_by_precision(float64=6, float32=1),
+        )
+
 
 class TestBound:
     """Tests for pm.Bound distribution"""