Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Start adding GRW RV Op and distribution #5298

Merged
merged 13 commits into from
Apr 3, 2022
317 changes: 196 additions & 121 deletions pymc/distributions/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Tuple, Union

import aesara.tensor as at
import numpy as np

from aesara import scan
from scipy import stats
from aesara.tensor.random.op import RandomVariable

from pymc.distributions import distribution, multivariate
from pymc.aesaraf import change_rv_size, floatX, intX
from pymc.distributions import distribution, logprob, multivariate
from pymc.distributions.continuous import Flat, Normal, get_tau_sigma
from pymc.distributions.dist_math import check_parameters
from pymc.distributions.shape_utils import to_tuple
from pymc.util import check_dist_not_registered

__all__ = [
"AR1",
Expand All @@ -33,6 +38,195 @@
]


class GaussianRandomWalkRV(RandomVariable):
"""
GaussianRandomWalk Random Variable
"""

name = "GaussianRandomWalk"
ndim_supp = 1
ndims_params = [0, 0, 0, 0]
dtype = "floatX"
_print_name = ("GaussianRandomWalk", "\\operatorname{GaussianRandomWalk}")

def make_node(self, rng, size, dtype, mu, sigma, init, steps):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That line is a value check, this is a shape check (to confirm it is a scalar)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this function then? I'm assuming make_node is called when the aesara graph is being made?

In other words why cant this logic be moved to where the value check is? Not saying it should be, just trying to understand :)

Copy link
Member

@ricardoV94 ricardoV94 Mar 31, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This check happens immediately when the variable is created, while that other one only when/if the variable is evaluated.

We don't want to allow the user to even initialize such variables, whereas we can't do that for the value of the steps, because it might be symbolic and not known until the function is evaluated.

canyon289 marked this conversation as resolved.
Show resolved Hide resolved
steps = at.as_tensor_variable(steps)
if not steps.ndim == 0 or not steps.dtype.startswith("int"):
raise ValueError("steps must be an integer scalar (ndim=0).")

return super().make_node(rng, size, dtype, mu, sigma, init, steps)

def _supp_shape_from_params(self, dist_params, reop_param_idx=0, param_shapes=None):
steps = dist_params[3]

return (steps + 1,)

@classmethod
def rng_fn(
cls,
rng: np.random.RandomState,
mu: Union[np.ndarray, float],
sigma: Union[np.ndarray, float],
init: float,
steps: int,
size: Tuple[int],
) -> np.ndarray:
"""Gaussian Random Walk generator.

The init value is drawn from the Normal distribution with the same sigma as the
innovations.

Notes
-----
Currently does not support custom init distribution

Parameters
----------
rng: np.random.RandomState
canyon289 marked this conversation as resolved.
Show resolved Hide resolved
Numpy random number generator
mu: array_like
Random walk mean
sigma: np.ndarray
Standard deviation of innovation (sigma > 0)
init: float
Initialization value for GaussianRandomWalk
steps: int
Length of random walk, must be greater than 1. Returned array will be of size+1 to
account as first value is initial value
size: int
The number of Random Walk time series generated

Returns
-------
ndarray
"""

if steps < 1:
raise ValueError("Steps must be greater than 0")

# If size is None then the returned series should be (*implied_dims, 1+steps)
if size is None:
# broadcast parameters with each other to find implied dims
bcast_shape = np.broadcast_shapes(
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does broadcast shape here do specifically that the code before was not doing?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It makes sure that when init is larger than mu or sigma, the distribution has the right shape, i.e., (*init.shape, nsteps)

canyon289 marked this conversation as resolved.
Show resolved Hide resolved
np.asarray(mu).shape,
np.asarray(sigma).shape,
np.asarray(init).shape,
)
dist_shape = (*bcast_shape, int(steps))

# If size is None then the returned series should be (*size, 1+steps)
else:
init_size = (*size, 1)
dist_shape = (*size, int(steps))

innovations = rng.normal(loc=mu, scale=sigma, size=dist_shape)
grw = np.concatenate([init[..., None], innovations], axis=-1)
return np.cumsum(grw, axis=-1)


gaussianrandomwalk = GaussianRandomWalkRV()


class GaussianRandomWalk(distribution.Continuous):
r"""Random Walk with Normal innovations

Parameters
----------
mu : tensor_like of float
innovation drift, defaults to 0.0
sigma : tensor_like of float, optional
sigma > 0, innovation standard deviation, defaults to 1.0
init : Univariate PyMC distribution
Univariate distribution of the initial value, created with the `.dist()` API.
Defaults to Normal with same `mu` and `sigma` as the GaussianRandomWalk
steps : int
Number of steps in Gaussian Random Walks (steps > 0).
"""

rv_op = gaussianrandomwalk

def __new__(cls, name, mu=0.0, sigma=1.0, init=None, steps=None, **kwargs):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I asked this question before but do __new__ and dist have to have almost the same signature (except for size)?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They can have the same signature. I don't know if other distributions usually show or hide size in kwargs. Only thing we care about here is getting init

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it ever the case theyll have vastly different signatures?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

__new__ is only used internally. Usually we just need to take some arguments for checks like the one here. Otherwise we leave it unchanged/undefined.

if init is not None:
check_dist_not_registered(init)
return super().__new__(cls, name, mu, sigma, init, steps, **kwargs)

@classmethod
def dist(
cls, mu=0.0, sigma=1.0, init=None, steps=None, size=None, **kwargs
) -> at.TensorVariable:

mu = at.as_tensor_variable(floatX(mu))
sigma = at.as_tensor_variable(floatX(sigma))
if steps is None:
raise ValueError("Must specify steps parameter")
steps = at.as_tensor_variable(intX(steps))

shape = kwargs.get("shape", None)
if size is None and shape is None:
init_size = None
else:
init_size = to_tuple(size) if size is not None else to_tuple(shape)[:-1]

# If no scalar distribution is passed then initialize with a Normal of same mu and sigma
if init is None:
init = Normal.dist(mu, sigma, size=init_size)
else:
if not (
isinstance(init, at.TensorVariable)
and init.owner is not None
and isinstance(init.owner.op, RandomVariable)
and init.owner.op.ndim_supp == 0
):
raise TypeError("init must be a univariate distribution variable")

if init_size is not None:
init = change_rv_size(init, init_size)
else:
# If not explicit, size is determined by the shapes of mu, sigma, and init
bcast_shape = at.broadcast_arrays(mu, sigma, init)[0].shape
init = change_rv_size(init, bcast_shape)

canyon289 marked this conversation as resolved.
Show resolved Hide resolved
# Ignores logprob of init var because that's accounted for in the logp method
init.tag.ignore_logprob = True

return super().dist([mu, sigma, init, steps], size=size, **kwargs)

def logp(
value: at.Variable,
mu: at.Variable,
sigma: at.Variable,
init: at.Variable,
steps: at.Variable,
) -> at.TensorVariable:
"""Calculate log-probability of Gaussian Random Walk distribution at specified value.

Parameters
----------
value: at.Variable,
mu: at.Variable,
sigma: at.Variable,
init: at.Variable, Not used
steps: at.Variable, Not used

Returns
-------
TensorVariable
"""

# Calculate initialization logp
init_logp = logprob.logp(init, value[..., 0])
canyon289 marked this conversation as resolved.
Show resolved Hide resolved

# Make time series stationary around the mean value
stationary_series = value[..., 1:] - value[..., :-1]
series_logp = logprob.logp(Normal.dist(mu, sigma), stationary_series)

return check_parameters(
init_logp + series_logp.sum(axis=-1),
steps > 0,
msg="steps > 0",
)


class AR1(distribution.Continuous):
"""
Autoregressive process with 1 lag.
Expand Down Expand Up @@ -171,125 +365,6 @@ def logp(self, value):
return at.sum(innov_like) + at.sum(init_like)


class GaussianRandomWalk(distribution.Continuous):
r"""Random Walk with Normal innovations

Note that this is mainly a user-friendly wrapper to enable an easier specification
of GRW. You are not restricted to use only Normal innovations but can use any
distribution: just use `aesara.tensor.cumsum()` to create the random walk behavior.

Parameters
----------
mu : tensor_like of float, default 0
canyon289 marked this conversation as resolved.
Show resolved Hide resolved
innovation drift, defaults to 0.0
For vector valued `mu`, first dimension must match shape of the random walk, and
the first element will be discarded (since there is no innovation in the first timestep)
sigma : tensor_like of float, optional
`sigma` > 0, innovation standard deviation (only required if `tau` is not specified)
For vector valued `sigma`, first dimension must match shape of the random walk, and
the first element will be discarded (since there is no innovation in the first timestep)
tau : tensor_like of float, optional
`tau` > 0, innovation precision (only required if `sigma` is not specified)
For vector valued `tau`, first dimension must match shape of the random walk, and
the first element will be discarded (since there is no innovation in the first timestep)
init : pymc.Distribution, optional
distribution for initial value (Defaults to Flat())
"""

def __init__(self, tau=None, init=None, sigma=None, mu=0.0, *args, **kwargs):
kwargs.setdefault("shape", 1)
super().__init__(*args, **kwargs)
if sum(self.shape) == 0:
raise TypeError("GaussianRandomWalk must be supplied a non-zero shape argument!")
tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
self.tau = at.as_tensor_variable(tau)
sigma = at.as_tensor_variable(sigma)
self.sigma = sigma
self.mu = at.as_tensor_variable(mu)
self.init = init or Flat.dist()
self.mean = at.as_tensor_variable(0.0)

def _mu_and_sigma(self, mu, sigma):
"""Helper to get mu and sigma if they are high dimensional."""
if sigma.ndim > 0:
sigma = sigma[1:]
if mu.ndim > 0:
mu = mu[1:]
return mu, sigma

def logp(self, x):
"""
Calculate log-probability of Gaussian Random Walk distribution at specified value.

Parameters
----------
x : numeric
Value for which log-probability is calculated.

Returns
-------
TensorVariable
"""
if x.ndim > 0:
x_im1 = x[:-1]
x_i = x[1:]
mu, sigma = self._mu_and_sigma(self.mu, self.sigma)
innov_like = Normal.dist(mu=x_im1 + mu, sigma=sigma).logp(x_i)
return self.init.logp(x[0]) + at.sum(innov_like)
return self.init.logp(x)

def random(self, point=None, size=None):
"""Draw random values from GaussianRandomWalk.

Parameters
----------
point : dict or Point, optional
Dict of variable values on which random values are to be
conditioned (uses default point if not specified).
size : int, optional
Desired size of random sample (returns one sample if not
specified).

Returns
-------
array
"""
# sigma, mu = distribution.draw_values([self.sigma, self.mu], point=point, size=size)
# return distribution.generate_samples(
# self._random,
# sigma=sigma,
# mu=mu,
# size=size,
# dist_shape=self.shape,
# not_broadcast_kwargs={"sample_shape": to_tuple(size)},
# )
pass

def _random(self, sigma, mu, size, sample_shape):
"""Implement a Gaussian random walk as a cumulative sum of normals.
axis = len(size) - 1 denotes the axis along which cumulative sum would be calculated.
This might need to be corrected in future when issue #4010 is fixed.
"""
if size[len(sample_shape)] == sample_shape:
axis = len(sample_shape)
else:
axis = len(size) - 1
rv = stats.norm(mu, sigma)
data = rv.rvs(size).cumsum(axis=axis)
data = np.array(data)

# the following lines center the random walk to start at the origin.
if len(data.shape) > 1:
for i in range(data.shape[0]):
data[i] = data[i] - data[i][0]
else:
data = data - data[0]
return data

def _distr_parameters_for_repr(self):
return ["mu", "sigma"]


class GARCH11(distribution.Continuous):
r"""
GARCH(1,1) with Normal innovations. The model is specified by
Expand Down
16 changes: 16 additions & 0 deletions pymc/tests/test_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2619,6 +2619,22 @@ def test_interpolated_transform(self, transform):
assert not np.isfinite(m.compile_logp()({"x": -1.0}))
assert not np.isfinite(m.compile_logp()({"x": 11.0}))

def test_gaussianrandomwalk(self):
def ref_logp(value, mu, sigma, steps):
# Relying on fact that init will be normal by default
return (
scipy.stats.norm.logpdf(value[0], mu, sigma)
+ scipy.stats.norm.logpdf(np.diff(value), mu, sigma).sum()
)

self.check_logp(
pm.GaussianRandomWalk,
Vector(R, 4),
{"mu": R, "sigma": Rplus, "steps": Nat},
ref_logp,
decimal=select_by_precision(float64=6, float32=1),
)


class TestBound:
"""Tests for pm.Bound distribution"""
Expand Down
Loading