added real-time Rhat

sidravi1 · rlouf · commit af1adc90b3eb · 2021-07-29T15:31:44.000+02:00
allows online metrics to be passed to sample_loop
diff --git a/mcx/diagnostics/__init__.py b/mcx/diagnostics/__init__.py
@@ -0,0 +1,5 @@
+from .gelman_rubin import online_gelman_rubin
+
+__all__ = [
+    "online_gelman_rubin",
+]
diff --git a/mcx/diagnostics/gelman_rubin.py b/mcx/diagnostics/gelman_rubin.py
@@ -1,42 +1,165 @@
 """Kernel to compute the Gelman-Rubin convergence diagnostic (Rhat) online.
 """
-from typing import NamedTuple
+from typing import Callable, NamedTuple, Tuple
 
+import jax
 import jax.numpy as jnp
 
-from mcx.inference.warmup.mass_matrix_adaptation import (
-    WelfordAlgorithmState,
-    welford_algorithm,
-)
+
+class WelfordAlgorithmState(NamedTuple):
+    """State carried through the Welford algorithm.
+
+    mean
+        The running sample mean.
+    m2
+        The running value of the sum of difference of squares. See documentation
+        of the `welford_algorithm` function for an explanation.
+    sample_size
+        The number of successive states the previous values have been computed on;
+        also the current number of iterations of the algorithm.
+    """
+
+    mean: float
+    m2: float
+    sample_size: int
 
 
 class GelmanRubinState(NamedTuple):
     w_state: WelfordAlgorithmState
-    rhat: float
+    rhat: jnp.DeviceArray
+    metric: jnp.DeviceArray
+    metric_name: str
+
+
+def welford_algorithm(is_diagonal_matrix: bool) -> Tuple[Callable, Callable, Callable]:
+    """Welford's online estimator of covariance.
+
+    It is possible to compute the variance of a population of values in an
+    on-line fashion to avoid storing intermediate results. The naive recurrence
+    relations between the sample mean and variance at a step and the next are
+    however not numerically stable.
+
+    Welford's algorithm uses the sum of square of differences
+    :math:`M_{2,n} = \\sum_{i=1}^n \\left(x_i-\\overline{x_n}\right)^2`
+    for updating where :math:`x_n` is the current mean and the following
+    recurrence relationships
+
+    Parameters
+    ----------
+    is_diagonal_matrix
+        When True the algorithm adapts and returns a diagonal mass matrix
+        (default), otherwise adapts and returns a dense mass matrix.
+
+    .. math:
+        M_{2,n} = M_{2, n-1} + (x_n-\\overline{x}_{n-1})(x_n-\\overline{x}_n)
+        \\sigma_n^2 = \\frac{M_{2,n}}{n}
+    """
+
+    def init(n_chains: int, n_dims: int) -> WelfordAlgorithmState:
+        """Initialize the covariance estimation.
+
+        When the matrix is diagonal it is sufficient to work with an array that contains
+        the diagonal value. Otherwise we need to work with the matrix in full.
+
+        Parameters
+        ----------
+        n_chains: int
+            The number of chains being run
+        n_dims: int
+            The number of variables
+        """
+        sample_size = 0
+        mean = jnp.zeros((n_chains, n_dims))
+        if is_diagonal_matrix:
+            m2 = jnp.zeros((n_chains, n_dims))
+        else:
+            m2 = jnp.zeros((n_chains, n_chains, n_dims))
+        return WelfordAlgorithmState(mean, m2, sample_size)
+
+    @jax.jit
+    def update(
+        state: WelfordAlgorithmState, value: jnp.DeviceArray
+    ) -> WelfordAlgorithmState:
+        """Update the M2 matrix using the new value.
+
+        Parameters
+        ----------
+        state: WelfordAlgorithmState
+            The current state of the Welford Algorithm
+        value: jax.numpy.DeviceArray, shape (1,)
+            The new sample (typically position of the chain) used to update m2
+        """
+        mean, m2, sample_size = state
+        sample_size = sample_size + 1
+
+        delta = value - mean
+        mean = mean + delta / sample_size
+        updated_delta = value - mean
+        if is_diagonal_matrix:
+            new_m2 = m2 + delta * updated_delta
+        else:
+            new_m2 = m2 + jnp.outer(updated_delta, delta)
+
+        return WelfordAlgorithmState(mean, new_m2, sample_size)
+
+    def covariance(
+        state: WelfordAlgorithmState,
+    ) -> Tuple[jnp.DeviceArray, int, jnp.DeviceArray]:
+        mean, m2, sample_size = state
+        covariance = m2 / (sample_size - 1)
+        return covariance, sample_size, mean
+
+    return init, update, covariance
 
 
 def online_gelman_rubin():
     """Online estimation of the Gelman-Rubin diagnostic."""
 
     w_init, w_update, w_covariance = welford_algorithm(True)
 
-    def init(num_chains):
-        w_state = w_init(num_chains)
-        return GelmanRubinState(w_state, 0)
+    def init(init_state):
+        """Initialise the online gelman/rubin estimator
+
+        Parameters
+        ----------
+        num_chains: int
+            The number of chains being run
+
+        Returns
+        -------
+        GelmanRubinState with all values set to zeros.
+
+        """
+        n_chains, n_dims = init_state.position.shape
+        w_state = w_init(n_chains, n_dims)
+        return GelmanRubinState(w_state, 0, jnp.nan, "worst_rhat")
 
     def update(chain_state, rhat_state):
-        within_state, step, num_chains, _, _, _ = rhat_state
+        """Update rhat estimates
+
+        Parameters
+        ----------
+        chain_state: HMCState
+            The chain state
+        rhat_state: GelmanRubinState
+            The GelmanRubinState from the previous draw
+
+        Returns
+        -------
+        An updated GelmanRubinState object
+        """
+        within_state, _, _, metric_name = rhat_state
 
         positions = chain_state.position
         within_state = w_update(within_state, positions)
-
-        covariance, step, mean = w_covariance(rhat_state)
-        within_var = jnp.mean(covariance)
-        between_var = jnp.var(mean, ddof=1)
+        covariance, step, mean = w_covariance(within_state)
+        within_var = jnp.mean(covariance, axis=0)
+        between_var = jnp.var(mean, axis=0, ddof=1)
         estimator = ((step - 1) / step) * within_var + between_var
         rhat = jnp.sqrt(estimator / within_var)
+        worst_rhat = rhat[jnp.argmax(jnp.abs(rhat - 1.0))]
 
-        return GelmanRubinState(within_state, rhat)
+        return GelmanRubinState(within_state, rhat, worst_rhat, metric_name)
 
     return init, update
 
diff --git a/mcx/distributions/mvnormal.py b/mcx/distributions/mvnormal.py
@@ -39,9 +39,9 @@ def __init__(self, mu, covariance_matrix):
         if (mu_event_shape, mu_event_shape) != covariance_event_shape:
             raise ValueError(
                 (
-                    f"The number of dimensions implied by `mu` ({mu_event_shape}),"
-                    "does not match the dimensions implied by `covariance_matrix` "
-                    f"({covariance_event_shape})"
+                    f"The number of dimensions implied by `mu`(dims = {mu_event_shape})"
+                    ", does not match the dimensions implied by `covariance_matrix`"
+                    f"(dims = {covariance_event_shape})"
                 )
             )
 
diff --git a/mcx/sample.py b/mcx/sample.py
@@ -1,5 +1,5 @@
 """Sample from the multivariate distribution defined by the model."""
-from typing import Any, Callable, Dict, Iterator, Optional, Tuple
+from typing import Any, Callable, Dict, Iterator, Optional, Sequence, Tuple
 
 import jax
 import jax.numpy as jnp
@@ -8,6 +8,7 @@
 from tqdm import tqdm
 
 import mcx
+from mcx.diagnostics import online_gelman_rubin
 from mcx.jax import progress_bar_factory
 from mcx.jax import ravel_pytree as mcx_ravel_pytree
 from mcx.trace import Trace
@@ -321,6 +322,9 @@ def run(
         num_samples: int = 1000,
         num_warmup_steps: int = 1000,
         compile: bool = False,
+        metrics: Sequence[Callable[..., Tuple[Callable, Callable]]] = [
+            online_gelman_rubin
+        ],
         **warmup_kwargs,
     ) -> Trace:
         """Run the posterior inference.
@@ -336,9 +340,13 @@ def run(
         num_warmup_steps
             The number of warmup_steps to perform.
         compile
-            If False the progress of the warmup and samplig will be displayed.
+            If False the progress of the warmup and sampling will be displayed.
             Otherwise it will use `lax.scan` to iterate (which is potentially
             faster).
+        metrics
+            A list of functions to generate online metrics when sampling. Only
+            used when `compile` is False. Each function must return two functions -
+            an `init` function and an `update` function.
         warmup_kwargs
             Parameters to pass to the evaluator's warmup.
 
@@ -349,6 +357,10 @@ def run(
             the inference process (e.g. divergences for evaluators in the
             HMC family).
 
+        Notes
+        -----
+        Passing functions to `metrics` may slow down sampling. It may be useful to have
+        online metrics when building or diagnosing a model.
         """
         if not self.is_warmed_up:
             self.warmup(num_warmup_steps, compile, **warmup_kwargs)
@@ -373,8 +385,15 @@ def update_one_chain(rng_key, parameters, chain_state):
                 update_one_chain, self.state, self.parameters, rng_keys, self.num_chains
             )
         else:
+            if metrics is None:
+                metrics = ()
             last_state, chain = sample_loop(
-                update_one_chain, self.state, self.parameters, rng_keys, self.num_chains
+                update_one_chain,
+                self.state,
+                self.parameters,
+                rng_keys,
+                self.num_chains,
+                metrics,
             )
 
         samples, sampling_info = self.evaluator.make_trace(
@@ -466,6 +485,7 @@ def sample_loop(
     parameters: jnp.DeviceArray,
     rng_keys: jnp.DeviceArray,
     num_chains: int,
+    metrics: Sequence[Callable[..., Tuple[Callable, Callable]]],
 ) -> Tuple:
     """Sample using a Python loop.
 
@@ -507,8 +527,12 @@ def sample_loop(
         The parameters of the evaluator.
     rng_keys: array (n_samples,)
         JAX PRNGKeys used for each sampling step.
-    num_chains
+    num_chains : int
         The number of chains
+    metrics:
+        A list of functions to generate real-time metrics when sampling.
+        Each function must return two functions - an `init` function and
+        an `update` function.
 
     Returns
     -------
@@ -531,7 +555,15 @@ def get_unravel_fn():
 
     _, unravel_fn = get_unravel_fn()
 
-    with tqdm(rng_keys, unit="samples") as progress:
+    metrics_init, metrics_update = [], []
+    for metric_func in metrics:
+        init_func, update_func = metric_func()
+        metrics_init.append(init_func)
+        metrics_update.append(update_func)
+
+    metrics_state = [init_func(init_state) for init_func in metrics_init]
+
+    with tqdm(rng_keys, unit="samples", mininterval=0.1) as progress:
         progress.set_description(
             f"Collecting {num_samples:,} samples across {num_chains:,} chains",
             refresh=False,
@@ -540,7 +572,17 @@ def get_unravel_fn():
         state = init_state
         try:
             for _, key in enumerate(progress):
+                metrics_state = [
+                    update_func(state, m_state)
+                    for update_func, m_state in zip(metrics_update, metrics_state)
+                ]
                 state, _, ravelled_state = update_loop(state, key)
+                postfix_dict = {
+                    m_state.metric_name: f"{m_state.metric:0.2f}"
+                    for m_state in metrics_state
+                }
+                if postfix_dict:
+                    progress.set_postfix(postfix_dict)
                 chain.append(ravelled_state)
         except KeyboardInterrupt:
             pass
diff --git a/mcx/trace.py b/mcx/trace.py
@@ -85,7 +85,7 @@ class Trace(InferenceData):
     The integration with ArviZ is seemless: MCX traces can be passed to ArviZ's
     diagnostics, statistics and plotting functions.
 
-        >>> import arvix as az
+        >>> import arviz as az
         >>> az.plot_trace(trace)
 
 
diff --git a/tests/hmc_test.py b/tests/hmc_test.py
@@ -24,8 +24,8 @@ def linear_regression(x, lmbda=1.0):
 def linear_regression_mvn(x, lmbda=1.):
     sigma <~ dist.Exponential(lmbda)
     sigma2 <~ dist.Exponential(lmbda)
-    rho <~ dist.Uniform(0, 1)
-    cov = jnp.array([[sigma, rho*sigma*sigma2],[rho*sigma*sigma2, sigma2]])
+    rho <~ dist.Uniform(-1, 1)
+    cov = jnp.array([[sigma**2, rho*sigma*sigma2],[rho*sigma*sigma2, sigma2**2]])
     coeffs <~ dist.MvNormal(jnp.ones(x.shape[-1]), cov)
     y = jnp.dot(x, coeffs)
     predictions <~ dist.Normal(y, sigma)
@@ -70,7 +70,7 @@ def test_linear_regression_mvn():
     y_data = x_data @ np.array([3, 1]) + np.random.normal(size=x_data.shape[0])
 
     kernel = HMC(
-        num_integration_steps=90,
+        num_integration_steps=10,
     )
 
     rng_key = jax.random.PRNGKey(2)

Original file line number	Diff line number	Diff line change
`@@ -39,9 +39,9 @@ def __init__(self, mu, covariance_matrix):`
`39`	`39`	`if (mu_event_shape, mu_event_shape) != covariance_event_shape:`
`40`	`40`	`raise ValueError(`
`41`	`41`	`(`
`42`		- f"The number of dimensions implied by `mu` ({mu_event_shape}),"
`43`		- "does not match the dimensions implied by `covariance_matrix` "
`44`		`- f"({covariance_event_shape})"`
	`42`	+ f"The number of dimensions implied by `mu`(dims = {mu_event_shape})"
	`43`	+ ", does not match the dimensions implied by `covariance_matrix`"
	`44`	`+ f"(dims = {covariance_event_shape})"`
`45`	`45`	`)`
`46`	`46`	`)`
`47`	`47`