display # divergences while sampling

rlouf · rlouf · commit 26c316f2911d · 2021-08-01T22:03:41.000+02:00
We added a callback that allows to display the largest number of
divergences on a single chain while sampling. In the spirit of creating
a fully interactive sampling experience.
diff --git a/mcx/diagnostics/__init__.py b/mcx/diagnostics/__init__.py
@@ -1,5 +1,7 @@
 from .gelman_rubin import online_gelman_rubin
+from .mcmc import divergences
 
 __all__ = [
+    "divergences",
     "online_gelman_rubin",
 ]
diff --git a/mcx/diagnostics/gelman_rubin.py b/mcx/diagnostics/gelman_rubin.py
@@ -28,7 +28,6 @@ class GelmanRubinState(NamedTuple):
     w_state: WelfordAlgorithmState
     rhat: jnp.DeviceArray
     metric: jnp.DeviceArray
-    metric_name: str
 
 
 def welford_algorithm(is_diagonal_matrix: bool) -> Tuple[Callable, Callable, Callable]:
@@ -112,12 +111,12 @@ def covariance(
     return init, update, covariance
 
 
-def online_gelman_rubin():
+def online_gelman_rubin() -> Tuple[str, Callable, Callable]:
     """Online estimation of the Gelman-Rubin diagnostic."""
-
+    metric_name = "worst_rhat"
     w_init, w_update, w_covariance = welford_algorithm(True)
 
-    def init(init_state):
+    def init(init_state) -> GelmanRubinState:
         """Initialise the online gelman/rubin estimator
 
         Parameters
@@ -132,9 +131,10 @@ def init(init_state):
         """
         n_chains, n_dims = init_state.position.shape
         w_state = w_init(n_chains, n_dims)
-        return GelmanRubinState(w_state, 0, jnp.nan, "worst_rhat")
+        return GelmanRubinState(w_state, 0, jnp.nan)
 
-    def update(chain_state, rhat_state):
+    @jax.jit
+    def update(chain_state, _, rhat_state: GelmanRubinState) -> GelmanRubinState:
         """Update rhat estimates
 
         Parameters
@@ -148,7 +148,7 @@ def update(chain_state, rhat_state):
         -------
         An updated GelmanRubinState object
         """
-        within_state, _, _, metric_name = rhat_state
+        within_state, *_ = rhat_state
 
         positions = chain_state.position
         within_state = w_update(within_state, positions)
@@ -159,9 +159,9 @@ def update(chain_state, rhat_state):
         rhat = jnp.sqrt(estimator / within_var)
         worst_rhat = rhat[jnp.argmax(jnp.abs(rhat - 1.0))]
 
-        return GelmanRubinState(within_state, rhat, worst_rhat, metric_name)
+        return GelmanRubinState(within_state, rhat, worst_rhat)
 
-    return init, update
+    return metric_name, init, update
 
 
 def split_gelman_rubin():
diff --git a/mcx/diagnostics/mcmc.py b/mcx/diagnostics/mcmc.py
@@ -0,0 +1,37 @@
+"""Diagnostics that are specific to MCMC algorithms."""
+from typing import Callable, NamedTuple, Tuple
+
+import jax
+import jax.numpy as jnp
+
+
+class DivergencesState(NamedTuple):
+    num_divergences: jnp.ndarray
+    metric: int  # maximum number of divergences
+
+
+def divergences() -> Tuple[str, Callable, Callable]:
+    """Count the number of divergences.
+
+    We keep a count of the current number of divergences for each chain and
+    return the maximum number of divergences to be displayed.
+
+    """
+    metric_name = "divergences"
+
+    def init(init_state) -> DivergencesState:
+        """Initialize the divergence counters."""
+        num_chains, _ = init_state.position.shape
+        num_divergences = jnp.zeros(num_chains)
+        return DivergencesState(num_divergences, 0)
+
+    @jax.jit
+    def update(_, info, divergence_state: DivergencesState) -> DivergencesState:
+        """Update the number of divergences."""
+        num_divergences, *_ = divergence_state
+        is_divergent = info.is_divergent.astype(int)
+        num_divergences = num_divergences + is_divergent
+        max_num_divergences = jnp.max(num_divergences)
+        return DivergencesState(num_divergences, max_num_divergences)
+
+    return metric_name, init, update
diff --git a/mcx/distributions/distribution.py b/mcx/distributions/distribution.py
@@ -63,13 +63,13 @@ def __init__(self, *args) -> None:
 
     @abstractmethod
     def sample(
-        self, rng_key: jax.random.PRNGKey, sample_shape: Union[Tuple[()], Tuple[int]]
+        self, rng_key: jnp.ndarray, sample_shape: Union[Tuple[()], Tuple[int]]
     ) -> jax.numpy.DeviceArray:
         """Obtain samples from the distribution.
 
         Parameters
         ----------
-        rng_key: jax.random.PRNGKey
+        rng_key: jnp.ndarray
             The pseudo random number generator key to use to draw samples.
         sample_shape: Tuple[int]
             The number of independant, identically distributed samples to draw
@@ -84,7 +84,7 @@ def sample(
 
     def forward(
         self,
-        rng_key: jax.random.PRNGKey,
+        rng_key: jnp.ndarray,
         sample_shape: Union[Tuple[()], Tuple[int]] = (),
     ) -> jax.numpy.DeviceArray:
         """Generate forward samples from the distribution. Defined for compatibility with
diff --git a/mcx/inference/adaptation/num_steps_adaptation.py b/mcx/inference/adaptation/num_steps_adaptation.py
diff --git a/mcx/inference/adaptation/stan.py b/mcx/inference/adaptation/stan.py
@@ -83,7 +83,7 @@ def stan_hmc_warmup(
     )
 
     def init(
-        rng_key: jax.random.PRNGKey, initial_state: HMCState, initial_step_size: int
+        rng_key: jnp.ndarray, initial_state: HMCState, initial_step_size: int
     ) -> StanWarmupState:
         """Initialize the warmup.
 
@@ -109,7 +109,7 @@ def init(
 
     @jax.jit
     def update(
-        rng_key: jax.random.PRNGKey,
+        rng_key: jnp.ndarray,
         stage: int,
         is_middle_window_end: bool,
         chain_state: HMCState,
@@ -197,7 +197,7 @@ def init(initial_step_size: float) -> DualAveragingState:
 
     @jax.jit
     def update(
-        state: Tuple[jax.random.PRNGKey, HMCState, HMCInfo, StanWarmupState]
+        state: Tuple[jnp.ndarray, HMCState, HMCInfo, StanWarmupState]
     ) -> StanWarmupState:
         rng_key, chain_state, chain_info, warmup_state = state
 
@@ -244,7 +244,7 @@ def init(chain_state: HMCState) -> MassMatrixAdaptationState:
 
     @jax.jit
     def update(
-        state: Tuple[jax.random.PRNGKey, HMCState, HMCInfo, StanWarmupState]
+        state: Tuple[jnp.ndarray, HMCState, HMCInfo, StanWarmupState]
     ) -> StanWarmupState:
         """Move the warmup by one state when in a slow adaptation interval.
 
diff --git a/mcx/inference/adaptation/step_size_adaptation.py b/mcx/inference/adaptation/step_size_adaptation.py
@@ -171,15 +171,15 @@ class ReasonableStepSizeState(NamedTuple):
         The current step size in the search.
     """
 
-    rng_key: jax.random.PRNGKey
+    rng_key: jnp.ndarray
     direction: int
     previous_direction: int
     step_size: float
 
 
 @partial(jax.jit, static_argnums=(1,))
 def find_reasonable_step_size(
-    rng_key: jax.random.PRNGKey,
+    rng_key: jnp.ndarray,
     kernel_generator: Callable[[float, jnp.DeviceArray], Callable],
     reference_hmc_state: HMCState,
     inverse_mass_matrix: jnp.DeviceArray,
diff --git a/mcx/inference/hmc.py b/mcx/inference/hmc.py
@@ -92,7 +92,7 @@ def make_state(position):
 
     def warmup(
         self,
-        rng_key: jax.random.PRNGKey,
+        rng_key: jax.numpy.ndarray,
         initial_state: HMCState,
         kernel_factory: Callable,
         num_chains,
diff --git a/mcx/inference/kernels.py b/mcx/inference/kernels.py
@@ -105,9 +105,7 @@ def hmc_kernel(
     """
 
     @jax.jit
-    def kernel(
-        rng_key: jax.random.PRNGKey, state: HMCState
-    ) -> Tuple[HMCState, HMCInfo]:
+    def kernel(rng_key: jnp.ndarray, state: HMCState) -> Tuple[HMCState, HMCInfo]:
         """Moves the chain by one step using the Hamiltonian dynamics.
 
         Parameters
@@ -219,9 +217,7 @@ def rwm_kernel(logpdf: Callable, proposal_generator: Callable) -> Callable:
     """
 
     @jax.jit
-    def kernel(
-        rng_key: jax.random.PRNGKey, state: RWMState
-    ) -> Tuple[RWMState, RWMInfo]:
+    def kernel(rng_key: jnp.ndarray, state: RWMState) -> Tuple[RWMState, RWMInfo]:
         """Moves the chain by one step using the Random Walk Metropolis algorithm.
 
         Parameters
diff --git a/mcx/inference/metrics.py b/mcx/inference/metrics.py
@@ -10,7 +10,7 @@
 
 
 KineticEnergy = Callable[[jnp.DeviceArray], float]
-MomentumGenerator = Callable[[jax.random.PRNGKey], jnp.DeviceArray]
+MomentumGenerator = Callable[[jnp.ndarray], jnp.DeviceArray]
 
 
 def gaussian_euclidean_metric(
@@ -35,7 +35,7 @@ def gaussian_euclidean_metric(
         mass_matrix_sqrt = jnp.sqrt(jnp.reciprocal(inverse_mass_matrix))
 
         @jax.jit
-        def momentum_generator(rng_key: jax.random.PRNGKey) -> jnp.DeviceArray:
+        def momentum_generator(rng_key: jnp.ndarray) -> jnp.DeviceArray:
             std = jax.random.normal(rng_key, shape)
             p = jnp.multiply(std, mass_matrix_sqrt)
             return p
@@ -52,7 +52,7 @@ def kinetic_energy(momentum: jnp.DeviceArray) -> float:
         mass_matrix_sqrt = cholesky_of_inverse(inverse_mass_matrix)
 
         @jax.jit
-        def momentum_generator(rng_key: jax.random.PRNGKey) -> jnp.DeviceArray:
+        def momentum_generator(rng_key: jnp.ndarray) -> jnp.DeviceArray:
             std = jax.random.normal(rng_key, shape)
             p = jnp.dot(std, mass_matrix_sqrt)
             return p
@@ -67,7 +67,7 @@ def kinetic_energy(momentum: jnp.DeviceArray) -> float:
     else:
         raise ValueError(
             "The mass matrix has the wrong number of dimensions:"
-            f" expected 1 or 2, got {jnp.dim(inverse_mass_matrix)}."
+            f" expected 1 or 2, got {jnp.ndim(inverse_mass_matrix)}."
         )
 
 
diff --git a/mcx/inference/proposals.py b/mcx/inference/proposals.py
diff --git a/mcx/sample.py b/mcx/sample.py
diff --git a/requirements-dev.txt b/requirements-dev.txt

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,7 @@`
`1`	`1`	`from .gelman_rubin import online_gelman_rubin`
	`2`	`+from .mcmc import divergences`
`2`	`3`
`3`	`4`	`__all__ = [`
	`5`	`+ "divergences",`
`4`	`6`	`"online_gelman_rubin",`
`5`	`7`	`]`