shap
diff --git a/‎CHANGELOG.md
+3 b/‎CHANGELOG.md
+3
diff --git a/‎tests/benchmark/perturbation.py
+5-2 b/‎tests/benchmark/perturbation.py
+5-2
diff --git a/‎tests/conftest.py
+55 b/‎tests/conftest.py
+55
diff --git a/‎tests/explainers/test_deep.py
+48-26 b/‎tests/explainers/test_deep.py
+48-26
diff --git a/‎tests/explainers/test_gradient.py
+22-16 b/‎tests/explainers/test_gradient.py
+22-16
@@ -108,6 +108,9 @@ into the main shap repository. PRs from this origin are labelled here as `fork#1
   by @connortann)
 - Reduced unit test time by ~5 mins
   ([#3046](https://github.com/slundberg/shap/pull/3046) by @connortann).
+- Introduced fixtures for reproducible fuzz testing
+  ([#3048](https://github.com/slundberg/shap/pull/3048) by @connortann).
+
 
 ## [0.41.0] - 2022-06-16
 
 
@@ -10,9 +10,12 @@ def model(x, y):
 
 sort_order = 'positive'
 perturbation = 'keep'
-X = np.random.random((10,13))
 
-def test_init():
+def test_init(random_seed):
+
+    rs = np.random.RandomState(random_seed)
+    X = rs.random((10,13))
+
     tabular_masker = Independent(X)
     sequential_perturbation = benchmark.perturbation.SequentialPerturbation(model, tabular_masker, sort_order, perturbation)
     assert sequential_perturbation.data_type == "tabular"
 
@@ -0,0 +1,55 @@
+import numpy as np
+import pytest
+
+
+def pytest_addoption(parser):
+    parser.addoption("--random-seed", action="store", help="Fix the random seed")
+
+
+@pytest.fixture()
+def random_seed(request) -> int:
+    """Provides a test-specific random seed for reproducible "fuzz testing".
+
+    Example use in a test:
+
+        def test_thing(random_seed):
+
+            # Numpy
+            rs = np.random.RandomState(seed=random_seed)
+            values = rs.randint(...)
+
+            # Pytorch
+            torch.manual_seed(random_seed)
+
+            # Tensorflow
+            tf.compat.v1.random.set_random_seed(random_seed)
+
+    By default, a new seed is generated on each run of the tests. If a test
+    fails, the random seed used will be displayed in the pytest logs.
+
+    The seed can be fixed by providing a CLI option e.g:
+
+        pytest --random-seed 123
+
+    For numpy usage, note the legacy `RandomState` has stricter version-to-version
+    compatibility guarantees than new-style `default_rng`:
+    https://numpy.org/doc/stable/reference/random/compatibility.html
+
+    """
+    manual_seed = request.config.getoption("--random-seed")
+    if manual_seed is not None:
+        return int(manual_seed)
+    else:
+        # Otherwise, create a new seed for each test
+        rs = np.random.RandomState()
+        return rs.randint(0, 1000)
+
+
+@pytest.fixture(autouse=True)
+def global_random_seed():
+    """Set the global numpy random seed before each test
+
+    Nb. Tests that use random numbers should instantiate a local
+    `np.random.RandomState` rather than use the global numpy random state.
+    """
+    np.random.seed(0)
@@ -15,15 +15,18 @@
 
 # pylint: disable=import-outside-toplevel, no-name-in-module, import-error
 
-def test_tf_eager():
+def test_tf_eager(random_seed):
     """ This is a basic eager example from keras.
     """
-
     tf = pytest.importorskip('tensorflow')
+
+    tf.compat.v1.random.set_random_seed(random_seed)
+    rs = np.random.RandomState(random_seed)
+
     if version.parse(tf.__version__) >= version.parse("2.4.0"):
         pytest.skip("Deep explainer does not work for TF 2.4 in eager mode.")
 
-    x = pd.DataFrame({"B": np.random.random(size=(100,))})
+    x = pd.DataFrame({"B": rs.random(size=(100,))})
     y = x.B
     y = y.map(lambda zz: chr(int(zz * 2 + 65))).str.get_dummies()
 
@@ -39,10 +42,12 @@ def test_tf_eager():
     assert np.abs(e.expected_value[0] + sv[0].sum(-1) - model(x.values)[:, 0]).max() < 1e-4
 
 
-def test_tf_keras_mnist_cnn(): # pylint: disable=too-many-locals
+def test_tf_keras_mnist_cnn(random_seed):
     """ This is the basic mnist cnn example from keras.
     """
     tf = pytest.importorskip('tensorflow')
+    rs = np.random.RandomState(random_seed)
+    tf.compat.v1.random.set_random_seed(random_seed)
 
     from tensorflow import keras
     from tensorflow.compat.v1 import ConfigProto, InteractiveSession
@@ -72,10 +77,10 @@ def test_tf_keras_mnist_cnn(): # pylint: disable=too-many-locals
 
     # the data, split between train and test sets
     # (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
-    x_train = np.random.randn(200, 28, 28)
-    y_train = np.random.randint(0, 9, 200)
-    x_test = np.random.randn(200, 28, 28)
-    y_test = np.random.randint(0, 9, 200)
+    x_train = rs.randn(200, 28, 28)
+    y_train = rs.randint(0, 9, 200)
+    x_test = rs.randn(200, 28, 28)
+    y_test = rs.randint(0, 9, 200)
 
     if K.image_data_format() == 'channels_first':
         x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
@@ -119,8 +124,7 @@ def test_tf_keras_mnist_cnn(): # pylint: disable=too-many-locals
               validation_data=(x_test[:10, :], y_test[:10, :]))
 
     # explain by passing the tensorflow inputs and outputs
-    np.random.seed(0)
-    inds = np.random.choice(x_train.shape[0], 3, replace=False)
+    inds = rs.choice(x_train.shape[0], 3, replace=False)
     e = shap.DeepExplainer((model.layers[0].input, model.layers[-1].input), x_train[inds, :, :])
     shap_values = e.shap_values(x_test[:1])
 
@@ -136,6 +140,10 @@ def test_tf_keras_mnist_cnn(): # pylint: disable=too-many-locals
 def test_tf_keras_linear():
     """Test verifying that a linear model with linear data gives the correct result.
     """
+
+    # FIXME: this test should ideally pass with any random seed. See #2960
+    random_seed = 0
+
     tf = pytest.importorskip('tensorflow')
 
     from tensorflow.keras.layers import Dense, Input
@@ -144,14 +152,15 @@ def test_tf_keras_linear():
 
     tf.compat.v1.disable_eager_execution()
 
-    np.random.seed(0)
+    tf.compat.v1.random.set_random_seed(random_seed)
+    rs = np.random.RandomState(random_seed)
 
     # coefficients relating y with x1 and x2.
     coef = np.array([1, 2]).T
 
     # generate data following a linear relationship
-    x = np.random.normal(1, 10, size=(1000, len(coef)))
-    y = np.dot(x, coef) + 1 + np.random.normal(scale=0.1, size=1000)
+    x = rs.normal(1, 10, size=(1000, len(coef)))
+    y = np.dot(x, coef) + 1 + rs.normal(scale=0.1, size=1000)
 
     # create a linear model
     inputs = Input(shape=(2,))
@@ -176,10 +185,12 @@ def test_tf_keras_linear():
     np.testing.assert_allclose(expected - values, 0, atol=1e-5)
 
 
-def test_tf_keras_imdb_lstm():
+def test_tf_keras_imdb_lstm(random_seed):
     """ Basic LSTM example using the keras API defined in tensorflow
     """
     tf = pytest.importorskip('tensorflow')
+    rs = np.random.RandomState(random_seed)
+    tf.compat.v1.random.set_random_seed(random_seed)
 
     # this fails right now for new TF versions (there is a warning in the code for this)
     if version.parse(tf.__version__) >= version.parse("2.5.0"):
@@ -193,7 +204,6 @@ def test_tf_keras_imdb_lstm():
     tf.compat.v1.disable_eager_execution()
 
     # load the data from keras
-    np.random.seed(7)
     max_features = 1000
     try:
         (X_train, _), (X_test, _) = imdb.load_data(num_words=max_features)
@@ -211,7 +221,7 @@ def test_tf_keras_imdb_lstm():
     mod.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
 
     # select the background and test samples
-    inds = np.random.choice(X_train.shape[0], 3, replace=False)
+    inds = rs.choice(X_train.shape[0], 3, replace=False)
     background = X_train[inds]
     testx = X_test[10:11]
 
@@ -238,6 +248,12 @@ def test_pytorch_mnist_cnn():
     from torch import nn
     from torch.nn import functional as F
 
+    # FIXME: this test should ideally pass with any random seed. See #2960
+    random_seed = 0
+
+    torch.manual_seed(random_seed)
+    rs = np.random.RandomState(random_seed)
+
     class RandData:
         """ Random test data.
         """
@@ -315,8 +331,7 @@ def train(model, device, train_loader, optimizer, _, cutoff=20):
         train(model, device, train_loader, optimizer, 1)
 
         next_x, _ = next(iter(train_loader))
-        np.random.seed(0)
-        inds = np.random.choice(next_x.shape[0], 3, replace=False)
+        inds = rs.choice(next_x.shape[0], 3, replace=False)
         if interim:
             e = shap.DeepExplainer((model, model.conv_layers[0]), next_x[inds, :, :, :])
         else:
@@ -349,7 +364,7 @@ def train(model, device, train_loader, optimizer, _, cutoff=20):
     run_test(train_loader, test_loader, interim=False)
 
 
-def test_pytorch_custom_nested_models():
+def test_pytorch_custom_nested_models(random_seed):
     """Testing single outputs
     """
     torch = pytest.importorskip('torch')
@@ -359,6 +374,9 @@ def test_pytorch_custom_nested_models():
     from torch.nn import functional as F
     from torch.utils.data import DataLoader, TensorDataset
 
+    torch.manual_seed(random_seed)
+    rs = np.random.RandomState(random_seed)
+
     X, y = fetch_california_housing(return_X_y=True)
     num_features = X.shape[1]
     data = TensorDataset(torch.tensor(X).float(),
@@ -436,8 +454,7 @@ def train(model, device, train_loader, optimizer, epoch):
     train(model, device, loader, optimizer, 1)
 
     next_x, _ = next(iter(loader))
-    np.random.seed(0)
-    inds = np.random.choice(next_x.shape[0], 20, replace=False)
+    inds = rs.choice(next_x.shape[0], 20, replace=False)
     e = shap.DeepExplainer(model, next_x[inds, :])
     test_x, _ = next(iter(loader))
     shap_values = e.shap_values(test_x[:1])
@@ -461,6 +478,11 @@ def test_pytorch_single_output():
     from torch.nn import functional as F
     from torch.utils.data import DataLoader, TensorDataset
 
+    # FIXME: this test should ideally pass with any random seed. See #2960
+    random_seed=0
+    torch.manual_seed(random_seed)
+    rs = np.random.RandomState(random_seed)
+
     X, y = fetch_california_housing(return_X_y=True)
     num_features = X.shape[1]
     data = TensorDataset(torch.tensor(X).float(),
@@ -507,8 +529,7 @@ def train(model, device, train_loader, optimizer, epoch):
     train(model, device, loader, optimizer, 1)
 
     next_x, _ = next(iter(loader))
-    np.random.seed(0)
-    inds = np.random.choice(next_x.shape[0], 20, replace=False)
+    inds = rs.choice(next_x.shape[0], 20, replace=False)
     e = shap.DeepExplainer(model, next_x[inds, :])
     test_x, _ = next(iter(loader))
     shap_values = e.shap_values(test_x[:1])
@@ -522,10 +543,12 @@ def train(model, device, train_loader, optimizer, epoch):
     assert d / np.abs(diff).sum() < 0.001, "Sum of SHAP values does not match difference! %f" % (d / np.abs(diff).sum())
 
 
-def test_pytorch_multiple_inputs():
+def test_pytorch_multiple_inputs(random_seed):
     """ Check a multi-input scenario.
     """
     torch = pytest.importorskip('torch')
+    torch.manual_seed(random_seed)
+    rs = np.random.RandomState(random_seed)
 
     def _run_pytorch_multiple_inputs_test(disconnected):
         """ Testing multiple inputs
@@ -590,8 +613,7 @@ def train(model, device, train_loader, optimizer, epoch):
         train(model, device, loader, optimizer, 1)
 
         next_x1, next_x2, _ = next(iter(loader))
-        np.random.seed(0)
-        inds = np.random.choice(next_x1.shape[0], 20, replace=False)
+        inds = rs.choice(next_x1.shape[0], 20, replace=False)
         background = [next_x1[inds, :], next_x2[inds, :]]
         e = shap.DeepExplainer(model, background)
         test_x1, test_x2, _ = next(iter(loader))
 
@@ -7,10 +7,15 @@
 
 # pylint: disable=import-error, import-outside-toplevel, no-name-in-module, import-error
 
-def test_tf_keras_mnist_cnn():
+def test_tf_keras_mnist_cnn(random_seed):
     """ This is the basic mnist cnn example from keras.
     """
+
     tf = pytest.importorskip('tensorflow')
+
+    rs = np.random.RandomState(random_seed)
+    tf.compat.v1.random.set_random_seed(random_seed)
+
     from tensorflow.compat.v1 import ConfigProto, InteractiveSession
     from tensorflow.keras import backend as K
     from tensorflow.keras.layers import (
@@ -38,10 +43,10 @@ def test_tf_keras_mnist_cnn():
 
     # the data, split between train and test sets
     #(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
-    x_train = np.random.randn(200, 28, 28)
-    y_train = np.random.randint(0, 9, 200)
-    x_test = np.random.randn(200, 28, 28)
-    y_test = np.random.randint(0, 9, 200)
+    x_train = rs.randn(200, 28, 28)
+    y_train = rs.randint(0, 9, 200)
+    x_test = rs.randn(200, 28, 28)
+    y_test = rs.randint(0, 9, 200)
 
     if K.image_data_format() == 'channels_first':
         x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
@@ -88,8 +93,7 @@ def test_tf_keras_mnist_cnn():
     )
 
     # explain by passing the tensorflow inputs and outputs
-    np.random.seed(0)
-    inds = np.random.choice(x_train.shape[0], 20, replace=False)
+    inds = rs.choice(x_train.shape[0], 20, replace=False)
     e = shap.GradientExplainer((model.layers[0].input, model.layers[-1].input), x_train[inds, :, :])
     shap_values = e.shap_values(x_test[:1], nsamples=2000)
 
@@ -102,14 +106,17 @@ def test_tf_keras_mnist_cnn():
     sess.close()
 
 
-def test_pytorch_mnist_cnn():
+def test_pytorch_mnist_cnn(random_seed):
     """The same test as above, but for pytorch
     """
+
     torch = pytest.importorskip('torch')
+    torch.manual_seed(random_seed)
+    rs = np.random.RandomState(random_seed)
 
     from torch import nn
     from torch.nn import functional as F
-    torch.manual_seed(0)
+
 
     batch_size = 128
 
@@ -199,8 +206,7 @@ def train(model, device, train_loader, optimizer, _, cutoff=20):
         train(model, device, train_loader, optimizer, 1)
 
         next_x, _ = next(iter(train_loader))
-        np.random.seed(0)
-        inds = np.random.choice(next_x.shape[0], 3, replace=False)
+        inds = rs.choice(next_x.shape[0], 3, replace=False)
         if interim:
             e = shap.GradientExplainer((model, model.conv1), next_x[inds, :, :, :])
         else:
@@ -225,13 +231,13 @@ def train(model, device, train_loader, optimizer, _, cutoff=20):
     run_test(train_loader, test_loader, False)
 
 
-def test_pytorch_multiple_inputs():
-    """ Test multi-input scenarios.
-    """
-    # pylint: disable=no-member
+def test_pytorch_multiple_inputs(random_seed):
+    """ Test multi-input scenarios."""
+
     torch = pytest.importorskip('torch')
     from torch import nn
-    torch.manual_seed(1)
+
+    torch.manual_seed(random_seed)
     batch_size = 10
     x1 = torch.ones(batch_size, 3)
     x2 = torch.ones(batch_size, 4)