fixes bug with a TabPFN model not working as intended after explanations (#401)

mmschlk · Copilot · web-flow · commit 3c0fd87a8123 · 2025-06-16T13:38:47.000+02:00
* works on fixing the bug * reduced size of tabpfn model * fixes #396 * documents fix in CHANGELOG.md * Update tests/tests_imputer/test_tabpfn_imputer.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fixed code-quality checks --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -21,6 +21,7 @@
 - suppress a ``RuntimeWarning`` in ``Regression`` approximators ``solve_regression()``method when the solver is not able to find good interim solutions for the regression problem.
 #### Bug Fixes
 - fixed a bug in the `shapiq.waterfall_plot` function that caused the plot to not display correctly resulting in cutoff y_ticks. Additionally, the file was renamed from `watefall.py` to `waterfall.py` to match the function name [#377](https://github.com/mmschlk/shapiq/pull/377)
+- fixes a bug with `TabPFNExplainer`, where the model was not able to be used for predictions after it was explained. This was due to the model being fitted on a subset of features, which caused inconsistencies in the model's predictions after explanation. The fix includes that after each call to the `TabPFNImputer.value_function`, the tabpfn model is fitted on the whole dataset (without omitting features). This means that the original model can be used for predictions after it has been explained. [#396](https://github.com/mmschlk/shapiq/issues/396).
 
 ### v1.2.3 (2025-03-24)
 - substantially improves the runtime of all `Regression` approximators by a) a faster pre-computation of the regression matrices and b) a faster computation of the weighted least squares regression [#340](https://github.com/mmschlk/shapiq/issues/340)
diff --git a/shapiq/games/imputer/tabpfn_imputer.py b/shapiq/games/imputer/tabpfn_imputer.py
@@ -77,6 +77,7 @@ def __init__(
                 accept the model and the data point as input and return the model's predictions. If
                 the model is instantiated via a ``shapiq.Explainer`` object, this function is
                 automatically set to the model's prediction function. Defaults to ``None``.
+
         """
         self.x_train = x_train
         self.y_train = y_train
@@ -136,4 +137,6 @@ def value_function(self, coalitions: np.ndarray) -> np.ndarray:
             self.model.fit(x_train_coal, self.y_train)
             pred = float(self.predict(x_explain_coal))
             output[i] = pred
+        # refit the model on the full training data to ensure it is in a consistent state
+        self.model.fit(self.x_train, self.y_train)
         return output
diff --git a/tests/fixtures/models.py b/tests/fixtures/models.py
@@ -185,7 +185,7 @@ def tabpfn_classification_problem(
 
     data, labels = background_clf_dataset_binary_small
     data, x_test, labels, _ = train_test_split(data, labels, random_state=42, train_size=8)
-    model = tabpfn.TabPFNClassifier()
+    model = tabpfn.TabPFNClassifier(n_estimators=1, fit_mode="low_memory")
     model.fit(data, labels)
     return model, data, labels, x_test
 
@@ -199,7 +199,7 @@ def tabpfn_regression_problem(
 
     data, labels = background_reg_dataset_small
     data, x_test, labels, _ = train_test_split(data, labels, random_state=42, train_size=8)
-    model = tabpfn.TabPFNRegressor()
+    model = tabpfn.TabPFNRegressor(n_estimators=1, fit_mode="low_memory")
     model.fit(data, labels)
     return model, data, labels, x_test
 
diff --git a/tests/tests_explainer/test_explainer_tabpfn.py b/tests/tests_explainer/test_explainer_tabpfn.py
@@ -11,55 +11,77 @@
 
 @skip_if_no_tabpfn
 @pytest.mark.external_libraries
-def test_tabpfn_explainer_clf(tabpfn_classification_problem):
-    """Test the TabPFNExplainer class for classification problems."""
-    import tabpfn
+class TestTabPFNExplainer:
+    """Tests for the TabPFNExplainer class."""
 
-    # setup
-    model, data, labels, x_test = tabpfn_classification_problem
-    x_explain = x_test[0]
-    assert isinstance(model, tabpfn.TabPFNClassifier)
-    if model.n_features_in_ == data.shape[1]:
-        model.fit(data, labels)
-    assert model.n_features_in_ == data.shape[1]
+    def test_tabpfn_explainer_clf(self, tabpfn_classification_problem):
+        """Test the TabPFNExplainer class for classification problems."""
+        import tabpfn
 
-    explainer = TabPFNExplainer(model=model, data=data, labels=labels, x_test=x_test)
-    explanation = explainer.explain(x=x_explain, budget=BUDGET_NR_FEATURES_SMALL)
-    assert isinstance(explanation, InteractionValues)
+        # setup
+        model, data, labels, x_test = tabpfn_classification_problem
+        x_explain = x_test[0]
+        assert isinstance(model, tabpfn.TabPFNClassifier)
+        if model.n_features_in_ == data.shape[1]:
+            model.fit(data, labels)
+        assert model.n_features_in_ == data.shape[1]
 
-    # test that bare explainer gets turned into TabPFNExplainer
-    explainer = Explainer(model=model, data=data, labels=labels, x_test=x_test)
-    assert isinstance(explainer, TabPFNExplainer)
+        explainer = TabPFNExplainer(model=model, data=data, labels=labels, x_test=x_test)
+        explanation = explainer.explain(x=x_explain, budget=BUDGET_NR_FEATURES_SMALL)
+        assert isinstance(explanation, InteractionValues)
 
-    # test that TabularExplainer works as well
-    with pytest.warns(UserWarning):
-        explainer = TabularExplainer(model=model, data=data, class_index=1, imputer="baseline")
-        assert isinstance(explainer, TabularExplainer)
+        # test that bare explainer gets turned into TabPFNExplainer
+        explainer = Explainer(model=model, data=data, labels=labels, x_test=x_test)
+        assert isinstance(explainer, TabPFNExplainer)
+
+        # test that TabularExplainer works as well
+        with pytest.warns(UserWarning):
+            explainer = TabularExplainer(model=model, data=data, class_index=1, imputer="baseline")
+            assert isinstance(explainer, TabularExplainer)
+
+    def test_tabpfn_explainer_reg(self, tabpfn_regression_problem):
+        """Test the TabPFNExplainer class for regression problems."""
+        import tabpfn
+
+        # setup
+        model, data, labels, x_test = tabpfn_regression_problem
+        x_explain = x_test[0]
+        assert isinstance(model, tabpfn.TabPFNRegressor)
+        if model.n_features_in_ == data.shape[1]:
+            model.fit(data, labels)
+        assert model.n_features_in_ == data.shape[1]
+
+        explainer = TabPFNExplainer(model=model, data=data, labels=labels, x_test=x_test)
+        explanation = explainer.explain(x=x_explain, budget=BUDGET_NR_FEATURES_SMALL)
+        assert isinstance(explanation, InteractionValues)
+
+        # test that bare explainer gets turned into TabPFNExplainer
+        explainer = Explainer(model=model, data=data, labels=labels, x_test=x_test)
+        assert isinstance(explainer, TabPFNExplainer)
+
+        # test that TabularExplainer works as well
+        with pytest.warns(UserWarning):
+            explainer = TabularExplainer(model=model, data=data, class_index=1, imputer="baseline")
+            assert isinstance(explainer, TabularExplainer)
 
 
 @skip_if_no_tabpfn
 @pytest.mark.external_libraries
-def test_tabpfn_explainer_reg(tabpfn_regression_problem):
-    """Test the TabPFNExplainer class for regression problems."""
-    import tabpfn
-
-    # setup
-    model, data, labels, x_test = tabpfn_regression_problem
-    x_explain = x_test[0]
-    assert isinstance(model, tabpfn.TabPFNRegressor)
-    if model.n_features_in_ == data.shape[1]:
-        model.fit(data, labels)
-    assert model.n_features_in_ == data.shape[1]
-
-    explainer = TabPFNExplainer(model=model, data=data, labels=labels, x_test=x_test)
-    explanation = explainer.explain(x=x_explain, budget=BUDGET_NR_FEATURES_SMALL)
-    assert isinstance(explanation, InteractionValues)
-
-    # test that bare explainer gets turned into TabPFNExplainer
-    explainer = Explainer(model=model, data=data, labels=labels, x_test=x_test)
-    assert isinstance(explainer, TabPFNExplainer)
-
-    # test that TabularExplainer works as well
-    with pytest.warns(UserWarning):
-        explainer = TabularExplainer(model=model, data=data, class_index=1, imputer="baseline")
-        assert isinstance(explainer, TabularExplainer)
+class TestTabPFNExplainerBugFixes:
+    """Tests for bug fixes conducted in the TabPFNExplainer."""
+
+    def test_after_explanation_prediction(self, tabpfn_regression_problem):
+        """Tests that the model can be used for prediction after explanation.
+
+        This bug was raised in issue [#396](https://github.com/mmschlk/shapiq/issues/396)
+        """
+        model, data, labels, x_test = tabpfn_regression_problem
+        x_explain = x_test[0]
+
+        _ = model.predict(x_explain.reshape(1, -1))
+
+        explainer = TabPFNExplainer(model=model, data=data, labels=labels, x_test=x_test)
+        explainer.explain(x=x_explain, budget=3)
+        assert model.n_features_in_ == data.shape[1]
+
+        model.predict(x_explain.reshape(1, -1))  # should not raise an error
diff --git a/tests/tests_imputer/test_tabpfn_imputer.py b/tests/tests_imputer/test_tabpfn_imputer.py
@@ -36,12 +36,12 @@ def test_tabpfn_imputer(tabpfn_classification_problem):
     imputer.fit(x=x_test[0])
 
     # test the imputer
-    imputer(np.asarray([True, True, True]))  # 3 features should now been fitted
-    assert model.n_features_in_ == 3
-    imputer(np.asarray([True, True, False]))  # 2 features should now been fitted
-    assert model.n_features_in_ == 2
-    imputer(np.asarray([False, True, False]))  # 1 feature should now been fitted
-    assert model.n_features_in_ == 1
+    out_1 = imputer(np.asarray([True, True, True]))  # 3 features should now been fitted
+    out_2 = imputer(np.asarray([True, True, False]))  # 2 features should now been fitted
+    out_3 = imputer(np.asarray([False, True, False]))  # 1 feature should now been fitted
+    assert out_1 != out_2
+    assert out_1 != out_3
+    assert out_2 != out_3
 
 
 @skip_if_no_tabpfn