[MERGE] Fix mixval (#222)

antoinecollas · web-flow · commit 5e530c720547 · 2024-08-15T15:14:48.000+02:00
* fix mixval

* fix scorer evaluation

* add classification check

* simplify mix_labels computation

* sample_domain

* alpha &gt;= 0.5 and TODO handle multiple target domains

---------

Co-authored-by: Antoine Collas &lt;22830806+antoinecollas@users.noreply.github.com&gt;
diff --git a/skada/metrics.py b/skada/metrics.py
@@ -634,15 +634,20 @@ class MixValScorer(_BaseDomainAwareScorer):
     ----------
     alpha : float, default=0.55
         Mixing parameter for mixup.
-    random_state : int, RandomState instance or None, default=None
-        Controls the randomness of the mixing process.
-    greater_is_better : bool, default=True
-        Whether higher scores are better.
     ice_type : {'both', 'intra', 'inter'}, default='both'
         Type of ICE score to compute:
         - 'both': Compute both intra-cluster and inter-cluster ICE scores (average).
         - 'intra': Compute only intra-cluster ICE score.
         - 'inter': Compute only inter-cluster ICE score.
+    scoring : str or callable, default=None
+        A string (see model evaluation documentation) or
+        a scorer callable object / function with signature
+        ``scorer(estimator, X, y)``.
+        If None, the provided estimator object's `score` method is used.
+    greater_is_better : bool, default=True
+        Whether higher scores are better.
+    random_state : int, RandomState instance or None, default=None
+        Controls the randomness of the mixing process.
 
     Attributes
     ----------
@@ -665,15 +670,17 @@ class MixValScorer(_BaseDomainAwareScorer):
     def __init__(
         self,
         alpha=0.55,
-        random_state=None,
-        greater_is_better=True,
         ice_type="both",
+        scoring=None,
+        greater_is_better=True,
+        random_state=None,
     ):
         super().__init__()
         self.alpha = alpha
-        self.random_state = random_state
-        self._sign = 1 if greater_is_better else -1
         self.ice_type = ice_type
+        self.scoring = scoring
+        self._sign = 1 if greater_is_better else -1
+        self.random_state = random_state
 
         if self.ice_type not in ["both", "intra", "inter"]:
             raise ValueError("ice_type must be 'both', 'intra', or 'inter'")
@@ -698,10 +705,17 @@ def _score(self, estimator, X, y=None, sample_domain=None, **params):
         score : float
             The ICE score.
         """
+        scorer = check_scoring(estimator, self.scoring)
+
         X, _, sample_domain = check_X_y_domain(X, y, sample_domain)
         source_idx = extract_source_indices(sample_domain)
         X_target = X[~source_idx]
 
+        # Check from y values if it is a classification problem
+        y_type = _find_y_type(y)
+        if y_type != Y_Type.DISCRETE:
+            raise ValueError("MixVal scorer only supports classification problems.")
+
         rng = check_random_state(self.random_state)
         rand_idx = rng.permutation(X_target.shape[0])
 
@@ -713,24 +727,29 @@ def _score(self, estimator, X, y=None, sample_domain=None, **params):
         same_idx = (labels_a == labels_b).nonzero()[0]
         diff_idx = (labels_a != labels_b).nonzero()[0]
 
-        # Mixup with images and hard pseudo labels
+        # Mixup with X_target and hard pseudo labels
         mix_inputs = self.alpha * X_target + (1 - self.alpha) * X_target[rand_idx]
-        mix_labels = self.alpha * labels_a + (1 - self.alpha) * labels_b
-
-        # Obtain predictions for the mixed samples
-        mix_pred = estimator.predict(
-            mix_inputs, sample_domain=np.full(mix_inputs.shape[0], -1)
-        )
+        if self.alpha >= 0.5:
+            mix_labels = labels_a
+        else:
+            mix_labels = labels_b
 
         # Calculate ICE scores based on ice_type
+        # TODO: handle multiple target domains
         if self.ice_type in ["both", "intra"]:
-            ice_same = (
-                np.sum(mix_pred[same_idx] == mix_labels[same_idx]) / same_idx.shape[0]
+            ice_same = scorer(
+                estimator,
+                mix_inputs[same_idx],
+                mix_labels[same_idx],
+                sample_domain=np.full(same_idx.shape[0], -1),
             )
 
         if self.ice_type in ["both", "inter"]:
-            ice_diff = (
-                np.sum(mix_pred[diff_idx] == mix_labels[diff_idx]) / diff_idx.shape[0]
+            ice_diff = scorer(
+                estimator,
+                mix_inputs[diff_idx],
+                mix_labels[diff_idx],
+                sample_domain=np.full(diff_idx.shape[0], -1),
             )
 
         if self.ice_type == "both":
diff --git a/skada/tests/test_scorer.py b/skada/tests/test_scorer.py
@@ -297,3 +297,13 @@ def test_mixval_scorer(da_dataset):
     # Test invalid ice_type
     with pytest.raises(ValueError):
         MixValScorer(ice_type="invalid")
+
+
+def test_mixval_scorer_regression(da_reg_dataset):
+    X, y, sample_domain = da_reg_dataset
+
+    estimator = make_da_pipeline(DensityReweightAdapter(), LinearRegression())
+
+    scorer = MixValScorer(alpha=0.55, random_state=42)
+    with pytest.raises(ValueError):
+        scorer(estimator, X, y, sample_domain)