Merge pull request #129 from st-tech/add-error-to-detect-negative-reward

usaito · web-flow · commit 07e9c17fb29f · 2021-08-31T19:33:52.000-04:00
Add error to detect negative rewards in IPWLearner
diff --git a/obp/dataset/synthetic_continuous.py b/obp/dataset/synthetic_continuous.py
@@ -240,12 +240,11 @@ def calc_ground_truth_policy_value(
 
         """
         check_array(array=context, name="context", expected_dim=2)
+        check_array(array=action, name="action", expected_dim=1)
         if context.shape[1] != self.dim_context:
             raise ValueError(
                 "Expected `context.shape[1] == self.dim_context`, found it False"
             )
-        if not isinstance(action, np.ndarray) or action.ndim != 1:
-            raise ValueError("action must be 1D array")
         if context.shape[0] != action.shape[0]:
             raise ValueError(
                 "Expected `context.shape[0] == action.shape[0]`, but found it False"
diff --git a/obp/policy/base.py b/obp/policy/base.py
@@ -209,7 +209,7 @@ def __post_init__(self) -> None:
 
         if self.n_actions < self.len_list:
             raise ValueError(
-                f"n_actions >= len_list should hold, but n_actions is {self.n_actions} and len_list is {self.len_list}"
+                f"Expected `n_actions >= len_list`, but got n_actions={self.n_actions} < len_list={self.len_list}"
             )
 
     @property
diff --git a/obp/policy/offline.py b/obp/policy/offline.py
@@ -18,8 +18,7 @@
 from tqdm import tqdm
 
 from .base import BaseOfflinePolicyLearner
-
-from ..utils import check_bandit_feedback_inputs
+from ..utils import check_bandit_feedback_inputs, check_array
 
 
 @dataclass
@@ -147,11 +146,20 @@ def fit(
             pscore=pscore,
             position=position,
         )
+        if (reward < 0).any():
+            raise ValueError(
+                "A negative value is found in `reward`."
+                "`obp.policy.IPWLearner` cannot handle negative rewards,"
+                "and please use `obp.policy.NNPolicyLearner` instead."
+            )
         if pscore is None:
             n_actions = np.int(action.max() + 1)
             pscore = np.ones_like(action) / n_actions
-        if position is None or self.len_list == 1:
+        if self.len_list == 1:
             position = np.zeros_like(action, dtype=int)
+        else:
+            if position is None:
+                raise ValueError("When `self.len_list=1`, `position` must be given.")
 
         for position_ in np.arange(self.len_list):
             X, sample_weight, y = self._create_train_data_for_opl(
@@ -184,8 +192,7 @@ def predict(self, context: np.ndarray) -> np.ndarray:
             If you want a non-repetitive action set, please use the `sample_action` method.
 
         """
-        if not isinstance(context, np.ndarray) or context.ndim != 2:
-            raise ValueError("context must be 2D array")
+        check_array(array=context, name="context", expected_dim=2)
 
         n_rounds = context.shape[0]
         action_dist = np.zeros((n_rounds, self.n_actions, self.len_list))
@@ -214,9 +221,7 @@ def predict_score(self, context: np.ndarray) -> np.ndarray:
             Scores for all possible pairs of action and position predicted by a classifier.
 
         """
-        assert (
-            isinstance(context, np.ndarray) and context.ndim == 2
-        ), "context must be 2D array"
+        check_array(array=context, name="context", expected_dim=2)
 
         n_rounds = context.shape[0]
         score_predicted = np.zeros((n_rounds, self.n_actions, self.len_list))
@@ -271,8 +276,7 @@ def sample_action(
             Action sampled by a trained classifier.
 
         """
-        if not isinstance(context, np.ndarray) or context.ndim != 2:
-            raise ValueError("context must be 2D array")
+        check_array(array=context, name="context", expected_dim=2)
         check_scalar(tau, name="tau", target_type=(int, float), min_val=0)
 
         n_rounds = context.shape[0]
@@ -329,10 +333,8 @@ def predict_proba(
         """
         assert (
             self.len_list == 1
-        ), "predict_proba method can be used only when len_list = 1"
-        assert (
-            isinstance(context, np.ndarray) and context.ndim == 2
-        ), "context must be 2D array"
+        ), "predict_proba method cannot be used when `len_list != 1`"
+        check_array(array=context, name="context", expected_dim=2)
         check_scalar(tau, name="tau", target_type=(int, float), min_val=0)
 
         score_predicted = self.predict_score(context=context)
@@ -761,19 +763,16 @@ def fit(
             pscore=pscore,
             position=position,
         )
-
         if context.shape[1] != self.dim_context:
             raise ValueError(
                 "Expected `context.shape[1] == self.dim_context`, but found it False"
             )
-
         if pscore is None:
             pscore = np.ones_like(action) / self.n_actions
         if estimated_rewards_by_reg_model is None:
             estimated_rewards_by_reg_model = np.zeros(
                 (context.shape[0], self.n_actions, self.len_list)
             )
-
         if self.len_list == 1:
             position = np.zeros_like(action, dtype=int)
         else:
@@ -900,9 +899,7 @@ def predict(self, context: np.ndarray) -> np.ndarray:
             If you want a non-repetitive action set, please use the `sample_action` method.
 
         """
-        if not isinstance(context, np.ndarray) or context.ndim != 2:
-            raise ValueError("context must be 2D array")
-
+        check_array(array=context, name="context", expected_dim=2)
         if context.shape[1] != self.dim_context:
             raise ValueError(
                 "Expected `context.shape[1] == self.dim_context`, but found it False"
@@ -939,9 +936,7 @@ def sample_action(
             Action sampled by a trained classifier.
 
         """
-        if not isinstance(context, np.ndarray) or context.ndim != 2:
-            raise ValueError("context must be 2D array")
-
+        check_array(array=context, name="context", expected_dim=2)
         if context.shape[1] != self.dim_context:
             raise ValueError(
                 "Expected `context.shape[1] == self.dim_context`, but found it False"
@@ -988,9 +983,7 @@ def predict_proba(
             Action choice probabilities obtained by a trained classifier.
 
         """
-        if not isinstance(context, np.ndarray) or context.ndim != 2:
-            raise ValueError("context must be 2D array")
-
+        check_array(array=context, name="context", expected_dim=2)
         if context.shape[1] != self.dim_context:
             raise ValueError(
                 "Expected `context.shape[1] == self.dim_context`, but found it False"
diff --git a/obp/utils.py b/obp/utils.py
@@ -204,7 +204,8 @@ def check_bandit_feedback_inputs(
             == expected_reward.shape[0]
         ):
             raise ValueError(
-                "context, action, reward, and expected_reward must have the same number of samples."
+                "Expected `context.shape[0] == action.shape[0] == reward.shape[0] == expected_reward.shape[0]`"
+                ", but found it False"
             )
         if action.max() >= expected_reward.shape[1]:
             raise ValueError(
@@ -216,7 +217,8 @@ def check_bandit_feedback_inputs(
             context.shape[0] == action.shape[0] == reward.shape[0] == pscore.shape[0]
         ):
             raise ValueError(
-                "Expected `action.shape[0] == reward.shape[0] == pscore.shape[0]`, but found it False"
+                "Expected `context.shape[0] == action.shape[0] == reward.shape[0] == pscore.shape[0]`"
+                ", but found it False"
             )
         if np.any(pscore <= 0):
             raise ValueError("pscore must be positive")
@@ -227,14 +229,16 @@ def check_bandit_feedback_inputs(
             context.shape[0] == action.shape[0] == reward.shape[0] == position.shape[0]
         ):
             raise ValueError(
-                "context, action, reward, and position must have the same number of samples."
+                "Expected `context.shape[0] == action.shape[0] == reward.shape[0] == position.shape[0]`"
+                ", but found it False"
             )
         if not (np.issubdtype(position.dtype, np.integer) and position.min() >= 0):
             raise ValueError("position elements must be non-negative integers")
     else:
         if not (context.shape[0] == action.shape[0] == reward.shape[0]):
             raise ValueError(
-                "context, action, and reward must have the same number of samples."
+                "Expected `context.shape[0] == action.shape[0] == reward.shape[0]`"
+                ", but found it False"
             )
     if action_context is not None:
         check_array(array=action_context, name="action_context", expected_dim=2)
@@ -444,7 +448,8 @@ def check_continuous_ope_inputs(
             != action_by_evaluation_policy.shape[0]
         ):
             raise ValueError(
-                "Expected `estimated_rewards_by_reg_model.shape[0] == action_by_evaluation_policy.shape[0]`, but found if False"
+                "Expected `estimated_rewards_by_reg_model.shape[0] == action_by_evaluation_policy.shape[0]`"
+                ", but found if False"
             )
 
     # action, reward
@@ -457,14 +462,15 @@ def check_continuous_ope_inputs(
         check_array(array=reward, name="reward", expected_dim=1)
         if not (action_by_behavior_policy.shape[0] == reward.shape[0]):
             raise ValueError(
-                "Expected `action_by_behavior_policy.shape[0] == reward.shape[0]`, but found it False"
+                "Expected `action_by_behavior_policy.shape[0] == reward.shape[0]`"
+                ", but found it False"
             )
         if not (
             action_by_behavior_policy.shape[0] == action_by_evaluation_policy.shape[0]
         ):
             raise ValueError(
                 "Expected `action_by_behavior_policy.shape[0] == action_by_evaluation_policy.shape[0]`"
-                "but found it False"
+                ", but found it False"
             )
 
     # pscore
@@ -548,7 +554,8 @@ def _check_slate_ope_inputs(
         == evaluation_policy_pscore.shape[0]
     ):
         raise ValueError(
-            f"slate_id, position, reward, {pscore_type}, and evaluation_policy_{pscore_type} must have the same number of samples."
+            f"slate_id, position, reward, {pscore_type}, and evaluation_policy_{pscore_type} "
+            "must have the same number of samples."
         )
 
 
@@ -805,7 +812,8 @@ def check_ope_inputs_tensor(
     if estimated_rewards_by_reg_model is not None:
         if estimated_rewards_by_reg_model.shape != action_dist.shape:
             raise ValueError(
-                "Expected `estimated_rewards_by_reg_model.shape == action_dist.shape`, but found it False"
+                "Expected `estimated_rewards_by_reg_model.shape == action_dist.shape`"
+                ", but found it False"
             )
 
     # action, reward
@@ -831,7 +839,8 @@ def check_ope_inputs_tensor(
             raise ValueError("pscore must be 1-dimensional")
         if not (action.shape[0] == reward.shape[0] == pscore.shape[0]):
             raise ValueError(
-                "Expected `action.shape[0] == reward.shape[0] == pscore.shape[0]`, but found it False"
+                "Expected `action.shape[0] == reward.shape[0] == pscore.shape[0]`"
+                ", but found it False"
             )
         if torch.any(pscore <= 0):
             raise ValueError("pscore must be positive")
diff --git a/tests/ope/test_regression_models.py b/tests/ope/test_regression_models.py
@@ -270,7 +270,7 @@
         generate_action_dist(n_rounds, n_actions, len_list),
         3,
         1,
-        "Expected `action.shape[0]",
+        "Expected `context.shape[0]",
     ),
     (
         np.random.uniform(size=(n_rounds, 7)),
@@ -334,7 +334,7 @@
         None,
         3,
         1,
-        "context, action, reward, and position must have the same number of samples.",
+        "Expected `context.shape[0]",
     ),
     (
         np.random.uniform(size=(n_rounds, 7)),
@@ -382,7 +382,7 @@
         None,
         3,
         1,
-        "context, action, and reward must have the same number of samples",
+        "Expected `context.shape[0]",
     ),
     (
         np.random.uniform(size=(n_rounds, 7)),
@@ -398,7 +398,7 @@
         generate_action_dist(n_rounds, n_actions, len_list),
         3,
         1,
-        "Expected `action.shape[0]",
+        "Expected `context.shape[0]",
     ),
     (
         np.random.uniform(size=(n_rounds, 7)),
diff --git a/tests/policy/test_offline.py b/tests/policy/test_offline.py

Original file line number	Diff line number	Diff line change
`@@ -209,7 +209,7 @@ def __post_init__(self) -> None:`
`209`	`209`
`210`	`210`	`if self.n_actions < self.len_list:`
`211`	`211`	`raise ValueError(`
`212`		`- f"n_actions >= len_list should hold, but n_actions is {self.n_actions} and len_list is {self.len_list}"`
	`212`	+ f"Expected `n_actions >= len_list`, but got n_actions={self.n_actions} < len_list={self.len_list}"
`213`	`213`	`)`
`214`	`214`
`215`	`215`	`@property`