Skip to content

Add error to detect negative rewards in IPWLearner #129

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Aug 31, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions obp/dataset/synthetic_continuous.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,12 +240,11 @@ def calc_ground_truth_policy_value(

"""
check_array(array=context, name="context", expected_dim=2)
check_array(array=action, name="action", expected_dim=1)
if context.shape[1] != self.dim_context:
raise ValueError(
"Expected `context.shape[1] == self.dim_context`, found it False"
)
if not isinstance(action, np.ndarray) or action.ndim != 1:
raise ValueError("action must be 1D array")
if context.shape[0] != action.shape[0]:
raise ValueError(
"Expected `context.shape[0] == action.shape[0]`, but found it False"
Expand Down
2 changes: 1 addition & 1 deletion obp/policy/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def __post_init__(self) -> None:

if self.n_actions < self.len_list:
raise ValueError(
f"n_actions >= len_list should hold, but n_actions is {self.n_actions} and len_list is {self.len_list}"
f"Expected `n_actions >= len_list`, but got n_actions={self.n_actions} < len_list={self.len_list}"
)

@property
Expand Down
45 changes: 19 additions & 26 deletions obp/policy/offline.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@
from tqdm import tqdm

from .base import BaseOfflinePolicyLearner

from ..utils import check_bandit_feedback_inputs
from ..utils import check_bandit_feedback_inputs, check_array


@dataclass
Expand Down Expand Up @@ -147,11 +146,20 @@ def fit(
pscore=pscore,
position=position,
)
if (reward < 0).any():
raise ValueError(
"A negative value is found in `reward`."
"`obp.policy.IPWLearner` cannot handle negative rewards,"
"and please use `obp.policy.NNPolicyLearner` instead."
)
if pscore is None:
n_actions = np.int(action.max() + 1)
pscore = np.ones_like(action) / n_actions
if position is None or self.len_list == 1:
if self.len_list == 1:
position = np.zeros_like(action, dtype=int)
else:
if position is None:
raise ValueError("When `self.len_list=1`, `position` must be given.")

for position_ in np.arange(self.len_list):
X, sample_weight, y = self._create_train_data_for_opl(
Expand Down Expand Up @@ -184,8 +192,7 @@ def predict(self, context: np.ndarray) -> np.ndarray:
If you want a non-repetitive action set, please use the `sample_action` method.

"""
if not isinstance(context, np.ndarray) or context.ndim != 2:
raise ValueError("context must be 2D array")
check_array(array=context, name="context", expected_dim=2)

n_rounds = context.shape[0]
action_dist = np.zeros((n_rounds, self.n_actions, self.len_list))
Expand Down Expand Up @@ -214,9 +221,7 @@ def predict_score(self, context: np.ndarray) -> np.ndarray:
Scores for all possible pairs of action and position predicted by a classifier.

"""
assert (
isinstance(context, np.ndarray) and context.ndim == 2
), "context must be 2D array"
check_array(array=context, name="context", expected_dim=2)

n_rounds = context.shape[0]
score_predicted = np.zeros((n_rounds, self.n_actions, self.len_list))
Expand Down Expand Up @@ -271,8 +276,7 @@ def sample_action(
Action sampled by a trained classifier.

"""
if not isinstance(context, np.ndarray) or context.ndim != 2:
raise ValueError("context must be 2D array")
check_array(array=context, name="context", expected_dim=2)
check_scalar(tau, name="tau", target_type=(int, float), min_val=0)

n_rounds = context.shape[0]
Expand Down Expand Up @@ -329,10 +333,8 @@ def predict_proba(
"""
assert (
self.len_list == 1
), "predict_proba method can be used only when len_list = 1"
assert (
isinstance(context, np.ndarray) and context.ndim == 2
), "context must be 2D array"
), "predict_proba method cannot be used when `len_list != 1`"
check_array(array=context, name="context", expected_dim=2)
check_scalar(tau, name="tau", target_type=(int, float), min_val=0)

score_predicted = self.predict_score(context=context)
Expand Down Expand Up @@ -761,19 +763,16 @@ def fit(
pscore=pscore,
position=position,
)

if context.shape[1] != self.dim_context:
raise ValueError(
"Expected `context.shape[1] == self.dim_context`, but found it False"
)

if pscore is None:
pscore = np.ones_like(action) / self.n_actions
if estimated_rewards_by_reg_model is None:
estimated_rewards_by_reg_model = np.zeros(
(context.shape[0], self.n_actions, self.len_list)
)

if self.len_list == 1:
position = np.zeros_like(action, dtype=int)
else:
Expand Down Expand Up @@ -900,9 +899,7 @@ def predict(self, context: np.ndarray) -> np.ndarray:
If you want a non-repetitive action set, please use the `sample_action` method.

"""
if not isinstance(context, np.ndarray) or context.ndim != 2:
raise ValueError("context must be 2D array")

check_array(array=context, name="context", expected_dim=2)
if context.shape[1] != self.dim_context:
raise ValueError(
"Expected `context.shape[1] == self.dim_context`, but found it False"
Expand Down Expand Up @@ -939,9 +936,7 @@ def sample_action(
Action sampled by a trained classifier.

"""
if not isinstance(context, np.ndarray) or context.ndim != 2:
raise ValueError("context must be 2D array")

check_array(array=context, name="context", expected_dim=2)
if context.shape[1] != self.dim_context:
raise ValueError(
"Expected `context.shape[1] == self.dim_context`, but found it False"
Expand Down Expand Up @@ -988,9 +983,7 @@ def predict_proba(
Action choice probabilities obtained by a trained classifier.

"""
if not isinstance(context, np.ndarray) or context.ndim != 2:
raise ValueError("context must be 2D array")

check_array(array=context, name="context", expected_dim=2)
if context.shape[1] != self.dim_context:
raise ValueError(
"Expected `context.shape[1] == self.dim_context`, but found it False"
Expand Down
29 changes: 19 additions & 10 deletions obp/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,8 @@ def check_bandit_feedback_inputs(
== expected_reward.shape[0]
):
raise ValueError(
"context, action, reward, and expected_reward must have the same number of samples."
"Expected `context.shape[0] == action.shape[0] == reward.shape[0] == expected_reward.shape[0]`"
", but found it False"
)
if action.max() >= expected_reward.shape[1]:
raise ValueError(
Expand All @@ -216,7 +217,8 @@ def check_bandit_feedback_inputs(
context.shape[0] == action.shape[0] == reward.shape[0] == pscore.shape[0]
):
raise ValueError(
"Expected `action.shape[0] == reward.shape[0] == pscore.shape[0]`, but found it False"
"Expected `context.shape[0] == action.shape[0] == reward.shape[0] == pscore.shape[0]`"
", but found it False"
)
if np.any(pscore <= 0):
raise ValueError("pscore must be positive")
Expand All @@ -227,14 +229,16 @@ def check_bandit_feedback_inputs(
context.shape[0] == action.shape[0] == reward.shape[0] == position.shape[0]
):
raise ValueError(
"context, action, reward, and position must have the same number of samples."
"Expected `context.shape[0] == action.shape[0] == reward.shape[0] == position.shape[0]`"
", but found it False"
)
if not (np.issubdtype(position.dtype, np.integer) and position.min() >= 0):
raise ValueError("position elements must be non-negative integers")
else:
if not (context.shape[0] == action.shape[0] == reward.shape[0]):
raise ValueError(
"context, action, and reward must have the same number of samples."
"Expected `context.shape[0] == action.shape[0] == reward.shape[0]`"
", but found it False"
)
if action_context is not None:
check_array(array=action_context, name="action_context", expected_dim=2)
Expand Down Expand Up @@ -444,7 +448,8 @@ def check_continuous_ope_inputs(
!= action_by_evaluation_policy.shape[0]
):
raise ValueError(
"Expected `estimated_rewards_by_reg_model.shape[0] == action_by_evaluation_policy.shape[0]`, but found if False"
"Expected `estimated_rewards_by_reg_model.shape[0] == action_by_evaluation_policy.shape[0]`"
", but found if False"
)

# action, reward
Expand All @@ -457,14 +462,15 @@ def check_continuous_ope_inputs(
check_array(array=reward, name="reward", expected_dim=1)
if not (action_by_behavior_policy.shape[0] == reward.shape[0]):
raise ValueError(
"Expected `action_by_behavior_policy.shape[0] == reward.shape[0]`, but found it False"
"Expected `action_by_behavior_policy.shape[0] == reward.shape[0]`"
", but found it False"
)
if not (
action_by_behavior_policy.shape[0] == action_by_evaluation_policy.shape[0]
):
raise ValueError(
"Expected `action_by_behavior_policy.shape[0] == action_by_evaluation_policy.shape[0]`"
"but found it False"
", but found it False"
)

# pscore
Expand Down Expand Up @@ -548,7 +554,8 @@ def _check_slate_ope_inputs(
== evaluation_policy_pscore.shape[0]
):
raise ValueError(
f"slate_id, position, reward, {pscore_type}, and evaluation_policy_{pscore_type} must have the same number of samples."
f"slate_id, position, reward, {pscore_type}, and evaluation_policy_{pscore_type} "
"must have the same number of samples."
)


Expand Down Expand Up @@ -805,7 +812,8 @@ def check_ope_inputs_tensor(
if estimated_rewards_by_reg_model is not None:
if estimated_rewards_by_reg_model.shape != action_dist.shape:
raise ValueError(
"Expected `estimated_rewards_by_reg_model.shape == action_dist.shape`, but found it False"
"Expected `estimated_rewards_by_reg_model.shape == action_dist.shape`"
", but found it False"
)

# action, reward
Expand All @@ -831,7 +839,8 @@ def check_ope_inputs_tensor(
raise ValueError("pscore must be 1-dimensional")
if not (action.shape[0] == reward.shape[0] == pscore.shape[0]):
raise ValueError(
"Expected `action.shape[0] == reward.shape[0] == pscore.shape[0]`, but found it False"
"Expected `action.shape[0] == reward.shape[0] == pscore.shape[0]`"
", but found it False"
)
if torch.any(pscore <= 0):
raise ValueError("pscore must be positive")
Expand Down
8 changes: 4 additions & 4 deletions tests/ope/test_regression_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@
generate_action_dist(n_rounds, n_actions, len_list),
3,
1,
"Expected `action.shape[0]",
"Expected `context.shape[0]",
),
(
np.random.uniform(size=(n_rounds, 7)),
Expand Down Expand Up @@ -334,7 +334,7 @@
None,
3,
1,
"context, action, reward, and position must have the same number of samples.",
"Expected `context.shape[0]",
),
(
np.random.uniform(size=(n_rounds, 7)),
Expand Down Expand Up @@ -382,7 +382,7 @@
None,
3,
1,
"context, action, and reward must have the same number of samples",
"Expected `context.shape[0]",
),
(
np.random.uniform(size=(n_rounds, 7)),
Expand All @@ -398,7 +398,7 @@
generate_action_dist(n_rounds, n_actions, len_list),
3,
1,
"Expected `action.shape[0]",
"Expected `context.shape[0]",
),
(
np.random.uniform(size=(n_rounds, 7)),
Expand Down
Loading