Skip to content

Commit 07e9c17

Browse files
authored
Merge pull request #129 from st-tech/add-error-to-detect-negative-reward
Add error to detect negative rewards in IPWLearner
2 parents cc652ae + a2dfde1 commit 07e9c17

File tree

6 files changed

+175
-91
lines changed

6 files changed

+175
-91
lines changed

obp/dataset/synthetic_continuous.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,12 +240,11 @@ def calc_ground_truth_policy_value(
240240
241241
"""
242242
check_array(array=context, name="context", expected_dim=2)
243+
check_array(array=action, name="action", expected_dim=1)
243244
if context.shape[1] != self.dim_context:
244245
raise ValueError(
245246
"Expected `context.shape[1] == self.dim_context`, found it False"
246247
)
247-
if not isinstance(action, np.ndarray) or action.ndim != 1:
248-
raise ValueError("action must be 1D array")
249248
if context.shape[0] != action.shape[0]:
250249
raise ValueError(
251250
"Expected `context.shape[0] == action.shape[0]`, but found it False"

obp/policy/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ def __post_init__(self) -> None:
209209

210210
if self.n_actions < self.len_list:
211211
raise ValueError(
212-
f"n_actions >= len_list should hold, but n_actions is {self.n_actions} and len_list is {self.len_list}"
212+
f"Expected `n_actions >= len_list`, but got n_actions={self.n_actions} < len_list={self.len_list}"
213213
)
214214

215215
@property

obp/policy/offline.py

Lines changed: 19 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@
1818
from tqdm import tqdm
1919

2020
from .base import BaseOfflinePolicyLearner
21-
22-
from ..utils import check_bandit_feedback_inputs
21+
from ..utils import check_bandit_feedback_inputs, check_array
2322

2423

2524
@dataclass
@@ -147,11 +146,20 @@ def fit(
147146
pscore=pscore,
148147
position=position,
149148
)
149+
if (reward < 0).any():
150+
raise ValueError(
151+
"A negative value is found in `reward`."
152+
"`obp.policy.IPWLearner` cannot handle negative rewards,"
153+
"and please use `obp.policy.NNPolicyLearner` instead."
154+
)
150155
if pscore is None:
151156
n_actions = np.int(action.max() + 1)
152157
pscore = np.ones_like(action) / n_actions
153-
if position is None or self.len_list == 1:
158+
if self.len_list == 1:
154159
position = np.zeros_like(action, dtype=int)
160+
else:
161+
if position is None:
162+
raise ValueError("When `self.len_list=1`, `position` must be given.")
155163

156164
for position_ in np.arange(self.len_list):
157165
X, sample_weight, y = self._create_train_data_for_opl(
@@ -184,8 +192,7 @@ def predict(self, context: np.ndarray) -> np.ndarray:
184192
If you want a non-repetitive action set, please use the `sample_action` method.
185193
186194
"""
187-
if not isinstance(context, np.ndarray) or context.ndim != 2:
188-
raise ValueError("context must be 2D array")
195+
check_array(array=context, name="context", expected_dim=2)
189196

190197
n_rounds = context.shape[0]
191198
action_dist = np.zeros((n_rounds, self.n_actions, self.len_list))
@@ -214,9 +221,7 @@ def predict_score(self, context: np.ndarray) -> np.ndarray:
214221
Scores for all possible pairs of action and position predicted by a classifier.
215222
216223
"""
217-
assert (
218-
isinstance(context, np.ndarray) and context.ndim == 2
219-
), "context must be 2D array"
224+
check_array(array=context, name="context", expected_dim=2)
220225

221226
n_rounds = context.shape[0]
222227
score_predicted = np.zeros((n_rounds, self.n_actions, self.len_list))
@@ -271,8 +276,7 @@ def sample_action(
271276
Action sampled by a trained classifier.
272277
273278
"""
274-
if not isinstance(context, np.ndarray) or context.ndim != 2:
275-
raise ValueError("context must be 2D array")
279+
check_array(array=context, name="context", expected_dim=2)
276280
check_scalar(tau, name="tau", target_type=(int, float), min_val=0)
277281

278282
n_rounds = context.shape[0]
@@ -329,10 +333,8 @@ def predict_proba(
329333
"""
330334
assert (
331335
self.len_list == 1
332-
), "predict_proba method can be used only when len_list = 1"
333-
assert (
334-
isinstance(context, np.ndarray) and context.ndim == 2
335-
), "context must be 2D array"
336+
), "predict_proba method cannot be used when `len_list != 1`"
337+
check_array(array=context, name="context", expected_dim=2)
336338
check_scalar(tau, name="tau", target_type=(int, float), min_val=0)
337339

338340
score_predicted = self.predict_score(context=context)
@@ -761,19 +763,16 @@ def fit(
761763
pscore=pscore,
762764
position=position,
763765
)
764-
765766
if context.shape[1] != self.dim_context:
766767
raise ValueError(
767768
"Expected `context.shape[1] == self.dim_context`, but found it False"
768769
)
769-
770770
if pscore is None:
771771
pscore = np.ones_like(action) / self.n_actions
772772
if estimated_rewards_by_reg_model is None:
773773
estimated_rewards_by_reg_model = np.zeros(
774774
(context.shape[0], self.n_actions, self.len_list)
775775
)
776-
777776
if self.len_list == 1:
778777
position = np.zeros_like(action, dtype=int)
779778
else:
@@ -900,9 +899,7 @@ def predict(self, context: np.ndarray) -> np.ndarray:
900899
If you want a non-repetitive action set, please use the `sample_action` method.
901900
902901
"""
903-
if not isinstance(context, np.ndarray) or context.ndim != 2:
904-
raise ValueError("context must be 2D array")
905-
902+
check_array(array=context, name="context", expected_dim=2)
906903
if context.shape[1] != self.dim_context:
907904
raise ValueError(
908905
"Expected `context.shape[1] == self.dim_context`, but found it False"
@@ -939,9 +936,7 @@ def sample_action(
939936
Action sampled by a trained classifier.
940937
941938
"""
942-
if not isinstance(context, np.ndarray) or context.ndim != 2:
943-
raise ValueError("context must be 2D array")
944-
939+
check_array(array=context, name="context", expected_dim=2)
945940
if context.shape[1] != self.dim_context:
946941
raise ValueError(
947942
"Expected `context.shape[1] == self.dim_context`, but found it False"
@@ -988,9 +983,7 @@ def predict_proba(
988983
Action choice probabilities obtained by a trained classifier.
989984
990985
"""
991-
if not isinstance(context, np.ndarray) or context.ndim != 2:
992-
raise ValueError("context must be 2D array")
993-
986+
check_array(array=context, name="context", expected_dim=2)
994987
if context.shape[1] != self.dim_context:
995988
raise ValueError(
996989
"Expected `context.shape[1] == self.dim_context`, but found it False"

obp/utils.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,8 @@ def check_bandit_feedback_inputs(
204204
== expected_reward.shape[0]
205205
):
206206
raise ValueError(
207-
"context, action, reward, and expected_reward must have the same number of samples."
207+
"Expected `context.shape[0] == action.shape[0] == reward.shape[0] == expected_reward.shape[0]`"
208+
", but found it False"
208209
)
209210
if action.max() >= expected_reward.shape[1]:
210211
raise ValueError(
@@ -216,7 +217,8 @@ def check_bandit_feedback_inputs(
216217
context.shape[0] == action.shape[0] == reward.shape[0] == pscore.shape[0]
217218
):
218219
raise ValueError(
219-
"Expected `action.shape[0] == reward.shape[0] == pscore.shape[0]`, but found it False"
220+
"Expected `context.shape[0] == action.shape[0] == reward.shape[0] == pscore.shape[0]`"
221+
", but found it False"
220222
)
221223
if np.any(pscore <= 0):
222224
raise ValueError("pscore must be positive")
@@ -227,14 +229,16 @@ def check_bandit_feedback_inputs(
227229
context.shape[0] == action.shape[0] == reward.shape[0] == position.shape[0]
228230
):
229231
raise ValueError(
230-
"context, action, reward, and position must have the same number of samples."
232+
"Expected `context.shape[0] == action.shape[0] == reward.shape[0] == position.shape[0]`"
233+
", but found it False"
231234
)
232235
if not (np.issubdtype(position.dtype, np.integer) and position.min() >= 0):
233236
raise ValueError("position elements must be non-negative integers")
234237
else:
235238
if not (context.shape[0] == action.shape[0] == reward.shape[0]):
236239
raise ValueError(
237-
"context, action, and reward must have the same number of samples."
240+
"Expected `context.shape[0] == action.shape[0] == reward.shape[0]`"
241+
", but found it False"
238242
)
239243
if action_context is not None:
240244
check_array(array=action_context, name="action_context", expected_dim=2)
@@ -444,7 +448,8 @@ def check_continuous_ope_inputs(
444448
!= action_by_evaluation_policy.shape[0]
445449
):
446450
raise ValueError(
447-
"Expected `estimated_rewards_by_reg_model.shape[0] == action_by_evaluation_policy.shape[0]`, but found if False"
451+
"Expected `estimated_rewards_by_reg_model.shape[0] == action_by_evaluation_policy.shape[0]`"
452+
", but found if False"
448453
)
449454

450455
# action, reward
@@ -457,14 +462,15 @@ def check_continuous_ope_inputs(
457462
check_array(array=reward, name="reward", expected_dim=1)
458463
if not (action_by_behavior_policy.shape[0] == reward.shape[0]):
459464
raise ValueError(
460-
"Expected `action_by_behavior_policy.shape[0] == reward.shape[0]`, but found it False"
465+
"Expected `action_by_behavior_policy.shape[0] == reward.shape[0]`"
466+
", but found it False"
461467
)
462468
if not (
463469
action_by_behavior_policy.shape[0] == action_by_evaluation_policy.shape[0]
464470
):
465471
raise ValueError(
466472
"Expected `action_by_behavior_policy.shape[0] == action_by_evaluation_policy.shape[0]`"
467-
"but found it False"
473+
", but found it False"
468474
)
469475

470476
# pscore
@@ -548,7 +554,8 @@ def _check_slate_ope_inputs(
548554
== evaluation_policy_pscore.shape[0]
549555
):
550556
raise ValueError(
551-
f"slate_id, position, reward, {pscore_type}, and evaluation_policy_{pscore_type} must have the same number of samples."
557+
f"slate_id, position, reward, {pscore_type}, and evaluation_policy_{pscore_type} "
558+
"must have the same number of samples."
552559
)
553560

554561

@@ -805,7 +812,8 @@ def check_ope_inputs_tensor(
805812
if estimated_rewards_by_reg_model is not None:
806813
if estimated_rewards_by_reg_model.shape != action_dist.shape:
807814
raise ValueError(
808-
"Expected `estimated_rewards_by_reg_model.shape == action_dist.shape`, but found it False"
815+
"Expected `estimated_rewards_by_reg_model.shape == action_dist.shape`"
816+
", but found it False"
809817
)
810818

811819
# action, reward
@@ -831,7 +839,8 @@ def check_ope_inputs_tensor(
831839
raise ValueError("pscore must be 1-dimensional")
832840
if not (action.shape[0] == reward.shape[0] == pscore.shape[0]):
833841
raise ValueError(
834-
"Expected `action.shape[0] == reward.shape[0] == pscore.shape[0]`, but found it False"
842+
"Expected `action.shape[0] == reward.shape[0] == pscore.shape[0]`"
843+
", but found it False"
835844
)
836845
if torch.any(pscore <= 0):
837846
raise ValueError("pscore must be positive")

tests/ope/test_regression_models.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@
270270
generate_action_dist(n_rounds, n_actions, len_list),
271271
3,
272272
1,
273-
"Expected `action.shape[0]",
273+
"Expected `context.shape[0]",
274274
),
275275
(
276276
np.random.uniform(size=(n_rounds, 7)),
@@ -334,7 +334,7 @@
334334
None,
335335
3,
336336
1,
337-
"context, action, reward, and position must have the same number of samples.",
337+
"Expected `context.shape[0]",
338338
),
339339
(
340340
np.random.uniform(size=(n_rounds, 7)),
@@ -382,7 +382,7 @@
382382
None,
383383
3,
384384
1,
385-
"context, action, and reward must have the same number of samples",
385+
"Expected `context.shape[0]",
386386
),
387387
(
388388
np.random.uniform(size=(n_rounds, 7)),
@@ -398,7 +398,7 @@
398398
generate_action_dist(n_rounds, n_actions, len_list),
399399
3,
400400
1,
401-
"Expected `action.shape[0]",
401+
"Expected `context.shape[0]",
402402
),
403403
(
404404
np.random.uniform(size=(n_rounds, 7)),

0 commit comments

Comments
 (0)