Skip to content

Commit 647f961

Browse files
authored
Merge pull request #23 from st-tech/hotfix/regression-model-fit-predict
Hotfix: potential error in the fit_predict method of RegressionModel
2 parents 0667738 + 45856d5 commit 647f961

File tree

1 file changed

+56
-34
lines changed

1 file changed

+56
-34
lines changed

obp/ope/regression_model.py

Lines changed: 56 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,16 @@
1414

1515
@dataclass
1616
class RegressionModel(BaseEstimator):
17-
"""Machine learning model to estimate the mean reward function (:math:`q(x,a):= \\mathbb{E}_{r \sim p(r|x,a)} [r|x,a]`).
17+
"""Machine learning model to estimate the mean reward function (:math:`q(x,a):= \\mathbb{E}[r|x,a]`).
1818
1919
Note
2020
-------
21-
Reward (or outcome) :math:`Y` must be either binary or continuous.
21+
Reward (or outcome) :math:`r` must be either binary or continuous.
2222
2323
Parameters
2424
------------
2525
base_model: BaseEstimator
26-
Model class to be used to estimate the mean reward function.
26+
A machine learning model used to estimate the mean reward function.
2727
2828
n_actions: int
2929
Number of actions.
@@ -66,7 +66,7 @@ def __post_init__(self) -> None:
6666
"normal",
6767
"iw",
6868
"mrdr",
69-
], f"fitting method must be one of 'normal', 'iw', or 'mrdr', but {self.fitting_method} is given"
69+
], f"fitting_method must be one of 'normal', 'iw', or 'mrdr', but {self.fitting_method} is given"
7070
assert self.n_actions > 1 and isinstance(
7171
self.n_actions, int
7272
), f"n_actions must be an integer larger than 1, but {self.n_actions} is given"
@@ -101,9 +101,10 @@ def fit(
101101
reward: array-like, shape (n_rounds,)
102102
Observed rewards (or outcome) in each round, i.e., :math:`r_t`.
103103
104-
pscore: Optional[np.ndarray], default=None
105-
Propensity scores, the action choice probabilities by behavior policy,
104+
pscore: array-like, shape (n_rounds,), default=None
105+
Action choice probabilities (propensity score) of a behavior policy
106106
in the training logged bandit feedback.
107+
When None is given, the the behavior policy is assumed to be a uniform one.
107108
108109
position: array-like, shape (n_rounds,), default=None
109110
Positions of each round in the given logged bandit feedback.
@@ -123,20 +124,26 @@ def fit(
123124
position=position,
124125
action_context=self.action_context,
125126
)
127+
n_rounds = context.shape[0]
128+
126129
if self.len_list == 1:
127130
position = np.zeros_like(action)
128131
else:
129132
assert (
130-
position is not None
131-
), "position has to be set when len_list is larger than 1"
133+
isinstance(position, np.ndarray) and position.ndim == 1
134+
), f"when len_list > 1, position must be a 1-dimensional ndarray"
132135
if self.fitting_method in ["iw", "mrdr"]:
133136
assert (
134-
action_dist is not None
135-
), "When either 'iw' or 'mrdr' is used as the 'fitting_method' argument, then action_dist must be given"
136-
assert (
137-
pscore is not None
138-
), "When either 'iw' or 'mrdr' is used as the 'fitting_method' argument, then pscore must be given"
139-
n_data = context.shape[0]
137+
isinstance(action_dist, np.ndarray) and action_dist.ndim == 3
138+
), f"when fitting_method is either 'iw' or 'mrdr', action_dist must be a 3-dimensional ndarray"
139+
assert action_dist.shape == (
140+
n_rounds,
141+
self.n_actions,
142+
self.len_list,
143+
), f"shape of action_dist must be (n_rounds, n_actions, len_list)=({n_rounds, self.n_actions, self.len_list})"
144+
if pscore is None:
145+
pscore = np.ones_like(action) / self.n_actions
146+
140147
for position_ in np.arange(self.len_list):
141148
idx = position == position_
142149
X = self._pre_process_for_reg_model(
@@ -149,19 +156,19 @@ def fit(
149156
self.base_model_list[position_].fit(X, reward[idx])
150157
else:
151158
action_dist_at_position = action_dist[
152-
np.arange(n_data), action, position_ * np.ones(n_data, dtype=int)
159+
np.arange(n_rounds),
160+
action,
161+
position_ * np.ones(n_rounds, dtype=int),
153162
][idx]
154163
if self.fitting_method == "iw":
155164
sample_weight = action_dist_at_position / pscore[idx]
156165
self.base_model_list[position_].fit(
157166
X, reward[idx], sample_weight=sample_weight
158167
)
159168
elif self.fitting_method == "mrdr":
160-
sample_weight = (
161-
action_dist_at_position
162-
* (1.0 - pscore[idx])
163-
/ (pscore[idx] ** 2)
164-
)
169+
sample_weight = action_dist_at_position
170+
sample_weight *= 1.0 - pscore[idx]
171+
sample_weight /= pscore[idx] ** 2
165172
self.base_model_list[position_].fit(
166173
X, reward[idx], sample_weight=sample_weight
167174
)
@@ -215,7 +222,7 @@ def fit_predict(
215222
n_folds: int = 1,
216223
random_state: Optional[int] = None,
217224
) -> None:
218-
"""Fit the regression model on given logged bandit feedback data and then predict the mean reward function of the same data.
225+
"""Fit the regression model on given logged bandit feedback data and predict the reward function of the same data.
219226
220227
Note
221228
------
@@ -234,8 +241,9 @@ def fit_predict(
234241
Observed rewards (or outcome) in each round, i.e., :math:`r_t`.
235242
236243
pscore: array-like, shape (n_rounds,), default=None
237-
Propensity scores, the action choice probabilities by behavior policy,
244+
Action choice probabilities (propensity score) of a behavior policy
238245
in the training logged bandit feedback.
246+
When None is given, the the behavior policy is assumed to be a uniform one.
239247
240248
position: array-like, shape (n_rounds,), default=None
241249
Positions of each round in the given logged bandit feedback.
@@ -248,7 +256,7 @@ def fit_predict(
248256
249257
n_folds: int, default=1
250258
Number of folds in the cross-fitting procedure.
251-
When 1 is given, then the regression model is trained on the whole logged bandit feedback data.
259+
When 1 is given, the regression model is trained on the whole logged bandit feedback data.
252260
253261
random_state: int, default=None
254262
`random_state` affects the ordering of the indices, which controls the randomness of each fold.
@@ -260,22 +268,36 @@ def fit_predict(
260268
Estimated expected rewards for new data by the regression model.
261269
262270
"""
271+
check_bandit_feedback_inputs(
272+
context=context,
273+
action=action,
274+
reward=reward,
275+
pscore=pscore,
276+
position=position,
277+
action_context=self.action_context,
278+
)
279+
n_rounds = context.shape[0]
280+
263281
assert n_folds > 0 and isinstance(
264282
n_folds, int
265283
), f"n_folds must be a positive integer, but {n_folds} is given"
266284
if self.len_list == 1:
267285
position = np.zeros_like(action)
268286
else:
269287
assert (
270-
position is not None
271-
), "position has to be set when len_list is larger than 1"
288+
isinstance(position, np.ndarray) and position.ndim == 1
289+
), f"when len_list > 1, position must be a 1-dimensional ndarray"
272290
if self.fitting_method in ["iw", "mrdr"]:
273291
assert (
274-
action_dist is not None
275-
), "When either 'iw' or 'mrdr' is used as the 'fitting_method' argument, then action_dist must be given"
276-
assert (
277-
pscore is not None
278-
), "When either 'iw' or 'mrdr' is used as the 'fitting_method' argument, then pscore must be given"
292+
isinstance(action_dist, np.ndarray) and action_dist.ndim == 3
293+
), f"when fitting_method is either 'iw' or 'mrdr', action_dist must be a 3-dimensional ndarray"
294+
assert action_dist.shape == (
295+
n_rounds,
296+
self.n_actions,
297+
self.len_list,
298+
), f"shape of action_dist must be (n_rounds, n_actions, len_list)={n_rounds, self.n_actions, self.len_list}, but is {action_dist.shape}"
299+
if pscore is None:
300+
pscore = np.ones_like(action) / self.n_actions
279301

280302
if n_folds == 1:
281303
self.fit(
@@ -289,11 +311,11 @@ def fit_predict(
289311
return self.predict(context=context)
290312
else:
291313
estimated_rewards_by_reg_model = np.zeros(
292-
(context.shape[0], self.n_actions, self.len_list)
314+
(n_rounds, self.n_actions, self.len_list)
293315
)
294-
skf = KFold(n_splits=n_folds, shuffle=True, random_state=random_state)
295-
skf.get_n_splits(context)
296-
for train_idx, test_idx in skf.split(context):
316+
kf = KFold(n_splits=n_folds, shuffle=True, random_state=random_state)
317+
kf.get_n_splits(context)
318+
for train_idx, test_idx in kf.split(context):
297319
action_dist_tr = (
298320
action_dist[train_idx] if action_dist is not None else action_dist
299321
)

0 commit comments

Comments
 (0)