Skip to content

Hotfix: potential error in the fit_predict method of RegressionModel #23

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 9, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 56 additions & 34 deletions obp/ope/regression_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,16 @@

@dataclass
class RegressionModel(BaseEstimator):
"""Machine learning model to estimate the mean reward function (:math:`q(x,a):= \\mathbb{E}_{r \sim p(r|x,a)} [r|x,a]`).
"""Machine learning model to estimate the mean reward function (:math:`q(x,a):= \\mathbb{E}[r|x,a]`).

Note
-------
Reward (or outcome) :math:`Y` must be either binary or continuous.
Reward (or outcome) :math:`r` must be either binary or continuous.

Parameters
------------
base_model: BaseEstimator
Model class to be used to estimate the mean reward function.
A machine learning model used to estimate the mean reward function.

n_actions: int
Number of actions.
Expand Down Expand Up @@ -66,7 +66,7 @@ def __post_init__(self) -> None:
"normal",
"iw",
"mrdr",
], f"fitting method must be one of 'normal', 'iw', or 'mrdr', but {self.fitting_method} is given"
], f"fitting_method must be one of 'normal', 'iw', or 'mrdr', but {self.fitting_method} is given"
assert self.n_actions > 1 and isinstance(
self.n_actions, int
), f"n_actions must be an integer larger than 1, but {self.n_actions} is given"
Expand Down Expand Up @@ -101,9 +101,10 @@ def fit(
reward: array-like, shape (n_rounds,)
Observed rewards (or outcome) in each round, i.e., :math:`r_t`.

pscore: Optional[np.ndarray], default=None
Propensity scores, the action choice probabilities by behavior policy,
pscore: array-like, shape (n_rounds,), default=None
Action choice probabilities (propensity score) of a behavior policy
in the training logged bandit feedback.
When None is given, the the behavior policy is assumed to be a uniform one.

position: array-like, shape (n_rounds,), default=None
Positions of each round in the given logged bandit feedback.
Expand All @@ -123,20 +124,26 @@ def fit(
position=position,
action_context=self.action_context,
)
n_rounds = context.shape[0]

if self.len_list == 1:
position = np.zeros_like(action)
else:
assert (
position is not None
), "position has to be set when len_list is larger than 1"
isinstance(position, np.ndarray) and position.ndim == 1
), f"when len_list > 1, position must be a 1-dimensional ndarray"
if self.fitting_method in ["iw", "mrdr"]:
assert (
action_dist is not None
), "When either 'iw' or 'mrdr' is used as the 'fitting_method' argument, then action_dist must be given"
assert (
pscore is not None
), "When either 'iw' or 'mrdr' is used as the 'fitting_method' argument, then pscore must be given"
n_data = context.shape[0]
isinstance(action_dist, np.ndarray) and action_dist.ndim == 3
), f"when fitting_method is either 'iw' or 'mrdr', action_dist must be a 3-dimensional ndarray"
assert action_dist.shape == (
n_rounds,
self.n_actions,
self.len_list,
), f"shape of action_dist must be (n_rounds, n_actions, len_list)=({n_rounds, self.n_actions, self.len_list})"
if pscore is None:
pscore = np.ones_like(action) / self.n_actions

for position_ in np.arange(self.len_list):
idx = position == position_
X = self._pre_process_for_reg_model(
Expand All @@ -149,19 +156,19 @@ def fit(
self.base_model_list[position_].fit(X, reward[idx])
else:
action_dist_at_position = action_dist[
np.arange(n_data), action, position_ * np.ones(n_data, dtype=int)
np.arange(n_rounds),
action,
position_ * np.ones(n_rounds, dtype=int),
][idx]
if self.fitting_method == "iw":
sample_weight = action_dist_at_position / pscore[idx]
self.base_model_list[position_].fit(
X, reward[idx], sample_weight=sample_weight
)
elif self.fitting_method == "mrdr":
sample_weight = (
action_dist_at_position
* (1.0 - pscore[idx])
/ (pscore[idx] ** 2)
)
sample_weight = action_dist_at_position
sample_weight *= 1.0 - pscore[idx]
sample_weight /= pscore[idx] ** 2
self.base_model_list[position_].fit(
X, reward[idx], sample_weight=sample_weight
)
Expand Down Expand Up @@ -215,7 +222,7 @@ def fit_predict(
n_folds: int = 1,
random_state: Optional[int] = None,
) -> None:
"""Fit the regression model on given logged bandit feedback data and then predict the mean reward function of the same data.
"""Fit the regression model on given logged bandit feedback data and predict the reward function of the same data.

Note
------
Expand All @@ -234,8 +241,9 @@ def fit_predict(
Observed rewards (or outcome) in each round, i.e., :math:`r_t`.

pscore: array-like, shape (n_rounds,), default=None
Propensity scores, the action choice probabilities by behavior policy,
Action choice probabilities (propensity score) of a behavior policy
in the training logged bandit feedback.
When None is given, the the behavior policy is assumed to be a uniform one.

position: array-like, shape (n_rounds,), default=None
Positions of each round in the given logged bandit feedback.
Expand All @@ -248,7 +256,7 @@ def fit_predict(

n_folds: int, default=1
Number of folds in the cross-fitting procedure.
When 1 is given, then the regression model is trained on the whole logged bandit feedback data.
When 1 is given, the regression model is trained on the whole logged bandit feedback data.

random_state: int, default=None
`random_state` affects the ordering of the indices, which controls the randomness of each fold.
Expand All @@ -260,22 +268,36 @@ def fit_predict(
Estimated expected rewards for new data by the regression model.

"""
check_bandit_feedback_inputs(
context=context,
action=action,
reward=reward,
pscore=pscore,
position=position,
action_context=self.action_context,
)
n_rounds = context.shape[0]

assert n_folds > 0 and isinstance(
n_folds, int
), f"n_folds must be a positive integer, but {n_folds} is given"
if self.len_list == 1:
position = np.zeros_like(action)
else:
assert (
position is not None
), "position has to be set when len_list is larger than 1"
isinstance(position, np.ndarray) and position.ndim == 1
), f"when len_list > 1, position must be a 1-dimensional ndarray"
if self.fitting_method in ["iw", "mrdr"]:
assert (
action_dist is not None
), "When either 'iw' or 'mrdr' is used as the 'fitting_method' argument, then action_dist must be given"
assert (
pscore is not None
), "When either 'iw' or 'mrdr' is used as the 'fitting_method' argument, then pscore must be given"
isinstance(action_dist, np.ndarray) and action_dist.ndim == 3
), f"when fitting_method is either 'iw' or 'mrdr', action_dist must be a 3-dimensional ndarray"
assert action_dist.shape == (
n_rounds,
self.n_actions,
self.len_list,
), f"shape of action_dist must be (n_rounds, n_actions, len_list)={n_rounds, self.n_actions, self.len_list}, but is {action_dist.shape}"
if pscore is None:
pscore = np.ones_like(action) / self.n_actions

if n_folds == 1:
self.fit(
Expand All @@ -289,11 +311,11 @@ def fit_predict(
return self.predict(context=context)
else:
estimated_rewards_by_reg_model = np.zeros(
(context.shape[0], self.n_actions, self.len_list)
(n_rounds, self.n_actions, self.len_list)
)
skf = KFold(n_splits=n_folds, shuffle=True, random_state=random_state)
skf.get_n_splits(context)
for train_idx, test_idx in skf.split(context):
kf = KFold(n_splits=n_folds, shuffle=True, random_state=random_state)
kf.get_n_splits(context)
for train_idx, test_idx in kf.split(context):
action_dist_tr = (
action_dist[train_idx] if action_dist is not None else action_dist
)
Expand Down