Skip to content

[Review] Feature: Balanced-OPE estimators #146

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Jan 12, 2022
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,258 changes: 1,258 additions & 0 deletions examples/quickstart/balanced-ope-deterministic-evaluation-policy.ipynb

Large diffs are not rendered by default.

1,260 changes: 1,260 additions & 0 deletions examples/quickstart/balanced-ope-stochastic-evaluation-policy.ipynb

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions obp/ope/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from obp.ope.estimators import SelfNormalizedDoublyRobust
from obp.ope.estimators import SelfNormalizedInverseProbabilityWeighting
from obp.ope.estimators import SwitchDoublyRobust
from obp.ope.estimators import BalancedInverseProbabilityWeighting
from obp.ope.estimators_continuous import (
KernelizedSelfNormalizedInverseProbabilityWeighting,
)
Expand All @@ -31,6 +32,8 @@
from obp.ope.meta_continuous import ContinuousOffPolicyEvaluation
from obp.ope.meta_slate import SlateOffPolicyEvaluation
from obp.ope.regression_model import RegressionModel
from obp.ope.classification_model import ImportanceSampler
from obp.ope.classification_model import PropensityScoreEstimator


__all__ = [
Expand All @@ -57,6 +60,9 @@
"SelfNormalizedSlateRewardInteractionIPS",
"SelfNormalizedSlateIndependentIPS",
"SelfNormalizedSlateStandardIPS",
"BalancedInverseProbabilityWeighting",
"ImportanceSampler",
"PropensityScoreEstimator",
"BaseContinuousOffPolicyEstimator",
"KernelizedInverseProbabilityWeighting",
"KernelizedSelfNormalizedInverseProbabilityWeighting",
Expand Down
666 changes: 666 additions & 0 deletions obp/ope/classification_model.py

Large diffs are not rendered by default.

367 changes: 339 additions & 28 deletions obp/ope/estimators.py

Large diffs are not rendered by default.

272 changes: 212 additions & 60 deletions obp/ope/estimators_tuning.py

Large diffs are not rendered by default.

213 changes: 198 additions & 15 deletions obp/ope/meta.py

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions obp/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ def check_ope_inputs(
reward: Optional[np.ndarray] = None,
pscore: Optional[np.ndarray] = None,
estimated_rewards_by_reg_model: Optional[np.ndarray] = None,
importance_sampling_ratio: Optional[np.ndarray] = None,
) -> Optional[ValueError]:
"""Check inputs for ope.

Expand All @@ -329,6 +330,9 @@ def check_ope_inputs(
estimated_rewards_by_reg_model: array-like, shape (n_rounds, n_actions, len_list), default=None
Expected rewards given context, action, and position estimated by regression model, i.e., :math:`\\hat{q}(x_t,a_t)`.

importance_sampling_ratio: array-like or Tensor, shape (n_rounds,), default=None
Ratio of probability that the action is sampled by evaluation policy divided by probability that the action is sampled by behavior policy,
i.e., :math:`\\hat{\\rho}(x_t, a_t)`.
"""
# action_dist
check_array(array=action_dist, name="action_dist", expected_dim=3)
Expand Down Expand Up @@ -360,6 +364,14 @@ def check_ope_inputs(
"Expected `estimated_rewards_by_reg_model.shape == action_dist.shape`, but found it False"
)

if importance_sampling_ratio is not None:
if not (action.shape[0] == importance_sampling_ratio.shape[0]):
raise ValueError(
"Expected `action.shape[0] == importance_sampling_ratio.shape[0]`, but found it False"
)
if np.any(importance_sampling_ratio < 0):
raise ValueError("importance_sampling_ratio must be non-negative")

# action, reward
if action is not None or reward is not None:
check_array(array=action, name="action", expected_dim=1)
Expand Down
Loading