Skip to content

Feature: Make Hyperparameter tuning method flexible #131

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Sep 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benchmark/ope/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ It is possible to run multiple experimental settings easily by using the `--mult
For example, the following script sweeps over all simulations including the three campaigns ('all', 'men', and 'women') and two different behavior policies ('random' and 'bts').

```bash
poetry run python benchmark_ope_estimators.py setting.campaign=all,men,women setting.behavior_policy=random.bts --multirun
poetry run python benchmark_ope_estimators.py setting.campaign=all,men,women setting.behavior_policy=random,bts --multirun
```

The experimental results (including the pairwise hypothesis test results) will be store in the `logs/` directory.
Expand Down
3 changes: 1 addition & 2 deletions benchmark/ope/benchmark_ope_estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,13 @@ def main(cfg: DictConfig) -> None:

# compared ope estimators
lambdas = list(dict(cfg.estimator_hyperparams)["lambdas"])
taus = list(dict(cfg.estimator_hyperparams)["taus"])
ope_estimators = [
InverseProbabilityWeighting(estimator_name="IPW"),
SelfNormalizedInverseProbabilityWeighting(estimator_name="SNIPW"),
DirectMethod(estimator_name="DM"),
DoublyRobust(estimator_name="DR"),
SelfNormalizedDoublyRobust(estimator_name="SNDR"),
SwitchDoublyRobustTuning(taus=taus, estimator_name="Switch-DR"),
SwitchDoublyRobustTuning(lambdas=lambdas, estimator_name="Switch-DR"),
DoublyRobustWithShrinkageTuning(lambdas=lambdas, estimator_name="DRos"),
]

Expand Down
10 changes: 0 additions & 10 deletions benchmark/ope/conf/estimator_hyperparams/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,3 @@ lambdas:
- 1000
- 5000
- 10000
taus:
- 1
- 5
- 10
- 50
- 100
- 500
- 1000
- 5000
- 10000
4 changes: 2 additions & 2 deletions examples/multiclass/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ python evaluate_off_policy_estimators.py\
# snipw 0.006797 0.004094
# dr 0.007780 0.004492
# sndr 0.007210 0.004089
# switch-dr (tau=1) 0.173282 0.020025
# switch-dr (tau=100) 0.007780 0.004492
# switch-dr (lambda=1) 0.173282 0.020025
# switch-dr (lambda=100) 0.007780 0.004492
# dr-os (lambda=1) 0.079629 0.014008
# dr-os (lambda=100) 0.008031 0.004634
# =============================================
Expand Down
4 changes: 2 additions & 2 deletions examples/multiclass/evaluate_off_policy_estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@
SelfNormalizedInverseProbabilityWeighting(),
DoublyRobust(),
SelfNormalizedDoublyRobust(),
SwitchDoublyRobust(tau=1.0, estimator_name="switch-dr (tau=1)"),
SwitchDoublyRobust(tau=100.0, estimator_name="switch-dr (tau=100)"),
SwitchDoublyRobust(lambda_=1.0, estimator_name="switch-dr (lambda=1)"),
SwitchDoublyRobust(lambda_=100.0, estimator_name="switch-dr (lambda=100)"),
DoublyRobustWithShrinkage(lambda_=1.0, estimator_name="dr-os (lambda=1)"),
DoublyRobustWithShrinkage(lambda_=100.0, estimator_name="dr-os (lambda=100)"),
]
Expand Down
4 changes: 2 additions & 2 deletions examples/synthetic/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ python evaluate_off_policy_estimators.py\
# snipw 0.007543 0.005196
# dr 0.008099 0.006659
# sndr 0.008054 0.004911
# switch-dr (tau=1) 0.195878 0.012146
# switch-dr (tau=100) 0.008099 0.006659
# switch-dr (lambda=1) 0.195878 0.012146
# switch-dr (lambda=100) 0.008099 0.006659
# dr-os (lambda=1) 0.195642 0.012151
# dr-os (lambda=100) 0.175285 0.012801
# =============================================
Expand Down
4 changes: 2 additions & 2 deletions examples/synthetic/evaluate_off_policy_estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@
SelfNormalizedInverseProbabilityWeighting(),
DoublyRobust(),
SelfNormalizedDoublyRobust(),
SwitchDoublyRobust(tau=1.0, estimator_name="switch-dr (tau=1)"),
SwitchDoublyRobust(tau=100.0, estimator_name="switch-dr (tau=100)"),
SwitchDoublyRobust(lambda_=1.0, estimator_name="switch-dr (lambda=1)"),
SwitchDoublyRobust(lambda_=100.0, estimator_name="switch-dr (lambda=100)"),
DoublyRobustWithShrinkage(lambda_=1.0, estimator_name="dr-os (lambda=1)"),
DoublyRobustWithShrinkage(lambda_=100.0, estimator_name="dr-os (lambda=100)"),
]
Expand Down
161 changes: 120 additions & 41 deletions obp/ope/estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import torch
from sklearn.utils import check_scalar

from .helper import estimate_high_probability_upper_bound_bias
from .helper import estimate_high_probability_upper_bound_bias, estimate_bias_in_ope
from ..utils import (
estimate_confidence_interval_by_bootstrap,
check_ope_inputs,
Expand Down Expand Up @@ -551,6 +551,8 @@ def _estimate_mse_score(
pscore: np.ndarray,
action_dist: np.ndarray,
position: Optional[np.ndarray] = None,
use_bias_upper_bound: bool = True,
delta: float = 0.05,
**kwargs,
) -> float:
"""Estimate the MSE score of a given clipping hyperparameter to conduct hyperparameter tuning.
Expand All @@ -572,6 +574,13 @@ def _estimate_mse_score(
position: array-like, shape (n_rounds,), default=None
Position of recommendation interface where action was presented in each round of the given logged bandit feedback.

use_bias_upper_bound: bool, default=True
Whether to use bias upper bound in hyperparameter tuning.
If False, direct bias estimator is used to estimate the MSE.

delta: float, default=0.05
A confidence delta to construct a high probability upper bound based on the Bernstein’s inequality.

Returns
----------
estimated_mse_score: float
Expand All @@ -596,12 +605,17 @@ def _estimate_mse_score(

# estimate the (high probability) upper bound of the bias of IPW with clipping
iw = action_dist[np.arange(n_rounds), action, position] / pscore
bias_upper_bound = estimate_high_probability_upper_bound_bias(
reward=reward,
iw=iw,
iw_hat=np.minimum(iw, self.lambda_),
)
estimated_mse_score = sample_variance + (bias_upper_bound ** 2)
if use_bias_upper_bound:
bias_term = estimate_high_probability_upper_bound_bias(
reward=reward, iw=iw, iw_hat=np.minimum(iw, self.lambda_), delta=delta
)
else:
bias_term = estimate_bias_in_ope(
reward=reward,
iw=iw,
iw_hat=np.minimum(iw, self.lambda_),
)
estimated_mse_score = sample_variance + (bias_term ** 2)

return estimated_mse_score

Expand Down Expand Up @@ -1310,6 +1324,8 @@ def _estimate_mse_score(
action_dist: np.ndarray,
estimated_rewards_by_reg_model: np.ndarray,
position: Optional[np.ndarray] = None,
use_bias_upper_bound: bool = True,
delta: float = 0.05,
) -> float:
"""Estimate the MSE score of a given clipping hyperparameter to conduct hyperparameter tuning.

Expand All @@ -1335,6 +1351,13 @@ def _estimate_mse_score(
estimated_rewards_by_reg_model: array-like, shape (n_rounds, n_actions, len_list)
Expected rewards given context, action, and position estimated by regression model, i.e., :math:`\\hat{q}(x_t,a_t)`.

use_bias_upper_bound: bool, default=True
Whether to use bias upper bound in hyperparameter tuning.
If False, direct bias estimator is used to estimate the MSE.

delta: float, default=0.05
A confidence delta to construct a high probability upper bound based on the Bernstein’s inequality.

Returns
----------
estimated_mse_score: float
Expand All @@ -1360,13 +1383,26 @@ def _estimate_mse_score(

# estimate the (high probability) upper bound of the bias of DR with clipping
iw = action_dist[np.arange(n_rounds), action, position] / pscore
bias_upper_bound = estimate_high_probability_upper_bound_bias(
reward=reward,
iw=iw,
iw_hat=np.minimum(iw, self.lambda_),
q_hat=estimated_rewards_by_reg_model[np.arange(n_rounds), action, position],
)
estimated_mse_score = sample_variance + (bias_upper_bound ** 2)
if use_bias_upper_bound:
bias_term = estimate_high_probability_upper_bound_bias(
reward=reward,
iw=iw,
iw_hat=np.minimum(iw, self.lambda_),
q_hat=estimated_rewards_by_reg_model[
np.arange(n_rounds), action, position
],
delta=delta,
)
else:
bias_term = estimate_bias_in_ope(
reward=reward,
iw=iw,
iw_hat=np.minimum(iw, self.lambda_),
q_hat=estimated_rewards_by_reg_model[
np.arange(n_rounds), action, position
],
)
estimated_mse_score = sample_variance + (bias_term ** 2)

return estimated_mse_score

Expand Down Expand Up @@ -1487,20 +1523,20 @@ class SwitchDoublyRobust(DoublyRobust):

.. math::

\\hat{V}_{\\mathrm{SwitchDR}} (\\pi_e; \\mathcal{D}, \\hat{q}, \\tau)
:= \\mathbb{E}_{\\mathcal{D}} [\\hat{q}(x_t,\\pi_e) + w(x_t,a_t) (r_t - \\hat{q}(x_t,a_t)) \\mathbb{I} \\{ w(x_t,a_t) \\le \\tau \\}],
\\hat{V}_{\\mathrm{SwitchDR}} (\\pi_e; \\mathcal{D}, \\hat{q}, \\lambda)
:= \\mathbb{E}_{\\mathcal{D}} [\\hat{q}(x_t,\\pi_e) + w(x_t,a_t) (r_t - \\hat{q}(x_t,a_t)) \\mathbb{I} \\{ w(x_t,a_t) \\le \\lambda \\}],

where :math:`\\mathcal{D}=\\{(x_t,a_t,r_t)\\}_{t=1}^{T}` is logged bandit feedback data with :math:`T` rounds collected by
a behavior policy :math:`\\pi_b`. :math:`w(x,a):=\\pi_e (a|x)/\\pi_b (a|x)` is the importance weight given :math:`x` and :math:`a`.
:math:`\\mathbb{E}_{\\mathcal{D}}[\\cdot]` is the empirical average over :math:`T` observations in :math:`\\mathcal{D}`.
:math:`\\tau (\\ge 0)` is a switching hyperparameter, which decides the threshold for the importance weight.
:math:`\\lambda (\\ge 0)` is a switching hyperparameter, which decides the threshold for the importance weight.
:math:`\\hat{q} (x,a)` is an estimated expected reward given :math:`x` and :math:`a`.
:math:`\\hat{q} (x_t,\\pi):= \\mathbb{E}_{a \\sim \\pi(a|x)}[\\hat{q}(x,a)]` is the expectation of the estimated reward function over :math:`\\pi`.
To estimate the mean reward function, please use `obp.ope.regression_model.RegressionModel`.

Parameters
----------
tau: float, default=np.inf
lambda_: float, default=np.inf
Switching hyperparameter. When importance weight is larger than this parameter, DM is applied, otherwise DR is used.
This hyperparameter should be larger than or equal to 0., otherwise it is meaningless.

Expand All @@ -1520,19 +1556,19 @@ class SwitchDoublyRobust(DoublyRobust):

"""

tau: float = np.inf
lambda_: float = np.inf
estimator_name: str = "switch-dr"

def __post_init__(self) -> None:
"""Initialize Class."""
check_scalar(
self.tau,
name="tau",
self.lambda_,
name="lambda_",
target_type=(int, float),
min_val=0.0,
)
if self.tau != self.tau:
raise ValueError("tau must not be nan")
if self.lambda_ != self.lambda_:
raise ValueError("lambda_ must not be nan")

def _estimate_round_rewards(
self,
Expand Down Expand Up @@ -1576,7 +1612,7 @@ def _estimate_round_rewards(
"""
n_rounds = action.shape[0]
iw = action_dist[np.arange(n_rounds), action, position] / pscore
switch_indicator = np.array(iw <= self.tau, dtype=int)
switch_indicator = np.array(iw <= self.lambda_, dtype=int)
q_hat_at_position = estimated_rewards_by_reg_model[
np.arange(n_rounds), :, position
]
Expand Down Expand Up @@ -1613,6 +1649,8 @@ def _estimate_mse_score(
action_dist: np.ndarray,
estimated_rewards_by_reg_model: np.ndarray,
position: Optional[np.ndarray] = None,
use_bias_upper_bound: bool = False,
delta: float = 0.05,
) -> float:
"""Estimate the MSE score of a given switching hyperparameter to conduct hyperparameter tuning.

Expand All @@ -1638,10 +1676,17 @@ def _estimate_mse_score(
When None is given, the effect of position on the reward will be ignored.
(If only one action is chosen and there is no posion, then you can just ignore this argument.)

use_bias_upper_bound: bool, default=True
Whether to use bias upper bound in hyperparameter tuning.
If False, direct bias estimator is used to estimate the MSE.

delta: float, default=0.05
A confidence delta to construct a high probability upper bound based on the Bernstein’s inequality.

Returns
----------
estimated_mse_score: float
Estimated MSE score of a given switching hyperparameter `tau`.
Estimated MSE score of a given switching hyperparameter `lambda_`.
MSE score is the sum of (high probability) upper bound of bias and the sample variance.
This is estimated using the automatic hyperparameter tuning procedure
based on Section 5 of Su et al.(2020).
Expand All @@ -1663,13 +1708,26 @@ def _estimate_mse_score(

# estimate the (high probability) upper bound of the bias of Switch-DR
iw = action_dist[np.arange(n_rounds), action, position] / pscore
bias_upper_bound = estimate_high_probability_upper_bound_bias(
reward=reward,
iw=iw,
iw_hat=iw * np.array(iw <= self.tau, dtype=int),
q_hat=estimated_rewards_by_reg_model[np.arange(n_rounds), action, position],
)
estimated_mse_score = sample_variance + (bias_upper_bound ** 2)
if use_bias_upper_bound:
bias_term = estimate_high_probability_upper_bound_bias(
reward=reward,
iw=iw,
iw_hat=iw * np.array(iw <= self.lambda_, dtype=int),
q_hat=estimated_rewards_by_reg_model[
np.arange(n_rounds), action, position
],
delta=delta,
)
else:
bias_term = estimate_bias_in_ope(
reward=reward,
iw=iw,
iw_hat=iw * np.array(iw <= self.lambda_, dtype=int),
q_hat=estimated_rewards_by_reg_model[
np.arange(n_rounds), action, position
],
)
estimated_mse_score = sample_variance + (bias_term ** 2)

return estimated_mse_score

Expand Down Expand Up @@ -1703,8 +1761,7 @@ class DoublyRobustWithShrinkage(DoublyRobust):
w_{o} (x_t,a_t;\\lambda) := \\frac{\\lambda}{w^2(x_t,a_t) + \\lambda} w(x_t,a_t).

When :math:`\\lambda=0`, we have :math:`w_{o} (x,a;\\lambda)=0` corresponding to the DM estimator.
In contrast, as :math:`\\lambda \\rightarrow \\infty`, :math:`w_{o} (x,a;\\lambda)` increases and in the limit becomes equal to
the original importance weight, corresponding to the standard DR estimator.
In contrast, as :math:`\\lambda \\rightarrow \\infty`, :math:`w_{o} (x,a;\\lambda)` increases and in the limit becomes equal to the original importance weight, corresponding to the standard DR estimator.

Parameters
----------
Expand Down Expand Up @@ -1815,6 +1872,8 @@ def _estimate_mse_score(
action_dist: np.ndarray,
estimated_rewards_by_reg_model: np.ndarray,
position: Optional[np.ndarray] = None,
use_bias_upper_bound: bool = False,
delta: float = 0.05,
) -> float:
"""Estimate the MSE score of a given shrinkage hyperparameter to conduct hyperparameter tuning.

Expand All @@ -1838,6 +1897,13 @@ def _estimate_mse_score(
position: array-like, shape (n_rounds,), default=None
Position of recommendation interface where action was presented in each round of the given logged bandit feedback.

use_bias_upper_bound: bool, default=True
Whether to use bias upper bound in hyperparameter tuning.
If False, direct bias estimator is used to estimate the MSE.

delta: float, default=0.05
A confidence delta to construct a high probability upper bound based on the Bernstein’s inequality.

Returns
----------
estimated_mse_score: float
Expand Down Expand Up @@ -1867,12 +1933,25 @@ def _estimate_mse_score(
iw_hat = (self.lambda_ * iw) / (iw ** 2 + self.lambda_)
else:
iw_hat = iw
bias_upper_bound = estimate_high_probability_upper_bound_bias(
reward=reward,
iw=iw,
iw_hat=iw_hat,
q_hat=estimated_rewards_by_reg_model[np.arange(n_rounds), action, position],
)
estimated_mse_score = sample_variance + (bias_upper_bound ** 2)
if use_bias_upper_bound:
bias_term = estimate_high_probability_upper_bound_bias(
reward=reward,
iw=iw,
iw_hat=iw_hat,
q_hat=estimated_rewards_by_reg_model[
np.arange(n_rounds), action, position
],
delta=0.05,
)
else:
bias_term = estimate_bias_in_ope(
reward=reward,
iw=iw,
iw_hat=iw_hat,
q_hat=estimated_rewards_by_reg_model[
np.arange(n_rounds), action, position
],
)
estimated_mse_score = sample_variance + (bias_term ** 2)

return estimated_mse_score
Loading