Skip to content

Feature policy learner #132

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Sep 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions benchmark/cf_policy_search/run_cf_policy_search.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,27 @@
import argparse
from pathlib import Path
import yaml

from custom_dataset import OBDWithInteractionFeatures
from joblib import delayed
from joblib import Parallel
import numpy as np
from pandas import DataFrame
from joblib import Parallel, delayed
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.experimental import enable_hist_gradient_boosting # noqa
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
import yaml

from custom_dataset import OBDWithInteractionFeatures
from obp.policy import IPWLearner
from obp.ope import InverseProbabilityWeighting
from obp.policy import IPWLearner


# hyperparameters of the regression model used in model dependent OPE estimators
with open("./conf/hyperparams.yaml", "rb") as f:
hyperparams = yaml.safe_load(f)

base_model_dict = dict(
logistic_regression=LogisticRegression,
lightgbm=HistGradientBoostingClassifier,
lightgbm=GradientBoostingClassifier,
random_forest=RandomForestClassifier,
)

Expand Down
34 changes: 17 additions & 17 deletions benchmark/ope/benchmark_ope_estimators.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,30 @@
from logging import getLogger
from pathlib import Path
import time
import warnings
from pathlib import Path
from logging import getLogger

import hydra
from omegaconf import DictConfig
import numpy as np
from omegaconf import DictConfig
from pandas import DataFrame
import pingouin as pg
from pyieoe.evaluator import InterpretableOPEEvaluator
from sklearn.ensemble import GradientBoostingClassifier as LightGBM
from sklearn.ensemble import RandomForestClassifier as RandomForest
from sklearn.exceptions import ConvergenceWarning
from sklearn.experimental import enable_hist_gradient_boosting # noqa
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier as RandomForest
from sklearn.ensemble import HistGradientBoostingClassifier as LightGBM

from obp.dataset import OpenBanditDataset
from obp.policy import Random, BernoulliTS
from obp.ope import (
InverseProbabilityWeighting,
SelfNormalizedInverseProbabilityWeighting,
DirectMethod,
DoublyRobust,
SelfNormalizedDoublyRobust,
SwitchDoublyRobustTuning,
DoublyRobustWithShrinkageTuning,
)
from pyieoe.evaluator import InterpretableOPEEvaluator
from obp.ope import DirectMethod
from obp.ope import DoublyRobust
from obp.ope import DoublyRobustWithShrinkageTuning
from obp.ope import InverseProbabilityWeighting
from obp.ope import SelfNormalizedDoublyRobust
from obp.ope import SelfNormalizedInverseProbabilityWeighting
from obp.ope import SwitchDoublyRobustTuning
from obp.policy import BernoulliTS
from obp.policy import Random


logger = getLogger(__name__)
warnings.filterwarnings(action="ignore", category=ConvergenceWarning)
Expand Down
24 changes: 12 additions & 12 deletions benchmark/ope/benchmark_ope_estimators_hypara.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
from logging import getLogger
from pathlib import Path
import time
import warnings
from pathlib import Path
from logging import getLogger

import hydra
from omegaconf import DictConfig
import numpy as np
from omegaconf import DictConfig
from pandas import DataFrame
import pingouin as pg
from pyieoe.evaluator import InterpretableOPEEvaluator
from sklearn.ensemble import GradientBoostingClassifier as LightGBM
from sklearn.ensemble import RandomForestClassifier as RandomForest
from sklearn.exceptions import ConvergenceWarning
from sklearn.experimental import enable_hist_gradient_boosting # noqa
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier as RandomForest
from sklearn.ensemble import HistGradientBoostingClassifier as LightGBM

from obp.dataset import OpenBanditDataset
from obp.policy import Random, BernoulliTS
from obp.ope import (
DoublyRobustWithShrinkage,
DoublyRobustWithShrinkageTuning,
)
from pyieoe.evaluator import InterpretableOPEEvaluator
from obp.ope import DoublyRobustWithShrinkage
from obp.ope import DoublyRobustWithShrinkageTuning
from obp.policy import BernoulliTS
from obp.policy import Random


logger = getLogger(__name__)
warnings.filterwarnings(action="ignore", category=ConvergenceWarning)
Expand Down
2 changes: 1 addition & 1 deletion benchmark/ope/conf/reg_model_hyperparams/default.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# @package _group_
lightgbm:
max_iter: 100
n_estimators: 100
learning_rate: 0.01
max_depth: 5
min_samples_leaf: 10
Expand Down
2 changes: 1 addition & 1 deletion examples/multiclass/conf/hyperparams.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
lightgbm:
max_iter: 30
n_estimators: 30
learning_rate: 0.01
max_depth: 5
min_samples_leaf: 10
Expand Down
36 changes: 19 additions & 17 deletions examples/multiclass/evaluate_off_policy_estimators.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,29 @@
import argparse
import yaml
from pathlib import Path

from joblib import delayed
from joblib import Parallel
import numpy as np
from pandas import DataFrame
from joblib import Parallel, delayed
from sklearn.datasets import load_breast_cancer, load_digits, load_iris, load_wine
from sklearn.experimental import enable_hist_gradient_boosting # noqa
from sklearn.ensemble import HistGradientBoostingClassifier, RandomForestClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.datasets import load_digits
from sklearn.datasets import load_iris
from sklearn.datasets import load_wine
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
import yaml

from obp.dataset import MultiClassToBanditReduction
from obp.ope import (
RegressionModel,
OffPolicyEvaluation,
InverseProbabilityWeighting,
SelfNormalizedInverseProbabilityWeighting,
DirectMethod,
DoublyRobust,
SelfNormalizedDoublyRobust,
SwitchDoublyRobust,
DoublyRobustWithShrinkage,
)
from obp.ope import DirectMethod
from obp.ope import DoublyRobust
from obp.ope import DoublyRobustWithShrinkage
from obp.ope import InverseProbabilityWeighting
from obp.ope import OffPolicyEvaluation
from obp.ope import RegressionModel
from obp.ope import SelfNormalizedDoublyRobust
from obp.ope import SelfNormalizedInverseProbabilityWeighting
from obp.ope import SwitchDoublyRobust


# hyperparameters of the regression model used in model dependent OPE estimators
Expand All @@ -37,7 +39,7 @@

base_model_dict = dict(
logistic_regression=LogisticRegression,
lightgbm=HistGradientBoostingClassifier,
lightgbm=GradientBoostingClassifier,
random_forest=RandomForestClassifier,
)

Expand Down
8 changes: 3 additions & 5 deletions examples/obd/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,10 @@ python evaluate_off_policy_estimators.py\
# random_state=12345
# ------------------------------
# mean std
# dm 0.180288 0.114694
# dm 0.180269 0.114716
# ipw 0.333113 0.350425
# dr 0.304401 0.347842
# dr 0.304422 0.347866
# ==============================
```

Please refer to [this page](https://zr-obp.readthedocs.io/en/latest/evaluation_ope.html) for the evaluation of OPE protocol using our real-world data.
Please visit [synthetic](../synthetic/) to try the evaluation of OPE estimators with synthetic bandit datasets.
Moreover, in [benchmark/ope](https://github.com/st-tech/zr-obp/tree/master/benchmark/ope), we performed the benchmark experiments on several OPE estimators using the full size Open Bandit Dataset.
Please refer to [this page](https://zr-obp.readthedocs.io/en/latest/evaluation_ope.html) for the evaluation of OPE protocol using our real-world data. Please visit [synthetic](../synthetic/) to try the evaluation of OPE estimators with synthetic bandit datasets. Moreover, in [benchmark/ope](https://github.com/st-tech/zr-obp/tree/master/benchmark/ope), we performed the benchmark experiments on several OPE estimators using the full size Open Bandit Dataset.
2 changes: 1 addition & 1 deletion examples/obd/conf/hyperparams.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
lightgbm:
max_iter: 30
n_estimators: 30
learning_rate: 0.01
max_depth: 5
min_samples_leaf: 10
Expand Down
27 changes: 14 additions & 13 deletions examples/obd/evaluate_off_policy_estimators.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,24 @@
import argparse
import yaml
from pathlib import Path

from joblib import delayed
from joblib import Parallel
import numpy as np
from pandas import DataFrame
from joblib import Parallel, delayed
from sklearn.experimental import enable_hist_gradient_boosting # noqa
from sklearn.ensemble import HistGradientBoostingClassifier, RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
import yaml

from obp.dataset import OpenBanditDataset
from obp.policy import Random, BernoulliTS
from obp.ope import (
RegressionModel,
OffPolicyEvaluation,
InverseProbabilityWeighting,
DirectMethod,
DoublyRobust,
)
from obp.ope import DirectMethod
from obp.ope import DoublyRobust
from obp.ope import InverseProbabilityWeighting
from obp.ope import OffPolicyEvaluation
from obp.ope import RegressionModel
from obp.policy import BernoulliTS
from obp.policy import Random


evaluation_policy_dict = dict(bts=BernoulliTS, random=Random)

Expand All @@ -27,7 +28,7 @@

base_model_dict = dict(
logistic_regression=LogisticRegression,
lightgbm=HistGradientBoostingClassifier,
lightgbm=GradientBoostingClassifier,
random_forest=RandomForestClassifier,
)

Expand Down
33 changes: 16 additions & 17 deletions examples/online/evaluate_off_policy_estimators.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,25 @@
import argparse
from pathlib import Path

from joblib import delayed
from joblib import Parallel
import numpy as np
from pandas import DataFrame
from joblib import Parallel, delayed

from obp.dataset import (
SyntheticBanditDataset,
logistic_reward_function,
)
from obp.policy import (
BernoulliTS,
EpsilonGreedy,
LinEpsilonGreedy,
LinTS,
LinUCB,
LogisticEpsilonGreedy,
LogisticTS,
LogisticUCB,
)
from obp.ope import OffPolicyEvaluation, ReplayMethod
from obp.simulator import calc_ground_truth_policy_value, run_bandit_simulation
from obp.dataset import logistic_reward_function
from obp.dataset import SyntheticBanditDataset
from obp.ope import OffPolicyEvaluation
from obp.ope import ReplayMethod
from obp.policy import BernoulliTS
from obp.policy import EpsilonGreedy
from obp.policy import LinEpsilonGreedy
from obp.policy import LinTS
from obp.policy import LinUCB
from obp.policy import LogisticEpsilonGreedy
from obp.policy import LogisticTS
from obp.policy import LogisticUCB
from obp.simulator import calc_ground_truth_policy_value
from obp.simulator import run_bandit_simulation


ope_estimators = [ReplayMethod()]
Expand Down
27 changes: 11 additions & 16 deletions examples/opl/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,7 @@ See [our documentation](https://zr-obp.readthedocs.io/en/latest/_autosummary/obp
NNPolicyLearner can use the following OPE estimators as the objective function:
- Direct Method (DM)
- Inverse Probability Weighting (IPW)
- Self-Normalized Inverse Probability Weighting (SNIPW)
- Doubly Robust (DR)
- Self-Normalized Doubly Robust (SNDR)
- Doubly Robust with Optimistic Shrinkage (DRos)

See [our documentation](https://zr-obp.readthedocs.io/en/latest/estimators.html) for the details about these estimators.

Expand All @@ -39,7 +36,7 @@ python evaluate_off_policy_learners.py\
--dim_context $dim_context\
--base_model_for_evaluation_policy $base_model_for_evaluation_policy\
--base_model_for_reg_model $base_model_for_reg_model\
--ope_estimator $ope_estimator\
--off_policy_objective $off_policy_objective\
--n_hidden $n_hidden\
--n_layers $n_layers\
--activation $activation\
Expand All @@ -50,13 +47,12 @@ python evaluate_off_policy_learners.py\
```
- `$n_rounds` and `$n_actions` specify the number of rounds (or samples) and the number of actions of the synthetic bandit data.
- `$dim_context` specifies the dimension of context vectors.
- `$base_model_for_evaluation_policy` specifies the base ML model for defining evaluation policy and should be one of "logistic_regression", "random_forest", or "lightgbm".
- `$base_model_for_reg_model` specifies the base ML model for defining regression model and should be one of "logistic_regression", "random_forest", or "lightgbm".
- `$ope_estimator` specifies the OPE estimator for NNPolicyLearner and should be one of "dm", "ipw", "sipw", "dr", "sndr" or "dros".
- `$base_model_for_ipw_learner` specifies the base ML model for defining evaluation policy and should be one of "logistic_regression", "random_forest", or "lightgbm".
- `$off_policy_objective` specifies the OPE estimator for NNPolicyLearner and should be one of "dm", "ipw", or "dr".
- `$n_hidden` specifies the size of hidden layers in NNPolicyLearner.
- `$n_layers` specifies the number of hidden layers in NNPolicyLearner.
- `$activation` specifies theadam activation function for NNPolicyLearner and should be one of "identity", "tanh", "logistic", or "relu".
- `$solver` specifies the optimizer for NNPolicyLearner and should be one of "lbfgs", "sgd", or "adam".
- `$activation` specifies the activation function for NNPolicyLearner and should be one of "identity", "tanh", "logistic", or "relu".
- `$solver` specifies the optimizer for NNPolicyLearner and should be one of "adagrad", "sgd", or "adam".
- `$batch_size` specifies the batch size for NNPolicyLearner.
- `$early_stopping` enables early stopping of training of NNPolicyLearner.

Expand All @@ -67,9 +63,8 @@ python evaluate_off_policy_learners.py\
--n_rounds 10000\
--n_actions 10\
--dim_context 5\
--base_model_for_evaluation_policy logistic_regression\
--base_model_for_reg_model logistic_regression\
--ope_estimator dr\
--base_model_for_ipw_learner logistic_regression\
--off_policy_objective ipw\
--n_hidden 100\
--n_layers 1\
--activation relu\
Expand All @@ -81,10 +76,10 @@ python evaluate_off_policy_learners.py\
# =============================================
# random_state=12345
# ---------------------------------------------
# policy value
# random_policy 0.604339
# ipw_learner 0.767615
# nn_policy_learner (with dr) 0.764302
# policy value
# random_policy 0.605604
# ipw_learner 0.753016
# nn_policy_learner (with ipw) 0.759228
# =============================================
```

Expand Down
2 changes: 1 addition & 1 deletion examples/opl/conf/hyperparams.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
lightgbm:
max_iter: 30
n_estimators: 30
learning_rate: 0.01
max_depth: 5
min_samples_leaf: 10
Expand Down
Loading