Skip to content

Commit 867eebe

Browse files
authored
Merge pull request #132 from st-tech/feature-policy-learner
Feature policy learner
2 parents 7b3f5c4 + f7a6a28 commit 867eebe

File tree

81 files changed

+2686
-3031
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

81 files changed

+2686
-3031
lines changed

benchmark/cf_policy_search/run_cf_policy_search.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,27 @@
11
import argparse
22
from pathlib import Path
3-
import yaml
43

4+
from custom_dataset import OBDWithInteractionFeatures
5+
from joblib import delayed
6+
from joblib import Parallel
57
import numpy as np
68
from pandas import DataFrame
7-
from joblib import Parallel, delayed
8-
from sklearn.linear_model import LogisticRegression
9+
from sklearn.ensemble import GradientBoostingClassifier
910
from sklearn.ensemble import RandomForestClassifier
10-
from sklearn.experimental import enable_hist_gradient_boosting # noqa
11-
from sklearn.ensemble import HistGradientBoostingClassifier
11+
from sklearn.linear_model import LogisticRegression
12+
import yaml
1213

13-
from custom_dataset import OBDWithInteractionFeatures
14-
from obp.policy import IPWLearner
1514
from obp.ope import InverseProbabilityWeighting
15+
from obp.policy import IPWLearner
16+
1617

1718
# hyperparameters of the regression model used in model dependent OPE estimators
1819
with open("./conf/hyperparams.yaml", "rb") as f:
1920
hyperparams = yaml.safe_load(f)
2021

2122
base_model_dict = dict(
2223
logistic_regression=LogisticRegression,
23-
lightgbm=HistGradientBoostingClassifier,
24+
lightgbm=GradientBoostingClassifier,
2425
random_forest=RandomForestClassifier,
2526
)
2627

benchmark/ope/benchmark_ope_estimators.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,30 @@
1+
from logging import getLogger
2+
from pathlib import Path
13
import time
24
import warnings
3-
from pathlib import Path
4-
from logging import getLogger
55

66
import hydra
7-
from omegaconf import DictConfig
87
import numpy as np
8+
from omegaconf import DictConfig
99
from pandas import DataFrame
1010
import pingouin as pg
11+
from pyieoe.evaluator import InterpretableOPEEvaluator
12+
from sklearn.ensemble import GradientBoostingClassifier as LightGBM
13+
from sklearn.ensemble import RandomForestClassifier as RandomForest
1114
from sklearn.exceptions import ConvergenceWarning
12-
from sklearn.experimental import enable_hist_gradient_boosting # noqa
1315
from sklearn.linear_model import LogisticRegression
14-
from sklearn.ensemble import RandomForestClassifier as RandomForest
15-
from sklearn.ensemble import HistGradientBoostingClassifier as LightGBM
16+
1617
from obp.dataset import OpenBanditDataset
17-
from obp.policy import Random, BernoulliTS
18-
from obp.ope import (
19-
InverseProbabilityWeighting,
20-
SelfNormalizedInverseProbabilityWeighting,
21-
DirectMethod,
22-
DoublyRobust,
23-
SelfNormalizedDoublyRobust,
24-
SwitchDoublyRobustTuning,
25-
DoublyRobustWithShrinkageTuning,
26-
)
27-
from pyieoe.evaluator import InterpretableOPEEvaluator
18+
from obp.ope import DirectMethod
19+
from obp.ope import DoublyRobust
20+
from obp.ope import DoublyRobustWithShrinkageTuning
21+
from obp.ope import InverseProbabilityWeighting
22+
from obp.ope import SelfNormalizedDoublyRobust
23+
from obp.ope import SelfNormalizedInverseProbabilityWeighting
24+
from obp.ope import SwitchDoublyRobustTuning
25+
from obp.policy import BernoulliTS
26+
from obp.policy import Random
27+
2828

2929
logger = getLogger(__name__)
3030
warnings.filterwarnings(action="ignore", category=ConvergenceWarning)

benchmark/ope/benchmark_ope_estimators_hypara.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,25 @@
1+
from logging import getLogger
2+
from pathlib import Path
13
import time
24
import warnings
3-
from pathlib import Path
4-
from logging import getLogger
55

66
import hydra
7-
from omegaconf import DictConfig
87
import numpy as np
8+
from omegaconf import DictConfig
99
from pandas import DataFrame
1010
import pingouin as pg
11+
from pyieoe.evaluator import InterpretableOPEEvaluator
12+
from sklearn.ensemble import GradientBoostingClassifier as LightGBM
13+
from sklearn.ensemble import RandomForestClassifier as RandomForest
1114
from sklearn.exceptions import ConvergenceWarning
12-
from sklearn.experimental import enable_hist_gradient_boosting # noqa
1315
from sklearn.linear_model import LogisticRegression
14-
from sklearn.ensemble import RandomForestClassifier as RandomForest
15-
from sklearn.ensemble import HistGradientBoostingClassifier as LightGBM
16+
1617
from obp.dataset import OpenBanditDataset
17-
from obp.policy import Random, BernoulliTS
18-
from obp.ope import (
19-
DoublyRobustWithShrinkage,
20-
DoublyRobustWithShrinkageTuning,
21-
)
22-
from pyieoe.evaluator import InterpretableOPEEvaluator
18+
from obp.ope import DoublyRobustWithShrinkage
19+
from obp.ope import DoublyRobustWithShrinkageTuning
20+
from obp.policy import BernoulliTS
21+
from obp.policy import Random
22+
2323

2424
logger = getLogger(__name__)
2525
warnings.filterwarnings(action="ignore", category=ConvergenceWarning)

benchmark/ope/conf/reg_model_hyperparams/default.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# @package _group_
22
lightgbm:
3-
max_iter: 100
3+
n_estimators: 100
44
learning_rate: 0.01
55
max_depth: 5
66
min_samples_leaf: 10

examples/multiclass/conf/hyperparams.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
lightgbm:
2-
max_iter: 30
2+
n_estimators: 30
33
learning_rate: 0.01
44
max_depth: 5
55
min_samples_leaf: 10

examples/multiclass/evaluate_off_policy_estimators.py

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,29 @@
11
import argparse
2-
import yaml
32
from pathlib import Path
43

4+
from joblib import delayed
5+
from joblib import Parallel
56
import numpy as np
67
from pandas import DataFrame
7-
from joblib import Parallel, delayed
8-
from sklearn.datasets import load_breast_cancer, load_digits, load_iris, load_wine
9-
from sklearn.experimental import enable_hist_gradient_boosting # noqa
10-
from sklearn.ensemble import HistGradientBoostingClassifier, RandomForestClassifier
8+
from sklearn.datasets import load_breast_cancer
9+
from sklearn.datasets import load_digits
10+
from sklearn.datasets import load_iris
11+
from sklearn.datasets import load_wine
12+
from sklearn.ensemble import GradientBoostingClassifier
13+
from sklearn.ensemble import RandomForestClassifier
1114
from sklearn.linear_model import LogisticRegression
15+
import yaml
1216

1317
from obp.dataset import MultiClassToBanditReduction
14-
from obp.ope import (
15-
RegressionModel,
16-
OffPolicyEvaluation,
17-
InverseProbabilityWeighting,
18-
SelfNormalizedInverseProbabilityWeighting,
19-
DirectMethod,
20-
DoublyRobust,
21-
SelfNormalizedDoublyRobust,
22-
SwitchDoublyRobust,
23-
DoublyRobustWithShrinkage,
24-
)
18+
from obp.ope import DirectMethod
19+
from obp.ope import DoublyRobust
20+
from obp.ope import DoublyRobustWithShrinkage
21+
from obp.ope import InverseProbabilityWeighting
22+
from obp.ope import OffPolicyEvaluation
23+
from obp.ope import RegressionModel
24+
from obp.ope import SelfNormalizedDoublyRobust
25+
from obp.ope import SelfNormalizedInverseProbabilityWeighting
26+
from obp.ope import SwitchDoublyRobust
2527

2628

2729
# hyperparameters of the regression model used in model dependent OPE estimators
@@ -37,7 +39,7 @@
3739

3840
base_model_dict = dict(
3941
logistic_regression=LogisticRegression,
40-
lightgbm=HistGradientBoostingClassifier,
42+
lightgbm=GradientBoostingClassifier,
4143
random_forest=RandomForestClassifier,
4244
)
4345

examples/obd/README.md

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,10 @@ python evaluate_off_policy_estimators.py\
5252
# random_state=12345
5353
# ------------------------------
5454
# mean std
55-
# dm 0.180288 0.114694
55+
# dm 0.180269 0.114716
5656
# ipw 0.333113 0.350425
57-
# dr 0.304401 0.347842
57+
# dr 0.304422 0.347866
5858
# ==============================
5959
```
6060

61-
Please refer to [this page](https://zr-obp.readthedocs.io/en/latest/evaluation_ope.html) for the evaluation of OPE protocol using our real-world data.
62-
Please visit [synthetic](../synthetic/) to try the evaluation of OPE estimators with synthetic bandit datasets.
63-
Moreover, in [benchmark/ope](https://github.com/st-tech/zr-obp/tree/master/benchmark/ope), we performed the benchmark experiments on several OPE estimators using the full size Open Bandit Dataset.
61+
Please refer to [this page](https://zr-obp.readthedocs.io/en/latest/evaluation_ope.html) for the evaluation of OPE protocol using our real-world data. Please visit [synthetic](../synthetic/) to try the evaluation of OPE estimators with synthetic bandit datasets. Moreover, in [benchmark/ope](https://github.com/st-tech/zr-obp/tree/master/benchmark/ope), we performed the benchmark experiments on several OPE estimators using the full size Open Bandit Dataset.

examples/obd/conf/hyperparams.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
lightgbm:
2-
max_iter: 30
2+
n_estimators: 30
33
learning_rate: 0.01
44
max_depth: 5
55
min_samples_leaf: 10

examples/obd/evaluate_off_policy_estimators.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,24 @@
11
import argparse
2-
import yaml
32
from pathlib import Path
43

4+
from joblib import delayed
5+
from joblib import Parallel
56
import numpy as np
67
from pandas import DataFrame
7-
from joblib import Parallel, delayed
8-
from sklearn.experimental import enable_hist_gradient_boosting # noqa
9-
from sklearn.ensemble import HistGradientBoostingClassifier, RandomForestClassifier
8+
from sklearn.ensemble import GradientBoostingClassifier
9+
from sklearn.ensemble import RandomForestClassifier
1010
from sklearn.linear_model import LogisticRegression
11+
import yaml
1112

1213
from obp.dataset import OpenBanditDataset
13-
from obp.policy import Random, BernoulliTS
14-
from obp.ope import (
15-
RegressionModel,
16-
OffPolicyEvaluation,
17-
InverseProbabilityWeighting,
18-
DirectMethod,
19-
DoublyRobust,
20-
)
14+
from obp.ope import DirectMethod
15+
from obp.ope import DoublyRobust
16+
from obp.ope import InverseProbabilityWeighting
17+
from obp.ope import OffPolicyEvaluation
18+
from obp.ope import RegressionModel
19+
from obp.policy import BernoulliTS
20+
from obp.policy import Random
21+
2122

2223
evaluation_policy_dict = dict(bts=BernoulliTS, random=Random)
2324

@@ -27,7 +28,7 @@
2728

2829
base_model_dict = dict(
2930
logistic_regression=LogisticRegression,
30-
lightgbm=HistGradientBoostingClassifier,
31+
lightgbm=GradientBoostingClassifier,
3132
random_forest=RandomForestClassifier,
3233
)
3334

examples/online/evaluate_off_policy_estimators.py

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,25 @@
11
import argparse
22
from pathlib import Path
33

4+
from joblib import delayed
5+
from joblib import Parallel
46
import numpy as np
57
from pandas import DataFrame
6-
from joblib import Parallel, delayed
78

8-
from obp.dataset import (
9-
SyntheticBanditDataset,
10-
logistic_reward_function,
11-
)
12-
from obp.policy import (
13-
BernoulliTS,
14-
EpsilonGreedy,
15-
LinEpsilonGreedy,
16-
LinTS,
17-
LinUCB,
18-
LogisticEpsilonGreedy,
19-
LogisticTS,
20-
LogisticUCB,
21-
)
22-
from obp.ope import OffPolicyEvaluation, ReplayMethod
23-
from obp.simulator import calc_ground_truth_policy_value, run_bandit_simulation
9+
from obp.dataset import logistic_reward_function
10+
from obp.dataset import SyntheticBanditDataset
11+
from obp.ope import OffPolicyEvaluation
12+
from obp.ope import ReplayMethod
13+
from obp.policy import BernoulliTS
14+
from obp.policy import EpsilonGreedy
15+
from obp.policy import LinEpsilonGreedy
16+
from obp.policy import LinTS
17+
from obp.policy import LinUCB
18+
from obp.policy import LogisticEpsilonGreedy
19+
from obp.policy import LogisticTS
20+
from obp.policy import LogisticUCB
21+
from obp.simulator import calc_ground_truth_policy_value
22+
from obp.simulator import run_bandit_simulation
2423

2524

2625
ope_estimators = [ReplayMethod()]

examples/opl/README.md

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,7 @@ See [our documentation](https://zr-obp.readthedocs.io/en/latest/_autosummary/obp
1818
NNPolicyLearner can use the following OPE estimators as the objective function:
1919
- Direct Method (DM)
2020
- Inverse Probability Weighting (IPW)
21-
- Self-Normalized Inverse Probability Weighting (SNIPW)
2221
- Doubly Robust (DR)
23-
- Self-Normalized Doubly Robust (SNDR)
24-
- Doubly Robust with Optimistic Shrinkage (DRos)
2522

2623
See [our documentation](https://zr-obp.readthedocs.io/en/latest/estimators.html) for the details about these estimators.
2724

@@ -39,7 +36,7 @@ python evaluate_off_policy_learners.py\
3936
--dim_context $dim_context\
4037
--base_model_for_evaluation_policy $base_model_for_evaluation_policy\
4138
--base_model_for_reg_model $base_model_for_reg_model\
42-
--ope_estimator $ope_estimator\
39+
--off_policy_objective $off_policy_objective\
4340
--n_hidden $n_hidden\
4441
--n_layers $n_layers\
4542
--activation $activation\
@@ -50,13 +47,12 @@ python evaluate_off_policy_learners.py\
5047
```
5148
- `$n_rounds` and `$n_actions` specify the number of rounds (or samples) and the number of actions of the synthetic bandit data.
5249
- `$dim_context` specifies the dimension of context vectors.
53-
- `$base_model_for_evaluation_policy` specifies the base ML model for defining evaluation policy and should be one of "logistic_regression", "random_forest", or "lightgbm".
54-
- `$base_model_for_reg_model` specifies the base ML model for defining regression model and should be one of "logistic_regression", "random_forest", or "lightgbm".
55-
- `$ope_estimator` specifies the OPE estimator for NNPolicyLearner and should be one of "dm", "ipw", "sipw", "dr", "sndr" or "dros".
50+
- `$base_model_for_ipw_learner` specifies the base ML model for defining evaluation policy and should be one of "logistic_regression", "random_forest", or "lightgbm".
51+
- `$off_policy_objective` specifies the OPE estimator for NNPolicyLearner and should be one of "dm", "ipw", or "dr".
5652
- `$n_hidden` specifies the size of hidden layers in NNPolicyLearner.
5753
- `$n_layers` specifies the number of hidden layers in NNPolicyLearner.
58-
- `$activation` specifies theadam activation function for NNPolicyLearner and should be one of "identity", "tanh", "logistic", or "relu".
59-
- `$solver` specifies the optimizer for NNPolicyLearner and should be one of "lbfgs", "sgd", or "adam".
54+
- `$activation` specifies the activation function for NNPolicyLearner and should be one of "identity", "tanh", "logistic", or "relu".
55+
- `$solver` specifies the optimizer for NNPolicyLearner and should be one of "adagrad", "sgd", or "adam".
6056
- `$batch_size` specifies the batch size for NNPolicyLearner.
6157
- `$early_stopping` enables early stopping of training of NNPolicyLearner.
6258

@@ -67,9 +63,8 @@ python evaluate_off_policy_learners.py\
6763
--n_rounds 10000\
6864
--n_actions 10\
6965
--dim_context 5\
70-
--base_model_for_evaluation_policy logistic_regression\
71-
--base_model_for_reg_model logistic_regression\
72-
--ope_estimator dr\
66+
--base_model_for_ipw_learner logistic_regression\
67+
--off_policy_objective ipw\
7368
--n_hidden 100\
7469
--n_layers 1\
7570
--activation relu\
@@ -81,10 +76,10 @@ python evaluate_off_policy_learners.py\
8176
# =============================================
8277
# random_state=12345
8378
# ---------------------------------------------
84-
# policy value
85-
# random_policy 0.604339
86-
# ipw_learner 0.767615
87-
# nn_policy_learner (with dr) 0.764302
79+
# policy value
80+
# random_policy 0.605604
81+
# ipw_learner 0.753016
82+
# nn_policy_learner (with ipw) 0.759228
8883
# =============================================
8984
```
9085

examples/opl/conf/hyperparams.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
lightgbm:
2-
max_iter: 30
2+
n_estimators: 30
33
learning_rate: 0.01
44
max_depth: 5
55
min_samples_leaf: 10

0 commit comments

Comments
 (0)