st-tech · usaito · Sep 6, 2021 · Sep 4, 2021 · Sep 4, 2021 · Sep 4, 2021
diff --git a/benchmark/cf_policy_search/run_cf_policy_search.py b/benchmark/cf_policy_search/run_cf_policy_search.py
@@ -1,26 +1,27 @@
 import argparse
 from pathlib import Path
-import yaml
 
+from custom_dataset import OBDWithInteractionFeatures
+from joblib import delayed
+from joblib import Parallel
 import numpy as np
 from pandas import DataFrame
-from joblib import Parallel, delayed
-from sklearn.linear_model import LogisticRegression
+from sklearn.ensemble import GradientBoostingClassifier
 from sklearn.ensemble import RandomForestClassifier
-from sklearn.experimental import enable_hist_gradient_boosting  # noqa
-from sklearn.ensemble import HistGradientBoostingClassifier
+from sklearn.linear_model import LogisticRegression
+import yaml
 
-from custom_dataset import OBDWithInteractionFeatures
-from obp.policy import IPWLearner
 from obp.ope import InverseProbabilityWeighting
+from obp.policy import IPWLearner
+
 
 # hyperparameters of the regression model used in model dependent OPE estimators
 with open("./conf/hyperparams.yaml", "rb") as f:
     hyperparams = yaml.safe_load(f)
 
 base_model_dict = dict(
     logistic_regression=LogisticRegression,
-    lightgbm=HistGradientBoostingClassifier,
+    lightgbm=GradientBoostingClassifier,
     random_forest=RandomForestClassifier,
 )
 

diff --git a/benchmark/ope/benchmark_ope_estimators.py b/benchmark/ope/benchmark_ope_estimators.py
@@ -1,30 +1,30 @@
+from logging import getLogger
+from pathlib import Path
 import time
 import warnings
-from pathlib import Path
-from logging import getLogger
 
 import hydra
-from omegaconf import DictConfig
 import numpy as np
+from omegaconf import DictConfig
 from pandas import DataFrame
 import pingouin as pg
+from pyieoe.evaluator import InterpretableOPEEvaluator
+from sklearn.ensemble import GradientBoostingClassifier as LightGBM
+from sklearn.ensemble import RandomForestClassifier as RandomForest
 from sklearn.exceptions import ConvergenceWarning
-from sklearn.experimental import enable_hist_gradient_boosting  # noqa
 from sklearn.linear_model import LogisticRegression
-from sklearn.ensemble import RandomForestClassifier as RandomForest
-from sklearn.ensemble import HistGradientBoostingClassifier as LightGBM
+
 from obp.dataset import OpenBanditDataset
-from obp.policy import Random, BernoulliTS
-from obp.ope import (
-    InverseProbabilityWeighting,
-    SelfNormalizedInverseProbabilityWeighting,
-    DirectMethod,
-    DoublyRobust,
-    SelfNormalizedDoublyRobust,
-    SwitchDoublyRobustTuning,
-    DoublyRobustWithShrinkageTuning,
-)
-from pyieoe.evaluator import InterpretableOPEEvaluator
+from obp.ope import DirectMethod
+from obp.ope import DoublyRobust
+from obp.ope import DoublyRobustWithShrinkageTuning
+from obp.ope import InverseProbabilityWeighting
+from obp.ope import SelfNormalizedDoublyRobust
+from obp.ope import SelfNormalizedInverseProbabilityWeighting
+from obp.ope import SwitchDoublyRobustTuning
+from obp.policy import BernoulliTS
+from obp.policy import Random
+
 
 logger = getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=ConvergenceWarning)

diff --git a/benchmark/ope/benchmark_ope_estimators_hypara.py b/benchmark/ope/benchmark_ope_estimators_hypara.py
@@ -1,25 +1,25 @@
+from logging import getLogger
+from pathlib import Path
 import time
 import warnings
-from pathlib import Path
-from logging import getLogger
 
 import hydra
-from omegaconf import DictConfig
 import numpy as np
+from omegaconf import DictConfig
 from pandas import DataFrame
 import pingouin as pg
+from pyieoe.evaluator import InterpretableOPEEvaluator
+from sklearn.ensemble import GradientBoostingClassifier as LightGBM
+from sklearn.ensemble import RandomForestClassifier as RandomForest
 from sklearn.exceptions import ConvergenceWarning
-from sklearn.experimental import enable_hist_gradient_boosting  # noqa
 from sklearn.linear_model import LogisticRegression
-from sklearn.ensemble import RandomForestClassifier as RandomForest
-from sklearn.ensemble import HistGradientBoostingClassifier as LightGBM
+
 from obp.dataset import OpenBanditDataset
-from obp.policy import Random, BernoulliTS
-from obp.ope import (
-    DoublyRobustWithShrinkage,
-    DoublyRobustWithShrinkageTuning,
-)
-from pyieoe.evaluator import InterpretableOPEEvaluator
+from obp.ope import DoublyRobustWithShrinkage
+from obp.ope import DoublyRobustWithShrinkageTuning
+from obp.policy import BernoulliTS
+from obp.policy import Random
+
 
 logger = getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=ConvergenceWarning)

diff --git a/benchmark/ope/conf/reg_model_hyperparams/default.yaml b/benchmark/ope/conf/reg_model_hyperparams/default.yaml
@@ -1,6 +1,6 @@
 # @package _group_
 lightgbm:
-  max_iter: 100
+  n_estimators: 100
   learning_rate: 0.01
   max_depth: 5
   min_samples_leaf: 10

diff --git a/examples/multiclass/conf/hyperparams.yaml b/examples/multiclass/conf/hyperparams.yaml
@@ -1,5 +1,5 @@
 lightgbm:
-  max_iter: 30
+  n_estimators: 30
   learning_rate: 0.01
   max_depth: 5
   min_samples_leaf: 10

diff --git a/examples/multiclass/evaluate_off_policy_estimators.py b/examples/multiclass/evaluate_off_policy_estimators.py
@@ -1,27 +1,29 @@
 import argparse
-import yaml
 from pathlib import Path
 
+from joblib import delayed
+from joblib import Parallel
 import numpy as np
 from pandas import DataFrame
-from joblib import Parallel, delayed
-from sklearn.datasets import load_breast_cancer, load_digits, load_iris, load_wine
-from sklearn.experimental import enable_hist_gradient_boosting  # noqa
-from sklearn.ensemble import HistGradientBoostingClassifier, RandomForestClassifier
+from sklearn.datasets import load_breast_cancer
+from sklearn.datasets import load_digits
+from sklearn.datasets import load_iris
+from sklearn.datasets import load_wine
+from sklearn.ensemble import GradientBoostingClassifier
+from sklearn.ensemble import RandomForestClassifier
 from sklearn.linear_model import LogisticRegression
+import yaml
 
 from obp.dataset import MultiClassToBanditReduction
-from obp.ope import (
-    RegressionModel,
-    OffPolicyEvaluation,
-    InverseProbabilityWeighting,
-    SelfNormalizedInverseProbabilityWeighting,
-    DirectMethod,
-    DoublyRobust,
-    SelfNormalizedDoublyRobust,
-    SwitchDoublyRobust,
-    DoublyRobustWithShrinkage,
-)
+from obp.ope import DirectMethod
+from obp.ope import DoublyRobust
+from obp.ope import DoublyRobustWithShrinkage
+from obp.ope import InverseProbabilityWeighting
+from obp.ope import OffPolicyEvaluation
+from obp.ope import RegressionModel
+from obp.ope import SelfNormalizedDoublyRobust
+from obp.ope import SelfNormalizedInverseProbabilityWeighting
+from obp.ope import SwitchDoublyRobust
 
 
 # hyperparameters of the regression model used in model dependent OPE estimators
@@ -37,7 +39,7 @@
 
 base_model_dict = dict(
     logistic_regression=LogisticRegression,
-    lightgbm=HistGradientBoostingClassifier,
+    lightgbm=GradientBoostingClassifier,
     random_forest=RandomForestClassifier,
 )
 

diff --git a/examples/obd/README.md b/examples/obd/README.md
@@ -52,12 +52,10 @@ python evaluate_off_policy_estimators.py\
 # random_state=12345
 # ------------------------------
 #          mean       std
-# dm   0.180288  0.114694
+# dm   0.180269  0.114716
 # ipw  0.333113  0.350425
-# dr   0.304401  0.347842
+# dr   0.304422  0.347866
 # ==============================
 ```
 
-Please refer to [this page](https://zr-obp.readthedocs.io/en/latest/evaluation_ope.html) for the evaluation of OPE protocol using our real-world data.
-Please visit [synthetic](../synthetic/) to try the evaluation of OPE estimators with synthetic bandit datasets.
-Moreover, in [benchmark/ope](https://github.com/st-tech/zr-obp/tree/master/benchmark/ope), we performed the benchmark experiments on several OPE estimators using the full size Open Bandit Dataset.
+Please refer to [this page](https://zr-obp.readthedocs.io/en/latest/evaluation_ope.html) for the evaluation of OPE protocol using our real-world data. Please visit [synthetic](../synthetic/) to try the evaluation of OPE estimators with synthetic bandit datasets. Moreover, in [benchmark/ope](https://github.com/st-tech/zr-obp/tree/master/benchmark/ope), we performed the benchmark experiments on several OPE estimators using the full size Open Bandit Dataset.
diff --git a/examples/obd/conf/hyperparams.yaml b/examples/obd/conf/hyperparams.yaml
@@ -1,5 +1,5 @@
 lightgbm:
-  max_iter: 30
+  n_estimators: 30
   learning_rate: 0.01
   max_depth: 5
   min_samples_leaf: 10

diff --git a/examples/obd/evaluate_off_policy_estimators.py b/examples/obd/evaluate_off_policy_estimators.py
@@ -1,23 +1,24 @@
 import argparse
-import yaml
 from pathlib import Path
 
+from joblib import delayed
+from joblib import Parallel
 import numpy as np
 from pandas import DataFrame
-from joblib import Parallel, delayed
-from sklearn.experimental import enable_hist_gradient_boosting  # noqa
-from sklearn.ensemble import HistGradientBoostingClassifier, RandomForestClassifier
+from sklearn.ensemble import GradientBoostingClassifier
+from sklearn.ensemble import RandomForestClassifier
 from sklearn.linear_model import LogisticRegression
+import yaml
 
 from obp.dataset import OpenBanditDataset
-from obp.policy import Random, BernoulliTS
-from obp.ope import (
-    RegressionModel,
-    OffPolicyEvaluation,
-    InverseProbabilityWeighting,
-    DirectMethod,
-    DoublyRobust,
-)
+from obp.ope import DirectMethod
+from obp.ope import DoublyRobust
+from obp.ope import InverseProbabilityWeighting
+from obp.ope import OffPolicyEvaluation
+from obp.ope import RegressionModel
+from obp.policy import BernoulliTS
+from obp.policy import Random
+
 
 evaluation_policy_dict = dict(bts=BernoulliTS, random=Random)
 
@@ -27,7 +28,7 @@
 
 base_model_dict = dict(
     logistic_regression=LogisticRegression,
-    lightgbm=HistGradientBoostingClassifier,
+    lightgbm=GradientBoostingClassifier,
     random_forest=RandomForestClassifier,
 )
 

diff --git a/examples/online/evaluate_off_policy_estimators.py b/examples/online/evaluate_off_policy_estimators.py
@@ -1,26 +1,25 @@
 import argparse
 from pathlib import Path
 
+from joblib import delayed
+from joblib import Parallel
 import numpy as np
 from pandas import DataFrame
-from joblib import Parallel, delayed
 
-from obp.dataset import (
-    SyntheticBanditDataset,
-    logistic_reward_function,
-)
-from obp.policy import (
-    BernoulliTS,
-    EpsilonGreedy,
-    LinEpsilonGreedy,
-    LinTS,
-    LinUCB,
-    LogisticEpsilonGreedy,
-    LogisticTS,
-    LogisticUCB,
-)
-from obp.ope import OffPolicyEvaluation, ReplayMethod
-from obp.simulator import calc_ground_truth_policy_value, run_bandit_simulation
+from obp.dataset import logistic_reward_function
+from obp.dataset import SyntheticBanditDataset
+from obp.ope import OffPolicyEvaluation
+from obp.ope import ReplayMethod
+from obp.policy import BernoulliTS
+from obp.policy import EpsilonGreedy
+from obp.policy import LinEpsilonGreedy
+from obp.policy import LinTS
+from obp.policy import LinUCB
+from obp.policy import LogisticEpsilonGreedy
+from obp.policy import LogisticTS
+from obp.policy import LogisticUCB
+from obp.simulator import calc_ground_truth_policy_value
+from obp.simulator import run_bandit_simulation
 
 
 ope_estimators = [ReplayMethod()]

diff --git a/examples/opl/README.md b/examples/opl/README.md
@@ -18,10 +18,7 @@ See [our documentation](https://zr-obp.readthedocs.io/en/latest/_autosummary/obp
 NNPolicyLearner can use the following OPE estimators as the objective function:
 - Direct Method (DM)
 - Inverse Probability Weighting (IPW)
-- Self-Normalized Inverse Probability Weighting (SNIPW)
 - Doubly Robust (DR)
-- Self-Normalized Doubly Robust (SNDR)
-- Doubly Robust with Optimistic Shrinkage (DRos)
 
 See [our documentation](https://zr-obp.readthedocs.io/en/latest/estimators.html) for the details about these estimators.
 
@@ -39,7 +36,7 @@ python evaluate_off_policy_learners.py\
     --dim_context $dim_context\
     --base_model_for_evaluation_policy $base_model_for_evaluation_policy\
     --base_model_for_reg_model $base_model_for_reg_model\
-    --ope_estimator $ope_estimator\
+    --off_policy_objective $off_policy_objective\
     --n_hidden $n_hidden\
     --n_layers $n_layers\
     --activation $activation\
@@ -50,13 +47,12 @@ python evaluate_off_policy_learners.py\
 ```
 - `$n_rounds` and `$n_actions` specify the number of rounds (or samples) and the number of actions of the synthetic bandit data.
 - `$dim_context` specifies the dimension of context vectors.
-- `$base_model_for_evaluation_policy` specifies the base ML model for defining evaluation policy and should be one of "logistic_regression", "random_forest", or "lightgbm".
-- `$base_model_for_reg_model` specifies the base ML model for defining regression model and should be one of "logistic_regression", "random_forest", or "lightgbm".
-- `$ope_estimator` specifies the OPE estimator for NNPolicyLearner and should be one of "dm", "ipw", "sipw", "dr", "sndr" or "dros".
+- `$base_model_for_ipw_learner` specifies the base ML model for defining evaluation policy and should be one of "logistic_regression", "random_forest", or "lightgbm".
+- `$off_policy_objective` specifies the OPE estimator for NNPolicyLearner and should be one of "dm", "ipw", or "dr".
 - `$n_hidden` specifies the size of hidden layers in NNPolicyLearner.
 - `$n_layers` specifies the number of hidden layers in NNPolicyLearner.
-- `$activation` specifies theadam activation function for NNPolicyLearner and should be one of "identity", "tanh", "logistic", or "relu".
-- `$solver` specifies the optimizer for NNPolicyLearner and should be one of "lbfgs", "sgd", or "adam".
+- `$activation` specifies the activation function for NNPolicyLearner and should be one of "identity", "tanh", "logistic", or "relu".
+- `$solver` specifies the optimizer for NNPolicyLearner and should be one of "adagrad", "sgd", or "adam".
 - `$batch_size` specifies the batch size for NNPolicyLearner.
 - `$early_stopping` enables early stopping of training of NNPolicyLearner.
 
@@ -67,9 +63,8 @@ python evaluate_off_policy_learners.py\
     --n_rounds 10000\
     --n_actions 10\
     --dim_context 5\
-    --base_model_for_evaluation_policy logistic_regression\
-    --base_model_for_reg_model logistic_regression\
-    --ope_estimator dr\
+    --base_model_for_ipw_learner logistic_regression\
+    --off_policy_objective ipw\
     --n_hidden 100\
     --n_layers 1\
     --activation relu\
@@ -81,10 +76,10 @@ python evaluate_off_policy_learners.py\
 # =============================================
 # random_state=12345
 # ---------------------------------------------
-#                              policy value
-# random_policy                    0.604339
-# ipw_learner                      0.767615
-# nn_policy_learner (with dr)      0.764302
+#                               policy value
+# random_policy                     0.605604
+# ipw_learner                       0.753016
+# nn_policy_learner (with ipw)      0.759228
 # =============================================
 ```
 

diff --git a/examples/opl/conf/hyperparams.yaml b/examples/opl/conf/hyperparams.yaml
@@ -1,5 +1,5 @@
 lightgbm:
-  max_iter: 30
+  n_estimators: 30
   learning_rate: 0.01
   max_depth: 5
   min_samples_leaf: 10