Merge pull request #168 from st-tech/feature/tuning-sort

usaito · web-flow · commit 0bf531c9de2e · 2022-04-24T21:52:39.000-04:00
Automatic candidate hyperparamer sorting for slope
diff --git a/obp/ope/estimators.py b/obp/ope/estimators.py
@@ -526,11 +526,11 @@ def _estimate_mse_score(
             (If only a single action is chosen for each data, you can just ignore this argument.)
 
         use_bias_upper_bound: bool, default=True
-            Whether to use bias upper bound in hyperparameter tuning.
+            Whether to use a bias upper bound in hyperparameter tuning.
             If False, the direct bias estimator is used to estimate the MSE. See Su et al.(2020) for details.
 
         delta: float, default=0.05
-            A confidence delta to construct a high probability upper bound based on Bernstein inequality.
+            A confidence delta to construct a high probability upper bound used in SLOPE.
 
         Returns
         ----------
@@ -1207,11 +1207,11 @@ def _estimate_mse_score(
             Estimated expected rewards given context, action, and position, i.e., :math:`\\hat{q}(x_i,a_i)`.
 
         use_bias_upper_bound: bool, default=True
-            Whether to use bias upper bound in hyperparameter tuning.
+            Whether to use a bias upper bound in hyperparameter tuning.
             If False, the direct bias estimator is used to estimate the MSE. See Su et al.(2020) for details.
 
         delta: float, default=0.05
-            A confidence delta to construct a high probability upper bound based on Bernstein inequality.
+            A confidence delta to construct a high probability upper bound used in SLOPE.
 
         Returns
         ----------
@@ -1511,11 +1511,11 @@ def _estimate_mse_score(
             (If only a single action is chosen for each data, you can just ignore this argument.)
 
         use_bias_upper_bound: bool, default=True
-            Whether to use bias upper bound in hyperparameter tuning.
+            Whether to use a bias upper bound in hyperparameter tuning.
             If False, the direct bias estimator is used to estimate the MSE. See Su et al.(2020) for details.
 
         delta: float, default=0.05
-            A confidence delta to construct a high probability upper bound based on Bernstein inequality.
+            A confidence delta to construct a high probability upper bound used in SLOPE.
 
         Returns
         ----------
@@ -1719,11 +1719,11 @@ def _estimate_mse_score(
             Indices to differentiate positions in a recommendation interface where the actions are presented.
 
         use_bias_upper_bound: bool, default=True
-            Whether to use bias upper bound in hyperparameter tuning.
+            Whether to use a bias upper bound in hyperparameter tuning.
             If False, the direct bias estimator is used to estimate the MSE. See Su et al.(2020) for details.
 
         delta: float, default=0.05
-            A confidence delta to construct a high probability upper bound based on Bernstein inequality.
+            A confidence delta to construct a high probability upper bound used in SLOPE.
 
         Returns
         ----------
@@ -1907,11 +1907,11 @@ def _estimate_mse_score(
             Indices to differentiate positions in a recommendation interface where the actions are presented.
 
         use_bias_upper_bound: bool, default=True
-            Whether to use bias upper bound in hyperparameter tuning.
+            Whether to use a bias upper bound in hyperparameter tuning.
             If False, the direct bias estimator is used to estimate the MSE. See Su et al.(2020) for details.
 
         delta: float, default=0.05
-            A confidence delta to construct a high probability upper bound based on Bernstein inequality.
+            A confidence delta to construct a high probability upper bound used in SLOPE.
 
         Returns
         ----------
@@ -2106,11 +2106,11 @@ def _estimate_mse_score(
             Indices to differentiate positions in a recommendation interface where the actions are presented.
 
         use_bias_upper_bound: bool, default=True
-            Whether to use bias upper bound in hyperparameter tuning.
+            Whether to use a bias upper bound in hyperparameter tuning.
             If False, the direct bias estimator is used to estimate the MSE. See Su et al.(2020) for details.
 
         delta: float, default=0.05
-            A confidence delta to construct a high probability upper bound based on Bernstein inequality.
+            A confidence delta to construct a high probability upper bound used in SLOPE.
 
         Returns
         ----------
diff --git a/obp/ope/estimators_tuning.py b/obp/ope/estimators_tuning.py
@@ -41,11 +41,11 @@ class BaseOffPolicyEstimatorTuning:
         which improves the original SLOPE proposed by Su et al.(2020).
 
     use_bias_upper_bound: bool, default=True
-        Whether to use bias upper bound in hyperparameter tuning.
+        Whether to use a bias upper bound in hyperparameter tuning.
         If False, the direct bias estimator is used to estimate the MSE. See Su et al.(2020) for details.
 
-    delta: float, default=0.05
-        A confidence delta to construct a high probability upper bound based on Bernstein inequality.
+    delta: float, default=0.1
+        A confidence delta to construct a high probability upper bound used in SLOPE.
 
     use_estimated_pscore: bool, default=False.
         If True, `estimated_pscore` is used, otherwise, `pscore` (the true propensity scores) is used.
@@ -70,7 +70,7 @@ class BaseOffPolicyEstimatorTuning:
     lambdas: List[float] = None
     tuning_method: str = "slope"
     use_bias_upper_bound: bool = True
-    delta: float = 0.05
+    delta: float = 0.1
     use_estimated_pscore: bool = False
 
     def __new__(cls, *args, **kwargs):
@@ -151,7 +151,6 @@ def _tune_hyperparam_with_slope(
     ) -> float:
         """Find the best hyperparameter value from the candidate set by SLOPE."""
         C = np.sqrt(6) - 1
-        theta_list, cnf_list = [], []
         theta_list_for_sort, cnf_list_for_sort = [], []
         for hyperparam_ in self.lambdas:
             estimated_round_rewards = self.base_ope_estimator(
@@ -172,6 +171,7 @@ def _tune_hyperparam_with_slope(
             )
             cnf_list_for_sort.append(cnf)
 
+        theta_list, cnf_list = [], []
         sorted_idx_list = np.argsort(cnf_list_for_sort)[::-1]
         for i, idx in enumerate(sorted_idx_list):
             cnf_i = cnf_list_for_sort[idx]
@@ -380,6 +380,8 @@ class InverseProbabilityWeightingTuning(BaseOffPolicyEstimatorTuning):
         A list of candidate clipping hyperparameters.
         The automatic hyperparameter tuning procedure proposed by Su et al.(2020)
         or Tucker and Lee.(2021) will choose the best hyperparameter value from the logged data.
+        The candidate hyperparameter values will be sorted automatically to ensure the monotonicity
+        assumption of SLOPE.
 
     tuning_method: str, default="slope".
         A method used to tune the hyperparameter of an OPE estimator.
@@ -388,11 +390,11 @@ class InverseProbabilityWeightingTuning(BaseOffPolicyEstimatorTuning):
         which improves the original SLOPE proposed by Su et al.(2020).
 
     use_bias_upper_bound: bool, default=True
-        Whether to use bias upper bound in hyperparameter tuning.
+        Whether to use a bias upper bound in hyperparameter tuning.
         If False, the direct bias estimator is used to estimate the MSE. See Su et al.(2020) for details.
 
     delta: float, default=0.05
-        A confidence delta to construct a high probability upper bound based on Bernstein inequality.
+        A confidence delta to construct a high probability upper bound used in SLOPE.
 
     use_estimated_pscore: bool, default=False.
         If True, `estimated_pscore` is used, otherwise, `pscore` (the true propensity scores) is used.
@@ -417,6 +419,7 @@ def __post_init__(self) -> None:
         self.base_ope_estimator = InverseProbabilityWeighting
         super()._check_lambdas()
         super()._check_init_inputs()
+        self.lambdas.sort(reverse=True)
 
     def estimate_policy_value(
         self,
@@ -583,6 +586,8 @@ class DoublyRobustTuning(BaseOffPolicyEstimatorTuning):
         A list of candidate clipping hyperparameters.
         The automatic hyperparameter tuning procedure proposed by Su et al.(2020)
         or Tucker and Lee.(2021) will choose the best hyperparameter value from the logged data.
+        The candidate hyperparameter values will be sorted automatically to ensure the monotonicity
+        assumption of SLOPE.
 
     tuning_method: str, default="slope".
         A method used to tune the hyperparameter of an OPE estimator.
@@ -614,6 +619,7 @@ def __post_init__(self) -> None:
         self.base_ope_estimator = DoublyRobust
         super()._check_lambdas()
         super()._check_init_inputs()
+        self.lambdas.sort(reverse=True)
 
     def estimate_policy_value(
         self,
@@ -801,6 +807,8 @@ class SwitchDoublyRobustTuning(BaseOffPolicyEstimatorTuning):
         A list of candidate switching hyperparameters.
         The automatic hyperparameter tuning procedure proposed by Su et al.(2020)
         or Tucker and Lee.(2021) will choose the best hyperparameter value from the logged data.
+        The candidate hyperparameter values will be sorted automatically to ensure the monotonicity
+        assumption of SLOPE.
 
     tuning_method: str, default="slope".
         A method used to tune the hyperparameter of an OPE estimator.
@@ -831,6 +839,7 @@ def __post_init__(self) -> None:
         self.base_ope_estimator = SwitchDoublyRobust
         super()._check_lambdas()
         super()._check_init_inputs()
+        self.lambdas.sort(reverse=True)
 
     def estimate_policy_value(
         self,
@@ -1018,6 +1027,8 @@ class DoublyRobustWithShrinkageTuning(BaseOffPolicyEstimatorTuning):
         A list of candidate shrinkage hyperparameters.
         The automatic hyperparameter tuning procedure proposed by Su et al.(2020)
         or Tucker and Lee.(2021) will choose the best hyperparameter value from the logged data.
+        The candidate hyperparameter values will be sorted automatically to ensure the monotonicity
+        assumption of SLOPE.
 
     tuning_method: str, default="slope".
         A method used to tune the hyperparameter of an OPE estimator.
@@ -1048,6 +1059,7 @@ def __post_init__(self) -> None:
         self.base_ope_estimator = DoublyRobustWithShrinkage
         super()._check_lambdas()
         super()._check_init_inputs()
+        self.lambdas.sort()
 
     def estimate_policy_value(
         self,
@@ -1234,6 +1246,8 @@ class SubGaussianInverseProbabilityWeightingTuning(BaseOffPolicyEstimatorTuning)
         A list of candidate hyperparameter values, which should be in the range of [0.0, 1.0].
         The automatic hyperparameter tuning procedure proposed by Su et al.(2020)
         or Tucker and Lee.(2021) will choose the best hyperparameter value from the logged data.
+        The candidate hyperparameter values will be sorted automatically to ensure the monotonicity
+        assumption of SLOPE.
 
     tuning_method: str, default="slope".
         A method used to tune the hyperparameter of an OPE estimator.
@@ -1242,11 +1256,11 @@ class SubGaussianInverseProbabilityWeightingTuning(BaseOffPolicyEstimatorTuning)
         which improves the original SLOPE proposed by Su et al.(2020).
 
     use_bias_upper_bound: bool, default=True
-        Whether to use bias upper bound in hyperparameter tuning.
+        Whether to use a bias upper bound in hyperparameter tuning.
         If False, the direct bias estimator is used to estimate the MSE. See Su et al.(2020) for details.
 
     delta: float, default=0.05
-        A confidence delta to construct a high probability upper bound based on Bernstein inequality.
+        A confidence delta to construct a high probability upper bound used in SLOPE.
 
     use_estimated_pscore: bool, default=False.
         If True, `estimated_pscore` is used, otherwise, `pscore` (the true propensity scores) is used.
@@ -1274,6 +1288,7 @@ def __post_init__(self) -> None:
         self.base_ope_estimator = SubGaussianInverseProbabilityWeighting
         super()._check_lambdas(max_val=1.0)
         super()._check_init_inputs()
+        self.lambdas.sort()
 
     def estimate_policy_value(
         self,
@@ -1437,6 +1452,8 @@ class SubGaussianDoublyRobustTuning(BaseOffPolicyEstimatorTuning):
         A list of candidate hyperparameter values, which should be in the range of [0.0, 1.0].
         The automatic hyperparameter tuning procedure proposed by Su et al.(2020)
         or Tucker and Lee.(2021) will choose the best hyperparameter value from the logged data.
+        The candidate hyperparameter values will be sorted automatically to ensure the monotonicity
+        assumption of SLOPE.
 
     tuning_method: str, default="slope".
         A method used to tune the hyperparameter of an OPE estimator.
@@ -1470,6 +1487,7 @@ def __post_init__(self) -> None:
         self.base_ope_estimator = SubGaussianDoublyRobust
         super()._check_lambdas(max_val=1.0)
         super()._check_init_inputs()
+        self.lambdas.sort()
 
     def estimate_policy_value(
         self,