@@ -41,11 +41,11 @@ class BaseOffPolicyEstimatorTuning:
41
41
which improves the original SLOPE proposed by Su et al.(2020).
42
42
43
43
use_bias_upper_bound: bool, default=True
44
- Whether to use bias upper bound in hyperparameter tuning.
44
+ Whether to use a bias upper bound in hyperparameter tuning.
45
45
If False, the direct bias estimator is used to estimate the MSE. See Su et al.(2020) for details.
46
46
47
- delta: float, default=0.05
48
- A confidence delta to construct a high probability upper bound based on Bernstein inequality .
47
+ delta: float, default=0.1
48
+ A confidence delta to construct a high probability upper bound used in SLOPE .
49
49
50
50
use_estimated_pscore: bool, default=False.
51
51
If True, `estimated_pscore` is used, otherwise, `pscore` (the true propensity scores) is used.
@@ -70,7 +70,7 @@ class BaseOffPolicyEstimatorTuning:
70
70
lambdas : List [float ] = None
71
71
tuning_method : str = "slope"
72
72
use_bias_upper_bound : bool = True
73
- delta : float = 0.05
73
+ delta : float = 0.1
74
74
use_estimated_pscore : bool = False
75
75
76
76
def __new__ (cls , * args , ** kwargs ):
@@ -151,7 +151,6 @@ def _tune_hyperparam_with_slope(
151
151
) -> float :
152
152
"""Find the best hyperparameter value from the candidate set by SLOPE."""
153
153
C = np .sqrt (6 ) - 1
154
- theta_list , cnf_list = [], []
155
154
theta_list_for_sort , cnf_list_for_sort = [], []
156
155
for hyperparam_ in self .lambdas :
157
156
estimated_round_rewards = self .base_ope_estimator (
@@ -172,6 +171,7 @@ def _tune_hyperparam_with_slope(
172
171
)
173
172
cnf_list_for_sort .append (cnf )
174
173
174
+ theta_list , cnf_list = [], []
175
175
sorted_idx_list = np .argsort (cnf_list_for_sort )[::- 1 ]
176
176
for i , idx in enumerate (sorted_idx_list ):
177
177
cnf_i = cnf_list_for_sort [idx ]
@@ -380,6 +380,8 @@ class InverseProbabilityWeightingTuning(BaseOffPolicyEstimatorTuning):
380
380
A list of candidate clipping hyperparameters.
381
381
The automatic hyperparameter tuning procedure proposed by Su et al.(2020)
382
382
or Tucker and Lee.(2021) will choose the best hyperparameter value from the logged data.
383
+ The candidate hyperparameter values will be sorted automatically to ensure the monotonicity
384
+ assumption of SLOPE.
383
385
384
386
tuning_method: str, default="slope".
385
387
A method used to tune the hyperparameter of an OPE estimator.
@@ -388,11 +390,11 @@ class InverseProbabilityWeightingTuning(BaseOffPolicyEstimatorTuning):
388
390
which improves the original SLOPE proposed by Su et al.(2020).
389
391
390
392
use_bias_upper_bound: bool, default=True
391
- Whether to use bias upper bound in hyperparameter tuning.
393
+ Whether to use a bias upper bound in hyperparameter tuning.
392
394
If False, the direct bias estimator is used to estimate the MSE. See Su et al.(2020) for details.
393
395
394
396
delta: float, default=0.05
395
- A confidence delta to construct a high probability upper bound based on Bernstein inequality .
397
+ A confidence delta to construct a high probability upper bound used in SLOPE .
396
398
397
399
use_estimated_pscore: bool, default=False.
398
400
If True, `estimated_pscore` is used, otherwise, `pscore` (the true propensity scores) is used.
@@ -417,6 +419,7 @@ def __post_init__(self) -> None:
417
419
self .base_ope_estimator = InverseProbabilityWeighting
418
420
super ()._check_lambdas ()
419
421
super ()._check_init_inputs ()
422
+ self .lambdas .sort (reverse = True )
420
423
421
424
def estimate_policy_value (
422
425
self ,
@@ -583,6 +586,8 @@ class DoublyRobustTuning(BaseOffPolicyEstimatorTuning):
583
586
A list of candidate clipping hyperparameters.
584
587
The automatic hyperparameter tuning procedure proposed by Su et al.(2020)
585
588
or Tucker and Lee.(2021) will choose the best hyperparameter value from the logged data.
589
+ The candidate hyperparameter values will be sorted automatically to ensure the monotonicity
590
+ assumption of SLOPE.
586
591
587
592
tuning_method: str, default="slope".
588
593
A method used to tune the hyperparameter of an OPE estimator.
@@ -614,6 +619,7 @@ def __post_init__(self) -> None:
614
619
self .base_ope_estimator = DoublyRobust
615
620
super ()._check_lambdas ()
616
621
super ()._check_init_inputs ()
622
+ self .lambdas .sort (reverse = True )
617
623
618
624
def estimate_policy_value (
619
625
self ,
@@ -801,6 +807,8 @@ class SwitchDoublyRobustTuning(BaseOffPolicyEstimatorTuning):
801
807
A list of candidate switching hyperparameters.
802
808
The automatic hyperparameter tuning procedure proposed by Su et al.(2020)
803
809
or Tucker and Lee.(2021) will choose the best hyperparameter value from the logged data.
810
+ The candidate hyperparameter values will be sorted automatically to ensure the monotonicity
811
+ assumption of SLOPE.
804
812
805
813
tuning_method: str, default="slope".
806
814
A method used to tune the hyperparameter of an OPE estimator.
@@ -831,6 +839,7 @@ def __post_init__(self) -> None:
831
839
self .base_ope_estimator = SwitchDoublyRobust
832
840
super ()._check_lambdas ()
833
841
super ()._check_init_inputs ()
842
+ self .lambdas .sort (reverse = True )
834
843
835
844
def estimate_policy_value (
836
845
self ,
@@ -1018,6 +1027,8 @@ class DoublyRobustWithShrinkageTuning(BaseOffPolicyEstimatorTuning):
1018
1027
A list of candidate shrinkage hyperparameters.
1019
1028
The automatic hyperparameter tuning procedure proposed by Su et al.(2020)
1020
1029
or Tucker and Lee.(2021) will choose the best hyperparameter value from the logged data.
1030
+ The candidate hyperparameter values will be sorted automatically to ensure the monotonicity
1031
+ assumption of SLOPE.
1021
1032
1022
1033
tuning_method: str, default="slope".
1023
1034
A method used to tune the hyperparameter of an OPE estimator.
@@ -1048,6 +1059,7 @@ def __post_init__(self) -> None:
1048
1059
self .base_ope_estimator = DoublyRobustWithShrinkage
1049
1060
super ()._check_lambdas ()
1050
1061
super ()._check_init_inputs ()
1062
+ self .lambdas .sort ()
1051
1063
1052
1064
def estimate_policy_value (
1053
1065
self ,
@@ -1234,6 +1246,8 @@ class SubGaussianInverseProbabilityWeightingTuning(BaseOffPolicyEstimatorTuning)
1234
1246
A list of candidate hyperparameter values, which should be in the range of [0.0, 1.0].
1235
1247
The automatic hyperparameter tuning procedure proposed by Su et al.(2020)
1236
1248
or Tucker and Lee.(2021) will choose the best hyperparameter value from the logged data.
1249
+ The candidate hyperparameter values will be sorted automatically to ensure the monotonicity
1250
+ assumption of SLOPE.
1237
1251
1238
1252
tuning_method: str, default="slope".
1239
1253
A method used to tune the hyperparameter of an OPE estimator.
@@ -1242,11 +1256,11 @@ class SubGaussianInverseProbabilityWeightingTuning(BaseOffPolicyEstimatorTuning)
1242
1256
which improves the original SLOPE proposed by Su et al.(2020).
1243
1257
1244
1258
use_bias_upper_bound: bool, default=True
1245
- Whether to use bias upper bound in hyperparameter tuning.
1259
+ Whether to use a bias upper bound in hyperparameter tuning.
1246
1260
If False, the direct bias estimator is used to estimate the MSE. See Su et al.(2020) for details.
1247
1261
1248
1262
delta: float, default=0.05
1249
- A confidence delta to construct a high probability upper bound based on Bernstein inequality .
1263
+ A confidence delta to construct a high probability upper bound used in SLOPE .
1250
1264
1251
1265
use_estimated_pscore: bool, default=False.
1252
1266
If True, `estimated_pscore` is used, otherwise, `pscore` (the true propensity scores) is used.
@@ -1274,6 +1288,7 @@ def __post_init__(self) -> None:
1274
1288
self .base_ope_estimator = SubGaussianInverseProbabilityWeighting
1275
1289
super ()._check_lambdas (max_val = 1.0 )
1276
1290
super ()._check_init_inputs ()
1291
+ self .lambdas .sort ()
1277
1292
1278
1293
def estimate_policy_value (
1279
1294
self ,
@@ -1437,6 +1452,8 @@ class SubGaussianDoublyRobustTuning(BaseOffPolicyEstimatorTuning):
1437
1452
A list of candidate hyperparameter values, which should be in the range of [0.0, 1.0].
1438
1453
The automatic hyperparameter tuning procedure proposed by Su et al.(2020)
1439
1454
or Tucker and Lee.(2021) will choose the best hyperparameter value from the logged data.
1455
+ The candidate hyperparameter values will be sorted automatically to ensure the monotonicity
1456
+ assumption of SLOPE.
1440
1457
1441
1458
tuning_method: str, default="slope".
1442
1459
A method used to tune the hyperparameter of an OPE estimator.
@@ -1470,6 +1487,7 @@ def __post_init__(self) -> None:
1470
1487
self .base_ope_estimator = SubGaussianDoublyRobust
1471
1488
super ()._check_lambdas (max_val = 1.0 )
1472
1489
super ()._check_init_inputs ()
1490
+ self .lambdas .sort ()
1473
1491
1474
1492
def estimate_policy_value (
1475
1493
self ,
0 commit comments