Skip to content

Commit c870a20

Browse files
authored
Merge pull request #34 from nmasahiro/black
apply black
2 parents 2652f33 + 2e35a16 commit c870a20

File tree

14 files changed

+131
-54
lines changed

14 files changed

+131
-54
lines changed

benchmark/cf_policy_search/run_cf_policy_search.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -140,9 +140,11 @@ def process(b: int):
140140
action_dist=action_dist,
141141
)
142142

143-
processed = Parallel(backend="multiprocessing", n_jobs=n_jobs, verbose=50,)(
144-
[delayed(process)(i) for i in np.arange(n_runs)]
145-
)
143+
processed = Parallel(
144+
backend="multiprocessing",
145+
n_jobs=n_jobs,
146+
verbose=50,
147+
)([delayed(process)(i) for i in np.arange(n_runs)])
146148

147149
# save counterfactual policy evaluation results in `./logs` directory
148150
ope_results = np.zeros((n_runs, 2))

benchmark/ope/benchmark_off_policy_estimators.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,10 @@
4444
if __name__ == "__main__":
4545
parser = argparse.ArgumentParser(description="evaluate off-policy estimators.")
4646
parser.add_argument(
47-
"--n_runs", type=int, default=1, help="number of experimental runs.",
47+
"--n_runs",
48+
type=int,
49+
default=1,
50+
help="number of experimental runs.",
4851
)
4952
parser.add_argument(
5053
"--base_model",
@@ -141,7 +144,9 @@
141144
)
142145
else:
143146
policy = Random(
144-
n_actions=obd.n_actions, len_list=obd.len_list, random_state=random_state,
147+
n_actions=obd.n_actions,
148+
len_list=obd.len_list,
149+
random_state=random_state,
145150
)
146151
action_dist_single_round = policy.compute_batch_action_dist(
147152
n_sim=n_sim_to_compute_action_dist
@@ -172,7 +177,8 @@ def process(b: int):
172177
)
173178
# evaluate the estimation performance of OPE estimators
174179
ope = OffPolicyEvaluation(
175-
bandit_feedback=bandit_feedback, ope_estimators=ope_estimators,
180+
bandit_feedback=bandit_feedback,
181+
ope_estimators=ope_estimators,
176182
)
177183
action_dist = np.tile(
178184
action_dist_single_round, (bandit_feedback["n_rounds"], 1, 1)
@@ -190,17 +196,21 @@ def process(b: int):
190196

191197
return relative_ee_b
192198

193-
processed = Parallel(backend="multiprocessing", n_jobs=n_jobs, verbose=50,)(
194-
[delayed(process)(i) for i in np.arange(n_runs)]
195-
)
199+
processed = Parallel(
200+
backend="multiprocessing",
201+
n_jobs=n_jobs,
202+
verbose=50,
203+
)([delayed(process)(i) for i in np.arange(n_runs)])
196204

197205
# save results of the evaluation of ope in './logs' directory.
198206
estimator_names = [est.estimator_name for est in ope_estimators] + ["mrdr"]
199207
relative_ee = {est: np.zeros(n_runs) for est in estimator_names}
200208
for b, relative_ee_b in enumerate(processed):
201-
for (estimator_name, relative_ee_,) in relative_ee_b.items():
209+
for (
210+
estimator_name,
211+
relative_ee_,
212+
) in relative_ee_b.items():
202213
relative_ee[estimator_name][b] = relative_ee_
203214
DataFrame(relative_ee).describe().T.round(6).to_csv(
204215
log_path / f"eval_ope_results.csv"
205216
)
206-

benchmark/ope/train_regression_model.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,10 @@ def evaluate_reg_model(
7171
if __name__ == "__main__":
7272
parser = argparse.ArgumentParser(description="evaluate off-policy estimators.")
7373
parser.add_argument(
74-
"--n_runs", type=int, default=1, help="number of experimental runs.",
74+
"--n_runs",
75+
type=int,
76+
default=1,
77+
help="number of experimental runs.",
7578
)
7679
parser.add_argument(
7780
"--base_model",
@@ -188,7 +191,8 @@ def process(b: int):
188191
).astype(bool)
189192
with open(reg_model_path / f"is_for_reg_model_{b}.pkl", "wb") as f:
190193
pickle.dump(
191-
is_for_reg_model, f,
194+
is_for_reg_model,
195+
f,
192196
)
193197
if is_mrdr:
194198
reg_model = RegressionModel(
@@ -211,7 +215,8 @@ def process(b: int):
211215
)
212216
with open(reg_model_path / f"reg_model_mrdr_{b}.pkl", "wb") as f:
213217
pickle.dump(
214-
reg_model, f,
218+
reg_model,
219+
f,
215220
)
216221
else:
217222
reg_model = RegressionModel(
@@ -230,7 +235,8 @@ def process(b: int):
230235
)
231236
with open(reg_model_path / f"reg_model_{b}.pkl", "wb") as f:
232237
pickle.dump(
233-
reg_model, f,
238+
reg_model,
239+
f,
234240
)
235241
# evaluate the estimation performance of the regression model by AUC and RCE
236242
if is_timeseries_split:
@@ -250,9 +256,11 @@ def process(b: int):
250256

251257
return performance_reg_model_b
252258

253-
processed = Parallel(backend="multiprocessing", n_jobs=n_jobs, verbose=50,)(
254-
[delayed(process)(i) for i in np.arange(n_runs)]
255-
)
259+
processed = Parallel(
260+
backend="multiprocessing",
261+
n_jobs=n_jobs,
262+
verbose=50,
263+
)([delayed(process)(i) for i in np.arange(n_runs)])
256264
# save performance of the regression model in './logs' directory.
257265
if not is_mrdr:
258266
performance_reg_model = {metric: dict() for metric in ["auc", "rce"]}
@@ -262,4 +270,3 @@ def process(b: int):
262270
DataFrame(performance_reg_model).describe().T.round(6).to_csv(
263271
log_path / f"performance_reg_model.csv"
264272
)
265-

examples/examples_with_obd/evaluate_off_policy_estimators.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,8 @@ def process(b: int):
139139
)
140140
# evaluate estimators' performances using relative estimation error (relative-ee)
141141
ope = OffPolicyEvaluation(
142-
bandit_feedback=bandit_feedback, ope_estimators=ope_estimators,
142+
bandit_feedback=bandit_feedback,
143+
ope_estimators=ope_estimators,
143144
)
144145
action_dist = np.tile(
145146
action_dist_single_round, (bandit_feedback["n_rounds"], 1, 1)
@@ -152,12 +153,17 @@ def process(b: int):
152153

153154
return relative_ee_b
154155

155-
processed = Parallel(backend="multiprocessing", n_jobs=n_jobs, verbose=50,)(
156-
[delayed(process)(i) for i in np.arange(n_runs)]
157-
)
156+
processed = Parallel(
157+
backend="multiprocessing",
158+
n_jobs=n_jobs,
159+
verbose=50,
160+
)([delayed(process)(i) for i in np.arange(n_runs)])
158161
relative_ee_dict = {est.estimator_name: dict() for est in ope_estimators}
159162
for b, relative_ee_b in enumerate(processed):
160-
for (estimator_name, relative_ee_,) in relative_ee_b.items():
163+
for (
164+
estimator_name,
165+
relative_ee_,
166+
) in relative_ee_b.items():
161167
relative_ee_dict[estimator_name][b] = relative_ee_
162168
relative_ee_df = DataFrame(relative_ee_dict).describe().T.round(6)
163169

examples/examples_with_synthetic/evaluate_off_policy_estimators.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,8 @@ def process(i: int):
172172
)
173173
# evaluate estimators' performances using relative estimation error (relative-ee)
174174
ope = OffPolicyEvaluation(
175-
bandit_feedback=bandit_feedback_test, ope_estimators=ope_estimators,
175+
bandit_feedback=bandit_feedback_test,
176+
ope_estimators=ope_estimators,
176177
)
177178
relative_ee_i = ope.evaluate_performance_of_estimators(
178179
ground_truth_policy_value=ground_truth_policy_value,
@@ -182,12 +183,17 @@ def process(i: int):
182183

183184
return relative_ee_i
184185

185-
processed = Parallel(backend="multiprocessing", n_jobs=n_jobs, verbose=50,)(
186-
[delayed(process)(i) for i in np.arange(n_runs)]
187-
)
186+
processed = Parallel(
187+
backend="multiprocessing",
188+
n_jobs=n_jobs,
189+
verbose=50,
190+
)([delayed(process)(i) for i in np.arange(n_runs)])
188191
relative_ee_dict = {est.estimator_name: dict() for est in ope_estimators}
189192
for i, relative_ee_i in enumerate(processed):
190-
for (estimator_name, relative_ee_,) in relative_ee_i.items():
193+
for (
194+
estimator_name,
195+
relative_ee_,
196+
) in relative_ee_i.items():
191197
relative_ee_dict[estimator_name][i] = relative_ee_
192198
relative_ee_df = DataFrame(relative_ee_dict).describe().T.round(6)
193199

obp/dataset/multiclass.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,9 @@ def n_samples(self) -> int:
177177
return self.y.shape[0]
178178

179179
def split_train_eval(
180-
self, eval_size: Union[int, float] = 0.25, random_state: Optional[int] = None,
180+
self,
181+
eval_size: Union[int, float] = 0.25,
182+
random_state: Optional[int] = None,
181183
) -> None:
182184
"""Split the original data into the training (used for policy learning) and evaluation (used for OPE) sets.
183185
@@ -204,7 +206,8 @@ def split_train_eval(
204206
self.n_samples_ev = self.X_ev.shape[0]
205207

206208
def obtain_batch_bandit_feedback(
207-
self, random_state: Optional[int] = None,
209+
self,
210+
random_state: Optional[int] = None,
208211
) -> BanditFeedback:
209212
"""Obtain batch logged bandit feedback, an evaluation policy, and its ground-truth policy value.
210213
@@ -324,4 +327,3 @@ def calc_ground_truth_policy_value(self, action_dist: np.ndarray) -> np.ndarray:
324327
action_dist.shape[0] == self.n_samples_ev
325328
), "the size of axis 0 of action_dist must be the same as the number of samples in the evaluation set"
326329
return action_dist[np.arange(self.n_samples_ev), self.y_ev].mean()
327-

obp/dataset/synthetic.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,8 @@ def obtain_batch_bandit_feedback(self, n_rounds: int) -> BanditFeedback:
194194
action = np.array(
195195
[
196196
self.random_.choice(
197-
np.arange(self.n_actions), p=behavior_policy_[i],
197+
np.arange(self.n_actions),
198+
p=behavior_policy_[i],
198199
)
199200
for i in np.arange(n_rounds)
200201
]
@@ -240,7 +241,9 @@ def obtain_batch_bandit_feedback(self, n_rounds: int) -> BanditFeedback:
240241

241242

242243
def logistic_reward_function(
243-
context: np.ndarray, action_context: np.ndarray, random_state: Optional[int] = None,
244+
context: np.ndarray,
245+
action_context: np.ndarray,
246+
random_state: Optional[int] = None,
244247
) -> np.ndarray:
245248
"""Logistic mean reward function for synthetic bandit datasets.
246249
@@ -280,7 +283,9 @@ def logistic_reward_function(
280283

281284

282285
def linear_reward_function(
283-
context: np.ndarray, action_context: np.ndarray, random_state: Optional[int] = None,
286+
context: np.ndarray,
287+
action_context: np.ndarray,
288+
random_state: Optional[int] = None,
284289
) -> np.ndarray:
285290
"""Linear mean reward function for synthetic bandit datasets.
286291
@@ -320,7 +325,9 @@ def linear_reward_function(
320325

321326

322327
def linear_behavior_policy(
323-
context: np.ndarray, action_context: np.ndarray, random_state: Optional[int] = None,
328+
context: np.ndarray,
329+
action_context: np.ndarray,
330+
random_state: Optional[int] = None,
324331
) -> np.ndarray:
325332
"""Linear contextual behavior policy for synthetic bandit datasets.
326333

obp/ope/estimators.py

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,10 @@ def estimate_policy_value(
126126
127127
"""
128128
return self._estimate_round_rewards(
129-
reward=reward, action=action, position=position, action_dist=action_dist,
129+
reward=reward,
130+
action=action,
131+
position=position,
132+
action_dist=action_dist,
130133
).mean()
131134

132135
def estimate_interval(
@@ -169,7 +172,10 @@ def estimate_interval(
169172
170173
"""
171174
estimated_round_rewards = self._estimate_round_rewards(
172-
reward=reward, action=action, position=position, action_dist=action_dist,
175+
reward=reward,
176+
action=action,
177+
position=position,
178+
action_dist=action_dist,
173179
)
174180
return estimate_confidence_interval_by_bootstrap(
175181
samples=estimated_round_rewards,
@@ -506,7 +512,11 @@ def _estimate_round_rewards(
506512
np.arange(n_rounds), :, position
507513
]
508514
pi_e_at_position = action_dist[np.arange(n_rounds), :, position]
509-
return np.average(q_hat_at_position, weights=pi_e_at_position, axis=1,)
515+
return np.average(
516+
q_hat_at_position,
517+
weights=pi_e_at_position,
518+
axis=1,
519+
)
510520

511521
def estimate_policy_value(
512522
self,
@@ -687,7 +697,9 @@ def _estimate_round_rewards(
687697
]
688698
pi_e_at_position = action_dist[np.arange(n_rounds), :, position]
689699
estimated_rewards = np.average(
690-
q_hat_at_position, weights=pi_e_at_position, axis=1,
700+
q_hat_at_position,
701+
weights=pi_e_at_position,
702+
axis=1,
691703
)
692704
estimated_rewards += iw * (reward - q_hat_factual)
693705
return estimated_rewards
@@ -889,7 +901,9 @@ def _estimate_round_rewards(
889901
]
890902
pi_e_at_position = action_dist[np.arange(n_rounds), :, position]
891903
estimated_rewards = np.average(
892-
q_hat_at_position, weights=pi_e_at_position, axis=1,
904+
q_hat_at_position,
905+
weights=pi_e_at_position,
906+
axis=1,
893907
)
894908
q_hat_factual = estimated_rewards_by_reg_model[
895909
np.arange(n_rounds), action, position
@@ -993,7 +1007,9 @@ def _estimate_round_rewards(
9931007
]
9941008
pi_e_at_position = action_dist[np.arange(n_rounds), :, position]
9951009
estimated_rewards = (1 - switch_indicator) * np.average(
996-
q_hat_at_position, weights=pi_e_at_position, axis=1,
1010+
q_hat_at_position,
1011+
weights=pi_e_at_position,
1012+
axis=1,
9971013
)
9981014
estimated_rewards += switch_indicator * iw * reward
9991015
return estimated_rewards
@@ -1098,7 +1114,9 @@ def _estimate_round_rewards(
10981114
]
10991115
pi_e_at_position = action_dist[np.arange(n_rounds), :, position]
11001116
estimated_rewards = np.average(
1101-
q_hat_at_position, weights=pi_e_at_position, axis=1,
1117+
q_hat_at_position,
1118+
weights=pi_e_at_position,
1119+
axis=1,
11021120
)
11031121
estimated_rewards += switch_indicator * iw * (reward - q_hat_factual)
11041122
return estimated_rewards
@@ -1214,7 +1232,9 @@ def _estimate_round_rewards(
12141232
]
12151233
pi_e_at_position = action_dist[np.arange(n_rounds), :, position]
12161234
estimated_rewards = np.average(
1217-
q_hat_at_position, weights=pi_e_at_position, axis=1,
1235+
q_hat_at_position,
1236+
weights=pi_e_at_position,
1237+
axis=1,
12181238
)
12191239
estimated_rewards += shrinkage_weight * (reward - q_hat_factual)
12201240
return estimated_rewards

obp/ope/regression_model.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,10 @@ def fit_predict(
333333
return estimated_rewards_by_reg_model
334334

335335
def _pre_process_for_reg_model(
336-
self, context: np.ndarray, action: np.ndarray, action_context: np.ndarray,
336+
self,
337+
context: np.ndarray,
338+
action: np.ndarray,
339+
action_context: np.ndarray,
337340
) -> np.ndarray:
338341
"""Preprocess feature vectors to train a give regression model.
339342

obp/policy/base.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,9 @@ def policy_type(self) -> str:
198198
return "offline"
199199

200200
@abstractmethod
201-
def fit(self,) -> None:
201+
def fit(
202+
self,
203+
) -> None:
202204
"""Fits an offline bandit policy using the given logged bandit feedback data."""
203205
raise NotImplementedError
204206

0 commit comments

Comments
 (0)