Merge pull request #34 from nmasahiro/black

usaito · web-flow · commit c870a20c3bb6 · 2020-12-01T09:15:23.000+09:00
apply black
diff --git a/benchmark/cf_policy_search/run_cf_policy_search.py b/benchmark/cf_policy_search/run_cf_policy_search.py
@@ -140,9 +140,11 @@ def process(b: int):
             action_dist=action_dist,
         )
 
-    processed = Parallel(backend="multiprocessing", n_jobs=n_jobs, verbose=50,)(
-        [delayed(process)(i) for i in np.arange(n_runs)]
-    )
+    processed = Parallel(
+        backend="multiprocessing",
+        n_jobs=n_jobs,
+        verbose=50,
+    )([delayed(process)(i) for i in np.arange(n_runs)])
 
     # save counterfactual policy evaluation results in `./logs` directory
     ope_results = np.zeros((n_runs, 2))
diff --git a/benchmark/ope/benchmark_off_policy_estimators.py b/benchmark/ope/benchmark_off_policy_estimators.py
@@ -44,7 +44,10 @@
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="evaluate off-policy estimators.")
     parser.add_argument(
-        "--n_runs", type=int, default=1, help="number of experimental runs.",
+        "--n_runs",
+        type=int,
+        default=1,
+        help="number of experimental runs.",
     )
     parser.add_argument(
         "--base_model",
@@ -141,7 +144,9 @@
         )
     else:
         policy = Random(
-            n_actions=obd.n_actions, len_list=obd.len_list, random_state=random_state,
+            n_actions=obd.n_actions,
+            len_list=obd.len_list,
+            random_state=random_state,
         )
     action_dist_single_round = policy.compute_batch_action_dist(
         n_sim=n_sim_to_compute_action_dist
@@ -172,7 +177,8 @@ def process(b: int):
         )
         # evaluate the estimation performance of OPE estimators
         ope = OffPolicyEvaluation(
-            bandit_feedback=bandit_feedback, ope_estimators=ope_estimators,
+            bandit_feedback=bandit_feedback,
+            ope_estimators=ope_estimators,
         )
         action_dist = np.tile(
             action_dist_single_round, (bandit_feedback["n_rounds"], 1, 1)
@@ -190,17 +196,21 @@ def process(b: int):
 
         return relative_ee_b
 
-    processed = Parallel(backend="multiprocessing", n_jobs=n_jobs, verbose=50,)(
-        [delayed(process)(i) for i in np.arange(n_runs)]
-    )
+    processed = Parallel(
+        backend="multiprocessing",
+        n_jobs=n_jobs,
+        verbose=50,
+    )([delayed(process)(i) for i in np.arange(n_runs)])
 
     # save results of the evaluation of ope in './logs' directory.
     estimator_names = [est.estimator_name for est in ope_estimators] + ["mrdr"]
     relative_ee = {est: np.zeros(n_runs) for est in estimator_names}
     for b, relative_ee_b in enumerate(processed):
-        for (estimator_name, relative_ee_,) in relative_ee_b.items():
+        for (
+            estimator_name,
+            relative_ee_,
+        ) in relative_ee_b.items():
             relative_ee[estimator_name][b] = relative_ee_
     DataFrame(relative_ee).describe().T.round(6).to_csv(
         log_path / f"eval_ope_results.csv"
     )
-
diff --git a/benchmark/ope/train_regression_model.py b/benchmark/ope/train_regression_model.py
@@ -71,7 +71,10 @@ def evaluate_reg_model(
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="evaluate off-policy estimators.")
     parser.add_argument(
-        "--n_runs", type=int, default=1, help="number of experimental runs.",
+        "--n_runs",
+        type=int,
+        default=1,
+        help="number of experimental runs.",
     )
     parser.add_argument(
         "--base_model",
@@ -188,7 +191,8 @@ def process(b: int):
         ).astype(bool)
         with open(reg_model_path / f"is_for_reg_model_{b}.pkl", "wb") as f:
             pickle.dump(
-                is_for_reg_model, f,
+                is_for_reg_model,
+                f,
             )
         if is_mrdr:
             reg_model = RegressionModel(
@@ -211,7 +215,8 @@ def process(b: int):
             )
             with open(reg_model_path / f"reg_model_mrdr_{b}.pkl", "wb") as f:
                 pickle.dump(
-                    reg_model, f,
+                    reg_model,
+                    f,
                 )
         else:
             reg_model = RegressionModel(
@@ -230,7 +235,8 @@ def process(b: int):
             )
             with open(reg_model_path / f"reg_model_{b}.pkl", "wb") as f:
                 pickle.dump(
-                    reg_model, f,
+                    reg_model,
+                    f,
                 )
             # evaluate the estimation performance of the regression model by AUC and RCE
             if is_timeseries_split:
@@ -250,9 +256,11 @@ def process(b: int):
 
             return performance_reg_model_b
 
-    processed = Parallel(backend="multiprocessing", n_jobs=n_jobs, verbose=50,)(
-        [delayed(process)(i) for i in np.arange(n_runs)]
-    )
+    processed = Parallel(
+        backend="multiprocessing",
+        n_jobs=n_jobs,
+        verbose=50,
+    )([delayed(process)(i) for i in np.arange(n_runs)])
     # save performance of the regression model in './logs' directory.
     if not is_mrdr:
         performance_reg_model = {metric: dict() for metric in ["auc", "rce"]}
@@ -262,4 +270,3 @@ def process(b: int):
         DataFrame(performance_reg_model).describe().T.round(6).to_csv(
             log_path / f"performance_reg_model.csv"
         )
-
diff --git a/examples/examples_with_obd/evaluate_off_policy_estimators.py b/examples/examples_with_obd/evaluate_off_policy_estimators.py
@@ -139,7 +139,8 @@ def process(b: int):
         )
         # evaluate estimators' performances using relative estimation error (relative-ee)
         ope = OffPolicyEvaluation(
-            bandit_feedback=bandit_feedback, ope_estimators=ope_estimators,
+            bandit_feedback=bandit_feedback,
+            ope_estimators=ope_estimators,
         )
         action_dist = np.tile(
             action_dist_single_round, (bandit_feedback["n_rounds"], 1, 1)
@@ -152,12 +153,17 @@ def process(b: int):
 
         return relative_ee_b
 
-    processed = Parallel(backend="multiprocessing", n_jobs=n_jobs, verbose=50,)(
-        [delayed(process)(i) for i in np.arange(n_runs)]
-    )
+    processed = Parallel(
+        backend="multiprocessing",
+        n_jobs=n_jobs,
+        verbose=50,
+    )([delayed(process)(i) for i in np.arange(n_runs)])
     relative_ee_dict = {est.estimator_name: dict() for est in ope_estimators}
     for b, relative_ee_b in enumerate(processed):
-        for (estimator_name, relative_ee_,) in relative_ee_b.items():
+        for (
+            estimator_name,
+            relative_ee_,
+        ) in relative_ee_b.items():
             relative_ee_dict[estimator_name][b] = relative_ee_
     relative_ee_df = DataFrame(relative_ee_dict).describe().T.round(6)
 
diff --git a/examples/examples_with_synthetic/evaluate_off_policy_estimators.py b/examples/examples_with_synthetic/evaluate_off_policy_estimators.py
@@ -172,7 +172,8 @@ def process(i: int):
         )
         # evaluate estimators' performances using relative estimation error (relative-ee)
         ope = OffPolicyEvaluation(
-            bandit_feedback=bandit_feedback_test, ope_estimators=ope_estimators,
+            bandit_feedback=bandit_feedback_test,
+            ope_estimators=ope_estimators,
         )
         relative_ee_i = ope.evaluate_performance_of_estimators(
             ground_truth_policy_value=ground_truth_policy_value,
@@ -182,12 +183,17 @@ def process(i: int):
 
         return relative_ee_i
 
-    processed = Parallel(backend="multiprocessing", n_jobs=n_jobs, verbose=50,)(
-        [delayed(process)(i) for i in np.arange(n_runs)]
-    )
+    processed = Parallel(
+        backend="multiprocessing",
+        n_jobs=n_jobs,
+        verbose=50,
+    )([delayed(process)(i) for i in np.arange(n_runs)])
     relative_ee_dict = {est.estimator_name: dict() for est in ope_estimators}
     for i, relative_ee_i in enumerate(processed):
-        for (estimator_name, relative_ee_,) in relative_ee_i.items():
+        for (
+            estimator_name,
+            relative_ee_,
+        ) in relative_ee_i.items():
             relative_ee_dict[estimator_name][i] = relative_ee_
     relative_ee_df = DataFrame(relative_ee_dict).describe().T.round(6)
 
diff --git a/obp/dataset/multiclass.py b/obp/dataset/multiclass.py
@@ -177,7 +177,9 @@ def n_samples(self) -> int:
         return self.y.shape[0]
 
     def split_train_eval(
-        self, eval_size: Union[int, float] = 0.25, random_state: Optional[int] = None,
+        self,
+        eval_size: Union[int, float] = 0.25,
+        random_state: Optional[int] = None,
     ) -> None:
         """Split the original data into the training (used for policy learning) and evaluation (used for OPE) sets.
 
@@ -204,7 +206,8 @@ def split_train_eval(
         self.n_samples_ev = self.X_ev.shape[0]
 
     def obtain_batch_bandit_feedback(
-        self, random_state: Optional[int] = None,
+        self,
+        random_state: Optional[int] = None,
     ) -> BanditFeedback:
         """Obtain batch logged bandit feedback, an evaluation policy, and its ground-truth policy value.
 
@@ -324,4 +327,3 @@ def calc_ground_truth_policy_value(self, action_dist: np.ndarray) -> np.ndarray:
             action_dist.shape[0] == self.n_samples_ev
         ), "the size of axis 0 of action_dist must be the same as the number of samples in the evaluation set"
         return action_dist[np.arange(self.n_samples_ev), self.y_ev].mean()
-
diff --git a/obp/dataset/synthetic.py b/obp/dataset/synthetic.py
@@ -194,7 +194,8 @@ def obtain_batch_bandit_feedback(self, n_rounds: int) -> BanditFeedback:
             action = np.array(
                 [
                     self.random_.choice(
-                        np.arange(self.n_actions), p=behavior_policy_[i],
+                        np.arange(self.n_actions),
+                        p=behavior_policy_[i],
                     )
                     for i in np.arange(n_rounds)
                 ]
@@ -240,7 +241,9 @@ def obtain_batch_bandit_feedback(self, n_rounds: int) -> BanditFeedback:
 
 
 def logistic_reward_function(
-    context: np.ndarray, action_context: np.ndarray, random_state: Optional[int] = None,
+    context: np.ndarray,
+    action_context: np.ndarray,
+    random_state: Optional[int] = None,
 ) -> np.ndarray:
     """Logistic mean reward function for synthetic bandit datasets.
 
@@ -280,7 +283,9 @@ def logistic_reward_function(
 
 
 def linear_reward_function(
-    context: np.ndarray, action_context: np.ndarray, random_state: Optional[int] = None,
+    context: np.ndarray,
+    action_context: np.ndarray,
+    random_state: Optional[int] = None,
 ) -> np.ndarray:
     """Linear mean reward function for synthetic bandit datasets.
 
@@ -320,7 +325,9 @@ def linear_reward_function(
 
 
 def linear_behavior_policy(
-    context: np.ndarray, action_context: np.ndarray, random_state: Optional[int] = None,
+    context: np.ndarray,
+    action_context: np.ndarray,
+    random_state: Optional[int] = None,
 ) -> np.ndarray:
     """Linear contextual behavior policy for synthetic bandit datasets.
 
diff --git a/obp/ope/estimators.py b/obp/ope/estimators.py
@@ -126,7 +126,10 @@ def estimate_policy_value(
 
         """
         return self._estimate_round_rewards(
-            reward=reward, action=action, position=position, action_dist=action_dist,
+            reward=reward,
+            action=action,
+            position=position,
+            action_dist=action_dist,
         ).mean()
 
     def estimate_interval(
@@ -169,7 +172,10 @@ def estimate_interval(
 
         """
         estimated_round_rewards = self._estimate_round_rewards(
-            reward=reward, action=action, position=position, action_dist=action_dist,
+            reward=reward,
+            action=action,
+            position=position,
+            action_dist=action_dist,
         )
         return estimate_confidence_interval_by_bootstrap(
             samples=estimated_round_rewards,
@@ -506,7 +512,11 @@ def _estimate_round_rewards(
             np.arange(n_rounds), :, position
         ]
         pi_e_at_position = action_dist[np.arange(n_rounds), :, position]
-        return np.average(q_hat_at_position, weights=pi_e_at_position, axis=1,)
+        return np.average(
+            q_hat_at_position,
+            weights=pi_e_at_position,
+            axis=1,
+        )
 
     def estimate_policy_value(
         self,
@@ -687,7 +697,9 @@ def _estimate_round_rewards(
         ]
         pi_e_at_position = action_dist[np.arange(n_rounds), :, position]
         estimated_rewards = np.average(
-            q_hat_at_position, weights=pi_e_at_position, axis=1,
+            q_hat_at_position,
+            weights=pi_e_at_position,
+            axis=1,
         )
         estimated_rewards += iw * (reward - q_hat_factual)
         return estimated_rewards
@@ -889,7 +901,9 @@ def _estimate_round_rewards(
         ]
         pi_e_at_position = action_dist[np.arange(n_rounds), :, position]
         estimated_rewards = np.average(
-            q_hat_at_position, weights=pi_e_at_position, axis=1,
+            q_hat_at_position,
+            weights=pi_e_at_position,
+            axis=1,
         )
         q_hat_factual = estimated_rewards_by_reg_model[
             np.arange(n_rounds), action, position
@@ -993,7 +1007,9 @@ def _estimate_round_rewards(
         ]
         pi_e_at_position = action_dist[np.arange(n_rounds), :, position]
         estimated_rewards = (1 - switch_indicator) * np.average(
-            q_hat_at_position, weights=pi_e_at_position, axis=1,
+            q_hat_at_position,
+            weights=pi_e_at_position,
+            axis=1,
         )
         estimated_rewards += switch_indicator * iw * reward
         return estimated_rewards
@@ -1098,7 +1114,9 @@ def _estimate_round_rewards(
         ]
         pi_e_at_position = action_dist[np.arange(n_rounds), :, position]
         estimated_rewards = np.average(
-            q_hat_at_position, weights=pi_e_at_position, axis=1,
+            q_hat_at_position,
+            weights=pi_e_at_position,
+            axis=1,
         )
         estimated_rewards += switch_indicator * iw * (reward - q_hat_factual)
         return estimated_rewards
@@ -1214,7 +1232,9 @@ def _estimate_round_rewards(
         ]
         pi_e_at_position = action_dist[np.arange(n_rounds), :, position]
         estimated_rewards = np.average(
-            q_hat_at_position, weights=pi_e_at_position, axis=1,
+            q_hat_at_position,
+            weights=pi_e_at_position,
+            axis=1,
         )
         estimated_rewards += shrinkage_weight * (reward - q_hat_factual)
         return estimated_rewards
diff --git a/obp/ope/regression_model.py b/obp/ope/regression_model.py
@@ -333,7 +333,10 @@ def fit_predict(
         return estimated_rewards_by_reg_model
 
     def _pre_process_for_reg_model(
-        self, context: np.ndarray, action: np.ndarray, action_context: np.ndarray,
+        self,
+        context: np.ndarray,
+        action: np.ndarray,
+        action_context: np.ndarray,
     ) -> np.ndarray:
         """Preprocess feature vectors to train a give regression model.
 
diff --git a/obp/policy/base.py b/obp/policy/base.py
@@ -198,7 +198,9 @@ def policy_type(self) -> str:
         return "offline"
 
     @abstractmethod
-    def fit(self,) -> None:
+    def fit(
+        self,
+    ) -> None:
         """Fits an offline bandit policy using the given logged bandit feedback data."""
         raise NotImplementedError
 
diff --git a/obp/policy/contextfree.py b/obp/policy/contextfree.py
diff --git a/obp/policy/linear.py b/obp/policy/linear.py
diff --git a/obp/policy/offline.py b/obp/policy/offline.py
diff --git a/obp/utils.py b/obp/utils.py