Fixup pycodestyle issues (#3050)

connortann · thatlittleboy · web-flow · commit 2e9a4e6f80a8 · 2023-07-02T22:55:00.000+08:00
Co-authored-by: Jeremy Goh &lt;30731072+thatlittleboy@users.noreply.github.com&gt;
diff --git a/pyproject.toml b/pyproject.toml
@@ -95,6 +95,14 @@ select = [
   "F",  # pyflakes
   "I",  # isort
   "UP",  # pyupgrade
+  "E",  # pycodestyle
+  "W",  # warning
+]
+ignore = [
+  # Aim to progressively fix address these codes over time
+  "E501",  # Line too long
+  "E741",  # Ambiguous variable name: `l`
+  "E402",  # Module level import not at top of file
 ]
 target-version = "py37"
 
diff --git a/shap/_explanation.py b/shap/_explanation.py
@@ -532,7 +532,7 @@ def _numpy_func(self, fname, **kwargs):
             if new_self.data is not None:
                 try:
                     new_self.data = getattr(np, fname)(np.array(self.data), **kwargs)
-                except:
+                except Exception:
                     new_self.data = None
             if new_self.base_values is not None and issubclass(type(axis), int) and len(self.base_values.shape) > axis:
                 new_self.base_values = getattr(np, fname)(self.base_values, **kwargs)
diff --git a/shap/benchmark/metrics.py b/shap/benchmark/metrics.py
@@ -432,8 +432,10 @@ def __score_method(X, y, fcounts, model_generator, score_function, method_name,
     """ Test an explanation method.
     """
 
-    try: pickle
-    except NameError: assert False, "The 'dill' package could not be loaded and is needed for the benchmark!"
+    try:
+        pickle
+    except NameError:
+        raise ImportError("The 'dill' package could not be loaded and is needed for the benchmark!")
 
     old_seed = np.random.seed()
     np.random.seed(3293)
diff --git a/shap/datasets.py b/shap/datasets.py
@@ -176,7 +176,7 @@ def adult(display=False, n_points=None):
         raw_data = shap.utils.sample(raw_data, n_points, random_state=0)
 
     data = raw_data.drop(["Education"], axis=1)  # redundant with Education-Num
-    filt_dtypes = list(filter(lambda x: not (x[0] in ["Target", "Education"]), dtypes))
+    filt_dtypes = list(filter(lambda x: x[0] not in ["Target", "Education"], dtypes))
     data["Target"] = data["Target"] == " >50K"
     rcode = {
         "Not-in-family": 0,
@@ -238,7 +238,8 @@ def corrgroups60(display=False, n_points=1_000): # pylint: disable=unused-argume
         C[i,i+1] = C[i+1,i] = 0.99
         C[i,i+2] = C[i+2,i] = 0.99
         C[i+1,i+2] = C[i+2,i+1] = 0.99
-    f = lambda X: np.matmul(X, beta)
+    def f(X):
+        return np.matmul(X, beta)
 
     # Make sure the sample correlation is a perfect match
     X_start = np.random.randn(N, M)
@@ -273,7 +274,8 @@ def independentlinear60(display=False, n_points=1_000): # pylint: disable=unused
     # set one coefficent from each group of 3 to 1
     beta = np.zeros(M)
     beta[0:30:3] = 1
-    f = lambda X: np.matmul(X, beta)
+    def f(X):
+        return np.matmul(X, beta)
 
     # Make sure the sample correlation is a perfect match
     X_start = np.random.randn(N, M)
diff --git a/shap/explainers/_deep/__init__.py b/shap/explainers/_deep/__init__.py
@@ -71,13 +71,13 @@ def __init__(self, model, data, session=None, learning_phase_flags=None):
             try:
                 a.named_parameters()
                 framework = 'pytorch'
-            except:
+            except Exception:
                 framework = 'tensorflow'
         else:
             try:
                 model.named_parameters()
                 framework = 'pytorch'
-            except:
+            except Exception:
                 framework = 'tensorflow'
 
         if framework == 'tensorflow':
diff --git a/shap/explainers/_deep/deep_tf.py b/shap/explainers/_deep/deep_tf.py
@@ -100,7 +100,7 @@ def __init__(self, model, data, session=None, learning_phase_flags=None):
             try:
                 import keras
                 warnings.warn("keras is no longer supported, please use tf.keras instead.")
-            except:
+            except Exception:
                 pass
 
         if version.parse(tf.__version__) >= version.parse("2.4.0"):
@@ -129,7 +129,7 @@ def __init__(self, model, data, session=None, learning_phase_flags=None):
             self.multi_input = False
             if type(self.model_inputs) != list:
                 self.model_inputs = [self.model_inputs]
-        if type(data) != list and (hasattr(data, '__call__')==False):
+        if type(data) != list and (hasattr(data, "__call__") is False):
             data = [data]
         self.data = data
 
diff --git a/shap/explainers/_gradient.py b/shap/explainers/_gradient.py
@@ -65,13 +65,13 @@ def __init__(self, model, data, session=None, batch_size=50, local_smoothing=0):
             try:
                 a.named_parameters()
                 framework = 'pytorch'
-            except:
+            except Exception:
                 framework = 'tensorflow'
         else:
             try:
                 model.named_parameters()
                 framework = 'pytorch'
-            except:
+            except Exception:
                 framework = 'tensorflow'
 
         if isinstance(data, pd.DataFrame):
@@ -159,7 +159,7 @@ def __init__(self, model, data, session=None, batch_size=50, local_smoothing=0):
                 from tensorflow import keras
                 if version.parse(keras.__version__) < version.parse("2.1.0"):
                     warnings.warn("Your Keras version is older than 2.1.0 and not supported.")
-            except:
+            except Exception:
                 pass
 
         # determine the model inputs and outputs
diff --git a/shap/explainers/_kernel.py b/shap/explainers/_kernel.py
@@ -347,7 +347,8 @@ def explain(self, incoming_instance, **kwargs):
 
                 # determine how many subsets (and their complements) are of the current size
                 nsubsets = binom(self.M, subset_size)
-                if subset_size <= num_paired_subset_sizes: nsubsets *= 2
+                if subset_size <= num_paired_subset_sizes:
+                    nsubsets *= 2
                 log.debug(f"subset_size = {subset_size}")
                 log.debug(f"nsubsets = {nsubsets}")
                 log.debug("self.nsamples*weight_vector[subset_size-1] = {}".format(
@@ -366,7 +367,8 @@ def explain(self, incoming_instance, **kwargs):
 
                     # add all the samples of the current subset size
                     w = weight_vector[subset_size - 1] / binom(self.M, subset_size)
-                    if subset_size <= num_paired_subset_sizes: w /= 2.0
+                    if subset_size <= num_paired_subset_sizes:
+                        w /= 2.0
                     for inds in itertools.combinations(group_inds, subset_size):
                         mask[:] = 0.0
                         mask[np.array(inds, dtype='int64')] = 1.0
diff --git a/shap/explainers/_linear.py b/shap/explainers/_linear.py
@@ -281,7 +281,7 @@ def supports_model_with_masker(model, masker):
 
         try:
             Linear._parse_model(model)
-        except:
+        except Exception:
             return False
         return True
 
diff --git a/shap/explainers/_sampling.py b/shap/explainers/_sampling.py
@@ -132,7 +132,8 @@ def explain(self, incoming_instance, **kwargs):
             phi_var /= phi_var.sum(0)[np.newaxis, :]
             nsamples_each2 = (phi_var[self.varyingInds,:].mean(1) * round2_samples).astype(int)
             for i in range(len(nsamples_each2)):
-                if nsamples_each2[i] % 2 == 1: nsamples_each2[i] += 1
+                if nsamples_each2[i] % 2 == 1:
+                    nsamples_each2[i] += 1
             for i in range(len(nsamples_each2)):
                 if nsamples_each2.sum() > round2_samples:
                     nsamples_each2[i] -= 2
diff --git a/shap/explainers/_tree.py b/shap/explainers/_tree.py
@@ -586,7 +586,7 @@ def supports_model_with_masker(model, masker):
 
         try:
             TreeEnsemble(model)
-        except:
+        except Exception:
             return False
         return True
 
@@ -1016,7 +1016,7 @@ def __init__(self, model, data=None, data_missing=None, model_output=None):
             tree_info = self.original_model.dump_model()["tree_info"]
             try:
                 self.trees = [SingleTree(e, data=data, data_missing=data_missing) for e in tree_info]
-            except:
+            except Exception:
                 self.trees = None # we get here because the cext can't handle categorical splits yet
 
             self.objective = objective_name_map.get(model.params.get("objective", "regression"), None)
@@ -1029,7 +1029,7 @@ def __init__(self, model, data=None, data_missing=None, model_output=None):
             tree_info = self.original_model.dump_model()["tree_info"]
             try:
                 self.trees = [SingleTree(e, data=data, data_missing=data_missing) for e in tree_info]
-            except:
+            except Exception:
                 self.trees = None # we get here because the cext can't handle categorical splits yet
 
             self.objective = objective_name_map.get(model.params.get("objective", "regression"), None)
@@ -1042,7 +1042,7 @@ def __init__(self, model, data=None, data_missing=None, model_output=None):
             tree_info = self.original_model.dump_model()["tree_info"]
             try:
                 self.trees = [SingleTree(e, data=data, data_missing=data_missing) for e in tree_info]
-            except:
+            except Exception:
                 self.trees = None # we get here because the cext can't handle categorical splits yet
             self.objective = objective_name_map.get(model.objective, None)
             self.tree_output = tree_output_name_map.get(model.objective, None)
@@ -1056,7 +1056,7 @@ def __init__(self, model, data=None, data_missing=None, model_output=None):
             tree_info = self.original_model.dump_model()["tree_info"]
             try:
                 self.trees = [SingleTree(e, data=data, data_missing=data_missing) for e in tree_info]
-            except:
+            except Exception:
                 self.trees = None # we get here because the cext can't handle categorical splits yet
             # Note: for ranker, leaving tree_output and objective as None as they
             # are not implemented in native code yet
@@ -1069,7 +1069,7 @@ def __init__(self, model, data=None, data_missing=None, model_output=None):
             tree_info = self.original_model.dump_model()["tree_info"]
             try:
                 self.trees = [SingleTree(e, data=data, data_missing=data_missing) for e in tree_info]
-            except:
+            except Exception:
                 self.trees = None # we get here because the cext can't handle categorical splits yet
             self.objective = objective_name_map.get(model.objective, None)
             self.tree_output = tree_output_name_map.get(model.objective, None)
@@ -1089,7 +1089,7 @@ def __init__(self, model, data=None, data_missing=None, model_output=None):
             try:
                 cb_loader = CatBoostTreeModelLoader(model)
                 self.trees = cb_loader.get_trees(data=data, data_missing=data_missing)
-            except:
+            except Exception:
                 self.trees = None # we get here because the cext can't handle categorical splits yet
             self.tree_output = "log_odds"
             self.objective = "binary_crossentropy"
diff --git a/shap/explainers/tf_utils.py b/shap/explainers/tf_utils.py
@@ -27,7 +27,7 @@ def _get_session(session):
     if session is None:
         try:
             session = tf.compat.v1.keras.backend.get_session()
-        except:
+        except Exception:
             session = tf.keras.backend.get_session()
     return tf.get_default_session() if session is None else session
 
diff --git a/shap/maskers/_tabular.py b/shap/maskers/_tabular.py
@@ -36,10 +36,10 @@ def __init__(self, data, max_samples=100, clustering=None):
             The distance metric to use for creating the clustering of the features. The
             distance function can be any valid scipy.spatial.distance.pdist's metric argument.
             However we suggest using 'correlation' in most cases. The full list of options is
-            ‘braycurtis’, ‘canberra’, ‘chebyshev’, ‘cityblock’, ‘correlation’, ‘cosine’, ‘dice’,
-            ‘euclidean’, ‘hamming’, ‘jaccard’, ‘jensenshannon’, ‘kulsinski’, ‘mahalanobis’,
-            ‘matching’, ‘minkowski’, ‘rogerstanimoto’, ‘russellrao’, ‘seuclidean’,
-            ‘sokalmichener’, ‘sokalsneath’, ‘sqeuclidean’, ‘yule’. These are all
+            `braycurtis`, `canberra`, `chebyshev`, `cityblock`, `correlation`, `cosine`, `dice`,
+            `euclidean`, `hamming`, `jaccard`, `jensenshannon`, `kulsinski`, `mahalanobis`,
+            `matching`, `minkowski`, `rogerstanimoto`, `russellrao`, `seuclidean`,
+            `sokalmichener`, `sokalsneath`, `sqeuclidean`, `yule`. These are all
             the options from scipy.spatial.distance.pdist's metric argument.
         """
 
@@ -289,10 +289,10 @@ def __init__(self, data, max_samples=100, clustering="correlation"):
             If a string, then this is the distance metric to use for creating the clustering of
             the features. The distance function can be any valid scipy.spatial.distance.pdist's metric
             argument. However we suggest using 'correlation' in most cases. The full list of options is
-            ‘braycurtis’, ‘canberra’, ‘chebyshev’, ‘cityblock’, ‘correlation’, ‘cosine’, ‘dice’,
-            ‘euclidean’, ‘hamming’, ‘jaccard’, ‘jensenshannon’, ‘kulsinski’, ‘mahalanobis’,
-            ‘matching’, ‘minkowski’, ‘rogerstanimoto’, ‘russellrao’, ‘seuclidean’,
-            ‘sokalmichener’, ‘sokalsneath’, ‘sqeuclidean’, ‘yule’. These are all
+            `braycurtis`, `canberra`, `chebyshev`, `cityblock`, `correlation`, `cosine`, `dice`,
+            `euclidean`, `hamming`, `jaccard`, `jensenshannon`, `kulsinski`, `mahalanobis`,
+            `matching`, `minkowski`, `rogerstanimoto`, `russellrao`, `seuclidean`,
+            `sokalmichener`, `sokalsneath`, `sqeuclidean`, `yule`. These are all
             the options from scipy.spatial.distance.pdist's metric argument.
             If an array, then this is assumed to be the clustering of the features.
         """
diff --git a/shap/maskers/_text.py b/shap/maskers/_text.py
@@ -49,7 +49,7 @@ def __init__(self, tokenizer=None, mask_token=None, collapse_mask_token="auto",
         else:
             try:
                 self.tokenizer = SimpleTokenizer(tokenizer)
-            except:
+            except Exception:
                 raise Exception( # pylint: disable=raise-missing-from
                     "The passed tokenizer cannot be wrapped as a masker because it does not have a __call__ " + \
                     "method, not can it be interpreted as a splitting regexp!"
@@ -457,7 +457,7 @@ def merge_score(group1, group2, special_tokens):
         score -= 100
 
     # attach surrounding an openers and closers a bit later
-    if group1[0].s in openers and not group2[-1] in closers:
+    if group1[0].s in openers and group2[-1] not in closers:
         score -= 2
 
     # reach across connectors later
diff --git a/shap/plots/_bar.py b/shap/plots/_bar.py
@@ -269,7 +269,7 @@ def bar(shap_values, max_display=10, order=Explanation.abs, clustering=None, clu
             try:
                 if round(features[i]) == features[i]:
                     features[i] = int(features[i])
-            except:
+            except Exception:
                 pass # features[i] must not be a number
 
     pl.gca().xaxis.set_ticks_position('bottom')
diff --git a/shap/plots/_beeswarm.py b/shap/plots/_beeswarm.py
@@ -336,7 +336,7 @@ def beeswarm(shap_values, max_display=10, order=Explanation.abs.mean(0),
                 colored_feature = False
             else:
                 fvalues = np.array(fvalues, dtype=np.float64)  # make sure this can be numeric
-        except:
+        except Exception:
             colored_feature = False
         N = len(shaps)
         # hspacing = (np.max(shaps) - np.min(shaps)) / 200
@@ -505,7 +505,8 @@ def summary_legacy(shap_values, features=None, feature_names=None, max_display=N
         if plot_type == 'layered_violin':
             color = "coolwarm"
         elif multi_class:
-            color = lambda i: colors.red_blue_circle(i/len(shap_values))
+            def color(i):
+                return colors.red_blue_circle(i / len(shap_values))
         else:
             color = colors.blue_rgb
 
@@ -659,7 +660,7 @@ def summary_legacy(shap_values, features=None, feature_names=None, max_display=N
                     colored_feature = False
                 else:
                     values = np.array(values, dtype=np.float64)  # make sure this can be numeric
-            except:
+            except Exception:
                 colored_feature = False
             N = len(shaps)
             # hspacing = (np.max(shaps) - np.min(shaps)) / 200
diff --git a/shap/plots/_force_matplotlib.py b/shap/plots/_force_matplotlib.py
@@ -217,9 +217,11 @@ def format_data(data):
 
     # Define link function
     if data['link'] == 'identity':
-        convert_func = lambda x: x
+        def convert_func(x):
+            return x
     elif data['link'] == 'logit':
-        convert_func = lambda x: 1 / (1 + np.exp(-x))
+        def convert_func(x):
+            return 1 / (1 + np.exp(-x))
     else:
         assert False, "ERROR: Unrecognized link function: " + str(data['link'])
 
diff --git a/shap/plots/_scatter.py b/shap/plots/_scatter.py
@@ -305,7 +305,8 @@ def scatter(shap_values, color="#1E88E5", hist=True, axis_color="#333333", cmap=
     # optionally add jitter to feature values
     xv_no_jitter = xv.copy()
     if x_jitter > 0:
-        if x_jitter > 1: x_jitter = 1
+        if x_jitter > 1:
+            x_jitter = 1
         xvals = xv.copy()
         if isinstance(xvals[0], float):
             xvals = xvals.astype(float)
@@ -663,7 +664,8 @@ def dependence_legacy(ind, shap_values=None, features=None, feature_names=None,
 
     # optionally add jitter to feature values
     if x_jitter > 0:
-        if x_jitter > 1: x_jitter = 1
+        if x_jitter > 1:
+            x_jitter = 1
         xvals = xv.copy()
         if isinstance(xvals[0], float):
             xvals = xvals.astype(float)
diff --git a/shap/utils/_keras.py b/shap/utils/_keras.py
@@ -39,7 +39,7 @@ def clone_keras_layers(model, start_layer, stop_layer):
                 # behind the next one in line
                 layers_to_process.append(layer)
                 continue
-            if not layer.output.name in new_layers:
+            if layer.output.name not in new_layers:
                 new_layers[layer.output.name] = layer(layer_inputs)
             if layer.output.name == stop_layer.output.name:
                 break
diff --git a/shap/utils/_legacy.py b/shap/utils/_legacy.py
@@ -121,7 +121,7 @@ def match_model_to_data(model, data):
             out_val = model.f(data.convert_to_df())
         else:
             out_val = model.f(data.data)
-    except:
+    except Exception:
         print("Provided model function fails when applied to the provided data set.")
         raise
 
diff --git a/tests/benchmark/framework.py b/tests/benchmark/framework.py
@@ -2,7 +2,10 @@
 
 import shap
 
-model = lambda x: np.array([np.linalg.norm(x)])
+
+def model(x):
+    return np.array([np.linalg.norm(x)])
+
 X = np.array([[3, 4], [5, 12], [7, 24]])
 y = np.array([5, 13, 25])
 explainer = np.array([[-1, 2], [-4, 2], [1, 2]])
@@ -12,7 +15,8 @@ def test_update():
     """ This is to test the update function within benchmark/framework
     """
     sort_order = 'positive'
-    score_function = lambda true, pred: np.mean(pred)
+    def score_function(true, pred):
+        return np.mean(pred)
     perturbation = 'keep'
     scores = {'name': 'test', 'metrics': list(), 'values': dict()}
 
diff --git a/tests/benchmark/perturbation.py b/tests/benchmark/perturbation.py
diff --git a/tests/explainers/test_tree.py b/tests/explainers/test_tree.py