Fix pearson correlation.py (#3101)

JackKuo666 · matt-gardner · commit 18daa2983cec · 2019-08-20T12:31:14.000-06:00
* fix bug: ZeroDivisionError: float division by zero

Since the input tensor may be, for example, a tensor ([[0.,0.,0.,0.],
[0.,0.,0.,0. ]), there will be a case where (math.sqrt(predictions_variance) or math.sqrt(labels_variance)) is zero, so a judgment is added here to prevent the denominator from being zero. If it is zero, the denominator is assigned a value of 1.

* fix bug: ZeroDivisionError: float division by zero

Since the input tensor may be, for example, a tensor ([[0.,0.,0.,0.],
[0.,0.,0.,0. ]), there will be a case where (math.sqrt(predictions_variance) or math.sqrt(labels_variance)) is zero, so a judgment is added here to prevent the denominator from being zero. If it is zero, the denominator is assigned a value of 1.

* fix bug: ZeroDivisionError: float division by zero 

Since the input tensor may be, for example, a tensor ([[0.,0.,0.,0.],
[0.,0.,0.,0. ]), there will be a case where (math.sqrt(predictions_variance) or math.sqrt(labels_variance)) is zero, so a judgment is added here to prevent the denominator from being zero. If it is zero, the pearson_r is assigned a value of 0.

* fix bug: ZeroDivisionError: float division by zero 

Since the input tensor may be, for example, a tensor ([[0.,0.,0.,0.],
[0.,0.,0.,0. ]), there will be a case where (math.sqrt(predictions_variance) or math.sqrt(labels_variance)) is zero, so a judgment is added here to prevent the denominator from being zero. If it is zero, the pearson_r is assigned a value of 0.

* fix some  pylint things

fix some  pylint things

* Update pearson_correlation.py

* Update pearson_correlation_test.py

* Update pearson_correlation_test.py
diff --git a/allennlp/tests/training/metrics/pearson_correlation_test.py b/allennlp/tests/training/metrics/pearson_correlation_test.py
@@ -7,54 +7,85 @@
 from allennlp.training.metrics import PearsonCorrelation
 
 
+def pearson_corrcoef(predictions, labels, fweights=None):
+    covariance_matrices = np.cov(predictions, labels, fweights=fweights)
+    denominator = np.sqrt(covariance_matrices[0, 0] * covariance_matrices[1, 1])
+    if np.around(denominator, decimals=5) == 0:
+        expected_pearson_correlation = 0
+    else:
+        expected_pearson_correlation = covariance_matrices[0, 1] / denominator
+    return expected_pearson_correlation
+
+
 class PearsonCorrelationTest(AllenNlpTestCase):
     def test_pearson_correlation_unmasked_computation(self):
         pearson_correlation = PearsonCorrelation()
         batch_size = 100
         num_labels = 10
-        predictions = np.random.randn(batch_size, num_labels).astype("float32")
-        labels = 0.5 * predictions + np.random.randn(batch_size, num_labels).astype("float32")
+        predictions_1 = np.random.randn(batch_size, num_labels).astype("float32")
+        labels_1 = 0.5 * predictions_1 + np.random.randn(batch_size, num_labels).astype("float32")
+
+        predictions_2 = np.random.randn(1).repeat(num_labels).astype("float32")
+        predictions_2 = predictions_2[np.newaxis, :].repeat(batch_size, axis=0)
+        labels_2 = np.random.randn(1).repeat(num_labels).astype("float32")
+        labels_2 = 0.5 * predictions_2 + labels_2[np.newaxis, :].repeat(batch_size, axis=0)
+
+        # in most cases, the data is constructed like predictions_1, the data of such a batch different.
+        # but in a few cases, for example, predictions_2, the data of such a batch is exactly the same.
+        predictions_labels = [(predictions_1, labels_1), (predictions_2, labels_2)]
 
         stride = 10
 
-        for i in range(batch_size // stride):
-            timestep_predictions = torch.FloatTensor(predictions[stride * i:stride * (i+1), :])
-            timestep_labels = torch.FloatTensor(labels[stride * i:stride * (i+1), :])
-            expected_pearson_correlation = np.corrcoef(predictions[:stride * (i + 1), :].reshape(-1),
-                                                       labels[:stride * (i + 1), :].reshape(-1))[0, 1]
-            pearson_correlation(timestep_predictions, timestep_labels)
-            assert_allclose(expected_pearson_correlation, pearson_correlation.get_metric(), rtol=1e-5)
-        # Test reset
-        pearson_correlation.reset()
-        pearson_correlation(torch.FloatTensor(predictions), torch.FloatTensor(labels))
-        assert_allclose(np.corrcoef(predictions.reshape(-1), labels.reshape(-1))[0, 1],
-                        pearson_correlation.get_metric(), rtol=1e-5)
+        for predictions, labels in predictions_labels:
+            pearson_correlation.reset()
+            for i in range(batch_size // stride):
+                timestep_predictions = torch.FloatTensor(predictions[stride * i:stride * (i + 1), :])
+                timestep_labels = torch.FloatTensor(labels[stride * i:stride * (i + 1), :])
+                expected_pearson_correlation = pearson_corrcoef(predictions[:stride * (i + 1), :].reshape(-1),
+                                                                labels[:stride * (i + 1), :].reshape(-1))
+                pearson_correlation(timestep_predictions, timestep_labels)
+                assert_allclose(expected_pearson_correlation, pearson_correlation.get_metric(), rtol=1e-5)
+            # Test reset
+            pearson_correlation.reset()
+            pearson_correlation(torch.FloatTensor(predictions), torch.FloatTensor(labels))
+            assert_allclose(pearson_corrcoef(predictions.reshape(-1), labels.reshape(-1)),
+                            pearson_correlation.get_metric(), rtol=1e-5)
 
     def test_pearson_correlation_masked_computation(self):
         pearson_correlation = PearsonCorrelation()
         batch_size = 100
         num_labels = 10
-        predictions = np.random.randn(batch_size, num_labels).astype("float32")
-        labels = 0.5 * predictions + np.random.randn(batch_size, num_labels).astype("float32")
+        predictions_1 = np.random.randn(batch_size, num_labels).astype("float32")
+        labels_1 = 0.5 * predictions_1 + np.random.randn(batch_size, num_labels).astype("float32")
+
+        predictions_2 = np.random.randn(1).repeat(num_labels).astype("float32")
+        predictions_2 = predictions_2[np.newaxis, :].repeat(batch_size, axis=0)
+        labels_2 = np.random.randn(1).repeat(num_labels).astype("float32")
+        labels_2 = 0.5 * predictions_2 + labels_2[np.newaxis, :].repeat(batch_size, axis=0)
+
+        predictions_labels = [(predictions_1, labels_1), (predictions_2, labels_2)]
+
         # Random binary mask
         mask = np.random.randint(0, 2, size=(batch_size, num_labels)).astype("float32")
         stride = 10
 
-        for i in range(batch_size // stride):
-            timestep_predictions = torch.FloatTensor(predictions[stride * i:stride * (i+1), :])
-            timestep_labels = torch.FloatTensor(labels[stride * i:stride * (i+1), :])
-            timestep_mask = torch.FloatTensor(mask[stride * i:stride * (i+1), :])
-            covariance_matrices = np.cov(predictions[:stride * (i + 1), :].reshape(-1),
-                                         labels[:stride * (i + 1), :].reshape(-1),
-                                         fweights=mask[:stride * (i + 1), :].reshape(-1))
-            expected_pearson_correlation = covariance_matrices[0, 1] / np.sqrt(covariance_matrices[0, 0] *
-                                                                               covariance_matrices[1, 1])
-            pearson_correlation(timestep_predictions, timestep_labels, timestep_mask)
+        for predictions, labels in predictions_labels:
+            pearson_correlation.reset()
+            for i in range(batch_size // stride):
+                timestep_predictions = torch.FloatTensor(predictions[stride * i:stride * (i + 1), :])
+                timestep_labels = torch.FloatTensor(labels[stride * i:stride * (i + 1), :])
+                timestep_mask = torch.FloatTensor(mask[stride * i:stride * (i + 1), :])
+                expected_pearson_correlation = pearson_corrcoef(predictions[:stride * (i + 1), :].reshape(-1),
+                                                                labels[:stride * (i + 1), :].reshape(-1),
+                                                                fweights=mask[:stride * (i + 1), :].reshape(-1))
+
+                pearson_correlation(timestep_predictions, timestep_labels, timestep_mask)
+                assert_allclose(expected_pearson_correlation, pearson_correlation.get_metric(), rtol=1e-5)
+            # Test reset
+            pearson_correlation.reset()
+            pearson_correlation(torch.FloatTensor(predictions),
+                                torch.FloatTensor(labels), torch.FloatTensor(mask))
+            expected_pearson_correlation = pearson_corrcoef(predictions.reshape(-1), labels.reshape(-1),
+                                                            fweights=mask.reshape(-1))
+
             assert_allclose(expected_pearson_correlation, pearson_correlation.get_metric(), rtol=1e-5)
-        # Test reset
-        pearson_correlation.reset()
-        pearson_correlation(torch.FloatTensor(predictions), torch.FloatTensor(labels), torch.FloatTensor(mask))
-        covariance_matrices = np.cov(predictions.reshape(-1), labels.reshape(-1), fweights=mask.reshape(-1))
-        expected_pearson_correlation = covariance_matrices[0, 1] / np.sqrt(covariance_matrices[0, 0] *
-                                                                           covariance_matrices[1, 1])
-        assert_allclose(expected_pearson_correlation, pearson_correlation.get_metric(), rtol=1e-5)
diff --git a/allennlp/training/metrics/pearson_correlation.py b/allennlp/training/metrics/pearson_correlation.py
@@ -1,5 +1,6 @@
 from typing import Optional
 import math
+import numpy as np
 
 from overrides import overrides
 import torch
@@ -29,6 +30,8 @@ class PearsonCorrelation(Metric):
     If we have these values, the sample Pearson correlation coefficient is simply:
 
     r = covariance / (sqrt(predictions_variance) * sqrt(labels_variance))
+
+    if predictions_variance or labels_variance is 0, r is 0
     """
     def __init__(self) -> None:
         self._predictions_labels_covariance = Covariance()
@@ -65,7 +68,11 @@ def get_metric(self, reset: bool = False):
         labels_variance = self._labels_variance.get_metric(reset=reset)
         if reset:
             self.reset()
-        pearson_r = covariance / (math.sqrt(predictions_variance) * math.sqrt(labels_variance))
+        denominator = (math.sqrt(predictions_variance) * math.sqrt(labels_variance))
+        if np.around(denominator, decimals=5) == 0:
+            pearson_r = 0
+        else:
+            pearson_r = covariance / denominator
         return pearson_r
 
     @overrides