[dlp] fix: mitigate flakiness (#3919)

Takashi Matsuo · web-flow · commit 953f89cedd6f · 2020-06-01T10:00:14.000-07:00
* [dlp] fix: mitigate flakiness * make the Pub/Sub fixture function level * shorten the timeout for the tests from 300 secs to 30 secs * retring all the tests in risk_test.py 3 times fixes #3897 fixes #3896 fixes #3895 fixes #3894 fixes #3893 fixes #3892 fixes #3890 fixes #3889 * more retries, comment * 30 seconds operation wait and 20 minutes retry delay * lint fix etc * limit the max retry wait time
diff --git a/dlp/conftest.py b/dlp/conftest.py
@@ -0,0 +1,20 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+
+# Used in risk_test.py to limit the maximum wait time before the flaky retries.
+def pytest_configure(config):
+    pytest.MAX_FLAKY_WAIT = 3600  # maximum of an hour
diff --git a/dlp/risk_test.py b/dlp/risk_test.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import os
+import time
 import uuid
 
 import google.cloud.bigquery
@@ -36,9 +37,14 @@
 BIGQUERY_TABLE_ID = "dlp_test_table" + UNIQUE_STRING
 BIGQUERY_HARMFUL_TABLE_ID = "harmful" + UNIQUE_STRING
 
+TIMEOUT = 30
+
 
 # Create new custom topic/subscription
-@pytest.fixture(scope="module")
+# We observe sometimes all the tests in this file fail. In a
+# hypothesis where DLP service somehow loses the connection to the
+# topic, now we use function scope for Pub/Sub fixtures.
+@pytest.fixture(scope="function")
 def topic_id():
     # Creates a pubsub topic, and tears it down.
     publisher = google.cloud.pubsub.PublisherClient()
@@ -53,7 +59,7 @@ def topic_id():
     publisher.delete_topic(topic_path)
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope="function")
 def subscription_id(topic_id):
     # Subscribes to a topic.
     subscriber = google.cloud.pubsub.SubscriberClient()
@@ -160,7 +166,22 @@ def bigquery_project():
     bigquery_client.delete_dataset(dataset_ref, delete_contents=True)
 
 
-@pytest.mark.flaky
+def delay(err, *args):
+    # 20 mins of delay. This sounds like too long a delay, but we
+    # occasionally observe consequtive time block where operations are
+    # slow which leads to the test failures. These situations tend to
+    # get self healed in 20 minutes or so, so I'm trying this strategy.
+    #
+    # There are 10 tests, so we don't want the retry delay happening
+    # for all the tests. When we exhaust the MAX_FLAKY_WAIT, we retry
+    # the test immediately.
+    wait_time = min(pytest.MAX_FLAKY_WAIT, 60*20)
+    pytest.MAX_FLAKY_WAIT -= wait_time
+    time.sleep(wait_time)
+    return True
+
+
+@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
 def test_numerical_risk_analysis(
     topic_id, subscription_id, bigquery_project, capsys
 ):
@@ -172,13 +193,14 @@ def test_numerical_risk_analysis(
         NUMERIC_FIELD,
         topic_id,
         subscription_id,
+        timeout=TIMEOUT,
     )
 
     out, _ = capsys.readouterr()
     assert "Value Range:" in out
 
 
-@pytest.mark.flaky
+@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
 def test_categorical_risk_analysis_on_string_field(
     topic_id, subscription_id, bigquery_project, capsys
 ):
@@ -190,14 +212,14 @@ def test_categorical_risk_analysis_on_string_field(
         UNIQUE_FIELD,
         topic_id,
         subscription_id,
-        timeout=180,
+        timeout=TIMEOUT,
     )
 
     out, _ = capsys.readouterr()
     assert "Most common value occurs" in out
 
 
-@pytest.mark.flaky
+@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
 def test_categorical_risk_analysis_on_number_field(
     topic_id, subscription_id, bigquery_project, capsys
 ):
@@ -209,13 +231,14 @@ def test_categorical_risk_analysis_on_number_field(
         NUMERIC_FIELD,
         topic_id,
         subscription_id,
+        timeout=TIMEOUT,
     )
 
     out, _ = capsys.readouterr()
     assert "Most common value occurs" in out
 
 
-@pytest.mark.flaky
+@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
 def test_k_anonymity_analysis_single_field(
     topic_id, subscription_id, bigquery_project, capsys
 ):
@@ -227,14 +250,15 @@ def test_k_anonymity_analysis_single_field(
         topic_id,
         subscription_id,
         [NUMERIC_FIELD],
+        timeout=TIMEOUT,
     )
 
     out, _ = capsys.readouterr()
     assert "Quasi-ID values:" in out
     assert "Class size:" in out
 
 
-@pytest.mark.flaky(max_runs=3, min_passes=1)
+@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
 def test_k_anonymity_analysis_multiple_fields(
     topic_id, subscription_id, bigquery_project, capsys
 ):
@@ -246,14 +270,15 @@ def test_k_anonymity_analysis_multiple_fields(
         topic_id,
         subscription_id,
         [NUMERIC_FIELD, REPEATED_FIELD],
+        timeout=TIMEOUT,
     )
 
     out, _ = capsys.readouterr()
     assert "Quasi-ID values:" in out
     assert "Class size:" in out
 
 
-@pytest.mark.flaky
+@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
 def test_l_diversity_analysis_single_field(
     topic_id, subscription_id, bigquery_project, capsys
 ):
@@ -266,6 +291,7 @@ def test_l_diversity_analysis_single_field(
         subscription_id,
         UNIQUE_FIELD,
         [NUMERIC_FIELD],
+        timeout=TIMEOUT,
     )
 
     out, _ = capsys.readouterr()
@@ -274,7 +300,7 @@ def test_l_diversity_analysis_single_field(
     assert "Sensitive value" in out
 
 
-@pytest.mark.flaky(max_runs=3, min_passes=1)
+@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
 def test_l_diversity_analysis_multiple_field(
     topic_id, subscription_id, bigquery_project, capsys
 ):
@@ -287,6 +313,7 @@ def test_l_diversity_analysis_multiple_field(
         subscription_id,
         UNIQUE_FIELD,
         [NUMERIC_FIELD, REPEATED_FIELD],
+        timeout=TIMEOUT,
     )
 
     out, _ = capsys.readouterr()
@@ -295,7 +322,7 @@ def test_l_diversity_analysis_multiple_field(
     assert "Sensitive value" in out
 
 
-@pytest.mark.flaky
+@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
 def test_k_map_estimate_analysis_single_field(
     topic_id, subscription_id, bigquery_project, capsys
 ):
@@ -308,6 +335,7 @@ def test_k_map_estimate_analysis_single_field(
         subscription_id,
         [NUMERIC_FIELD],
         ["AGE"],
+        timeout=TIMEOUT,
     )
 
     out, _ = capsys.readouterr()
@@ -316,7 +344,7 @@ def test_k_map_estimate_analysis_single_field(
     assert "Values" in out
 
 
-@pytest.mark.flaky(max_runs=3, min_passes=1)
+@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
 def test_k_map_estimate_analysis_multiple_field(
     topic_id, subscription_id, bigquery_project, capsys
 ):
@@ -329,6 +357,7 @@ def test_k_map_estimate_analysis_multiple_field(
         subscription_id,
         [NUMERIC_FIELD, STRING_BOOLEAN_FIELD],
         ["AGE", "GENDER"],
+        timeout=TIMEOUT,
     )
 
     out, _ = capsys.readouterr()
@@ -337,7 +366,7 @@ def test_k_map_estimate_analysis_multiple_field(
     assert "Values" in out
 
 
-@pytest.mark.flaky
+@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
 def test_k_map_estimate_analysis_quasi_ids_info_types_equal(
     topic_id, subscription_id, bigquery_project
 ):
@@ -351,4 +380,5 @@ def test_k_map_estimate_analysis_quasi_ids_info_types_equal(
             subscription_id,
             [NUMERIC_FIELD, STRING_BOOLEAN_FIELD],
             ["AGE"],
+            timeout=TIMEOUT,
         )