HypothesisWorks · DRMacIver · Nov 28, 2019 · Nov 27, 2019 · Nov 27, 2019 · Nov 27, 2019
diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,8 @@
+RELEASE_TYPE: minor
+
+This release significantly improves the data distribution in rule based stateful testing <stateful_testing>,
+by using a technique called `Swarm Testing (Groce, Alex, et al. "Swarm testing."
+Proceedings of the 2012 International Symposium on Software Testing and Analysis. ACM, 2012.) <https://agroce.github.io/issta12.pdf>`_
+to select which rules are run in any given test case. This should allow it to find many issues that it would previously have missed.
+
+This change is likely to be especially beneficial for stateful tests with large numbers of rules.
diff --git a/hypothesis-python/src/hypothesis/core.py b/hypothesis-python/src/hypothesis/core.py
@@ -758,6 +758,12 @@ def run_engine(self):
                 report("".join(traceback.format_exception(type(e), e, tb)))
 
             finally:  # pragma: no cover
+                # Mostly useful for ``find`` and ensuring that objects that
+                # hold on to a reference to ``data`` know that it's now been
+                # finished and they shouldn't attempt to draw more data from
+                # it.
+                ran_example.freeze()
+
                 # This section is in fact entirely covered by the tests in
                 # test_reproduce_failure, but it seems to trigger a lovely set
                 # of coverage bugs: The branches show up as uncovered (despite

diff --git a/hypothesis-python/src/hypothesis/searchstrategy/featureflags.py b/hypothesis-python/src/hypothesis/searchstrategy/featureflags.py
@@ -0,0 +1,129 @@
+# coding=utf-8
+#
+# This file is part of Hypothesis, which may be found at
+# https://github.com/HypothesisWorks/hypothesis/
+#
+# Most of this work is copyright (C) 2013-2019 David R. MacIver
+# ([email protected]), but it contains contributions by others. See
+# CONTRIBUTING.rst for a full list of people who may hold copyright, and
+# consult the git log if you need to determine who owns an individual
+# contribution.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public License,
+# v. 2.0. If a copy of the MPL was not distributed with this file, You can
+# obtain one at https://mozilla.org/MPL/2.0/.
+#
+# END HEADER
+
+from __future__ import absolute_import, division, print_function
+
+import hypothesis.internal.conjecture.utils as cu
+from hypothesis.searchstrategy.strategies import SearchStrategy
+
+FEATURE_LABEL = cu.calc_label_from_name("feature flag")
+
+
+class FeatureFlags(object):
+    """Object that can be used to control a number of feature flags for a
+    given test run.
+
+    This enables an approach to data generation called swarm testing (
+    see Groce, Alex, et al. "Swarm testing." Proceedings of the 2012
+    International Symposium on Software Testing and Analysis. ACM, 2012), in
+    which generation is biased by selectively turning some features off for
+    each test case generated. When there are many interacting features this can
+    find bugs that a pure generation strategy would otherwise have missed.
+
+    FeatureFlags are designed to "shrink open", so that during shrinking they
+    become less restrictive. This allows us to potentially shrink to smaller
+    test cases that were forbidden during the generation phase because they
+    required disabled features.
+    """
+
+    def __init__(self, data=None, enabled=(), disabled=()):
+        self.__data = data
+        self.__decisions = {}
+
+        for f in enabled:
+            self.__decisions[f] = 0
+
+        for f in disabled:
+            self.__decisions[f] = 255
+
+        # In the original swarm testing paper they turn features on or off
+        # uniformly at random. Instead we decide the probability with which to
+        # enable features up front. This can allow for scenarios where all or
+        # no features are enabled, which are vanishingly unlikely in the
+        # original model.
+        #
+        # We implement this as a single 8-bit integer and enable features which
+        # score >= that value. In particular when self.__baseline is 0, all
+        # features will be enabled. This is so that we shrink in the direction
+        # of more features being enabled.
+        if self.__data is not None:
+            self.__baseline = data.draw_bits(8)
+        else:
+            # If data is None we're in example mode so all that matters is the
+            # enabled/disabled lists above. We set this up so that
+            self.__baseline = 1
+
+    def is_enabled(self, name):
+        """Tests whether the feature named ``name`` should be enabled on this
+        test run."""
+        if self.__data is None or self.__data.frozen:
+            # Feature set objects might hang around after data generation has
+            # finished. If this happens then we just report all new features as
+            # enabled, because that's our shrinking direction and they have no
+            # impact on data generation if they weren't used while it was
+            # running.
+            try:
+                return self.__is_value_enabled(self.__decisions[name])
+            except KeyError:
+                return True
+
+        data = self.__data
+
+        data.start_example(label=FEATURE_LABEL)
+        if name in self.__decisions:
+            # If we've already decided on this feature then we don't actually
+            # need to draw anything, but we do write the same decision to the
+            # input stream. This allows us to lazily decide whether a feature
+            # is enabled, because it means that if we happen to delete the part
+            # of the test case where we originally decided, the next point at
+            # which we make this decision just makes the decision it previously
+            # made.
+            value = self.__decisions[name]
+            data.draw_bits(8, forced=value)
+        else:
+            # If the baseline is 0 then everything is enabled so it doesn't
+            # matter what we have here and we might as well make the shrinker's
+            # life easier by forcing it to zero.
+            if self.__baseline == 0:
+                value = 0
+                data.draw_bits(8, forced=0)
+            else:
+                value = data.draw_bits(8)
+            self.__decisions[name] = value
+        data.stop_example()
+        return self.__is_value_enabled(value)
+
+    def __is_value_enabled(self, value):
+        """Check if a given value drawn for a feature counts as enabled. Note
+        that low values are more likely to be enabled. This is again in aid of
+        shrinking open. In particular a value of 255 is always enabled."""
+        return (255 - value) >= self.__baseline
+
+    def __repr__(self):
+        enabled = []
+        disabled = []
+        for k, v in self.__decisions.items():
+            if self.__is_value_enabled(v):
+                enabled.append(k)
+            else:
+                disabled.append(k)
+        return "FeatureFlags(enabled=%r, disabled=%r)" % (enabled, disabled)
+
+
+class FeatureStrategy(SearchStrategy):
+    def do_draw(self, data):
+        return FeatureFlags(data)
diff --git a/hypothesis-python/src/hypothesis/stateful.py b/hypothesis-python/src/hypothesis/stateful.py
@@ -47,6 +47,7 @@
 from hypothesis.internal.reflection import function_digest, nicerepr, proxies, qualname
 from hypothesis.internal.validation import check_type
 from hypothesis.reporting import current_verbosity, report
+from hypothesis.searchstrategy.featureflags import FeatureStrategy
 from hypothesis.searchstrategy.strategies import OneOfStrategy, SearchStrategy
 from hypothesis.vendor.pretty import CUnicodeIO, RepresentationPrinter
 
@@ -610,6 +611,10 @@ def __init__(self, machine):
         self.machine = machine
         self.rules = list(machine.rules())
 
+        self.enabled_rules_strategy = st.shared(
+            FeatureStrategy(), key=("enabled rules", machine),
+        )
+
         # The order is a bit arbitrary. Primarily we're trying to group rules
         # that write to the same location together, and to put rules with no
         # target first as they have less effect on the structure. We order from
@@ -635,12 +640,27 @@ def do_draw(self, data):
         if not any(self.is_valid(rule) for rule in self.rules):
             msg = u"No progress can be made from state %r" % (self.machine,)
             quiet_raise(InvalidDefinition(msg))
-        rule = data.draw(st.sampled_from(self.rules).filter(self.is_valid))
+
+        feature_flags = data.draw(self.enabled_rules_strategy)
+
+        # Note: The order of the filters here is actually quite important,
+        # because checking is_enabled makes choices, so increases the size of
+        # the choice sequence. This means that if we are in a case where many
+        # rules are invalid we will make a lot more choices if we ask if they
+        # are enabled before we ask if they are valid, so our test cases will
+        # be artificially large.
+        rule = data.draw(
+            st.sampled_from(self.rules)
+            .filter(self.is_valid)
+            .filter(lambda r: feature_flags.is_enabled(r.function.__name__))
+        )
+
         return (rule, data.draw(rule.arguments_strategy))
 
     def is_valid(self, rule):
         if rule.precondition and not rule.precondition(self.machine):
             return False
+
         for b in rule.bundles:
             bundle = self.machine.bundle(b.name)
             if not bundle:

diff --git a/hypothesis-python/tests/cover/test_feature_flags.py b/hypothesis-python/tests/cover/test_feature_flags.py
@@ -0,0 +1,84 @@
+# coding=utf-8
+#
+# This file is part of Hypothesis, which may be found at
+# https://github.com/HypothesisWorks/hypothesis/
+#
+# Most of this work is copyright (C) 2013-2019 David R. MacIver
+# ([email protected]), but it contains contributions by others. See
+# CONTRIBUTING.rst for a full list of people who may hold copyright, and
+# consult the git log if you need to determine who owns an individual
+# contribution.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public License,
+# v. 2.0. If a copy of the MPL was not distributed with this file, You can
+# obtain one at https://mozilla.org/MPL/2.0/.
+#
+# END HEADER
+
+from __future__ import absolute_import, division, print_function
+
+from hypothesis import given, strategies as st
+from hypothesis.internal.compat import hrange
+from hypothesis.searchstrategy.featureflags import FeatureFlags, FeatureStrategy
+from tests.common.debug import find_any, minimal
+
+STRAT = FeatureStrategy()
+
+
+def test_can_all_be_enabled():
+    find_any(STRAT, lambda x: all(x.is_enabled(i) for i in hrange(100)))
+
+
+def test_can_all_be_disabled():
+    find_any(STRAT, lambda x: all(not x.is_enabled(i) for i in hrange(100)))
+
+
+def test_minimizes_open():
+    features = hrange(10)
+
+    flags = minimal(STRAT, lambda x: [x.is_enabled(i) for i in features])
+
+    assert all(flags.is_enabled(i) for i in features)
+
+
+def test_minimizes_individual_features_to_open():
+    features = list(hrange(10))
+
+    flags = minimal(
+        STRAT, lambda x: sum([x.is_enabled(i) for i in features]) < len(features)
+    )
+
+    assert all(flags.is_enabled(i) for i in features[:-1])
+    assert not flags.is_enabled(features[-1])
+
+
+def test_marks_unknown_features_as_enabled():
+    x = find_any(STRAT, lambda v: True)
+
+    assert x.is_enabled("fish")
+
+
+def test_by_default_all_enabled():
+    f = FeatureFlags()
+
+    assert f.is_enabled("foo")
+
+
+@given(st.data())
+def test_repr_can_be_evalled(data):
+    flags = data.draw(STRAT)
+
+    features = data.draw(st.lists(st.text(), unique=True))
+
+    for f in features:
+        flags.is_enabled(f)
+
+    flags2 = eval(repr(flags))
+
+    for f in features:
+        assert flags2.is_enabled(f) == flags.is_enabled(f)
+
+    more_features = data.draw(st.lists(st.text().filter(lambda s: s not in features)))
+
+    for f in more_features:
+        assert flags2.is_enabled(f)
diff --git a/hypothesis-python/tests/cover/test_stateful.py b/hypothesis-python/tests/cover/test_stateful.py
@@ -223,12 +223,42 @@ def fail(self, x, y):
         assert False
 
 
+class CanSwarm(RuleBasedStateMachine):
+    """This test will essentially never pass if you choose rules uniformly at
+    random, because every time the snake rule fires we return to the beginning,
+    so we will tend to undo progress well before we make enough progress for
+    the test to fail.
+
+    This tests our swarm testing functionality in stateful testing by ensuring
+    that we can sometimes generate long runs of steps which exclude a
+    particular rule.
+    """
+
+    def __init__(self):
+        super(CanSwarm, self).__init__()
+        self.seen = set()
+
+    # The reason this rule takes a parameter is that it ensures that we do not
+    # achieve "swarming" by by just restricting the alphabet for single byte
+    # decisions, which is a thing the underlying conjecture engine  will
+    # happily do on its own without knowledge of the rule structure.
+    @rule(move=integers(0, 255))
+    def ladder(self, move):
+        self.seen.add(move)
+        assert len(self.seen) <= 15
+
+    @rule()
+    def snake(self):
+        self.seen.clear()
+
+
 bad_machines = (
     BalancedTrees,
     DepthMachine,
     RoseTreeStateMachine,
     NotTheLastMachine,
     PopulateMultipleTargets,
+    CanSwarm,
 )
 
 for m in bad_machines:
@@ -1164,7 +1194,7 @@ def oops(self):
 
 
 def test_reproduce_failure_works():
-    @reproduce_failure(__version__, base64.b64encode(b"\0\0\0"))
+    @reproduce_failure(__version__, base64.b64encode(b"\0\0\0\0\0"))
     class TrivialMachine(RuleBasedStateMachine):
         @rule()
         def oops(self):
@@ -1175,7 +1205,7 @@ def oops(self):
 
 
 def test_reproduce_failure_fails_if_no_error():
-    @reproduce_failure(__version__, base64.b64encode(b"\0\0\0"))
+    @reproduce_failure(__version__, base64.b64encode(b"\0\0\0\0\0"))
     class TrivialMachine(RuleBasedStateMachine):
         @rule()
         def ok(self):

diff --git a/hypothesis-python/tests/cover/test_statistical_events.py b/hypothesis-python/tests/cover/test_statistical_events.py
@@ -234,4 +234,4 @@ def do(self, item):
 
 def test_stateful_states_are_deduped():
     stats = call_for_statistics(DemoStateMachine.TestCase().runTest)
-    assert len(stats.events) == 1
+    assert len(stats.events) <= 2