Stateful store tests (#2070)

enthusiastdev121 · dcherian · jhamman · enthusiastdev121 · commit 30f2753e2874 · 2024-08-15T14:30:10.000-07:00
* stateful store test

* Deepak's edits

* add data strategies and beginning of testgetpartial

* a few more updates

* troubleshooting get_partial

* edits

* Deepak edits

* bundle changesc

* add sts script, delete

* comment out bundles for now

* draft of listdir

* edits

* some changes

* draft of stateful store tests

* precommit

* Update src/zarr/strategies.py

Co-authored-by: Deepak Cherian &lt;dcherian@users.noreply.github.com&gt;

* Update tests/v3/test_store/test_stateful_store.py

Co-authored-by: Deepak Cherian &lt;dcherian@users.noreply.github.com&gt;

* Update tests/v3/test_store/test_stateful_store.py

Co-authored-by: Deepak Cherian &lt;dcherian@users.noreply.github.com&gt;

* Apply suggestions from code review

Co-authored-by: Deepak Cherian &lt;dcherian@users.noreply.github.com&gt;

* Apply suggestions from code review

Co-authored-by: Deepak Cherian &lt;dcherian@users.noreply.github.com&gt;

* moved strategies and edits to stateful store tests

* Update tests/v3/test_store/test_stateful_store.py

Co-authored-by: Deepak Cherian &lt;dcherian@users.noreply.github.com&gt;

* Update tests/v3/test_store/test_stateful_store.py

Co-authored-by: Deepak Cherian &lt;dcherian@users.noreply.github.com&gt;

* Update tests/v3/test_store/test_stateful_store.py

Co-authored-by: Deepak Cherian &lt;dcherian@users.noreply.github.com&gt;

* Update tests/v3/test_store/test_stateful_store.py

Co-authored-by: Deepak Cherian &lt;dcherian@users.noreply.github.com&gt;

* Update tests/v3/test_store/test_stateful_store.py

Co-authored-by: Deepak Cherian &lt;dcherian@users.noreply.github.com&gt;

* Update tests/v3/test_store/test_stateful_store.py

Co-authored-by: Deepak Cherian &lt;dcherian@users.noreply.github.com&gt;

* Update tests/v3/test_store/test_stateful_store.py

Co-authored-by: Deepak Cherian &lt;dcherian@users.noreply.github.com&gt;

* fixed wrapper class and add rules for a few more methods

* incorporate Deepak's edits

* switch to bundles + a few more updates

* remove some notes

* Update src/zarr/testing/strategies.py

Co-authored-by: Deepak Cherian &lt;dcherian@users.noreply.github.com&gt;

* remove bundles

* fix get signature, get invalid keys and some other changes

* add docstrings

* Deepak edits

* fixing imports now that strategies in testing

* revert docstrings

* Update run-hypothesis command

* Fix run-hypothesis command

* Apply suggestions from code review

Co-authored-by: Joe Hamman &lt;jhamman1@gmail.com&gt;

* changes from Joe's review

* Apply suggestions from code review

Co-authored-by: Joe Hamman &lt;jhamman1@gmail.com&gt;

* add review comments

---------

Co-authored-by: Deepak Cherian &lt;deepak@cherian.net&gt;
Co-authored-by: Deepak Cherian &lt;dcherian@users.noreply.github.com&gt;
Co-authored-by: Joe Hamman &lt;jhamman1@gmail.com&gt;
diff --git a/pyproject.toml b/pyproject.toml
@@ -139,7 +139,7 @@ run-coverage = "pytest --cov-config=pyproject.toml --cov=pkg --cov=tests"
 run = "run-coverage --no-cov"
 run-verbose = "run-coverage --verbose"
 run-mypy = "mypy src"
-run-hypothesis = "pytest --hypothesis-profile ci tests/v3/test_properties.py"
+run-hypothesis = "pytest --hypothesis-profile ci tests/v3/test_properties.py tests/v3/test_store/test_stateful*"
 list-env = "pip list"
 
 [tool.hatch.envs.docs]
diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py
@@ -4,8 +4,10 @@
 import hypothesis.strategies as st
 import numpy as np
 from hypothesis import given, settings  # noqa
+from hypothesis.strategies import SearchStrategy
 
-from zarr import Array, Group
+from zarr.core.array import Array
+from zarr.core.group import Group
 from zarr.store import MemoryStore, StoreLike
 
 # Copied from Xarray
@@ -142,3 +144,16 @@ def basic_indices(draw: st.DrawFn, *, shape: tuple[int], **kwargs):  # type: ign
             )
         )
     )
+
+
+def key_ranges(keys: SearchStrategy = node_names) -> SearchStrategy[list]:
+    """fn to generate key_ranges strategy for get_partial_values()
+    returns list strategy w/ form: [(key, (range_start, range_step)),
+                                    (key, (range_start, range_step)),...]
+    """
+    byte_ranges = st.tuples(
+        st.none() | st.integers(min_value=0), st.none() | st.integers(min_value=0)
+    )
+    key_tuple = st.tuples(keys, byte_ranges)
+    key_range_st = st.lists(key_tuple, min_size=1, max_size=10)
+    return key_range_st
diff --git a/tests/v3/test_properties.py b/tests/v3/test_properties.py
@@ -7,7 +7,7 @@
 import hypothesis.extra.numpy as npst  # noqa
 import hypothesis.strategies as st  # noqa
 from hypothesis import given, settings  # noqa
-from zarr.strategies import arrays, np_arrays, basic_indices  # noqa
+from zarr.testing.strategies import arrays, np_arrays, basic_indices  # noqa
 
 
 @given(st.data())
diff --git a/tests/v3/test_store/test_stateful_store.py b/tests/v3/test_store/test_stateful_store.py
@@ -0,0 +1,229 @@
+# Stateful tests for arbitrary Zarr stores.
+
+
+import hypothesis.strategies as st
+from hypothesis import assume, note
+from hypothesis.stateful import (
+    RuleBasedStateMachine,
+    invariant,
+    precondition,
+    rule,
+)
+
+import zarr
+from zarr.abc.store import AccessMode, Store
+from zarr.core.buffer import Buffer, BufferPrototype, default_buffer_prototype
+from zarr.store import MemoryStore
+from zarr.testing.strategies import key_ranges, paths
+
+
+class SyncStoreWrapper(zarr.core.sync.SyncMixin):
+    def __init__(self, store: Store):
+        """Synchronous Store wrapper
+
+        This class holds synchronous methods that map to async methods of Store classes.
+        The synchronous wrapper is needed because hypothesis' stateful testing infra does
+        not support asyncio so we redefine sync versions of the Store API.
+        https://github.com/HypothesisWorks/hypothesis/issues/3712#issuecomment-1668999041
+        """
+        self.store = store
+
+    @property
+    def mode(self) -> AccessMode:
+        return self.store.mode
+
+    def set(self, key: str, data_buffer: zarr.core.buffer.Buffer) -> None:
+        return self._sync(self.store.set(key, data_buffer))
+
+    def list(self) -> list:
+        return self._sync_iter(self.store.list())
+
+    def get(self, key: str, prototype: BufferPrototype) -> zarr.core.buffer.Buffer:
+        obs = self._sync(self.store.get(key, prototype=prototype))
+        return obs
+
+    def get_partial_values(
+        self, key_ranges: list, prototype: BufferPrototype
+    ) -> zarr.core.buffer.Buffer:
+        obs_partial = self._sync(
+            self.store.get_partial_values(prototype=prototype, key_ranges=key_ranges)
+        )
+        return obs_partial
+
+    def delete(self, path: str) -> None:
+        return self._sync(self.store.delete(path))
+
+    def empty(self) -> bool:
+        return self._sync(self.store.empty())
+
+    def clear(self) -> None:
+        return self._sync(self.store.clear())
+
+    def exists(self, key) -> bool:
+        return self._sync(self.store.exists(key))
+
+    def list_dir(self, prefix):
+        raise NotImplementedError
+
+    def list_prefix(self, prefix: str):
+        raise NotImplementedError
+
+    def set_partial_values(self, key_start_values):
+        raise NotImplementedError
+
+    @property
+    def supports_listing(self) -> bool:
+        return self.store.supports_listing
+
+    @property
+    def supports_partial_writes(self) -> bool:
+        return self.supports_partial_writes
+
+    @property
+    def supports_writes(self) -> bool:
+        return self.store.supports_writes
+
+
+class ZarrStoreStateMachine(RuleBasedStateMachine):
+    """ "
+    Zarr store state machine
+
+        This is a subclass of a Hypothesis RuleBasedStateMachine.
+        It is testing a framework to ensure that the state of a Zarr store matches
+        an expected state after a set of random operations. It contains a store
+        (currently, a Zarr MemoryStore) and a model, a simplified version of a
+        zarr store (in this case, a dict). It also contains rules which represent
+        actions that can be applied to a zarr store. Rules apply an action to both
+        the store and the model, and invariants assert that the state of the model
+        is equal to the state of the store. Hypothesis then generates sequences of
+        rules, running invariants after each rule. It raises an error if a sequence
+        produces discontinuity between state of the model and state of the store
+        (ie. an invariant is violated).
+        https://hypothesis.readthedocs.io/en/latest/stateful.html
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.model: dict[str, bytes] = {}
+        self.store = SyncStoreWrapper(MemoryStore(mode="w"))
+        self.prototype = default_buffer_prototype()
+
+    @rule(key=paths, data=st.binary(min_size=0, max_size=100))
+    def set(self, key: str, data: bytes) -> None:
+        note(f"(set) Setting {key!r} with {data}")
+        assert not self.store.mode.readonly
+        data_buf = Buffer.from_bytes(data)
+        self.store.set(key, data_buf)
+        self.model[key] = data_buf
+
+    @precondition(lambda self: len(self.model.keys()) > 0)
+    @rule(key=paths, data=st.data())
+    def get(self, key: str, data: bytes) -> None:
+        key = data.draw(
+            st.sampled_from(sorted(self.model.keys()))
+        )  # hypothesis wants to sample from sorted list
+        note("(get)")
+        store_value = self.store.get(key, self.prototype)
+        # to bytes here necessary because data_buf set to model in set()
+        assert self.model[key].to_bytes() == (store_value.to_bytes())
+
+    @rule(key=paths, data=st.data())
+    def get_invalid_keys(self, key: str, data: bytes) -> None:
+        note("(get_invalid)")
+        assume(key not in self.model.keys())
+        assert self.store.get(key, self.prototype) is None
+
+    @precondition(lambda self: len(self.model.keys()) > 0)
+    @rule(data=st.data())
+    def get_partial_values(self, data: bytes) -> None:
+        key_range = data.draw(key_ranges(keys=st.sampled_from(sorted(self.model.keys()))))
+        note(f"(get partial) {key_range=}")
+        obs_maybe = self.store.get_partial_values(key_range, self.prototype)
+        observed = []
+
+        for obs in obs_maybe:
+            assert obs is not None
+            observed.append(obs.to_bytes())
+
+        model_vals_ls = []
+
+        for key, byte_range in key_range:
+            start = byte_range[0] or 0
+            step = byte_range[1]
+            stop = start + step if step is not None else None
+            model_vals_ls.append(self.model[key][start:stop])
+
+        assert all(
+            obs == exp.to_bytes() for obs, exp in zip(observed, model_vals_ls, strict=True)
+        ), (
+            observed,
+            model_vals_ls,
+        )
+
+    @precondition(lambda self: len(self.model.keys()) > 0)
+    @rule(data=st.data())
+    def delete(self, data: bytes) -> None:
+        key = data.draw(st.sampled_from(sorted(self.model.keys())))
+        note(f"(delete) Deleting {key=}")
+
+        self.store.delete(key)
+        del self.model[key]
+
+    @rule()
+    def clear(self):
+        assert not self.store.mode.readonly
+        note("(clear)")
+        self.store.clear()
+        self.model.clear()
+
+        assert len(self.model.keys()) == len(list(self.store.list())) == 0
+
+    @rule()
+    def empty(self) -> None:
+        note("(empty)")
+
+        # make sure they either both are or both aren't empty (same state)
+        assert self.store.empty() == (not self.model)
+
+    @rule(key=paths)
+    def exists(self, key: str) -> None:
+        note("(exists)")
+
+        assert self.store.exists(key) == (key in self.model)
+
+    @invariant()
+    def check_paths_equal(self) -> None:
+        note("Checking that paths are equal")
+        paths = list(self.store.list())
+
+        assert list(self.model.keys()) == paths
+
+    @invariant()
+    def check_vals_equal(self) -> None:
+        note("Checking values equal")
+        for key, _val in self.model.items():
+            store_item = self.store.get(key, self.prototype).to_bytes()
+            assert self.model[key].to_bytes() == store_item
+
+    @invariant()
+    def check_num_keys_equal(self) -> None:
+        note("check num keys equal")
+
+        assert len(self.model) == len(list(self.store.list()))
+
+    @invariant()
+    def check_keys(self) -> None:
+        keys = list(self.store.list())
+
+        if len(keys) == 0:
+            assert self.store.empty() is True
+
+        elif len(keys) != 0:
+            assert self.store.empty() is False
+
+            for key in keys:
+                assert self.store.exists(key) is True
+        note("checking keys / exists / empty")
+
+
+StatefulStoreTest = ZarrStoreStateMachine.TestCase