Skip to content

Refactor execution as a HypofuzzProvider backend #113

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 24 commits into from
Jun 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 56 additions & 45 deletions src/hypofuzz/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from collections import Counter, defaultdict
from collections.abc import Callable, Iterator, Set
from random import Random
from typing import TYPE_CHECKING, Optional, Union
from typing import TYPE_CHECKING, Union

from hypothesis import settings
from hypothesis.internal.conjecture.choice import (
Expand All @@ -15,11 +15,11 @@
ConjectureData,
ConjectureResult,
Status,
_Overrun,
)
from hypothesis.internal.conjecture.engine import ConjectureRunner
from hypothesis.internal.conjecture.shrinker import Shrinker, sort_key as _sort_key
from hypothesis.internal.escalation import InterestingOrigin
from hypothesis.internal.observability import TestCaseObservation

from hypofuzz.database import ChoicesT, HypofuzzDatabase, Observation

Expand Down Expand Up @@ -104,7 +104,9 @@ def get_shrinker(
class Corpus:
"""Manage the corpus for a fuzz target.

The class tracks the minimal valid example which covers each known branch.
The corpus is responsible for managing all seed state, including saving
novel seeds to the database. This includes tracking how often each branch
has been hit, minimal covering examples for each branch, and so on.
"""

def __init__(self, database: HypofuzzDatabase, database_key: bytes) -> None:
Expand All @@ -126,9 +128,7 @@ def __init__(self, database: HypofuzzDatabase, database_key: bytes) -> None:
# fingerprints. The correctness of this refcounting is validated in
# _check_invariants.
self._count_fingerprints: dict[Choices, int] = defaultdict(int)
self.interesting_examples: dict[
InterestingOrigin, tuple[ConjectureResult, Optional[Observation]]
] = {}
self.interesting_examples: dict[InterestingOrigin, TestCaseObservation] = {}

self.__shrunk_to_nodes: set[NodesT] = set()

Expand Down Expand Up @@ -170,17 +170,18 @@ def _check_invariants(self) -> None:
def _add_fingerprint(
self,
fingerprint: Fingerprint,
result: ConjectureResult,
observation: TestCaseObservation,
*,
observation: Optional[Observation] = None,
save_observation: bool,
) -> None:
self.fingerprints[fingerprint] = result.nodes
choices = Choices(result.choices)
assert observation.metadata.choice_nodes is not None
self.fingerprints[fingerprint] = observation.metadata.choice_nodes
choices = Choices(tuple(n.value for n in observation.metadata.choice_nodes))
if choices not in self.corpus:
self._db.save_corpus(self.database_key, result.choices)
if observation is not None:
self._db.save_corpus(self.database_key, choices)
if save_observation:
self._db.save_corpus_observation(
self.database_key, result.choices, observation
self.database_key, choices, Observation.from_hypothesis(observation)
)
self.corpus.add(choices)
self._count_fingerprints[choices] += 1
Expand All @@ -204,66 +205,76 @@ def _evict_choices(self, choices: Choices) -> None:

def add(
self,
result: Union[ConjectureResult, _Overrun],
observation: TestCaseObservation,
*,
observation: Optional[Observation] = None,
behaviors: Set[Behavior],
save_observation: bool,
) -> bool:
"""Update the corpus with the result of running a test.

Returns whether this changed the corpus.
"""
if result.status < Status.VALID:
if observation.metadata.data_status < Status.VALID:
return False
assert isinstance(result, ConjectureResult)
assert result.extra_information is not None

fingerprint: Fingerprint = result.extra_information.behaviors # type: ignore
assert observation.metadata.choice_nodes is not None

if result.status is Status.INTERESTING:
origin = result.interesting_origin
if observation.metadata.data_status is Status.INTERESTING:
origin = observation.metadata.interesting_origin
assert origin is not None
if origin not in self.interesting_examples or (
sort_key(result) < sort_key(self.interesting_examples[origin][0])
sort_key(observation.metadata.choice_nodes)
< sort_key(self.interesting_examples[origin].metadata.choice_nodes) # type: ignore
):
previous = self.interesting_examples.get(origin)
self.interesting_examples[origin] = (result, observation)
choices = tuple(n.value for n in observation.metadata.choice_nodes)
self.interesting_examples[origin] = observation
# We save interesting examples to the unshrunk/secondary database
# so they can appear immediately without waiting for shrinking to
# finish. (also in case of a fatal hypofuzz error etc).
self._db.save_failure(self.database_key, result.choices, shrunk=False)
# observation might be none even for failures if we are replaying
# a failure in Phase.REPLAY, since we know observations already
# exist when replaying.
if observation is not None:
self._db.save_failure_observation(
self.database_key, result.choices, observation
)
#
# Note that `observation`` might be none even for failures if we
# are replaying a failure in Phase.REPLAY, since we know observations
# already exist when replaying.
self._db.save_failure(
self.database_key,
choices,
Observation.from_hypothesis(observation),
shrunk=False,
)

if previous is not None:
(previous_node, previous_observation) = previous
assert previous.metadata.choice_nodes is not None
previous_choices = tuple(
n.value for n in previous.metadata.choice_nodes
)
# remove the now-redundant failure we had previously saved.
self._db.delete_failure(
self.database_key, previous_node.choices, shrunk=False
self.database_key,
previous_choices,
Observation.from_hypothesis(previous),
shrunk=False,
)
if previous_observation is not None:
self._db.delete_failure_observation(
self.database_key,
previous_node.choices,
previous_observation,
)
return True

self.behavior_counts.update(fingerprint)
if fingerprint not in self.fingerprints:
self._add_fingerprint(fingerprint, result, observation=observation)
self.behavior_counts.update(behaviors)
if behaviors not in self.fingerprints:
self._add_fingerprint(
behaviors, observation, save_observation=save_observation
)
return True

if sort_key(result.nodes) < sort_key(self.fingerprints[fingerprint]):
if sort_key(observation.metadata.choice_nodes) < sort_key(
self.fingerprints[behaviors]
):
existing_choices = Choices(
tuple(n.value for n in self.fingerprints[fingerprint])
tuple(n.value for n in self.fingerprints[behaviors])
)

self._count_fingerprints[existing_choices] -= 1
self._add_fingerprint(fingerprint, result, observation=observation)
self._add_fingerprint(
behaviors, observation, save_observation=save_observation
)
if self._count_fingerprints[existing_choices] == 0:
self._evict_choices(existing_choices)
return True
Expand Down
59 changes: 31 additions & 28 deletions src/hypofuzz/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,44 +577,47 @@ def fetch_corpus_observations(
if observation := Observation.from_json(value):
yield observation

# failures (failure_key)
# failures (failure_key and failure_observation_key)

def save_failure(self, key: bytes, choices: ChoicesT, *, shrunk: bool) -> None:
def save_failure(
self,
key: bytes,
choices: ChoicesT,
observation: Optional[Observation],
*,
shrunk: bool,
) -> None:
self.save(failure_key(key, shrunk=shrunk), choices_to_bytes(choices))

def delete_failure(self, key: bytes, choices: ChoicesT, *, shrunk: bool) -> None:
self.delete(failure_key(key, shrunk=shrunk), choices_to_bytes(choices))

def fetch_failures(self, key: bytes, *, shrunk: bool) -> Iterable[ChoicesT]:
for value in self.fetch(failure_key(key, shrunk=shrunk)):
if (choices := choices_from_bytes(value)) is not None:
yield choices

# failure observation (failure_observation_key)
if observation is not None:
self._check_observation(observation)
existing_observations = list(self.fetch_failure_observations(key, choices))
self.save(failure_observation_key(key, choices), self._encode(observation))
for observation in existing_observations:
self._check_observation(observation)
self.delete(
failure_observation_key(key, choices), self._encode(observation)
)

def save_failure_observation(
def delete_failure(
self,
key: bytes,
choices: ChoicesT,
observation: Observation,
observation: Optional[Observation],
*,
delete: bool = True,
shrunk: bool,
) -> None:
self._check_observation(observation)
if not delete:
self.save(failure_observation_key(key, choices), self._encode(observation))
return

existing = list(self.fetch_failure_observations(key, choices))
self.save(failure_observation_key(key, choices), self._encode(observation))
for observation in existing:
self.delete_failure_observation(key, choices, observation)
self.delete(failure_key(key, shrunk=shrunk), choices_to_bytes(choices))
if observation is not None:
self._check_observation(observation)
self.delete(
failure_observation_key(key, choices), self._encode(observation)
)

def delete_failure_observation(
self, key: bytes, choices: ChoicesT, observation: Observation
) -> None:
self._check_observation(observation)
self.delete(failure_observation_key(key, choices), self._encode(observation))
def fetch_failures(self, key: bytes, *, shrunk: bool) -> Iterable[ChoicesT]:
for value in self.fetch(failure_key(key, shrunk=shrunk)):
if (choices := choices_from_bytes(value)) is not None:
yield choices

def fetch_failure_observation(
self, key: bytes, choices: ChoicesT
Expand Down
3 changes: 3 additions & 0 deletions src/hypofuzz/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
import click
import hypothesis.extra.cli
import psutil
from hypothesis.internal.conjecture.providers import AVAILABLE_PROVIDERS

AVAILABLE_PROVIDERS["hypofuzz"] = "hypofuzz.provider.HypofuzzProvider"


@hypothesis.extra.cli.main.command() # type: ignore
Expand Down
Loading