diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ab68597496..458caebece 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -190,7 +190,8 @@ jobs: run: | pip install --upgrade setuptools pip wheel pip install -r requirements/coverage.txt - pip install hypothesis-python/[all] + pip install -r requirements/crosshair.txt + pip install hypothesis-python/ - name: Run tests run: python -m pytest --numprocesses auto ${{ matrix.whichtests == 'nocover' && 'hypothesis-python/tests/nocover' || 'hypothesis-python/tests/ --ignore=hypothesis-python/tests/nocover/ --ignore=hypothesis-python/tests/quality/ --ignore=hypothesis-python/tests/ghostwriter/ --ignore=hypothesis-python/tests/patching/' }} diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst new file mode 100644 index 0000000000..ae9cb8091d --- /dev/null +++ b/hypothesis-python/RELEASE.rst @@ -0,0 +1,3 @@ +RELEASE_TYPE: patch + +Hypothesis now looks for constant values in the source code of your program, and sometimes uses them while generating examples. This lets Hypothesis generate interesting inputs that are specific to your program. diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py index 2f87e84f70..200ea12172 100644 --- a/hypothesis-python/src/hypothesis/internal/conjecture/data.py +++ b/hypothesis-python/src/hypothesis/internal/conjecture/data.py @@ -861,6 +861,16 @@ def draw_float( assert not math.isnan(min_value) assert not math.isnan(max_value) + if smallest_nonzero_magnitude == 0.0: # pragma: no cover + raise FloatingPointError( + "Got allow_subnormal=True, but we can't represent subnormal floats " + "right now, in violation of the IEEE-754 floating-point " + "specification. This is usually because something was compiled with " + "-ffast-math or a similar option, which sets global processor state. " + "See https://simonbyrne.github.io/notes/fastmath/ for a more detailed " + "writeup - and good luck!" + ) + if forced is not None: assert allow_nan or not math.isnan(forced) assert math.isnan(forced) or ( diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/providers.py b/hypothesis-python/src/hypothesis/internal/conjecture/providers.py index beccc75c36..a1b06c86c8 100644 --- a/hypothesis-python/src/hypothesis/internal/conjecture/providers.py +++ b/hypothesis-python/src/hypothesis/internal/conjecture/providers.py @@ -18,7 +18,6 @@ from typing import ( TYPE_CHECKING, Any, - Callable, Literal, Optional, TypedDict, @@ -26,15 +25,19 @@ Union, ) +from sortedcontainers import SortedSet + from hypothesis.errors import HypothesisWarning from hypothesis.internal.cache import LRUCache from hypothesis.internal.compat import WINDOWS, int_from_bytes from hypothesis.internal.conjecture.choice import ( - StringConstraints, + ChoiceConstraintsT, + ChoiceTypeT, + FloatConstraints, choice_constraints_key, choice_permitted, ) -from hypothesis.internal.conjecture.floats import float_to_lex, lex_to_float +from hypothesis.internal.conjecture.floats import lex_to_float from hypothesis.internal.conjecture.junkdrawer import bits_to_bytes from hypothesis.internal.conjecture.utils import ( INT_SIZES, @@ -42,13 +45,13 @@ Sampler, many, ) +from hypothesis.internal.constants_ast import local_constants from hypothesis.internal.floats import ( SIGNALING_NAN, float_to_int, make_float_clamper, next_down, next_up, - sign_aware_lte, ) from hypothesis.internal.intervalsets import IntervalSet @@ -56,6 +59,7 @@ from typing import TypeAlias from hypothesis.internal.conjecture.data import ConjectureData + from hypothesis.internal.constants_ast import ConstantsT, ConstantT T = TypeVar("T") _Lifetime: "TypeAlias" = Literal["test_case", "test_function"] @@ -77,11 +81,11 @@ "hypothesis-urandom": "hypothesis.internal.conjecture.providers.URandomProvider", } FLOAT_INIT_LOGIC_CACHE = LRUCache(4096) -STRING_SAMPLER_CACHE = LRUCache(64) +# cache the choice_permitted constants for a particular set of constraints. +CONSTANTS_CACHE = LRUCache(1024) -NASTY_FLOATS = sorted( +_constant_floats = ( [ - 0.0, 0.5, 1.1, 1.5, @@ -94,7 +98,7 @@ float_info.min, float_info.max, 3.402823466e38, - 9007199254740992, + 9007199254740992.0, 1 - 10e-6, 2 + 10e-6, 1.192092896e-07, @@ -102,96 +106,112 @@ ] + [2.0**-n for n in (24, 14, 149, 126)] # minimum (sub)normals for float16,32 + [float_info.min / n for n in (2, 10, 1000, 100_000)] # subnormal in float64 - + [math.inf, math.nan] * 5 - + [SIGNALING_NAN], - key=float_to_lex, ) -NASTY_FLOATS = list(map(float, NASTY_FLOATS)) -NASTY_FLOATS.extend([-x for x in NASTY_FLOATS]) +_constant_floats.extend([-x for x in _constant_floats]) +assert all(isinstance(f, float) for f in _constant_floats) + +_constant_strings = { + # strings which can be interpreted as code / logic + "undefined", + "null", + "NULL", + "nil", + "NIL", + "true", + "false", + "True", + "False", + "TRUE", + "FALSE", + "None", + "none", + "if", + "then", + "else", + # strings which can be interpreted as a number + "0", + "1e100", + "0..0", + "0/0", + "1/0", + "+0.0", + "Infinity", + "-Infinity", + "Inf", + "INF", + "NaN", + "9" * 30, + # common ascii characters + ",./;'[]\\-=<>?:\"{}|_+!@#$%^&*()`~", + # common unicode characters + "Ω≈ç√∫˜µ≤≥÷åß∂ƒ©˙∆˚¬…æœ∑´®†¥¨ˆøπ“‘¡™£¢∞§¶•ªº–≠¸˛Ç◊ı˜Â¯˘¿ÅÍÎÏ˝ÓÔÒÚÆ☃Œ„´‰ˇÁ¨ˆØ∏”’`⁄€‹›fifl‡°·‚—±", + # characters which increase in length when lowercased + "Ⱥ", + "Ⱦ", + # ligatures + "æœÆŒffʤʨß" + # emoticons + "(╯°□°)╯︵ ┻━┻)", + # emojis + "😍", + "🇺🇸", + # emoji modifiers + "🏻" # U+1F3FB Light Skin Tone, + "👍🏻", # 👍 followed by U+1F3FB + # RTL text + "الكل في المجمو عة", + # Ogham text, which contains the only character in the Space Separators + # unicode category (Zs) that isn't visually blank:  . # noqa: RUF003 + "᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜", + # readable variations on text (bolt/italic/script) + "𝐓𝐡𝐞 𝐪𝐮𝐢𝐜𝐤 𝐛𝐫𝐨𝐰𝐧 𝐟𝐨𝐱 𝐣𝐮𝐦𝐩𝐬 𝐨𝐯𝐞𝐫 𝐭𝐡𝐞 𝐥𝐚𝐳𝐲 𝐝𝐨𝐠", + "𝕿𝖍𝖊 𝖖𝖚𝖎𝖈𝖐 𝖇𝖗𝖔𝖜𝖓 𝖋𝖔𝖝 𝖏𝖚𝖒𝖕𝖘 𝖔𝖛𝖊𝖗 𝖙𝖍𝖊 𝖑𝖆𝖟𝖞 𝖉𝖔𝖌", + "𝑻𝒉𝒆 𝒒𝒖𝒊𝒄𝒌 𝒃𝒓𝒐𝒘𝒏 𝒇𝒐𝒙 𝒋𝒖𝒎𝒑𝒔 𝒐𝒗𝒆𝒓 𝒕𝒉𝒆 𝒍𝒂𝒛𝒚 𝒅𝒐𝒈", + "𝓣𝓱𝓮 𝓺𝓾𝓲𝓬𝓴 𝓫𝓻𝓸𝔀𝓷 𝓯𝓸𝔁 𝓳𝓾𝓶𝓹𝓼 𝓸𝓿𝓮𝓻 𝓽𝓱𝓮 𝓵𝓪𝔃𝔂 𝓭𝓸𝓰", + "𝕋𝕙𝕖 𝕢𝕦𝕚𝕔𝕜 𝕓𝕣𝕠𝕨𝕟 𝕗𝕠𝕩 𝕛𝕦𝕞𝕡𝕤 𝕠𝕧𝕖𝕣 𝕥𝕙𝕖 𝕝𝕒𝕫𝕪 𝕕𝕠𝕘", + # upsidown text + "ʇǝɯɐ ʇᴉs ɹolop ɯnsdᴉ ɯǝɹo˥", + # reserved strings in windows + "NUL", + "COM1", + "LPT1", + # scunthorpe problem + "Scunthorpe", + # zalgo text + "Ṱ̺̺̕o͞ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣", + # + # examples from https://faultlore.com/blah/text-hates-you/ + "मनीष منش", + "पन्ह पन्ह त्र र्च कृकृ ड्ड न्हृे إلا بسم الله", + "lorem لا بسم الله ipsum 你好1234你好", +} -NASTY_STRINGS = sorted( - [ - # strings which can be interpreted as code / logic - "undefined", - "null", - "NULL", - "nil", - "NIL", - "true", - "false", - "True", - "False", - "TRUE", - "FALSE", - "None", - "none", - "if", - "then", - "else", - # strings which can be interpreted as a number - "0", - "1e100", - "0..0", - "0/0", - "1/0", - "+0.0", - "Infinity", - "-Infinity", - "Inf", - "INF", - "NaN", - "9" * 30, - # common ascii characters - ",./;'[]\\-=<>?:\"{}|_+!@#$%^&*()`~", - # common unicode characters - "Ω≈ç√∫˜µ≤≥÷åß∂ƒ©˙∆˚¬…æœ∑´®†¥¨ˆøπ“‘¡™£¢∞§¶•ªº–≠¸˛Ç◊ı˜Â¯˘¿ÅÍÎÏ˝ÓÔÒÚÆ☃Œ„´‰ˇÁ¨ˆØ∏”’`⁄€‹›fifl‡°·‚—±", - # characters which increase in length when lowercased - "Ⱥ", - "Ⱦ", - # ligatures - "æœÆŒffʤʨß" - # emoticons - "(╯°□°)╯︵ ┻━┻)", - # emojis - "😍", - "🇺🇸", - # emoji modifiers - "🏻" # U+1F3FB Light Skin Tone, - "👍🏻", # 👍 followed by U+1F3FB - # RTL text - "الكل في المجمو عة", - # Ogham text, which contains the only character in the Space Separators - # unicode category (Zs) that isn't visually blank:  . # noqa: RUF003 - "᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜", - # readable variations on text (bolt/italic/script) - "𝐓𝐡𝐞 𝐪𝐮𝐢𝐜𝐤 𝐛𝐫𝐨𝐰𝐧 𝐟𝐨𝐱 𝐣𝐮𝐦𝐩𝐬 𝐨𝐯𝐞𝐫 𝐭𝐡𝐞 𝐥𝐚𝐳𝐲 𝐝𝐨𝐠", - "𝕿𝖍𝖊 𝖖𝖚𝖎𝖈𝖐 𝖇𝖗𝖔𝖜𝖓 𝖋𝖔𝖝 𝖏𝖚𝖒𝖕𝖘 𝖔𝖛𝖊𝖗 𝖙𝖍𝖊 𝖑𝖆𝖟𝖞 𝖉𝖔𝖌", - "𝑻𝒉𝒆 𝒒𝒖𝒊𝒄𝒌 𝒃𝒓𝒐𝒘𝒏 𝒇𝒐𝒙 𝒋𝒖𝒎𝒑𝒔 𝒐𝒗𝒆𝒓 𝒕𝒉𝒆 𝒍𝒂𝒛𝒚 𝒅𝒐𝒈", - "𝓣𝓱𝓮 𝓺𝓾𝓲𝓬𝓴 𝓫𝓻𝓸𝔀𝓷 𝓯𝓸𝔁 𝓳𝓾𝓶𝓹𝓼 𝓸𝓿𝓮𝓻 𝓽𝓱𝓮 𝓵𝓪𝔃𝔂 𝓭𝓸𝓰", - "𝕋𝕙𝕖 𝕢𝕦𝕚𝕔𝕜 𝕓𝕣𝕠𝕨𝕟 𝕗𝕠𝕩 𝕛𝕦𝕞𝕡𝕤 𝕠𝕧𝕖𝕣 𝕥𝕙𝕖 𝕝𝕒𝕫𝕪 𝕕𝕠𝕘", - # upsidown text - "ʇǝɯɐ ʇᴉs ɹolop ɯnsdᴉ ɯǝɹo˥", - # reserved strings in windows - "NUL", - "COM1", - "LPT1", - # scunthorpe problem - "Scunthorpe", - # zalgo text - "Ṱ̺̺̕o͞ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣", - # - # examples from https://faultlore.com/blah/text-hates-you/ - "मनीष منش", - "पन्ह पन्ह त्र र्च कृकृ ड्ड न्हृे إلا بسم الله", - "lorem لا بسم الله ipsum 你好1234你好", - ], - key=len, -) -# Masks for masking off the first byte of an n-bit buffer. -# The appropriate mask is stored at position n % 8. -BYTE_MASKS = [(1 << n) - 1 for n in range(8)] -BYTE_MASKS[0] = 255 +# we don't actually care what order the constants are sorted in, just that the +# ordering is deterministic. +GLOBAL_CONSTANTS: "ConstantsT" = { + "float": SortedSet(_constant_floats, key=float_to_int), + "string": SortedSet(_constant_strings), + "integer": SortedSet(), + "bytes": SortedSet(), +} + + +_local_constants_hash: Optional[int] = None + + +def _get_local_constants(): + global _local_constants_hash + + constants = local_constants() + constants_hash = hash(tuple((k, tuple(v)) for k, v in constants.items())) + # if we've added new constants since the last time we checked, invalidate + # the cache. + if constants_hash != _local_constants_hash: + CONSTANTS_CACHE.cache.clear() + _local_constants_hash = constants_hash + + return constants class _BackendInfoMsg(TypedDict): @@ -303,10 +323,6 @@ def draw_float( max_value: float = math.inf, allow_nan: bool = True, smallest_nonzero_magnitude: float, - # TODO: consider supporting these float widths at the IR level in the - # future. - # width: Literal[16, 32, 64] = 64, - # exclude_min and exclude_max handled higher up, ) -> float: raise NotImplementedError @@ -355,8 +371,56 @@ class HypothesisProvider(PrimitiveProvider): def __init__(self, conjecturedata: Optional["ConjectureData"], /): super().__init__(conjecturedata) + self.local_constants = _get_local_constants() self._random = None if self._cd is None else self._cd._random + def _maybe_draw_constant( + self, + choice_type: ChoiceTypeT, + constraints: ChoiceConstraintsT, + *, + p: float = 0.05, + ) -> Optional["ConstantT"]: + assert self._random is not None + assert choice_type != "boolean" + + # check whether we even want a constant before spending time computing + # and caching the allowed constants. + if self._random.random() > p: + return None + + key = (choice_type, choice_constraints_key(choice_type, constraints)) + if key not in CONSTANTS_CACHE: + CONSTANTS_CACHE[key] = ( + tuple( + choice + for choice in GLOBAL_CONSTANTS[choice_type] + if choice_permitted(choice, constraints) + ), + tuple( + choice + for choice in self.local_constants[choice_type] + if choice_permitted(choice, constraints) + ), + ) + + # split constants into two pools, so we still have a good chance to draw + # global constants even if there are many local constants. + (global_constants, local_constants) = CONSTANTS_CACHE[key] + constants_lists = ([global_constants] if global_constants else []) + ( + [local_constants] if local_constants else [] + ) + if not constants_lists: + return None + + # At this point, we've decided to use a constant. Now we select which pool + # to draw that constant from. + # + # Note that this approach has a different probability distribution than + # attempting a random.random for both global_constants and local_constants. + constants = self._random.choice(constants_lists) + return self._random.choice(constants) + def draw_boolean( self, p: float = 0.5, @@ -379,6 +443,19 @@ def draw_integer( shrink_towards: int = 0, ) -> int: assert self._cd is not None + if ( + constant := self._maybe_draw_constant( + "integer", + { + "min_value": min_value, + "max_value": max_value, + "weights": weights, + "shrink_towards": shrink_towards, + }, + ) + ) is not None: + assert isinstance(constant, int) + return constant center = 0 if min_value is not None: @@ -436,39 +513,64 @@ def draw_float( max_value: float = math.inf, allow_nan: bool = True, smallest_nonzero_magnitude: float, - # TODO: consider supporting these float widths at the IR level in the - # future. - # width: Literal[16, 32, 64] = 64, - # exclude_min and exclude_max handled higher up, ) -> float: - ( - sampler, - clamper, - nasty_floats, - ) = self._draw_float_init_logic( - min_value=min_value, - max_value=max_value, - allow_nan=allow_nan, + assert self._random is not None + + constraints: FloatConstraints = { + "min_value": min_value, + "max_value": max_value, + "allow_nan": allow_nan, + "smallest_nonzero_magnitude": smallest_nonzero_magnitude, + } + if ( + constant := self._maybe_draw_constant("float", constraints, p=0.15) + ) is not None: + assert isinstance(constant, float) + return constant + + # on top of the probability to draw a constant float, we independently + # upweight 0.0/-0.0, math.inf, -math.inf, nans, and boundary values. + weird_floats = [ + f + for f in [ + 0.0, + -0.0, + math.inf, + -math.inf, + math.nan, + -math.nan, + SIGNALING_NAN, + -SIGNALING_NAN, + min_value, + next_up(min_value), + min_value + 1, + max_value - 1, + next_down(max_value), + max_value, + ] + if choice_permitted(f, constraints) + ] + + if weird_floats and self._random.random() < 0.05: + return self._random.choice(weird_floats) + + clamper = make_float_clamper( + min_value, + max_value, smallest_nonzero_magnitude=smallest_nonzero_magnitude, + allow_nan=allow_nan, ) - assert self._cd is not None - - while True: - i = sampler.sample(self._cd) if sampler else 0 - if i == 0: - result = self._draw_float() - if allow_nan and math.isnan(result): - clamped = result # pragma: no cover - else: - clamped = clamper(result) - if float_to_int(clamped) != float_to_int(result) and not ( - math.isnan(result) and allow_nan - ): - result = clamped - else: - result = nasty_floats[i - 1] - return result + result = self._draw_float() + if allow_nan and math.isnan(result): + clamped = result # pragma: no cover + else: + clamped = clamper(result) + if float_to_int(clamped) != float_to_int(result) and not ( + math.isnan(result) and allow_nan + ): + result = clamped + return result def draw_string( self, @@ -483,14 +585,14 @@ def draw_string( if len(intervals) == 0: return "" - sampler, nasty_strings = self._draw_string_sampler( - intervals=intervals, - min_size=min_size, - max_size=max_size, - ) - - if sampler is not None and self.draw_boolean(p=0.05): - return nasty_strings[sampler.sample(self._cd)] + if ( + constant := self._maybe_draw_constant( + "string", + {"intervals": intervals, "min_size": min_size, "max_size": max_size}, + ) + ) is not None: + assert isinstance(constant, str) + return constant average_size = min( max(min_size * 2, min_size + 5), @@ -526,6 +628,14 @@ def draw_bytes( assert self._cd is not None assert self._random is not None + if ( + constant := self._maybe_draw_constant( + "bytes", {"min_size": min_size, "max_size": max_size} + ) + ) is not None: + assert isinstance(constant, bytes) + return constant + buf = bytearray() average_size = min( max(min_size * 2, min_size + 5), @@ -589,118 +699,11 @@ def _draw_bounded_integer( return self._random.randint(lower, upper) - @classmethod - def _draw_float_init_logic( - cls, - *, - min_value: float, - max_value: float, - allow_nan: bool, - smallest_nonzero_magnitude: float, - ) -> tuple[ - Optional[Sampler], - Callable[[float], float], - list[float], - ]: - """ - Caches initialization logic for draw_float, as an alternative to - computing this for *every* float draw. - """ - # float_to_int allows us to distinguish between e.g. -0.0 and 0.0, - # even in light of hash(-0.0) == hash(0.0) and -0.0 == 0.0. - key = ( - float_to_int(min_value), - float_to_int(max_value), - allow_nan, - float_to_int(smallest_nonzero_magnitude), - ) - if key in FLOAT_INIT_LOGIC_CACHE: - return FLOAT_INIT_LOGIC_CACHE[key] - - result = cls._compute_draw_float_init_logic( - min_value=min_value, - max_value=max_value, - allow_nan=allow_nan, - smallest_nonzero_magnitude=smallest_nonzero_magnitude, - ) - FLOAT_INIT_LOGIC_CACHE[key] = result - return result - - @staticmethod - def _compute_draw_float_init_logic( - *, - min_value: float, - max_value: float, - allow_nan: bool, - smallest_nonzero_magnitude: float, - ) -> tuple[ - Optional[Sampler], - Callable[[float], float], - list[float], - ]: - if smallest_nonzero_magnitude == 0.0: # pragma: no cover - raise FloatingPointError( - "Got allow_subnormal=True, but we can't represent subnormal floats " - "right now, in violation of the IEEE-754 floating-point " - "specification. This is usually because something was compiled with " - "-ffast-math or a similar option, which sets global processor state. " - "See https://simonbyrne.github.io/notes/fastmath/ for a more detailed " - "writeup - and good luck!" - ) - def permitted(f: float) -> bool: - if math.isnan(f): - return allow_nan - if 0 < abs(f) < smallest_nonzero_magnitude: - return False - return sign_aware_lte(min_value, f) and sign_aware_lte(f, max_value) - - boundary_values = [ - min_value, - next_up(min_value), - min_value + 1, - max_value - 1, - next_down(max_value), - max_value, - ] - nasty_floats = [f for f in NASTY_FLOATS + boundary_values if permitted(f)] - weights = [0.2 * len(nasty_floats)] + [0.8] * len(nasty_floats) - sampler = Sampler(weights, observe=False) if nasty_floats else None - - clamper = make_float_clamper( - min_value, - max_value, - smallest_nonzero_magnitude=smallest_nonzero_magnitude, - allow_nan=allow_nan, - ) - return (sampler, clamper, nasty_floats) - - @classmethod - def _draw_string_sampler( - cls, - *, - intervals: IntervalSet, - min_size: int, - max_size: int, - ) -> tuple[Optional[Sampler], list[str]]: - constraints: StringConstraints = { - "intervals": intervals, - "min_size": min_size, - "max_size": max_size, - } - key = choice_constraints_key("string", constraints) - if key in STRING_SAMPLER_CACHE: - return STRING_SAMPLER_CACHE[key] - - nasty_strings = [s for s in NASTY_STRINGS if choice_permitted(s, constraints)] - sampler = ( - Sampler([1 / len(nasty_strings)] * len(nasty_strings), observe=False) - if nasty_strings - else None - ) - result = (sampler, nasty_strings) - STRING_SAMPLER_CACHE[key] = result - return result +# Masks for masking off the first byte of an n-bit buffer. +# The appropriate mask is stored at position n % 8. +BYTE_MASKS = [(1 << n) - 1 for n in range(8)] +BYTE_MASKS[0] = 255 class BytestringProvider(PrimitiveProvider): diff --git a/hypothesis-python/src/hypothesis/internal/constants_ast.py b/hypothesis-python/src/hypothesis/internal/constants_ast.py index c69e5ba707..9b3877b63c 100644 --- a/hypothesis-python/src/hypothesis/internal/constants_ast.py +++ b/hypothesis-python/src/hypothesis/internal/constants_ast.py @@ -14,11 +14,14 @@ import sys from ast import AST, Constant, Expr, NodeVisitor, UnaryOp, USub from functools import lru_cache +from pathlib import Path from types import ModuleType -from typing import TYPE_CHECKING, Optional, Union +from typing import TYPE_CHECKING, AbstractSet, Optional, TypedDict, Union + +from sortedcontainers import SortedSet from hypothesis.internal.escalation import is_hypothesis_file -from hypothesis.internal.scrutineer import ModuleLocation +from hypothesis.internal.floats import float_to_int if TYPE_CHECKING: from typing import TypeAlias @@ -26,13 +29,42 @@ ConstantT: "TypeAlias" = Union[int, float, bytes, str] +class ConstantsT(TypedDict): + integer: AbstractSet[int] + float: AbstractSet[float] + bytes: AbstractSet[bytes] + string: AbstractSet[str] + + class ConstantVisitor(NodeVisitor): def __init__(self): super().__init__() self.constants: set[ConstantT] = set() - def _add_constant(self, constant: object) -> None: - self.constants |= self._unfold_constant(constant) + def _add_constant(self, value: object) -> None: + if isinstance(value, str) and ( + len(value) > 20 or value.isspace() or value == "" + ): + # discard long strings, which are unlikely to be useful. + return + if isinstance(value, bytes) and value == b"": + return + if isinstance(value, bool): + return + if isinstance(value, float) and math.isinf(value): + # we already upweight inf. + return + if isinstance(value, int) and -100 < value < 100: + # we already upweight small integers. + return + + if isinstance(value, (int, float, bytes, str)): + self.constants.add(value) + return + + # I don't kow what case could go here, but am also not confident there + # isn't one. + return # pragma: no cover def visit_UnaryOp(self, node: UnaryOp) -> None: # `a = -1` is actually a combination of a USub and the constant 1. @@ -59,31 +91,13 @@ def visit_JoinedStr(self, node): # in f strings are unlikely to be helpful. return - @classmethod - def _unfold_constant(cls, value: object) -> set[ConstantT]: - if isinstance(value, str) and ( - len(value) > 20 or value.isspace() or value == "" - ): - # discard long strings, which are unlikely to be useful. - return set() - if isinstance(value, bool): - return set() - if isinstance(value, float) and math.isinf(value): - # we already upweight inf. - return set() - if isinstance(value, (int, float, bytes, str)): - return {value} - # I don't kow what case could go here, but am also not confident there - # isn't one. - return set() # pragma: no cover - def visit_Constant(self, node): self._add_constant(node.value) self.generic_visit(node) @lru_cache(1024) -def constants_from_ast(tree: AST) -> set[ConstantT]: +def constants_from_ast(tree: AST) -> AbstractSet[ConstantT]: visitor = ConstantVisitor() visitor.visit(tree) return visitor.constants @@ -100,45 +114,65 @@ def _module_ast(module: ModuleType) -> Optional[AST]: return tree -def local_modules() -> tuple[ModuleType, ...]: - modules = [] - for module in sys.modules.values(): - if ( - not hasattr(module, "__file__") - or module.__file__ is None - # Skip expensive path lookup for stdlib modules. - # This will cause false negatives if a user names their module the - # same as a stdlib module. - # - # sys.stdlib_module_names is new in 3.10 - or ( - sys.version_info >= (3, 10) - and module.__name__ in sys.stdlib_module_names - ) - or ModuleLocation.from_path(module.__file__) is not ModuleLocation.LOCAL - ): - continue - - modules.append(module) - return tuple(modules) - +@lru_cache(4096) +def _is_local_module_file(path: str) -> bool: + from hypothesis.internal.scrutineer import ModuleLocation -def local_constants(): - constants = set() - for module in local_modules(): + return ( + # Skip expensive path lookup for stdlib modules. + # This will cause false negatives if a user names their module the + # same as a stdlib module. + # + # sys.stdlib_module_names is new in 3.10 + not (sys.version_info >= (3, 10) and path in sys.stdlib_module_names) + and ModuleLocation.from_path(path) is ModuleLocation.LOCAL # normally, hypothesis is a third-party library and is not returned # by local_modules. However, if it is installed as an editable package # with pip install -e, then we will pick up on it. Just hardcode an # ignore here. + and not is_hypothesis_file(path) + # avoid collecting constants from test files + and not ( + "test" in (p := Path(path)).parts + or "tests" in p.parts + or p.stem.startswith("test_") + or p.stem.endswith("_test") + ) + ) - # this is actually covered by test_constants_from_running_file, but - # not in the same process. - if is_hypothesis_file(module.__file__): # pragma: no cover - continue +def local_modules() -> tuple[ModuleType, ...]: + return tuple( + module + for module in sys.modules.values() + if ( + getattr(module, "__file__", None) is not None + and _is_local_module_file(module.__file__) + ) + ) + + +def local_constants() -> ConstantsT: + constants: set[ConstantT] = set() + for module in local_modules(): tree = _module_ast(module) if tree is None: # pragma: no cover continue constants |= constants_from_ast(tree) - return constants + local_constants: ConstantsT = { + "integer": SortedSet(), + "float": SortedSet(key=float_to_int), + "bytes": SortedSet(), + "string": SortedSet(), + } + for value in constants: + choice_type = { + int: "integer", + float: "float", + bytes: "bytes", + str: "string", + }[type(value)] + local_constants[choice_type].add(value) # type: ignore # hard to type + + return local_constants diff --git a/hypothesis-python/tests/conjecture/common.py b/hypothesis-python/tests/conjecture/common.py index 2c7f859d91..eabf662a3c 100644 --- a/hypothesis-python/tests/conjecture/common.py +++ b/hypothesis-python/tests/conjecture/common.py @@ -264,8 +264,9 @@ def _collection_constraints(draw, *, forced, use_min_size=None, use_max_size=Non min_value=min_size if forced is None else max(min_size, len(forced)) ) ) - # cap to some reasonable max size to avoid overruns. - max_size = min(max_size, min_size + 100) + if forced is None: + # cap to some reasonable max size to avoid overruns. + max_size = min(max_size, min_size + 100) return {"min_size": min_size, "max_size": max_size} diff --git a/hypothesis-python/tests/conjecture/test_alt_backend.py b/hypothesis-python/tests/conjecture/test_alt_backend.py index b6f9593e08..44d1bc561a 100644 --- a/hypothesis-python/tests/conjecture/test_alt_backend.py +++ b/hypothesis-python/tests/conjecture/test_alt_backend.py @@ -282,7 +282,7 @@ def draw_string(self, *args, **constraints): class InvalidLifetime(TrivialProvider): - lifetime = "forever and a day!" + lifetime = "forever and a day" def test_invalid_lifetime(): @@ -350,8 +350,8 @@ def test_function(n): # we create a new provider each time we *try* to generate an input to the # test function, but this could be filtered out, discarded as duplicate, # etc. We also sometimes try predetermined inputs to the test function, - # such as the zero buffer, which does not entail creating providers. - # These two facts combined mean that the number of inits could be + # such as ChoiceTemplate(type="simplest"), which does not entail creating + # providers. These two facts combined mean that the number of inits could be # anywhere reasonably close to the number of function calls. assert ( test_function_count - 10 diff --git a/hypothesis-python/tests/conjecture/test_ir.py b/hypothesis-python/tests/conjecture/test_choice.py similarity index 100% rename from hypothesis-python/tests/conjecture/test_ir.py rename to hypothesis-python/tests/conjecture/test_choice.py diff --git a/hypothesis-python/tests/conjecture/test_forced.py b/hypothesis-python/tests/conjecture/test_forced.py index 3a467606c5..1c214356e3 100644 --- a/hypothesis-python/tests/conjecture/test_forced.py +++ b/hypothesis-python/tests/conjecture/test_forced.py @@ -13,7 +13,7 @@ import pytest import hypothesis.strategies as st -from hypothesis import HealthCheck, assume, example, given, settings +from hypothesis import HealthCheck, example, given, settings from hypothesis.internal.conjecture import utils as cu from hypothesis.internal.conjecture.choice import choice_equal from hypothesis.internal.conjecture.data import ConjectureData @@ -133,14 +133,6 @@ def test_forced_many(data): @given(choice_types_constraints(use_forced=True)) def test_forced_values(choice_type_and_constraints): (choice_type, constraints) = choice_type_and_constraints - - if choice_type == "float": - # TODO intentionally avoid triggering a bug with forcing nan values - # while both min and max value have the opposite sign. - # Once we fix the aforementioned bug we can remove this intentional - # weakening of the test. - assume(not math.isnan(constraints["forced"])) - forced = constraints["forced"] data = fresh_data() assert choice_equal(getattr(data, f"draw_{choice_type}")(**constraints), forced) diff --git a/hypothesis-python/tests/conjecture/test_local_constants.py b/hypothesis-python/tests/conjecture/test_local_constants.py new file mode 100644 index 0000000000..cc2865a8b6 --- /dev/null +++ b/hypothesis-python/tests/conjecture/test_local_constants.py @@ -0,0 +1,53 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import math + +import pytest + +from hypothesis import settings, strategies as st +from hypothesis.internal.conjecture import providers +from hypothesis.internal.conjecture.choice import choice_equal + +from tests.common.debug import find_any +from tests.common.utils import Why, xfail_on_crosshair + + +# I tried using @given(st.integers()) here, but I think there is a bad interaction +# with CONSTANTS_CACHE when testing it inside of a hypothesis test. +@pytest.mark.parametrize("value", [2**20 - 50, 2**10 - 10, 129387123, -19827321, 0]) +def test_can_draw_local_constants_integers(monkeypatch, value): + monkeypatch.setattr(providers, "local_constants", lambda: {"integer": {value}}) + find_any(st.integers(), lambda v: choice_equal(v, value)) + + +@xfail_on_crosshair(Why.undiscovered) # I think float_to_int is difficult for crosshair +@pytest.mark.parametrize("value", [1.2938, -1823.0239, 1e999, math.nan]) +def test_can_draw_local_constants_floats(monkeypatch, value): + monkeypatch.setattr(providers, "local_constants", lambda: {"float": {value}}) + find_any(st.floats(), lambda v: choice_equal(v, value)) + + +@pytest.mark.parametrize("value", [b"abdefgh", b"a" * 50]) +def test_can_draw_local_constants_bytes(monkeypatch, value): + monkeypatch.setattr(providers, "local_constants", lambda: {"bytes": {value}}) + find_any(st.binary(), lambda v: choice_equal(v, value)) + + +@pytest.mark.parametrize("value", ["abdefgh", "a" * 50]) +def test_can_draw_local_constants_string(monkeypatch, value): + monkeypatch.setattr(providers, "local_constants", lambda: {"string": {value}}) + # we have a bunch of strings in GLOBAL_CONSTANTS, so it might take a while + # to generate our local constant. + find_any( + st.text(), + lambda v: choice_equal(v, value), + settings=settings(max_examples=5_000), + ) diff --git a/hypothesis-python/tests/conjecture/test_pareto.py b/hypothesis-python/tests/conjecture/test_pareto.py index 241581dc24..b6d8dd01ce 100644 --- a/hypothesis-python/tests/conjecture/test_pareto.py +++ b/hypothesis-python/tests/conjecture/test_pareto.py @@ -76,12 +76,13 @@ def test_database_contains_only_pareto_front(): with deterministic_PRNG(): def test(data): - data.target_observations["1"] = data.draw(st.integers(0, 2**4 - 1)) - data.draw(st.integers(0, 2**64 - 1)) - data.target_observations["2"] = data.draw(st.integers(0, 2**8 - 1)) + data.target_observations["1"] = data.draw(st.integers(0, 5)) + data.draw(st.integers()) + data.target_observations["2"] = data.draw(st.integers(0, 100)) - db = InMemoryExampleDatabase() + assert len(set(db.fetch(b"stuff.pareto"))) == len(runner.pareto_front) + db = InMemoryExampleDatabase() runner = ConjectureRunner( test, settings=settings( @@ -89,20 +90,16 @@ def test(data): ), database_key=b"stuff", ) - runner.run() assert len(runner.pareto_front) <= 500 - for v in runner.pareto_front: assert v.status >= Status.VALID - assert len(db.data) == 1 - - (values,) = db.data.values() - values = set(values) - - assert len(values) == len(runner.pareto_front) + values = set(db.fetch(b"stuff.pareto")) + assert len(values) == len(runner.pareto_front), { + choices_to_bytes(data.choices) for data in runner.pareto_front + }.symmetric_difference(values) for data in runner.pareto_front: assert choices_to_bytes(data.choices) in values diff --git a/hypothesis-python/tests/cover/test_constants_ast.py b/hypothesis-python/tests/cover/test_constants_ast.py index c6b3e2f61f..9da8565d1a 100644 --- a/hypothesis-python/tests/cover/test_constants_ast.py +++ b/hypothesis-python/tests/cover/test_constants_ast.py @@ -17,7 +17,11 @@ import pytest from hypothesis import given, strategies as st -from hypothesis.internal.constants_ast import _module_ast, constants_from_ast +from hypothesis.internal.constants_ast import ( + _is_local_module_file, + _module_ast, + constants_from_ast, +) from tests.common.utils import skipif_emscripten @@ -27,21 +31,24 @@ [ ( """ - a1 = 42 + a1 = 142 a2 = 3.14 a3 = 'test1' a4 = b'test2' - a5 = (1, 2) - a6 = frozenset([3]) + a5 = (101, 102) + a6 = frozenset([103]) """, - {42, 3.14, "test1", b"test2", 1, 2, 3}, + {142, 3.14, 101, 102, 103, "test1", b"test2"}, + ), + ( + "a = (101, (102, 103), frozenset([104, 105]))", + {101, 102, 103, 104, 105}, ), - ("a = (1, (2, 3), frozenset([4, 5]))", {1, 2, 3, 4, 5}), - ("a = {'b': 1}", {"b", 1}), - ("a = [1]", {1}), - ("a = +42", {42}), - ("a = 1 + 2", {1, 2}), - ("a = ~ 42", {42}), + ("a = {'b': 101}", {"b", 101}), + ("a = [101]", {101}), + ("a = +142", {142}), + ("a = 101 + 102", {101, 102}), + ("a = ~ 142", {142}), # the following cases are ignored: # * booleans # * math.inf and math.nan (not constants, but we don't want to collect them @@ -51,6 +58,8 @@ # * pure-whitespace strings # * standalone string expressions (strings not assigned to a variable). # This covers docstrings of all kinds. + # * small integers + # * the empty bytestring b"" ("a = True", set()), ("a = False", set()), ("a = not False", set()), @@ -64,6 +73,9 @@ ('a = "\\n \\n \\n"', set()), ("'test'", set()), ("'test with \\n newlines'", set()), + ("a = 10", set()), + ("a = -1", set()), + ("a = b''", set()), ], ) def test_constants_from_ast(source, expected): @@ -72,16 +84,19 @@ def test_constants_from_ast(source, expected): assert constants_from_ast(tree) == expected -@given(st.integers(max_value=-1)) +@given(st.integers(max_value=-101)) def test_parses_negatives(n): assert constants_from_ast(ast.parse(f"a = {n}")) == {n} constants = st.one_of( - st.integers(), + # constants_from_ast skips small integers + st.integers(max_value=-101), + st.integers(min_value=101), st.floats(allow_nan=False, allow_infinity=False), - st.binary(), - # constants_from_ast ignores the following strings: + # constants_from_ast skips b"" + st.binary(min_size=1), + # constants_from_ast skips the following strings: # * empty strings # * long strings # * strings which are entirely spaces @@ -103,7 +118,7 @@ def test_frozenset_constants(value): @skipif_emscripten def test_constants_from_running_file(tmp_path): - p = tmp_path / "test_constants.py" + p = tmp_path / "my_constants.py" p.write_text( textwrap.dedent( """ @@ -123,25 +138,19 @@ def test_constants_from_running_file(tmp_path): del sys.modules[module] # local - a = 42 + a = 142 b = "test1" c = True d = 3.14 e = b"test2" - f = (1, 2) - g = frozenset([3, 4]) + f = (101, 102) + g = frozenset([103, 104]) actual = local_constants() assert actual == { - "hypofuzz", - 42, - "test1", - True, - 3.14, - b"test2", - 1, - 2, - 3, - 4 + 'string': {'float', 'string', 'bytes', 'integer', 'test1', 'hypofuzz'}, + 'float': {3.14}, + 'bytes': {b'test2'}, + "integer": {142, 101, 102, 103, 104} }, actual """, ), @@ -154,3 +163,16 @@ def test_constants_from_bad_module(): # covering test for the except branch module = ModuleType("nonexistent") assert _module_ast(module) is None + + +@pytest.mark.parametrize( + "path", + [ + "/path/to/tests/module", + "/path/to/test/module", + "/a/test_file.py", + "/a/file_test.py", + ], +) +def test_local_modules_ignores_test_modules(path): + assert not _is_local_module_file(path) diff --git a/hypothesis-python/tests/cover/test_datetimes.py b/hypothesis-python/tests/cover/test_datetimes.py index 51677c7df2..72b3979bc1 100644 --- a/hypothesis-python/tests/cover/test_datetimes.py +++ b/hypothesis-python/tests/cover/test_datetimes.py @@ -73,7 +73,7 @@ def test_bordering_on_a_leap_year(): dt.datetime.min.replace(year=2003), dt.datetime.max.replace(year=2005) ), lambda x: x.month == 2 and x.day == 29, - settings=settings(max_examples=1200), + settings=settings(max_examples=2500), ) assert x.year == 2004 diff --git a/hypothesis-python/tests/nocover/test_floating.py b/hypothesis-python/tests/nocover/test_floating.py index fd679c3429..6acbeb465f 100644 --- a/hypothesis-python/tests/nocover/test_floating.py +++ b/hypothesis-python/tests/nocover/test_floating.py @@ -57,6 +57,7 @@ def test_negation_is_self_inverse(x): @fails @given(lists(floats())) +@TRY_HARDER def test_is_not_nan(xs): assert not any(math.isnan(x) for x in xs) diff --git a/hypothesis-python/tests/nocover/test_stateful.py b/hypothesis-python/tests/nocover/test_stateful.py index 98fb01b280..9c9757f748 100644 --- a/hypothesis-python/tests/nocover/test_stateful.py +++ b/hypothesis-python/tests/nocover/test_stateful.py @@ -29,7 +29,7 @@ def run_to_notes(TestClass): TestCase = TestClass.TestCase # don't add explain phase notes to the error - TestCase.settings = Settings(phases=set(Phase) - {Phase.explain}) + TestCase.settings = Settings(phases=set(Phase) - {Phase.explain}, max_examples=500) try: TestCase().runTest() except AssertionError as err: diff --git a/hypothesis-python/tox.ini b/hypothesis-python/tox.ini index 32965fce8a..6cd6b3790b 100644 --- a/hypothesis-python/tox.ini +++ b/hypothesis-python/tox.ini @@ -145,7 +145,7 @@ commands = [testenv:crosshair-{cover,nocover,niche,custom}] deps = -r../requirements/test.txt - -e .[crosshair] + -r../requirements/crosshair.txt allowlist_externals = bash setenv= diff --git a/requirements/crosshair.in b/requirements/crosshair.in new file mode 100644 index 0000000000..3cad3e7c3f --- /dev/null +++ b/requirements/crosshair.in @@ -0,0 +1,3 @@ +crosshair-tool==0.0.85 +hypothesis-crosshair==0.0.20 +-c test.in # match test.in attrs pin diff --git a/requirements/crosshair.txt b/requirements/crosshair.txt new file mode 100644 index 0000000000..22f9bd19e1 --- /dev/null +++ b/requirements/crosshair.txt @@ -0,0 +1,60 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# ./build.sh upgrade-requirements +# +attrs==24.1.0 + # via + # -r requirements/crosshair.in + # cattrs + # hypothesis + # hypothesis (hypothesis-python/pyproject.toml) + # lsprotocol +cattrs==24.1.3 + # via + # lsprotocol + # pygls +crosshair-tool==0.0.85 + # via + # -r requirements/crosshair.in + # hypothesis-crosshair +exceptiongroup==1.2.2 ; python_version < "3.11" + # via + # cattrs + # hypothesis + # hypothesis (hypothesis-python/pyproject.toml) +hypothesis==6.131.0 + # via hypothesis-crosshair +hypothesis-crosshair==0.0.20 + # via -r requirements/crosshair.in +importlib-metadata==8.6.1 + # via crosshair-tool +importlib-resources==6.5.2 + # via typeshed-client +lsprotocol==2023.0.1 + # via pygls +mypy-extensions==1.0.0 + # via typing-inspect +packaging==24.2 + # via crosshair-tool +pygls==1.3.1 + # via crosshair-tool +sortedcontainers==2.4.0 + # via + # hypothesis + # hypothesis (hypothesis-python/pyproject.toml) +typeshed-client==2.7.0 + # via crosshair-tool +typing-extensions==4.13.2 + # via + # cattrs + # crosshair-tool + # typeshed-client + # typing-inspect +typing-inspect==0.9.0 + # via crosshair-tool +z3-solver==4.14.1.0 + # via crosshair-tool +zipp==3.21.0 + # via importlib-metadata