Python Hypothesis: Property-Based Testing Guide
Hypothesis is a Python library for property-based testing — instead of writing specific example inputs, you describe the shape of valid inputs and Hypothesis generates hundreds of random test cases automatically, including tricky edge cases like empty strings, negative numbers, Unicode surrogates, and NaN. When it finds a failure it automatically shrinks the input to the minimal reproducing example. Property-based testing is the most effective technique for finding bugs that hand-written examples miss.
Table of Contents
Installation and First Test
pip install hypothesis pytest
# test_basics.py
from hypothesis import given
from hypothesis import strategies as st
def encode_decode(s: str) -> str:
"""Function we want to test: encode then decode should be an identity."""
return s.encode("utf-8").decode("utf-8")
@given(st.text())
def test_encode_decode_identity(s: str):
"""Property: encoding then decoding any text returns the original string."""
assert encode_decode(s) == s
# Run with: pytest test_basics.py -v
# Hypothesis generates 100 random strings including empty string,
# ASCII, Unicode, emoji, null bytes, surrogates etc.
.hypothesis/ directory, your CI picks up the same failures immediately without needing to re-discover them.
Built-in Strategies
Strategies are composable generators. Hypothesis ships with strategies for all Python built-in types, and you can combine them to generate complex nested structures matching your domain model.
from hypothesis import given, assume
from hypothesis import strategies as st
@given(st.integers())
def test_integers(n: int):
"""Generates: 0, -1, 1, min_int, max_int, random values."""
assert isinstance(n, int)
@given(st.integers(min_value=1, max_value=100))
def test_bounded_integer(n: int):
assert 1 <= n <= 100
@given(st.floats(allow_nan=False, allow_infinity=False))
def test_float_arithmetic(f: float):
"""Filters NaN and inf — useful for numeric functions."""
assert f * 2 / 2 == f or abs(f * 2 / 2 - f) < 1e-10
@given(st.text(min_size=1, max_size=50))
def test_non_empty_text(s: str):
assert len(s) >= 1
@given(st.lists(st.integers(), min_size=1))
def test_list_max(lst: list[int]):
"""Property: max of a list is always >= every element."""
assert max(lst) >= lst[0]
@given(st.dictionaries(st.text(), st.integers()))
def test_dict_round_trip(d: dict):
import json
# Property: JSON round-trip preserves structure
restored = json.loads(json.dumps(d))
assert restored == d
@given(
st.one_of(st.integers(), st.floats(allow_nan=False), st.text())
)
def test_one_of(value):
"""one_of draws from any of the provided strategies."""
assert value is not None
Composite and Custom Strategies
Use @st.composite to build domain-specific strategies from multiple existing strategies. This is how you generate realistic test data matching your application's data model.
from hypothesis import given
from hypothesis import strategies as st
from dataclasses import dataclass
@dataclass
class Order:
order_id: str
user_id: int
items: list[dict]
total: float
@st.composite
def order_strategy(draw) -> Order:
"""Generate a realistic Order with consistent total."""
order_id = draw(st.text(alphabet="ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", min_size=8, max_size=8))
user_id = draw(st.integers(min_value=1, max_value=1_000_000))
n_items = draw(st.integers(min_value=1, max_value=10))
items = []
total = 0.0
for _ in range(n_items):
price = draw(st.floats(min_value=0.01, max_value=999.99, allow_nan=False, allow_infinity=False))
qty = draw(st.integers(min_value=1, max_value=20))
items.append({"price": round(price, 2), "qty": qty})
total += price * qty
return Order(order_id=order_id, user_id=user_id, items=items, total=round(total, 2))
@given(order_strategy())
def test_order_total_is_positive(order: Order):
assert order.total > 0
@given(order_strategy())
def test_order_id_length(order: Order):
assert len(order.order_id) == 8
# Combining strategies with flatmap
positive_and_larger = st.integers(min_value=1, max_value=100).flatmap(
lambda n: st.tuples(st.just(n), st.integers(min_value=n + 1, max_value=n + 100))
)
@given(positive_and_larger)
def test_a_less_than_b(pair):
a, b = pair
assert a < b
Writing Good Properties
The key skill in property-based testing is identifying invariants — properties that must hold for all valid inputs. Common property patterns include round-trip, oracle, commutativity, idempotence, and metamorphic relations.
from hypothesis import given
from hypothesis import strategies as st
import json
# 1. Round-trip property
@given(st.binary())
def test_base64_round_trip(data: bytes):
import base64
assert base64.b64decode(base64.b64encode(data)) == data
# 2. Oracle property (compare fast impl against slow reference impl)
def sort_fast(lst: list[int]) -> list[int]:
return sorted(lst) # production sort
def sort_reference(lst: list[int]) -> list[int]:
# Bubble sort — obviously correct, obviously slow
lst = list(lst)
for i in range(len(lst)):
for j in range(len(lst) - 1 - i):
if lst[j] > lst[j + 1]:
lst[j], lst[j + 1] = lst[j + 1], lst[j]
return lst
@given(st.lists(st.integers()))
def test_sort_matches_reference(lst: list[int]):
assert sort_fast(lst) == sort_reference(lst)
# 3. Idempotence property
@given(st.text())
def test_strip_idempotent(s: str):
assert s.strip() == s.strip().strip()
# 4. Metamorphic relation
@given(st.lists(st.integers(), min_size=1))
def test_max_after_adding_larger(lst: list[int]):
original_max = max(lst)
bigger = original_max + 1
assert max(lst + [bigger]) == bigger
# 5. Structural property
@given(st.lists(st.integers()))
def test_sorted_list_is_ordered(lst: list[int]):
result = sorted(lst)
for i in range(len(result) - 1):
assert result[i] <= result[i + 1]
Settings and Profiles
Use @settings to control how many examples Hypothesis generates, the deadline per test, and suppressed health checks. Define profiles in conftest.py to run quick tests locally and thorough tests in CI.
from hypothesis import given, settings, HealthCheck
from hypothesis import strategies as st
# Increase examples for thorough CI testing
@settings(max_examples=1000, deadline=None)
@given(st.text())
def test_thorough(s: str):
assert isinstance(s.lower(), str)
# Suppress health check for slow data generation
@settings(suppress_health_check=[HealthCheck.too_slow], max_examples=200)
@given(st.text(min_size=100, max_size=10000))
def test_large_text(s: str):
assert len(s) >= 100
# conftest.py — define profiles
from hypothesis import settings, Phase
settings.register_profile("dev", max_examples=20, deadline=200)
settings.register_profile("ci", max_examples=500, deadline=None, phases=[
Phase.explicit, Phase.reuse, Phase.generate, Phase.shrink
])
# Activate with: HYPOTHESIS_PROFILE=ci pytest
import os
settings.load_profile(os.getenv("HYPOTHESIS_PROFILE", "dev"))
Stateful Testing
Stateful (model-based) testing generates sequences of operations on a stateful system and checks invariants after each step. It is ideal for testing data structures, APIs, and anything where the order of operations matters.
from hypothesis.stateful import RuleBasedStateMachine, rule, invariant
from hypothesis import strategies as st
class SetMachine(RuleBasedStateMachine):
"""Tests a custom set implementation against Python's built-in set."""
def __init__(self):
super().__init__()
self.model = set() # reference implementation
self.impl = set() # implementation under test
@rule(value=st.integers())
def add_value(self, value: int):
self.model.add(value)
self.impl.add(value)
@rule(value=st.integers())
def discard_value(self, value: int):
self.model.discard(value)
self.impl.discard(value)
@invariant()
def sets_are_equal(self):
assert self.model == self.impl
@invariant()
def length_matches(self):
assert len(self.model) == len(self.impl)
# Convert to a pytest test class
TestSet = SetMachine.TestCase
Testing Django and FastAPI
Hypothesis integrates seamlessly with pytest and Django's test client. For FastAPI, combine Hypothesis with httpx's test client to property-test entire endpoints.
# FastAPI property test
from hypothesis import given, settings
from hypothesis import strategies as st
from fastapi.testclient import TestClient
from app.main import app
client = TestClient(app)
@given(
name=st.text(min_size=1, max_size=100),
age=st.integers(min_value=0, max_value=150),
)
@settings(max_examples=100)
def test_create_user_any_valid_input(name: str, age: int):
response = client.post("/users", json={"name": name, "age": age})
# Property: valid input never returns 5xx
assert response.status_code < 500
@given(st.text())
def test_search_never_crashes(query: str):
response = client.get("/search", params={"q": query})
assert response.status_code in {200, 400, 422} # no 500
Frequently Asked Questions
- How many examples does Hypothesis run by default?
- 100 examples per test. This is usually enough to find common bugs. Set
max_examples=1000in CI for more thorough checking. Hypothesis also caches previously found failures and always re-runs them first. - What is shrinking?
- When Hypothesis finds a failing example, it automatically tries smaller and simpler inputs until it finds the minimal reproducible case. For example, if a list of 500 integers causes a failure, Hypothesis shrinks it to the smallest list (often 1-2 elements) that still triggers the same assertion error.
- Hypothesis vs pytest-randomly?
- pytest-randomly randomises test order and seeds random functions. Hypothesis generates structured inputs with domain knowledge (empty string, max integer, etc.), shrinks failures, and persists them. They complement each other — use both.