Python Tenacity: Retry Logic and Backoff Strategies
Tenacity is the go-to Python library for retrying operations that fail due to transient errors — network timeouts, database connection resets, rate-limit 429s, or any other intermittent failure. It provides a composable decorator API with configurable stop conditions, wait strategies including exponential backoff with jitter, retry predicates, and before/after callbacks. It supports both synchronous and async code with the same API.
Table of Contents
Basic Retry Decorator
pip install tenacity
from tenacity import retry, stop_after_attempt, wait_fixed
import httpx
@retry(stop=stop_after_attempt(3), wait=wait_fixed(1))
def fetch_user(user_id: int) -> dict:
"""Retry up to 3 times with 1-second fixed wait between attempts."""
response = httpx.get(f"https://api.example.com/users/{user_id}", timeout=5)
response.raise_for_status()
return response.json()
# Retry is transparent — callers don't know about it
try:
user = fetch_user(42)
except Exception as e:
print(f"All retries exhausted: {e}")
Stop Conditions
Combine multiple stop conditions with | (logical OR) or & (logical AND) to create precise retry windows. Stop after a maximum number of attempts or after a maximum elapsed time, whichever comes first.
from tenacity import (
retry, stop_after_attempt, stop_after_delay, stop_never
)
# Stop after 5 attempts
@retry(stop=stop_after_attempt(5))
def call_a(): ...
# Stop after 30 seconds total (regardless of attempt count)
@retry(stop=stop_after_delay(30))
def call_b(): ...
# Stop after 5 attempts OR 30 seconds — whichever comes first
@retry(stop=stop_after_attempt(5) | stop_after_delay(30))
def call_c(): ...
# Never stop — useful for background jobs that must complete eventually
# Use with caution and always pair with a circuit breaker at a higher level
@retry(stop=stop_never, wait=wait_fixed(60))
def critical_background_sync(): ...
Wait and Backoff Strategies
The wait strategy controls the delay between retries. Always add jitter (randomness) to exponential backoff to prevent the thundering herd problem where many clients retry simultaneously after a service blip.
from tenacity import (
retry, stop_after_attempt,
wait_fixed, wait_random, wait_exponential,
wait_exponential_jitter, wait_combine, wait_random_exponential
)
# Fixed wait — simple, predictable
@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
def fixed_wait(): ...
# Random wait — spreads retries when many clients retry simultaneously
@retry(stop=stop_after_attempt(3), wait=wait_random(min=1, max=5))
def random_wait(): ...
# Exponential backoff: 1s, 2s, 4s, 8s, ... capped at 60s
@retry(stop=stop_after_attempt(8), wait=wait_exponential(multiplier=1, min=1, max=60))
def exponential_backoff(): ...
# Exponential + jitter (RECOMMENDED for production)
# Randomises the wait within the exponential range — avoids thundering herd
@retry(
stop=stop_after_attempt(6),
wait=wait_exponential_jitter(initial=1, max=60, jitter=2)
)
def exponential_jitter(): ...
# Random exponential — full jitter strategy (AWS recommended)
@retry(
stop=stop_after_attempt(6),
wait=wait_random_exponential(multiplier=1, max=60)
)
def full_jitter(): ...
# Combine: fixed initial delay + exponential for subsequent retries
@retry(
stop=stop_after_attempt(5),
wait=wait_combine(wait_fixed(0.5), wait_exponential(multiplier=0.5, max=30))
)
def combined_wait(): ...
Retry Predicates
Control which exceptions trigger a retry and which should propagate immediately. Use retry_if_exception_type, retry_if_exception, or retry_if_result to be precise about what gets retried.
import httpx
from tenacity import (
retry, stop_after_attempt, wait_exponential_jitter,
retry_if_exception_type, retry_if_exception, retry_if_result,
retry_if_not_result
)
def is_transient_http_error(exc: Exception) -> bool:
"""Retry on network errors and 5xx/429 responses only."""
if isinstance(exc, (httpx.ConnectError, httpx.TimeoutException, httpx.RemoteProtocolError)):
return True
if isinstance(exc, httpx.HTTPStatusError):
return exc.response.status_code in {429, 500, 502, 503, 504}
return False
@retry(
stop=stop_after_attempt(5),
wait=wait_exponential_jitter(initial=1, max=30),
retry=retry_if_exception(is_transient_http_error),
reraise=True, # re-raise the original exception after all retries fail
)
def call_external_api(endpoint: str) -> dict:
response = httpx.get(endpoint, timeout=10)
response.raise_for_status()
return response.json()
# Retry on specific exception types
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential_jitter(initial=0.5, max=10),
retry=retry_if_exception_type((ConnectionError, TimeoutError)),
)
def connect_to_db(): ...
# Retry based on return value (None means "not ready yet")
@retry(
stop=stop_after_attempt(10),
wait=wait_fixed(5),
retry=retry_if_result(lambda result: result is None),
)
def poll_job_status(job_id: str) -> str | None:
"""Returns None while job is still running, status string when done."""
# result = api.get_job(job_id)
# return result.get("status") if result.get("done") else None
return None # simulated
Before and After Callbacks
Use before, after, and before_sleep callbacks to log retry attempts, emit metrics, or update circuit breaker state. These hooks receive a RetryCallState object with attempt number, outcome, and elapsed time.
import logging
from tenacity import (
retry, stop_after_attempt, wait_exponential_jitter,
before_sleep_log, after_log, RetryCallState
)
log = logging.getLogger(__name__)
def log_retry_attempt(retry_state: RetryCallState):
log.warning(
"Retrying %s (attempt %d): %s",
retry_state.fn.__name__,
retry_state.attempt_number,
retry_state.outcome.exception(),
)
def record_retry_metric(retry_state: RetryCallState):
# emit a counter to your metrics system
# metrics.increment("api.retry", tags={"fn": retry_state.fn.__name__})
pass
@retry(
stop=stop_after_attempt(4),
wait=wait_exponential_jitter(initial=1, max=30),
before_sleep=log_retry_attempt,
after=record_retry_metric,
)
def resilient_call() -> dict:
response = httpx.get("https://api.example.com/data", timeout=5)
response.raise_for_status()
return response.json()
# Built-in structured logging helpers
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential_jitter(initial=0.5, max=10),
before_sleep=before_sleep_log(log, logging.WARNING),
after=after_log(log, logging.INFO),
)
def database_query(): ...
Async Retry
Tenacity works identically with async functions. Just decorate your async def with @retry as usual — Tenacity detects coroutines and applies asyncio.sleep between retries instead of time.sleep.
import asyncio
import httpx
from tenacity import retry, stop_after_attempt, wait_exponential_jitter, retry_if_exception
def is_retryable(exc: Exception) -> bool:
if isinstance(exc, (httpx.ConnectError, httpx.TimeoutException)):
return True
if isinstance(exc, httpx.HTTPStatusError):
return exc.response.status_code in {429, 500, 502, 503, 504}
return False
@retry(
stop=stop_after_attempt(5),
wait=wait_exponential_jitter(initial=0.5, max=30),
retry=retry_if_exception(is_retryable),
reraise=True,
)
async def fetch_async(url: str) -> dict:
async with httpx.AsyncClient(timeout=10) as client:
response = await client.get(url)
response.raise_for_status()
return response.json()
async def main():
try:
data = await fetch_async("https://api.example.com/data")
print(data)
except Exception as e:
print(f"All retries failed: {e}")
asyncio.run(main())
Production Patterns
In production microservices, pair retry logic with timeouts and a circuit breaker. Tenacity handles retries; a circuit breaker (like pybreaker) prevents cascading failures by stopping all calls to a failing dependency after a threshold.
import httpx
from tenacity import (
retry, Retrying, stop_after_attempt,
wait_exponential_jitter, retry_if_exception, RetryError
)
# Retry with per-attempt timeout using httpx
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential_jitter(initial=1, max=10),
retry=retry_if_exception(lambda e: isinstance(e, (httpx.ConnectError, httpx.TimeoutException))),
reraise=True,
)
def call_with_timeout(url: str) -> dict:
# Per-attempt timeout prevents individual attempts from blocking too long
response = httpx.get(url, timeout=httpx.Timeout(connect=3, read=10, write=5, pool=2))
response.raise_for_status()
return response.json()
# Programmatic retry (without decorator — useful in loops or conditional retry)
def process_items(items: list) -> list:
results = []
for item in items:
try:
for attempt in Retrying(
stop=stop_after_attempt(3),
wait=wait_exponential_jitter(initial=0.5, max=5),
):
with attempt:
result = call_with_timeout(f"https://api.example.com/item/{item}")
results.append(result)
except RetryError:
results.append({"id": item, "error": "max_retries_exceeded"})
return results
Frequently Asked Questions
- What's the difference between tenacity and stdlib retrying patterns?
- Manual retry loops lack composability, don't support jitter, and are error-prone to write correctly with exponential backoff. Tenacity gives you a tested, composable, and readable API. It also supports async transparently, which manual loops do not.
- Should I retry all exceptions?
- No. Only retry exceptions that indicate transient failures: network errors, timeouts, and specific HTTP status codes (429, 5xx). Never retry 4xx client errors (400, 401, 403, 404) — they indicate a problem with the request itself and will never succeed.
- How do I test code that uses retry?
- Pass
stop=stop_after_attempt(1)in tests to disable retries, or patchtenacity.nap.sleepso waits are instant. Alternatively, use@retry(..., reraise=True)and mock the function to fail N times then succeed.