From 5abb2c14f75a88bc842354b1306136905f1f6d6f Mon Sep 17 00:00:00 2001 From: zeevdr Date: Sun, 24 May 2026 15:58:57 +0300 Subject: [PATCH 1/2] fix(retry): enforce overall deadline budget across attempts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add `total_timeout` to `RetryConfig` so the wall-clock budget is shared across all retry attempts. Without this, a caller with `timeout=10` and `max_attempts=3` could wait up to `3*(10+backoff)` seconds total. When set, backoff sleeps are clipped to the remaining budget and the loop exits immediately when the budget is exhausted — either at the loop-top check or when `remaining <= 0` after a failure. `write_safe_config` propagates `total_timeout` to the derived config. Closes #58 Co-Authored-By: Claude --- sdk/src/opendecree/_retry.py | 28 +++++++++++++- sdk/tests/test_retry.py | 74 ++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+), 2 deletions(-) diff --git a/sdk/src/opendecree/_retry.py b/sdk/src/opendecree/_retry.py index 5ae165c..f00f85d 100644 --- a/sdk/src/opendecree/_retry.py +++ b/sdk/src/opendecree/_retry.py @@ -28,6 +28,10 @@ class RetryConfig: max_backoff: Maximum backoff duration in seconds. multiplier: Backoff multiplier between attempts. retryable_codes: gRPC status codes that trigger a retry. + total_timeout: Overall wall-clock budget in seconds shared across all + attempts. When set, backoff sleeps are clipped to the remaining + budget and no further attempt is made once the budget is exhausted. + None means no global limit (original behavior). """ max_attempts: int = 3 @@ -40,6 +44,7 @@ class RetryConfig: grpc.StatusCode.DEADLINE_EXCEEDED, ) ) + total_timeout: float | None = None def write_safe_config(base: RetryConfig | None) -> RetryConfig | None: @@ -60,6 +65,7 @@ def write_safe_config(base: RetryConfig | None) -> RetryConfig | None: max_backoff=base.max_backoff, multiplier=base.multiplier, retryable_codes=safe_codes, + total_timeout=base.total_timeout, ) @@ -68,10 +74,13 @@ def with_retry(config: RetryConfig | None, fn: Callable[[], T]) -> T: if config is None: return fn() + deadline = time.monotonic() + config.total_timeout if config.total_timeout is not None else None last_err: Exception | None = None backoff = config.initial_backoff for attempt in range(config.max_attempts): + if deadline is not None and time.monotonic() >= deadline: + break try: return fn() except grpc.RpcError as e: @@ -80,7 +89,13 @@ def with_retry(config: RetryConfig | None, fn: Callable[[], T]) -> T: raise last_err = e jitter = random.uniform(0.5, 1.5) - time.sleep(backoff * jitter) + sleep_time = backoff * jitter + if deadline is not None: + remaining = deadline - time.monotonic() + if remaining <= 0: + raise + sleep_time = min(sleep_time, remaining) + time.sleep(sleep_time) backoff = min(backoff * config.multiplier, config.max_backoff) raise last_err # type: ignore[misc] # pragma: no cover @@ -91,10 +106,13 @@ async def async_with_retry(config: RetryConfig | None, fn: Callable[[], Awaitabl if config is None: return await fn() + deadline = time.monotonic() + config.total_timeout if config.total_timeout is not None else None last_err: Exception | None = None backoff = config.initial_backoff for attempt in range(config.max_attempts): + if deadline is not None and time.monotonic() >= deadline: + break try: return await fn() except grpc.aio.AioRpcError as e: @@ -103,7 +121,13 @@ async def async_with_retry(config: RetryConfig | None, fn: Callable[[], Awaitabl raise last_err = e jitter = random.uniform(0.5, 1.5) - await asyncio.sleep(backoff * jitter) + sleep_time = backoff * jitter + if deadline is not None: + remaining = deadline - time.monotonic() + if remaining <= 0: + raise + sleep_time = min(sleep_time, remaining) + await asyncio.sleep(sleep_time) backoff = min(backoff * config.multiplier, config.max_backoff) raise last_err # type: ignore[misc] # pragma: no cover diff --git a/sdk/tests/test_retry.py b/sdk/tests/test_retry.py index 156fa30..e152a50 100644 --- a/sdk/tests/test_retry.py +++ b/sdk/tests/test_retry.py @@ -138,3 +138,77 @@ async def fn() -> str: with patch("opendecree._retry.asyncio.sleep", new_callable=AsyncMock): with pytest.raises(grpc.aio.AioRpcError): await async_with_retry(RetryConfig(max_attempts=2), fn) + + +# --- Deadline budget --- + + +def test_deadline_clips_sleep(): + """Sleep is clipped to remaining budget so total wall time stays bounded.""" + err = FakeRpcError(grpc.StatusCode.UNAVAILABLE) + fn = MagicMock(side_effect=[err, "ok"]) + slept: list[float] = [] + + with patch("opendecree._retry.time.sleep", side_effect=lambda s: slept.append(s)): + # monotonic calls: [deadline_start=0.0, loop-top-0=0.0, remaining-check=0.05, loop-top-1=0.05] + with patch("opendecree._retry.time.monotonic", side_effect=[0.0, 0.0, 0.05, 0.05]): + result = with_retry(RetryConfig(max_attempts=3, total_timeout=0.1), fn) + + assert result == "ok" + assert slept[0] <= 0.05 + 1e-9 # clipped to remaining budget + + +def test_deadline_exhausted_raises_immediately(): + """When budget is gone after a failure, raises without sleeping.""" + err = FakeRpcError(grpc.StatusCode.UNAVAILABLE) + fn = MagicMock(side_effect=err) + slept: list[float] = [] + + with patch("opendecree._retry.time.sleep", side_effect=lambda s: slept.append(s)): + # monotonic calls: [deadline_start=0.0, loop-top-0=0.0, remaining-check=0.2 (over budget)] + with patch("opendecree._retry.time.monotonic", side_effect=[0.0, 0.0, 0.2]): + with pytest.raises(grpc.RpcError): + with_retry(RetryConfig(max_attempts=3, total_timeout=0.1), fn) + + assert slept == [] + + +def test_deadline_already_passed_before_second_attempt(): + """Loop-top deadline check stops further attempts once budget is exhausted.""" + err = FakeRpcError(grpc.StatusCode.UNAVAILABLE) + fn = MagicMock(side_effect=[err, "ok"]) + + with patch("opendecree._retry.time.sleep"): + # monotonic calls: [deadline_start=0.0, loop-top-0=0.0, remaining=0.05 (ok, sleep), loop-top-1=0.2 (over)] + with patch("opendecree._retry.time.monotonic", side_effect=[0.0, 0.0, 0.05, 0.2]): + with pytest.raises(grpc.RpcError): + with_retry(RetryConfig(max_attempts=3, total_timeout=0.1), fn) + + assert fn.call_count == 1 + + +def test_write_safe_config_preserves_total_timeout(): + base = RetryConfig(total_timeout=30.0) + safe = write_safe_config(base) + assert safe is not None + assert safe.total_timeout == 30.0 + + +@pytest.mark.asyncio +async def test_async_deadline_exhausted_raises_immediately(): + err = FakeRpcError(grpc.StatusCode.UNAVAILABLE) + slept: list[float] = [] + + async def fn() -> str: + raise err + + async def fake_sleep(s: float) -> None: + slept.append(s) + + with patch("opendecree._retry.asyncio.sleep", side_effect=fake_sleep): + # monotonic: [deadline_start=0.0, loop-top-0=0.0, remaining-check=0.2 (over budget)] + with patch("opendecree._retry.time.monotonic", side_effect=[0.0, 0.0, 0.2]): + with pytest.raises(grpc.aio.AioRpcError): + await async_with_retry(RetryConfig(max_attempts=3, total_timeout=0.1), fn) + + assert slept == [] From 91dfe35dc28c0fe91a490390e1758916b0358d16 Mon Sep 17 00:00:00 2001 From: zeevdr Date: Sun, 24 May 2026 16:00:22 +0300 Subject: [PATCH 2/2] style(retry): shorten test comments to fit line limit Co-Authored-By: Claude --- sdk/tests/test_retry.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/tests/test_retry.py b/sdk/tests/test_retry.py index e152a50..73d3c40 100644 --- a/sdk/tests/test_retry.py +++ b/sdk/tests/test_retry.py @@ -150,7 +150,7 @@ def test_deadline_clips_sleep(): slept: list[float] = [] with patch("opendecree._retry.time.sleep", side_effect=lambda s: slept.append(s)): - # monotonic calls: [deadline_start=0.0, loop-top-0=0.0, remaining-check=0.05, loop-top-1=0.05] + # monotonic: [deadline=0.0, loop-top-0=0.0, remaining=0.05, loop-top-1=0.05] with patch("opendecree._retry.time.monotonic", side_effect=[0.0, 0.0, 0.05, 0.05]): result = with_retry(RetryConfig(max_attempts=3, total_timeout=0.1), fn) @@ -179,7 +179,7 @@ def test_deadline_already_passed_before_second_attempt(): fn = MagicMock(side_effect=[err, "ok"]) with patch("opendecree._retry.time.sleep"): - # monotonic calls: [deadline_start=0.0, loop-top-0=0.0, remaining=0.05 (ok, sleep), loop-top-1=0.2 (over)] + # monotonic: [deadline=0.0, loop-top-0=0.0, remaining=0.05 (ok), loop-top-1=0.2 (over)] with patch("opendecree._retry.time.monotonic", side_effect=[0.0, 0.0, 0.05, 0.2]): with pytest.raises(grpc.RpcError): with_retry(RetryConfig(max_attempts=3, total_timeout=0.1), fn)