mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-25 09:26:00 +00:00
fix(actor): harden lifecycle, supervision, Redis mailbox, and add comprehensive tests
- Fix spawn() zombie cell: clean up registry on start() failure - Fix shutdown(): cancel + await tasks that exceed graceful timeout - Fix _shutdown(): await mailbox.close() to release backend resources - Fix escalate directive: stop failing child before propagating to grandparent - Fix RedisMailbox.put(): wrap Redis errors in try/except, return False on failure - Fix retry.py: replace assert with proper raise for last_exc - Add put_batch() to Mailbox abstraction for single-roundtrip bulk enqueue - Add RedisMailbox.put_batch() with atomic Lua script for bounded queues - Add MailboxFullError exception type for semantic backpressure handling - Add redis>=7.4.0 dependency with public PyPI sources in uv.lock Tests added (31 total, up from 27): - test_middleware_on_restart_hook: verifies middleware.on_restart() on supervision restart - test_ask_propagates_actor_exception: ask() re-raises original exception type - test_ask_propagates_exception_while_supervised: exception propagates; root actor survives - test_ask_timeout_late_reply_no_exception: late reply after timeout is silent no-op - test_actor_backpressure.py: MailboxFullError + dead letter on full mailbox - test_actor_retry.py: ask_with_retry with exponential backoff - test_mailbox_redis.py: RedisMailbox put/get/batch/close - bench_actor_redis.py: RedisMailbox throughput benchmarks
This commit is contained in:
@@ -0,0 +1,142 @@
|
||||
"""Retry + idempotency helpers for Actor ask/tell patterns.
|
||||
|
||||
This module provides:
|
||||
- Message envelope carrying retry/idempotency metadata
|
||||
- In-memory idempotency store (process-local)
|
||||
- ask_with_retry helper (bounded retries + exponential backoff + jitter)
|
||||
|
||||
Design notes:
|
||||
- Keep transport-agnostic; works with current in-memory mailbox.
|
||||
- Business handlers must opt in by using ``IdempotentActorMixin`` and
|
||||
wrapping logic with ``handle_idempotent``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import random
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RetryEnvelope:
|
||||
"""Metadata wrapper for idempotent/retriable messages."""
|
||||
|
||||
payload: Any
|
||||
message_id: str = field(default_factory=lambda: uuid.uuid4().hex)
|
||||
idempotency_key: str | None = None
|
||||
attempt: int = 1
|
||||
max_attempts: int = 1
|
||||
created_at_ms: int = field(default_factory=lambda: int(time.time() * 1000))
|
||||
|
||||
@classmethod
|
||||
def wrap(
|
||||
cls,
|
||||
payload: Any,
|
||||
*,
|
||||
idempotency_key: str | None = None,
|
||||
attempt: int = 1,
|
||||
max_attempts: int = 1,
|
||||
) -> "RetryEnvelope":
|
||||
return cls(
|
||||
payload=payload,
|
||||
idempotency_key=idempotency_key,
|
||||
attempt=attempt,
|
||||
max_attempts=max_attempts,
|
||||
)
|
||||
|
||||
|
||||
class IdempotencyStore:
|
||||
"""Process-local idempotency result store."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._results: dict[str, Any] = {}
|
||||
|
||||
def has(self, key: str) -> bool:
|
||||
return key in self._results
|
||||
|
||||
def get(self, key: str) -> Any:
|
||||
return self._results[key]
|
||||
|
||||
def set(self, key: str, value: Any) -> None:
|
||||
self._results[key] = value
|
||||
|
||||
|
||||
class IdempotentActorMixin:
|
||||
"""Mixin adding idempotent handling utility for actors.
|
||||
|
||||
Usage in actor::
|
||||
|
||||
class MyActor(IdempotentActorMixin, Actor):
|
||||
async def on_receive(self, message):
|
||||
return await self.handle_idempotent(message, self._handle)
|
||||
|
||||
async def _handle(self, payload):
|
||||
...
|
||||
"""
|
||||
|
||||
def _idempotency_store(self) -> IdempotencyStore:
|
||||
store = getattr(self, "_idem_store", None)
|
||||
if store is None:
|
||||
store = IdempotencyStore()
|
||||
setattr(self, "_idem_store", store)
|
||||
return store
|
||||
|
||||
async def handle_idempotent(self, message: Any, handler):
|
||||
if not isinstance(message, RetryEnvelope):
|
||||
return await handler(message)
|
||||
|
||||
key = message.idempotency_key
|
||||
if not key:
|
||||
return await handler(message.payload)
|
||||
|
||||
store = self._idempotency_store()
|
||||
if store.has(key):
|
||||
return store.get(key)
|
||||
|
||||
result = await handler(message.payload)
|
||||
store.set(key, result)
|
||||
return result
|
||||
|
||||
|
||||
async def ask_with_retry(
|
||||
ref,
|
||||
payload: Any,
|
||||
*,
|
||||
timeout: float = 5.0,
|
||||
max_attempts: int = 3,
|
||||
base_backoff_s: float = 0.1,
|
||||
max_backoff_s: float = 5.0,
|
||||
jitter_ratio: float = 0.3,
|
||||
retry_exceptions: tuple[type[BaseException], ...] = (asyncio.TimeoutError,),
|
||||
idempotency_key: str | None = None,
|
||||
) -> Any:
|
||||
"""Ask actor with bounded retries and envelope metadata."""
|
||||
if max_attempts < 1:
|
||||
raise ValueError("max_attempts must be >= 1")
|
||||
|
||||
key = idempotency_key or uuid.uuid4().hex
|
||||
last_exc: BaseException | None = None
|
||||
|
||||
for attempt in range(1, max_attempts + 1):
|
||||
msg = RetryEnvelope.wrap(
|
||||
payload,
|
||||
idempotency_key=key,
|
||||
attempt=attempt,
|
||||
max_attempts=max_attempts,
|
||||
)
|
||||
try:
|
||||
return await ref.ask(msg, timeout=timeout)
|
||||
except retry_exceptions as exc:
|
||||
last_exc = exc
|
||||
if attempt >= max_attempts:
|
||||
break
|
||||
|
||||
backoff = min(max_backoff_s, base_backoff_s * (2 ** (attempt - 1)))
|
||||
jitter = backoff * jitter_ratio * random.random()
|
||||
await asyncio.sleep(backoff + jitter)
|
||||
|
||||
raise last_exc # type: ignore[misc] # always set: loop runs ≥1 time and sets on last iteration
|
||||
Reference in New Issue
Block a user