mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-24 00:45:57 +00:00
fix(actor): harden lifecycle, supervision, Redis mailbox, and add comprehensive tests
- Fix spawn() zombie cell: clean up registry on start() failure - Fix shutdown(): cancel + await tasks that exceed graceful timeout - Fix _shutdown(): await mailbox.close() to release backend resources - Fix escalate directive: stop failing child before propagating to grandparent - Fix RedisMailbox.put(): wrap Redis errors in try/except, return False on failure - Fix retry.py: replace assert with proper raise for last_exc - Add put_batch() to Mailbox abstraction for single-roundtrip bulk enqueue - Add RedisMailbox.put_batch() with atomic Lua script for bounded queues - Add MailboxFullError exception type for semantic backpressure handling - Add redis>=7.4.0 dependency with public PyPI sources in uv.lock Tests added (31 total, up from 27): - test_middleware_on_restart_hook: verifies middleware.on_restart() on supervision restart - test_ask_propagates_actor_exception: ask() re-raises original exception type - test_ask_propagates_exception_while_supervised: exception propagates; root actor survives - test_ask_timeout_late_reply_no_exception: late reply after timeout is silent no-op - test_actor_backpressure.py: MailboxFullError + dead letter on full mailbox - test_actor_retry.py: ask_with_retry with exponential backoff - test_mailbox_redis.py: RedisMailbox put/get/batch/close - bench_actor_redis.py: RedisMailbox throughput benchmarks
This commit is contained in:
@@ -440,3 +440,95 @@ class TestMiddleware:
|
||||
# tell goes through middleware too
|
||||
assert any("before:" in entry for entry in mw.log) is False
|
||||
await system.shutdown()
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_middleware_on_restart_hook(self):
|
||||
"""on_restart is called on the middleware when a child actor is restarted."""
|
||||
|
||||
class RestartTrackingMiddleware(Middleware):
|
||||
def __init__(self):
|
||||
self.restart_errors: list[Exception] = []
|
||||
|
||||
async def on_restart(self, actor_ref, error):
|
||||
self.restart_errors.append(error)
|
||||
|
||||
mw = RestartTrackingMiddleware()
|
||||
|
||||
class ChildSpawningParent(Actor):
|
||||
async def on_receive(self, message):
|
||||
if message == "spawn":
|
||||
ref = await self.context.spawn(CrashActor, "child", middlewares=[mw])
|
||||
return ref
|
||||
|
||||
system = ActorSystem("test")
|
||||
parent = await system.spawn(ChildSpawningParent, "parent")
|
||||
child = await parent.ask("spawn")
|
||||
|
||||
# Crash the child — parent supervisor will restart it
|
||||
try:
|
||||
await child.ask("crash")
|
||||
except ValueError:
|
||||
pass
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
assert len(mw.restart_errors) == 1
|
||||
assert isinstance(mw.restart_errors[0], ValueError)
|
||||
await system.shutdown()
|
||||
|
||||
|
||||
class TestAskErrorPropagation:
|
||||
@pytest.mark.anyio
|
||||
async def test_ask_propagates_actor_exception(self):
|
||||
"""ask() re-raises the original exception type when on_receive crashes."""
|
||||
|
||||
class BoomActor(Actor):
|
||||
async def on_receive(self, message):
|
||||
raise ValueError("intentional crash")
|
||||
|
||||
system = ActorSystem("test")
|
||||
ref = await system.spawn(BoomActor, "boom")
|
||||
with pytest.raises(ValueError, match="intentional crash"):
|
||||
await ref.ask("trigger")
|
||||
await system.shutdown()
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_ask_propagates_exception_while_supervised(self):
|
||||
"""ask() gets the exception even when the actor is supervised (not stopped)."""
|
||||
|
||||
class SometimesCrashActor(Actor):
|
||||
async def on_receive(self, message):
|
||||
if message == "crash":
|
||||
raise RuntimeError("supervised crash")
|
||||
return "ok"
|
||||
|
||||
system = ActorSystem("test")
|
||||
ref = await system.spawn(SometimesCrashActor, "sca")
|
||||
with pytest.raises(RuntimeError, match="supervised crash"):
|
||||
await ref.ask("crash")
|
||||
# Root actor keeps running after a crash (consecutive_failures, not restart)
|
||||
result = await ref.ask("hello", timeout=2.0)
|
||||
assert result == "ok"
|
||||
await system.shutdown()
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_ask_timeout_late_reply_no_exception(self):
|
||||
"""Late reply arriving after ask() timeout is silently dropped — no exception, no orphaned future."""
|
||||
|
||||
class SlowActor(Actor):
|
||||
async def on_receive(self, message):
|
||||
await asyncio.sleep(0.3)
|
||||
return "late"
|
||||
|
||||
system = ActorSystem("test")
|
||||
ref = await system.spawn(SlowActor, "slow")
|
||||
|
||||
with pytest.raises(asyncio.TimeoutError):
|
||||
await ref.ask("go", timeout=0.05)
|
||||
|
||||
# Wait for actor to finish processing — late reply arrives, should be a no-op
|
||||
await asyncio.sleep(0.4)
|
||||
# System still functional: no orphaned futures, no leaked state
|
||||
assert ref.is_alive
|
||||
result = await ref.ask("go", timeout=2.0)
|
||||
assert result == "late"
|
||||
await system.shutdown()
|
||||
|
||||
Reference in New Issue
Block a user