mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-22 16:06:50 +00:00
fix(sandbox): avoid blocking sandbox readiness polling (#2822)
* fix(sandbox): offload async sandbox acquisition Run blocking sandbox provider acquisition through the async provider hook so eager sandbox setup does not stall the event loop. * fix(sandbox): add async readiness polling Introduce an async sandbox readiness poller using httpx and asyncio.sleep while preserving the existing synchronous API. * test(sandbox): cover async readiness polling Lock in non-blocking readiness behavior so the async helper does not regress to requests.get or time.sleep. * fix(sandbox): allow anonymous backend creation * fix(sandbox): use async readiness in provider acquisition * fix(sandbox): use async acquisition for lazy tools * test(sandbox): cover anonymous remote creation * fix(sandbox): clamp async readiness timeout budget * fix(sandbox): offload async lock file handling * fix(sandbox): delegate async middleware fallthrough * docs(sandbox): document async acquisition path * fix(sandbox): offload async sandbox release * docs(sandbox): mention async release hook * fix(sandbox): address async lock review Reduce duplicate sync/async sandbox acquisition state handling and move async thread-lock waits onto a dedicated executor with cancellation-safe cleanup. * chore: retrigger ci Retrigger GitHub Actions after upstream main fixed the stale PR merge lint failure. * test(sandbox): sync backend unit fixtures --------- Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
"""Tests for AioSandboxProvider mount helpers."""
|
||||
|
||||
import asyncio
|
||||
import importlib
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock, patch
|
||||
@@ -140,6 +141,182 @@ def test_discover_or_create_only_unlocks_when_lock_succeeds(tmp_path, monkeypatc
|
||||
assert unlock_calls == []
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_acquire_async_uses_async_readiness_polling(monkeypatch):
|
||||
"""AioSandboxProvider async creation must not use sync readiness polling."""
|
||||
aio_mod = importlib.import_module("deerflow.community.aio_sandbox.aio_sandbox_provider")
|
||||
provider = _make_provider(None)
|
||||
provider._config = {"replicas": 3}
|
||||
provider._thread_locks = {}
|
||||
provider._warm_pool = {}
|
||||
provider._sandbox_infos = {}
|
||||
provider._thread_sandboxes = {}
|
||||
provider._last_activity = {}
|
||||
provider._lock = aio_mod.threading.Lock()
|
||||
provider._backend = SimpleNamespace(
|
||||
create=MagicMock(return_value=aio_mod.SandboxInfo(sandbox_id="sandbox-async", sandbox_url="http://sandbox")),
|
||||
destroy=MagicMock(),
|
||||
discover=MagicMock(return_value=None),
|
||||
)
|
||||
|
||||
async_readiness_calls: list[tuple[str, int]] = []
|
||||
|
||||
async def fake_wait_for_sandbox_ready_async(sandbox_url: str, timeout: int = 30, poll_interval: float = 1.0) -> bool:
|
||||
async_readiness_calls.append((sandbox_url, timeout))
|
||||
return True
|
||||
|
||||
monkeypatch.setattr(aio_mod, "wait_for_sandbox_ready_async", fake_wait_for_sandbox_ready_async)
|
||||
monkeypatch.setattr(
|
||||
aio_mod,
|
||||
"wait_for_sandbox_ready",
|
||||
lambda *_args, **_kwargs: (_ for _ in ()).throw(AssertionError("sync readiness should not be used")),
|
||||
)
|
||||
|
||||
sandbox_id = await provider._create_sandbox_async("thread-async", "sandbox-async")
|
||||
|
||||
assert sandbox_id == "sandbox-async"
|
||||
assert async_readiness_calls == [("http://sandbox", 60)]
|
||||
assert provider._backend.destroy.call_count == 0
|
||||
assert provider._thread_sandboxes["thread-async"] == "sandbox-async"
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_discover_or_create_with_lock_async_offloads_lock_file_open_and_close(tmp_path, monkeypatch):
|
||||
"""Async lock path must not open or close lock files on the event loop."""
|
||||
aio_mod = importlib.import_module("deerflow.community.aio_sandbox.aio_sandbox_provider")
|
||||
provider = _make_provider(tmp_path)
|
||||
provider._discover_or_create_with_lock_async = aio_mod.AioSandboxProvider._discover_or_create_with_lock_async.__get__(
|
||||
provider,
|
||||
aio_mod.AioSandboxProvider,
|
||||
)
|
||||
provider._thread_locks = {}
|
||||
provider._warm_pool = {}
|
||||
provider._sandbox_infos = {}
|
||||
provider._thread_sandboxes = {"thread-async-lock": "sandbox-async-lock"}
|
||||
provider._sandboxes = {"sandbox-async-lock": aio_mod.AioSandbox(id="sandbox-async-lock", base_url="http://sandbox")}
|
||||
provider._last_activity = {}
|
||||
provider._lock = aio_mod.threading.Lock()
|
||||
provider._backend = SimpleNamespace(discover=MagicMock(return_value=None))
|
||||
|
||||
monkeypatch.setattr(aio_mod, "get_paths", lambda: Paths(base_dir=tmp_path))
|
||||
|
||||
to_thread_calls: list[object] = []
|
||||
|
||||
async def fake_to_thread(func, /, *args, **kwargs):
|
||||
to_thread_calls.append(func)
|
||||
return func(*args, **kwargs)
|
||||
|
||||
monkeypatch.setattr(aio_mod.asyncio, "to_thread", fake_to_thread)
|
||||
|
||||
sandbox_id = await provider._discover_or_create_with_lock_async("thread-async-lock", "sandbox-async-lock")
|
||||
|
||||
assert sandbox_id == "sandbox-async-lock"
|
||||
assert aio_mod._open_lock_file in to_thread_calls
|
||||
assert any(getattr(func, "__name__", "") == "close" for func in to_thread_calls)
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_acquire_thread_lock_async_uses_dedicated_executor(monkeypatch):
|
||||
"""Per-thread lock waits should not consume the default asyncio.to_thread pool."""
|
||||
aio_mod = importlib.import_module("deerflow.community.aio_sandbox.aio_sandbox_provider")
|
||||
lock = aio_mod.threading.Lock()
|
||||
|
||||
async def fail_to_thread(*_args, **_kwargs):
|
||||
raise AssertionError("thread-lock acquisition must not use asyncio.to_thread")
|
||||
|
||||
monkeypatch.setattr(aio_mod.asyncio, "to_thread", fail_to_thread)
|
||||
|
||||
await aio_mod._acquire_thread_lock_async(lock)
|
||||
try:
|
||||
assert not lock.acquire(blocking=False)
|
||||
finally:
|
||||
lock.release()
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_acquire_async_cancellation_does_not_leak_thread_lock(tmp_path):
|
||||
"""Cancelled async lock waiters must not leave the per-thread lock held."""
|
||||
aio_mod = importlib.import_module("deerflow.community.aio_sandbox.aio_sandbox_provider")
|
||||
provider = _make_provider(tmp_path)
|
||||
provider._thread_locks = {}
|
||||
provider._warm_pool = {}
|
||||
provider._sandbox_infos = {}
|
||||
provider._thread_sandboxes = {}
|
||||
provider._last_activity = {}
|
||||
provider._lock = aio_mod.threading.Lock()
|
||||
|
||||
thread_id = "thread-cancel-lock"
|
||||
thread_lock = provider._get_thread_lock(thread_id)
|
||||
thread_lock.acquire()
|
||||
|
||||
task = asyncio.create_task(provider.acquire_async(thread_id))
|
||||
await asyncio.sleep(0.05)
|
||||
task.cancel()
|
||||
|
||||
try:
|
||||
await task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
thread_lock.release()
|
||||
deadline = asyncio.get_running_loop().time() + 1
|
||||
while asyncio.get_running_loop().time() < deadline:
|
||||
acquired = thread_lock.acquire(blocking=False)
|
||||
if acquired:
|
||||
thread_lock.release()
|
||||
return
|
||||
await asyncio.sleep(0.01)
|
||||
|
||||
pytest.fail("provider thread lock was leaked after cancelling acquire_async")
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_acquire_async_cancelled_waiter_does_not_block_successor(tmp_path, monkeypatch):
|
||||
"""A cancelled waiter must not prevent the next live waiter from acquiring."""
|
||||
aio_mod = importlib.import_module("deerflow.community.aio_sandbox.aio_sandbox_provider")
|
||||
provider = _make_provider(tmp_path)
|
||||
provider._thread_locks = {}
|
||||
provider._warm_pool = {}
|
||||
provider._sandbox_infos = {}
|
||||
provider._thread_sandboxes = {}
|
||||
provider._last_activity = {}
|
||||
provider._lock = aio_mod.threading.Lock()
|
||||
|
||||
async def fake_acquire_internal_async(thread_id: str | None) -> str:
|
||||
assert thread_id == "thread-successor-lock"
|
||||
await asyncio.sleep(0)
|
||||
return "sandbox-successor"
|
||||
|
||||
monkeypatch.setattr(provider, "_acquire_internal_async", fake_acquire_internal_async)
|
||||
|
||||
thread_id = "thread-successor-lock"
|
||||
thread_lock = provider._get_thread_lock(thread_id)
|
||||
thread_lock.acquire()
|
||||
|
||||
cancelled_waiter = asyncio.create_task(provider.acquire_async(thread_id))
|
||||
await asyncio.sleep(0.05)
|
||||
cancelled_waiter.cancel()
|
||||
try:
|
||||
await cancelled_waiter
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
live_waiter = asyncio.create_task(provider.acquire_async(thread_id))
|
||||
thread_lock.release()
|
||||
|
||||
assert await asyncio.wait_for(live_waiter, timeout=1) == "sandbox-successor"
|
||||
|
||||
deadline = asyncio.get_running_loop().time() + 1
|
||||
while asyncio.get_running_loop().time() < deadline:
|
||||
acquired = thread_lock.acquire(blocking=False)
|
||||
if acquired:
|
||||
thread_lock.release()
|
||||
return
|
||||
await asyncio.sleep(0.01)
|
||||
|
||||
pytest.fail("provider thread lock was not released after successor acquire_async")
|
||||
|
||||
|
||||
def test_remote_backend_create_forwards_effective_user_id(monkeypatch):
|
||||
"""Provisioner mode must receive user_id so PVC subPath matches user isolation."""
|
||||
remote_mod = importlib.import_module("deerflow.community.aio_sandbox.remote_backend")
|
||||
|
||||
Reference in New Issue
Block a user