Files
deer-flow/backend/packages/harness/deerflow/community/aio_sandbox/backend.py
T
AochenShen99 8b697245eb fix(sandbox): avoid blocking sandbox readiness polling (#2822)
* fix(sandbox): offload async sandbox acquisition

Run blocking sandbox provider acquisition through the async provider hook so eager sandbox setup does not stall the event loop.

* fix(sandbox): add async readiness polling

Introduce an async sandbox readiness poller using httpx and asyncio.sleep while preserving the existing synchronous API.

* test(sandbox): cover async readiness polling

Lock in non-blocking readiness behavior so the async helper does not regress to requests.get or time.sleep.

* fix(sandbox): allow anonymous backend creation

* fix(sandbox): use async readiness in provider acquisition

* fix(sandbox): use async acquisition for lazy tools

* test(sandbox): cover anonymous remote creation

* fix(sandbox): clamp async readiness timeout budget

* fix(sandbox): offload async lock file handling

* fix(sandbox): delegate async middleware fallthrough

* docs(sandbox): document async acquisition path

* fix(sandbox): offload async sandbox release

* docs(sandbox): mention async release hook

* fix(sandbox): address async lock review

Reduce duplicate sync/async sandbox acquisition state handling and move async thread-lock waits onto a dedicated executor with cancellation-safe cleanup.

* chore: retrigger ci

Retrigger GitHub Actions after upstream main fixed the stale PR merge lint failure.

* test(sandbox): sync backend unit fixtures

---------

Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
2026-05-21 14:44:34 +08:00

145 lines
4.7 KiB
Python

"""Abstract base class for sandbox provisioning backends."""
from __future__ import annotations
import asyncio
import logging
import time
from abc import ABC, abstractmethod
import httpx
import requests
from .sandbox_info import SandboxInfo
logger = logging.getLogger(__name__)
def wait_for_sandbox_ready(sandbox_url: str, timeout: int = 30) -> bool:
"""Poll sandbox health endpoint until ready or timeout.
Args:
sandbox_url: URL of the sandbox (e.g. http://k3s:30001).
timeout: Maximum time to wait in seconds.
Returns:
True if sandbox is ready, False otherwise.
"""
start_time = time.time()
while time.time() - start_time < timeout:
try:
response = requests.get(f"{sandbox_url}/v1/sandbox", timeout=5)
if response.status_code == 200:
return True
except requests.exceptions.RequestException:
pass
time.sleep(1)
return False
async def wait_for_sandbox_ready_async(sandbox_url: str, timeout: int = 30, poll_interval: float = 1.0) -> bool:
"""Async variant of sandbox readiness polling.
Use this from async runtime paths so sandbox startup waits do not block the
event loop. The synchronous ``wait_for_sandbox_ready`` function remains for
existing synchronous backend/provider call sites.
"""
loop = asyncio.get_running_loop()
deadline = loop.time() + timeout
async with httpx.AsyncClient(timeout=5) as client:
while True:
remaining = deadline - loop.time()
if remaining <= 0:
break
try:
response = await client.get(f"{sandbox_url}/v1/sandbox", timeout=min(5.0, remaining))
if response.status_code == 200:
return True
except httpx.RequestError:
pass
remaining = deadline - loop.time()
if remaining <= 0:
break
await asyncio.sleep(min(poll_interval, remaining))
return False
class SandboxBackend(ABC):
"""Abstract base for sandbox provisioning backends.
Two implementations:
- LocalContainerBackend: starts Docker/Apple Container locally, manages ports
- RemoteSandboxBackend: connects to a pre-existing URL (K8s service, external)
"""
@abstractmethod
def create(self, thread_id: str | None, sandbox_id: str, extra_mounts: list[tuple[str, str, bool]] | None = None) -> SandboxInfo:
"""Create/provision a new sandbox.
Args:
thread_id: Thread ID for which the sandbox is being created. Useful for backends that want to organize sandboxes by thread.
sandbox_id: Deterministic sandbox identifier.
extra_mounts: Additional volume mounts as (host_path, container_path, read_only) tuples.
Ignored by backends that don't manage containers (e.g., remote).
Returns:
SandboxInfo with connection details.
"""
...
@abstractmethod
def destroy(self, info: SandboxInfo) -> None:
"""Destroy/cleanup a sandbox and release its resources.
Args:
info: The sandbox metadata to destroy.
"""
...
@abstractmethod
def is_alive(self, info: SandboxInfo) -> bool:
"""Quick check whether a sandbox is still alive.
This should be a lightweight check (e.g., container inspect)
rather than a full health check.
Args:
info: The sandbox metadata to check.
Returns:
True if the sandbox appears to be alive.
"""
...
@abstractmethod
def discover(self, sandbox_id: str) -> SandboxInfo | None:
"""Try to discover an existing sandbox by its deterministic ID.
Used for cross-process recovery: when another process started a sandbox,
this process can discover it by the deterministic container name or URL.
Args:
sandbox_id: The deterministic sandbox ID to look for.
Returns:
SandboxInfo if found and healthy, None otherwise.
"""
...
def list_running(self) -> list[SandboxInfo]:
"""Enumerate all running sandboxes managed by this backend.
Used for startup reconciliation: when the process restarts, it needs
to discover containers started by previous processes so they can be
adopted into the warm pool or destroyed if idle too long.
The default implementation returns an empty list, which is correct
for backends that don't manage local containers (e.g., RemoteSandboxBackend
delegates lifecycle to the provisioner which handles its own cleanup).
Returns:
A list of SandboxInfo for all currently running sandboxes.
"""
return []