0b6fa8b9e1
* fix(sandbox): add startup reconciliation to prevent orphaned container leaks Sandbox containers were never cleaned up when the managing process restarted, because all lifecycle tracking lived in in-memory dictionaries. This adds startup reconciliation that enumerates running containers via `docker ps` and either destroys orphans (age > idle_timeout) or adopts them into the warm pool. Closes #1972 * fix(sandbox): address Copilot review — adopt-all strategy, improved error handling - Reconciliation now adopts all containers into warm pool unconditionally, letting the idle checker decide cleanup. Avoids destroying containers that another concurrent process may still be using. - list_running() logs stderr on docker ps failure and catches FileNotFoundError/OSError. - Signal handler test restores SIGTERM/SIGINT in addition to SIGHUP. - E2E test docstring corrected to match actual coverage scope. * fix(sandbox): address maintainer review — batch inspect, lock tightening, import hygiene - _reconcile_orphans(): merge check-and-insert into a single lock acquisition per container to eliminate the TOCTOU window. - list_running(): batch the per-container docker inspect into a single call. Total subprocess calls drop from 2N+1 to 2 (one ps + one batch inspect). Parse port and created_at from the inspect JSON payload. - Extract _parse_docker_timestamp() and _extract_host_port() as module-level pure helpers and test them directly. - Move datetime/json imports to module top level. - _make_provider_for_reconciliation(): document the __new__ bypass and the lockstep coupling to AioSandboxProvider.__init__. - Add assertion that list_running() makes exactly ONE inspect call.
115 lines
3.6 KiB
Python
115 lines
3.6 KiB
Python
"""Abstract base class for sandbox provisioning backends."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import time
|
|
from abc import ABC, abstractmethod
|
|
|
|
import requests
|
|
|
|
from .sandbox_info import SandboxInfo
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def wait_for_sandbox_ready(sandbox_url: str, timeout: int = 30) -> bool:
|
|
"""Poll sandbox health endpoint until ready or timeout.
|
|
|
|
Args:
|
|
sandbox_url: URL of the sandbox (e.g. http://k3s:30001).
|
|
timeout: Maximum time to wait in seconds.
|
|
|
|
Returns:
|
|
True if sandbox is ready, False otherwise.
|
|
"""
|
|
start_time = time.time()
|
|
while time.time() - start_time < timeout:
|
|
try:
|
|
response = requests.get(f"{sandbox_url}/v1/sandbox", timeout=5)
|
|
if response.status_code == 200:
|
|
return True
|
|
except requests.exceptions.RequestException:
|
|
pass
|
|
time.sleep(1)
|
|
return False
|
|
|
|
|
|
class SandboxBackend(ABC):
|
|
"""Abstract base for sandbox provisioning backends.
|
|
|
|
Two implementations:
|
|
- LocalContainerBackend: starts Docker/Apple Container locally, manages ports
|
|
- RemoteSandboxBackend: connects to a pre-existing URL (K8s service, external)
|
|
"""
|
|
|
|
@abstractmethod
|
|
def create(self, thread_id: str, sandbox_id: str, extra_mounts: list[tuple[str, str, bool]] | None = None) -> SandboxInfo:
|
|
"""Create/provision a new sandbox.
|
|
|
|
Args:
|
|
thread_id: Thread ID for which the sandbox is being created. Useful for backends that want to organize sandboxes by thread.
|
|
sandbox_id: Deterministic sandbox identifier.
|
|
extra_mounts: Additional volume mounts as (host_path, container_path, read_only) tuples.
|
|
Ignored by backends that don't manage containers (e.g., remote).
|
|
|
|
Returns:
|
|
SandboxInfo with connection details.
|
|
"""
|
|
...
|
|
|
|
@abstractmethod
|
|
def destroy(self, info: SandboxInfo) -> None:
|
|
"""Destroy/cleanup a sandbox and release its resources.
|
|
|
|
Args:
|
|
info: The sandbox metadata to destroy.
|
|
"""
|
|
...
|
|
|
|
@abstractmethod
|
|
def is_alive(self, info: SandboxInfo) -> bool:
|
|
"""Quick check whether a sandbox is still alive.
|
|
|
|
This should be a lightweight check (e.g., container inspect)
|
|
rather than a full health check.
|
|
|
|
Args:
|
|
info: The sandbox metadata to check.
|
|
|
|
Returns:
|
|
True if the sandbox appears to be alive.
|
|
"""
|
|
...
|
|
|
|
@abstractmethod
|
|
def discover(self, sandbox_id: str) -> SandboxInfo | None:
|
|
"""Try to discover an existing sandbox by its deterministic ID.
|
|
|
|
Used for cross-process recovery: when another process started a sandbox,
|
|
this process can discover it by the deterministic container name or URL.
|
|
|
|
Args:
|
|
sandbox_id: The deterministic sandbox ID to look for.
|
|
|
|
Returns:
|
|
SandboxInfo if found and healthy, None otherwise.
|
|
"""
|
|
...
|
|
|
|
def list_running(self) -> list[SandboxInfo]:
|
|
"""Enumerate all running sandboxes managed by this backend.
|
|
|
|
Used for startup reconciliation: when the process restarts, it needs
|
|
to discover containers started by previous processes so they can be
|
|
adopted into the warm pool or destroyed if idle too long.
|
|
|
|
The default implementation returns an empty list, which is correct
|
|
for backends that don't manage local containers (e.g., RemoteSandboxBackend
|
|
delegates lifecycle to the provisioner which handles its own cleanup).
|
|
|
|
Returns:
|
|
A list of SandboxInfo for all currently running sandboxes.
|
|
"""
|
|
return []
|