mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-24 17:06:00 +00:00
fix(sandbox): auto-restart crashed containers transparently (#2788)
When a sandbox container crashes (e.g. due to an internal error), the agent enters a connection-refused loop because AioSandboxProvider.get() returns a cached but dead sandbox object. Add a liveness check in get() that detects crashed containers via backend.is_alive() and evicts them from all caches, allowing ensure_sandbox_initialized() to transparently recreate a fresh container on the next acquire(). The behavior is controlled by a new config option (default: true). Set to false to skip health checks and preserve the old behavior of returning stale cached sandboxes. Closes #2788
This commit is contained in:
@@ -23,6 +23,9 @@ class SandboxConfig(BaseModel):
|
||||
replicas: Maximum number of concurrent sandbox containers (default: 3). When the limit is reached the least-recently-used sandbox is evicted to make room.
|
||||
container_prefix: Prefix for container names (default: deer-flow-sandbox)
|
||||
idle_timeout: Idle timeout in seconds before sandbox is released (default: 600 = 10 minutes). Set to 0 to disable.
|
||||
auto_restart: Automatically restart sandbox containers that have crashed (default: true). When a tool call
|
||||
detects the container is no longer alive, the sandbox is evicted from cache and transparently recreated
|
||||
on the next acquire. Set to false to disable.
|
||||
mounts: List of volume mounts to share directories with the container
|
||||
environment: Environment variables to inject into the container (values starting with $ are resolved from host env)
|
||||
"""
|
||||
@@ -55,6 +58,10 @@ class SandboxConfig(BaseModel):
|
||||
default=None,
|
||||
description="Idle timeout in seconds before sandbox is released (default: 600 = 10 minutes). Set to 0 to disable.",
|
||||
)
|
||||
auto_restart: bool = Field(
|
||||
default=True,
|
||||
description="Automatically restart sandbox containers that have crashed. When a tool call detects the container is no longer alive, the sandbox is evicted from cache and transparently recreated on the next acquire.",
|
||||
)
|
||||
mounts: list[VolumeMountConfig] = Field(
|
||||
default_factory=list,
|
||||
description="List of volume mounts to share directories between host and container",
|
||||
|
||||
Reference in New Issue
Block a user