mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-06-11 18:05:58 +00:00
[codex] Fix stale AIO sandbox cache reuse (#3494)
* Fix stale AIO sandbox cache reuse * Address AIO sandbox review feedback * Distinguish sandbox health check failures * Keep local discovery recoverable when the runtime check fails LocalContainerBackend.discover() shares _is_container_running, which now raises on transient daemon errors instead of returning False. Discovery has no exception handling in _discover_or_create_with_lock(_async), so a brief Docker hiccup turned a recoverable "could not verify, create instead" into a hard acquire failure. Catch the check failure inside discover() and return None so an unverifiable container is simply not adopted, restoring the pre-change fall-through while keeping raise-on-unknown semantics protecting the destroy path. Reported by fancy-agent on PR #3494. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com> * Narrow the not-found match in container inspect error handling A bare "not found" substring also matches transient failures like "command not found" or "context not found", which would misclassify a check error as "container definitely gone" and bypass the raise-on-unknown contract. Keep Docker's specific "No such object"/"No such container" phrases, and only trust a generic "not found" (Apple Container) when the message names the inspected container or refers to a container/object. Reported by WillemJiang on PR #3494. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com> --------- Co-authored-by: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,7 +1,10 @@
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
from deerflow.community.aio_sandbox.local_backend import (
|
||||
LocalContainerBackend,
|
||||
_format_container_command_for_log,
|
||||
@@ -234,3 +237,99 @@ def test_start_container_keeps_apple_container_port_format(monkeypatch):
|
||||
captured_cmd = _capture_start_container_command(monkeypatch, backend, runtime="container")
|
||||
|
||||
assert captured_cmd[captured_cmd.index("-p") + 1] == "18080:8080"
|
||||
|
||||
|
||||
def _backend_for_inspect_tests() -> LocalContainerBackend:
|
||||
backend = LocalContainerBackend(
|
||||
image="sandbox:latest",
|
||||
base_port=8080,
|
||||
container_prefix="sandbox",
|
||||
config_mounts=[],
|
||||
environment={},
|
||||
)
|
||||
backend._runtime = "docker"
|
||||
return backend
|
||||
|
||||
|
||||
def test_is_container_running_false_when_container_missing(monkeypatch):
|
||||
backend = _backend_for_inspect_tests()
|
||||
|
||||
def fake_run(cmd, **kwargs):
|
||||
return SimpleNamespace(stdout="", stderr="Error: No such object: sandbox-missing", returncode=1)
|
||||
|
||||
monkeypatch.setattr("subprocess.run", fake_run)
|
||||
|
||||
assert backend._is_container_running("sandbox-missing") is False
|
||||
|
||||
|
||||
def test_is_container_running_raises_on_runtime_error(monkeypatch):
|
||||
backend = _backend_for_inspect_tests()
|
||||
|
||||
def fake_run(cmd, **kwargs):
|
||||
return SimpleNamespace(stdout="", stderr="Cannot connect to the Docker daemon", returncode=1)
|
||||
|
||||
monkeypatch.setattr("subprocess.run", fake_run)
|
||||
|
||||
with pytest.raises(RuntimeError, match="Failed to inspect container sandbox-busy"):
|
||||
backend._is_container_running("sandbox-busy")
|
||||
|
||||
|
||||
def test_is_container_running_raises_on_timeout(monkeypatch):
|
||||
backend = _backend_for_inspect_tests()
|
||||
|
||||
def fake_run(cmd, **kwargs):
|
||||
raise subprocess.TimeoutExpired(cmd=cmd, timeout=kwargs["timeout"])
|
||||
|
||||
monkeypatch.setattr("subprocess.run", fake_run)
|
||||
|
||||
with pytest.raises(RuntimeError, match="Timed out checking container sandbox-timeout"):
|
||||
backend._is_container_running("sandbox-timeout")
|
||||
|
||||
|
||||
def test_discover_returns_none_when_runtime_check_fails(monkeypatch):
|
||||
"""A transient daemon error during discovery must fall through to create, not fail acquire."""
|
||||
backend = _backend_for_inspect_tests()
|
||||
|
||||
def fake_run(cmd, **kwargs):
|
||||
return SimpleNamespace(stdout="", stderr="Cannot connect to the Docker daemon", returncode=1)
|
||||
|
||||
monkeypatch.setattr("subprocess.run", fake_run)
|
||||
|
||||
assert backend.discover("sandbox-blip") is None
|
||||
|
||||
|
||||
def test_discover_returns_none_when_runtime_check_times_out(monkeypatch):
|
||||
"""An inspect timeout during discovery must not propagate out of discover()."""
|
||||
backend = _backend_for_inspect_tests()
|
||||
|
||||
def fake_run(cmd, **kwargs):
|
||||
raise subprocess.TimeoutExpired(cmd=cmd, timeout=kwargs["timeout"])
|
||||
|
||||
monkeypatch.setattr("subprocess.run", fake_run)
|
||||
|
||||
assert backend.discover("sandbox-timeout") is None
|
||||
|
||||
|
||||
def test_is_container_running_false_on_apple_container_not_found(monkeypatch):
|
||||
"""Apple Container's generic "not found" is trusted when it names the container."""
|
||||
backend = _backend_for_inspect_tests()
|
||||
|
||||
def fake_run(cmd, **kwargs):
|
||||
return SimpleNamespace(stdout="", stderr='Error: not found: "sandbox-apple"', returncode=1)
|
||||
|
||||
monkeypatch.setattr("subprocess.run", fake_run)
|
||||
|
||||
assert backend._is_container_running("sandbox-apple") is False
|
||||
|
||||
|
||||
def test_is_container_running_raises_on_unrelated_not_found_error(monkeypatch):
|
||||
"""Transient errors whose text contains "not found" must not be misread as a dead container."""
|
||||
backend = _backend_for_inspect_tests()
|
||||
|
||||
def fake_run(cmd, **kwargs):
|
||||
return SimpleNamespace(stdout="", stderr="Error: credential helper not found in $PATH", returncode=1)
|
||||
|
||||
monkeypatch.setattr("subprocess.run", fake_run)
|
||||
|
||||
with pytest.raises(RuntimeError, match="Failed to inspect container sandbox-busy"):
|
||||
backend._is_container_running("sandbox-busy")
|
||||
|
||||
Reference in New Issue
Block a user