fix(sandbox): serialize concurrent exec_command calls in AioSandbox (#1435)

* fix(sandbox): serialize concurrent exec_command calls in AioSandbox The AIO sandbox container maintains a single persistent shell session that corrupts when multiple exec_command requests arrive concurrently (e.g. when ToolNode issues parallel tool_calls). The corrupted session returns 'ErrorObservation' strings as output, cascading into subsequent commands. Add a threading.Lock to AioSandbox to serialize shell commands. As a secondary defense, detect ErrorObservation in output and retry with a fresh session ID. Fixes #1433 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix(sandbox): address Copilot review findings - Fix shell injection in list_dir: use shlex.quote(path) to escape user-provided paths in the find command - Narrow ErrorObservation retry condition from broad substring match to the specific corruption signature to prevent false retries - Improve test_lock_prevents_concurrent_execution: use threading.Barrier to ensure all workers contend for the lock simultaneously - Improve test_list_dir_uses_lock: assert lock.locked() is True during exec_command to verify lock acquisition * style: auto-format with ruff --------- Co-authored-by: Matt Van Horn <455140+mvanhorn@users.noreply.github.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-21 23:46:50 +00:00 · 2026-03-31 07:33:35 -07:00
parent aae59a8ba8
commit a3bfea631c
2 changed files with 173 additions and 18 deletions
@@ -1,5 +1,8 @@
 import base64
 import logging
+import shlex
+import threading
+import uuid

 from agent_sandbox import Sandbox as AioSandboxClient

@@ -7,11 +10,15 @@ from deerflow.sandbox.sandbox import Sandbox

 logger = logging.getLogger(__name__)

+_ERROR_OBSERVATION_SIGNATURE = "'ErrorObservation' object has no attribute 'exit_code'"
+

 class AioSandbox(Sandbox):
    """Sandbox implementation using the agent-infra/sandbox Docker container.

    This sandbox connects to a running AIO sandbox container via HTTP API.
+    A threading lock serializes shell commands to prevent concurrent requests
+    from corrupting the container's single persistent session (see #1433).
    """

    def __init__(self, id: str, base_url: str, home_dir: str | None = None):
@@ -26,6 +33,7 @@ class AioSandbox(Sandbox):
        self._base_url = base_url
        self._client = AioSandboxClient(base_url=base_url, timeout=600)
        self._home_dir = home_dir
+        self._lock = threading.Lock()

    @property
    def base_url(self) -> str:
@@ -42,19 +50,34 @@ class AioSandbox(Sandbox):
    def execute_command(self, command: str) -> str:
        """Execute a shell command in the sandbox.

+        Uses a lock to serialize concurrent requests. The AIO sandbox
+        container maintains a single persistent shell session that
+        corrupts when hit with concurrent exec_command calls (returns
+        ``ErrorObservation`` instead of real output). If corruption is
+        detected despite the lock (e.g. multiple processes sharing a
+        sandbox), the command is retried on a fresh session.
+
        Args:
            command: The command to execute.

        Returns:
            The output of the command.
        """
-        try:
-            result = self._client.shell.exec_command(command=command)
-            output = result.data.output if result.data else ""
-            return output if output else "(no output)"
-        except Exception as e:
-            logger.error(f"Failed to execute command in sandbox: {e}")
-            return f"Error: {e}"
+        with self._lock:
+            try:
+                result = self._client.shell.exec_command(command=command)
+                output = result.data.output if result.data else ""
+
+                if output and _ERROR_OBSERVATION_SIGNATURE in output:
+                    logger.warning("ErrorObservation detected in sandbox output, retrying with a fresh session")
+                    fresh_id = str(uuid.uuid4())
+                    result = self._client.shell.exec_command(command=command, id=fresh_id)
+                    output = result.data.output if result.data else ""
+
+                return output if output else "(no output)"
+            except Exception as e:
+                logger.error(f"Failed to execute command in sandbox: {e}")
+                return f"Error: {e}"

    def read_file(self, path: str) -> str:
        """Read the content of a file in the sandbox.
@@ -82,17 +105,16 @@ class AioSandbox(Sandbox):
        Returns:
            The contents of the directory.
        """
-        try:
-            # Use shell command to list directory with depth limit
-            # The -L flag limits the depth for the tree command
-            result = self._client.shell.exec_command(command=f"find {path} -maxdepth {max_depth} -type f -o -type d 2>/dev/null | head -500")
-            output = result.data.output if result.data else ""
-            if output:
-                return [line.strip() for line in output.strip().split("\n") if line.strip()]
-            return []
-        except Exception as e:
-            logger.error(f"Failed to list directory in sandbox: {e}")
-            return []
+        with self._lock:
+            try:
+                result = self._client.shell.exec_command(command=f"find {shlex.quote(path)} -maxdepth {max_depth} -type f -o -type d 2>/dev/null | head -500")
+                output = result.data.output if result.data else ""
+                if output:
+                    return [line.strip() for line in output.strip().split("\n") if line.strip()]
+                return []
+            except Exception as e:
+                logger.error(f"Failed to list directory in sandbox: {e}")
+                return []

    def write_file(self, path: str, content: str, append: bool = False) -> None:
        """Write content to a file in the sandbox.