mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-06-10 17:35:57 +00:00
fix(middleware): externalize oversized tool output into sandbox for non-mounted sandboxes
ToolOutputBudgetMiddleware persisted oversized tool results to the host
filesystem and returned a /mnt/user-data/outputs virtual path. For sandboxes
that do not use thread-data mounts (e.g. remote AIO sandbox), that virtual
path does not exist inside the sandbox, so the model's read_file tool could
not read it back and reported 'file not found'.
Branch on SandboxProvider.uses_thread_data_mounts:
- Mounted sandboxes (local Docker, AIO + LocalContainerBackend) keep the
original host-disk path; the host outputs dir is bind-mounted to the same
virtual path inside the sandbox, so behavior is unchanged.
- Non-mounted (remote) sandboxes externalize into the sandbox itself via
execute_command('mkdir -p ...') + write_file + 'test -s' validation. The
validation step is required because AIO sandbox execute_command returns
'Error: ...' as a string on failure instead of raising, so a silent mkdir
failure would otherwise leak through.
Any failure (rejected subdir, mkdir/write/validate error) falls back to the
existing inline head+tail truncation, so an unreadable path is never returned
to the model.
The sandbox resolver reads the sandbox_id that SandboxMiddleware already
writes into runtime.state['sandbox']; it never calls provider.acquire(),
keeping the tool-call hot path free of blocking I/O. Tools that do not use a
sandbox (web_search, MCP, ...) resolve to None and fall through to inline
truncation, which is the safe behavior for them.
Fixes #3416
This commit is contained in:
+157
-21
@@ -11,10 +11,11 @@ from __future__ import annotations
|
|||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import shlex
|
||||||
import uuid
|
import uuid
|
||||||
from collections.abc import Awaitable, Callable
|
from collections.abc import Awaitable, Callable
|
||||||
from dataclasses import replace as dc_replace
|
from dataclasses import replace as dc_replace
|
||||||
from typing import Any, override
|
from typing import TYPE_CHECKING, Any, override
|
||||||
|
|
||||||
from langchain.agents import AgentState
|
from langchain.agents import AgentState
|
||||||
from langchain.agents.middleware import AgentMiddleware
|
from langchain.agents.middleware import AgentMiddleware
|
||||||
@@ -24,9 +25,19 @@ from langgraph.prebuilt.tool_node import ToolCallRequest
|
|||||||
from langgraph.types import Command
|
from langgraph.types import Command
|
||||||
|
|
||||||
from deerflow.config.tool_output_config import ToolOutputConfig
|
from deerflow.config.tool_output_config import ToolOutputConfig
|
||||||
|
from deerflow.sandbox.sandbox_provider import get_sandbox_provider
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from deerflow.sandbox.sandbox import Sandbox
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Virtual outputs root inside the sandbox. Host-mounted sandboxes map this to
|
||||||
|
# the thread outputs dir on the host; for non-mounted (remote) sandboxes the
|
||||||
|
# same path is written directly into the sandbox filesystem so the model's
|
||||||
|
# ``read_file`` tool can read it back (issue #3416).
|
||||||
|
_VIRTUAL_OUTPUTS_BASE = "/mnt/user-data/outputs"
|
||||||
|
|
||||||
|
|
||||||
def _default_config() -> ToolOutputConfig:
|
def _default_config() -> ToolOutputConfig:
|
||||||
return ToolOutputConfig()
|
return ToolOutputConfig()
|
||||||
@@ -94,6 +105,18 @@ def _sanitize_tool_name(name: str) -> str:
|
|||||||
return safe or "unknown"
|
return safe or "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
def _build_externalized_filename(*, tool_name: str, tool_call_id: str) -> str:
|
||||||
|
"""Build the on-disk filename for an externalized tool output.
|
||||||
|
|
||||||
|
Shared by the host-disk and sandbox externalization paths so both
|
||||||
|
produce the identical naming scheme.
|
||||||
|
"""
|
||||||
|
safe_name = _sanitize_tool_name(tool_name)
|
||||||
|
ext = _EXT_MAP.get(tool_name, "txt")
|
||||||
|
short_id = uuid.uuid4().hex[:12]
|
||||||
|
return f"{safe_name}-{short_id}.{ext}"
|
||||||
|
|
||||||
|
|
||||||
def _externalize(
|
def _externalize(
|
||||||
content: str,
|
content: str,
|
||||||
*,
|
*,
|
||||||
@@ -111,10 +134,7 @@ def _externalize(
|
|||||||
except OSError:
|
except OSError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
safe_name = _sanitize_tool_name(tool_name)
|
filename = _build_externalized_filename(tool_name=tool_name, tool_call_id=tool_call_id)
|
||||||
ext = _EXT_MAP.get(tool_name, "txt")
|
|
||||||
short_id = uuid.uuid4().hex[:12]
|
|
||||||
filename = f"{safe_name}-{short_id}.{ext}"
|
|
||||||
filepath = os.path.join(storage_dir, filename)
|
filepath = os.path.join(storage_dir, filename)
|
||||||
|
|
||||||
if not os.path.abspath(filepath).startswith(os.path.abspath(storage_dir)):
|
if not os.path.abspath(filepath).startswith(os.path.abspath(storage_dir)):
|
||||||
@@ -126,8 +146,56 @@ def _externalize(
|
|||||||
except OSError:
|
except OSError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
virtual_base = "/mnt/user-data/outputs"
|
return f"{_VIRTUAL_OUTPUTS_BASE}/{storage_subdir}/{filename}"
|
||||||
return f"{virtual_base}/{storage_subdir}/{filename}"
|
|
||||||
|
|
||||||
|
def _externalize_to_sandbox(
|
||||||
|
content: str,
|
||||||
|
*,
|
||||||
|
tool_name: str,
|
||||||
|
tool_call_id: str,
|
||||||
|
storage_subdir: str,
|
||||||
|
sandbox: Sandbox,
|
||||||
|
) -> str | None:
|
||||||
|
"""Write *content* into the sandbox filesystem and return the virtual path.
|
||||||
|
|
||||||
|
Used when the sandbox does not use thread-data mounts (e.g. a remote AIO
|
||||||
|
sandbox): the host-side :func:`_externalize` virtual path would not exist
|
||||||
|
inside the sandbox, so the model's ``read_file`` tool could not read it
|
||||||
|
back (issue #3416). Returns the same virtual-path contract on success, or
|
||||||
|
``None`` to signal the caller to fall back to inline truncation.
|
||||||
|
"""
|
||||||
|
if os.path.isabs(storage_subdir) or ".." in storage_subdir:
|
||||||
|
return None
|
||||||
|
filename = _build_externalized_filename(tool_name=tool_name, tool_call_id=tool_call_id)
|
||||||
|
virtual_dir = f"{_VIRTUAL_OUTPUTS_BASE}/{storage_subdir}"
|
||||||
|
virtual_path = f"{virtual_dir}/{filename}"
|
||||||
|
try:
|
||||||
|
# AIO sandbox write_file does NOT create parent directories, so create
|
||||||
|
# them explicitly before writing. execute_command returns its stdout
|
||||||
|
# verbatim (including an "Error: ..." string on failure) rather than
|
||||||
|
# raising, so we cannot rely on exception propagation here.
|
||||||
|
sandbox.execute_command(f"mkdir -p {shlex.quote(virtual_dir)}")
|
||||||
|
sandbox.write_file(virtual_path, content)
|
||||||
|
# Validate the file landed: execute_command may have silently failed
|
||||||
|
# to create the directory, and write_file backends differ. Refuse to
|
||||||
|
# hand the model an unreadable read_file path.
|
||||||
|
check = sandbox.execute_command(f"test -s {shlex.quote(virtual_path)} && echo OK || echo MISSING")
|
||||||
|
if not isinstance(check, str) or "OK" not in check:
|
||||||
|
logger.warning(
|
||||||
|
"Sandbox externalize validation failed: path=%s, check=%r",
|
||||||
|
virtual_path,
|
||||||
|
check,
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
except Exception:
|
||||||
|
logger.exception(
|
||||||
|
"Failed to externalize %s output to sandbox (call_id=%s)",
|
||||||
|
tool_name,
|
||||||
|
tool_call_id,
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
return virtual_path
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -227,6 +295,33 @@ def _resolve_outputs_path(request: ToolCallRequest) -> str | None:
|
|||||||
return outputs_path if isinstance(outputs_path, str) else None
|
return outputs_path if isinstance(outputs_path, str) else None
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_sandbox(request: ToolCallRequest) -> Sandbox | None:
|
||||||
|
"""Resolve the active sandbox for the current tool call, or ``None``.
|
||||||
|
|
||||||
|
Reads the sandbox_id that ``SandboxMiddleware`` (and the sandbox tools
|
||||||
|
themselves) write into ``runtime.state["sandbox"]``. We intentionally do
|
||||||
|
NOT call ``provider.acquire`` here: acquiring a sandbox can trigger
|
||||||
|
blocking remote I/O, and this resolver runs on every tool call. Tools
|
||||||
|
that do not use a sandbox (``web_search``, MCP, ...) will return ``None``
|
||||||
|
here, which is fine -- the caller falls back to inline truncation.
|
||||||
|
"""
|
||||||
|
runtime = getattr(request, "runtime", None)
|
||||||
|
state = getattr(runtime, "state", None)
|
||||||
|
if not isinstance(state, dict):
|
||||||
|
return None
|
||||||
|
sandbox_state = state.get("sandbox")
|
||||||
|
if not isinstance(sandbox_state, dict):
|
||||||
|
return None
|
||||||
|
sandbox_id = sandbox_state.get("sandbox_id")
|
||||||
|
if not sandbox_id:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return get_sandbox_provider().get(sandbox_id)
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Failed to look up sandbox %s for tool-output externalization", sandbox_id)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _budget_content(
|
def _budget_content(
|
||||||
content: str,
|
content: str,
|
||||||
*,
|
*,
|
||||||
@@ -234,6 +329,7 @@ def _budget_content(
|
|||||||
tool_call_id: str,
|
tool_call_id: str,
|
||||||
outputs_path: str | None,
|
outputs_path: str | None,
|
||||||
config: ToolOutputConfig,
|
config: ToolOutputConfig,
|
||||||
|
sandbox: Sandbox | None = None,
|
||||||
) -> str | None:
|
) -> str | None:
|
||||||
"""Apply budget to *content*. Returns ``None`` if no change needed."""
|
"""Apply budget to *content*. Returns ``None`` if no change needed."""
|
||||||
threshold = config.tool_overrides.get(tool_name, config.externalize_min_chars)
|
threshold = config.tool_overrides.get(tool_name, config.externalize_min_chars)
|
||||||
@@ -242,14 +338,32 @@ def _budget_content(
|
|||||||
if len(content) <= threshold and len(content) <= config.fallback_max_chars:
|
if len(content) <= threshold and len(content) <= config.fallback_max_chars:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if threshold > 0 and len(content) > threshold and outputs_path:
|
if threshold > 0 and len(content) > threshold:
|
||||||
virtual_path = _externalize(
|
virtual_path: str | None = None
|
||||||
content,
|
provider = get_sandbox_provider()
|
||||||
tool_name=tool_name,
|
if getattr(provider, "uses_thread_data_mounts", False):
|
||||||
tool_call_id=tool_call_id,
|
# Host-mounted sandbox: the host outputs path is bind-mounted into
|
||||||
outputs_path=outputs_path,
|
# the sandbox at the same virtual path, so writing host-side is
|
||||||
storage_subdir=config.storage_subdir,
|
# equivalent to writing sandbox-side. Preserve the original
|
||||||
)
|
# behavior to avoid extra sandbox round-trips.
|
||||||
|
if outputs_path:
|
||||||
|
virtual_path = _externalize(
|
||||||
|
content,
|
||||||
|
tool_name=tool_name,
|
||||||
|
tool_call_id=tool_call_id,
|
||||||
|
outputs_path=outputs_path,
|
||||||
|
storage_subdir=config.storage_subdir,
|
||||||
|
)
|
||||||
|
elif sandbox is not None:
|
||||||
|
# Non-mounted (remote) sandbox: write into the sandbox itself so
|
||||||
|
# the model's read_file tool can read it back. Issue #3416.
|
||||||
|
virtual_path = _externalize_to_sandbox(
|
||||||
|
content,
|
||||||
|
tool_name=tool_name,
|
||||||
|
tool_call_id=tool_call_id,
|
||||||
|
storage_subdir=config.storage_subdir,
|
||||||
|
sandbox=sandbox,
|
||||||
|
)
|
||||||
if virtual_path is not None:
|
if virtual_path is not None:
|
||||||
logger.info(
|
logger.info(
|
||||||
"Externalized %s output (%d chars) to %s",
|
"Externalized %s output (%d chars) to %s",
|
||||||
@@ -288,7 +402,12 @@ def _budget_content(
|
|||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
def _patch_tool_message(msg: ToolMessage, config: ToolOutputConfig, outputs_path: str | None) -> ToolMessage:
|
def _patch_tool_message(
|
||||||
|
msg: ToolMessage,
|
||||||
|
config: ToolOutputConfig,
|
||||||
|
outputs_path: str | None,
|
||||||
|
sandbox: Sandbox | None = None,
|
||||||
|
) -> ToolMessage:
|
||||||
"""Apply budget to a single ToolMessage. Returns the original if unchanged."""
|
"""Apply budget to a single ToolMessage. Returns the original if unchanged."""
|
||||||
tool_name = msg.name or "unknown"
|
tool_name = msg.name or "unknown"
|
||||||
if tool_name in config.exempt_tools:
|
if tool_name in config.exempt_tools:
|
||||||
@@ -304,6 +423,7 @@ def _patch_tool_message(msg: ToolMessage, config: ToolOutputConfig, outputs_path
|
|||||||
tool_call_id=msg.tool_call_id or "",
|
tool_call_id=msg.tool_call_id or "",
|
||||||
outputs_path=outputs_path,
|
outputs_path=outputs_path,
|
||||||
config=config,
|
config=config,
|
||||||
|
sandbox=sandbox,
|
||||||
)
|
)
|
||||||
if replacement is None:
|
if replacement is None:
|
||||||
return msg
|
return msg
|
||||||
@@ -355,10 +475,15 @@ def _needs_budget(result: ToolMessage | Command, config: ToolOutputConfig) -> bo
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def _patch_result(result: ToolMessage | Command, config: ToolOutputConfig, outputs_path: str | None) -> ToolMessage | Command:
|
def _patch_result(
|
||||||
|
result: ToolMessage | Command,
|
||||||
|
config: ToolOutputConfig,
|
||||||
|
outputs_path: str | None,
|
||||||
|
sandbox: Sandbox | None = None,
|
||||||
|
) -> ToolMessage | Command:
|
||||||
"""Apply budget to a tool call result (ToolMessage or Command)."""
|
"""Apply budget to a tool call result (ToolMessage or Command)."""
|
||||||
if isinstance(result, ToolMessage):
|
if isinstance(result, ToolMessage):
|
||||||
return _patch_tool_message(result, config, outputs_path)
|
return _patch_tool_message(result, config, outputs_path, sandbox)
|
||||||
|
|
||||||
update = getattr(result, "update", None)
|
update = getattr(result, "update", None)
|
||||||
if not isinstance(update, dict):
|
if not isinstance(update, dict):
|
||||||
@@ -372,7 +497,7 @@ def _patch_result(result: ToolMessage | Command, config: ToolOutputConfig, outpu
|
|||||||
changed = False
|
changed = False
|
||||||
for msg in messages:
|
for msg in messages:
|
||||||
if isinstance(msg, ToolMessage):
|
if isinstance(msg, ToolMessage):
|
||||||
patched = _patch_tool_message(msg, config, outputs_path)
|
patched = _patch_tool_message(msg, config, outputs_path, sandbox)
|
||||||
if patched is not msg:
|
if patched is not msg:
|
||||||
changed = True
|
changed = True
|
||||||
new_messages.append(patched)
|
new_messages.append(patched)
|
||||||
@@ -392,6 +517,11 @@ def _patch_model_messages(messages: list[Any], config: ToolOutputConfig) -> list
|
|||||||
ToolMessage exceeds the budget — the common case once every result has
|
ToolMessage exceeds the budget — the common case once every result has
|
||||||
already been budgeted at tool-call time, so a long history is not rebuilt
|
already been budgeted at tool-call time, so a long history is not rebuilt
|
||||||
on every model call.
|
on every model call.
|
||||||
|
|
||||||
|
Historical messages do not get a ``sandbox`` argument: any oversized tool
|
||||||
|
message in history was already budgeted (and possibly externalized) at
|
||||||
|
tool-call time, so the only thing left for the history path to do is
|
||||||
|
inline fallback truncation, which needs no sandbox.
|
||||||
"""
|
"""
|
||||||
if not any(isinstance(msg, ToolMessage) and _tool_message_over_budget(msg, config) for msg in messages):
|
if not any(isinstance(msg, ToolMessage) and _tool_message_over_budget(msg, config) for msg in messages):
|
||||||
return None
|
return None
|
||||||
@@ -442,7 +572,8 @@ class ToolOutputBudgetMiddleware(AgentMiddleware[AgentState]):
|
|||||||
if not _needs_budget(result, self._config):
|
if not _needs_budget(result, self._config):
|
||||||
return result
|
return result
|
||||||
outputs_path = _resolve_outputs_path(request)
|
outputs_path = _resolve_outputs_path(request)
|
||||||
return _patch_result(result, self._config, outputs_path)
|
sandbox = _resolve_sandbox(request)
|
||||||
|
return _patch_result(result, self._config, outputs_path, sandbox)
|
||||||
|
|
||||||
@override
|
@override
|
||||||
async def awrap_tool_call(
|
async def awrap_tool_call(
|
||||||
@@ -456,7 +587,12 @@ class ToolOutputBudgetMiddleware(AgentMiddleware[AgentState]):
|
|||||||
if not _needs_budget(result, self._config):
|
if not _needs_budget(result, self._config):
|
||||||
return result
|
return result
|
||||||
outputs_path = _resolve_outputs_path(request)
|
outputs_path = _resolve_outputs_path(request)
|
||||||
return await asyncio.to_thread(_patch_result, result, self._config, outputs_path)
|
# _resolve_sandbox only touches runtime.state and the provider's
|
||||||
|
# in-memory sandbox registry, so it is safe to call on the event
|
||||||
|
# loop. The actual sandbox I/O (mkdir/write/test) happens inside
|
||||||
|
# _patch_result, which is offloaded to a worker thread below.
|
||||||
|
sandbox = _resolve_sandbox(request)
|
||||||
|
return await asyncio.to_thread(_patch_result, result, self._config, outputs_path, sandbox)
|
||||||
|
|
||||||
# -- model call hooks (historical message truncation) ------------------
|
# -- model call hooks (historical message truncation) ------------------
|
||||||
|
|
||||||
|
|||||||
@@ -888,3 +888,298 @@ class TestConfigVersion:
|
|||||||
assert tool_output["enabled"] is True
|
assert tool_output["enabled"] is True
|
||||||
assert tool_output["externalize_min_chars"] == 12000
|
assert tool_output["externalize_min_chars"] == 12000
|
||||||
assert "read_file" in tool_output["exempt_tools"]
|
assert "read_file" in tool_output["exempt_tools"]
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# externalize into sandbox for non-mounted (remote) sandboxes
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeSandbox:
|
||||||
|
"""In-memory stand-in for a Sandbox. Records calls and supports failure injection."""
|
||||||
|
|
||||||
|
def __init__(self, *, write_ok: bool = True, check_result: str = "OK") -> None:
|
||||||
|
self.commands: list[str] = []
|
||||||
|
self.writes: list[tuple[str, str]] = []
|
||||||
|
self._write_ok = write_ok
|
||||||
|
self._check_result = check_result
|
||||||
|
|
||||||
|
def execute_command(self, command: str) -> str:
|
||||||
|
self.commands.append(command)
|
||||||
|
if command.startswith("test -s"):
|
||||||
|
return self._check_result
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def write_file(self, path: str, content: str, append: bool = False) -> None:
|
||||||
|
if not self._write_ok:
|
||||||
|
raise RuntimeError("simulated write failure")
|
||||||
|
self.writes.append((path, content))
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeProvider:
|
||||||
|
"""Minimal SandboxProvider stand-in for monkeypatching get_sandbox_provider."""
|
||||||
|
|
||||||
|
def __init__(self, *, uses_thread_data_mounts: bool, sandbox: _FakeSandbox | None = None) -> None:
|
||||||
|
self.uses_thread_data_mounts = uses_thread_data_mounts
|
||||||
|
self._sandbox = sandbox
|
||||||
|
|
||||||
|
def get(self, sandbox_id: str):
|
||||||
|
return self._sandbox
|
||||||
|
|
||||||
|
|
||||||
|
class TestExternalizeToSandbox:
|
||||||
|
def test_writes_and_returns_virtual_path(self):
|
||||||
|
from deerflow.agents.middlewares.tool_output_budget_middleware import (
|
||||||
|
_externalize_to_sandbox,
|
||||||
|
)
|
||||||
|
|
||||||
|
sb = _FakeSandbox()
|
||||||
|
result = _externalize_to_sandbox(
|
||||||
|
"x" * 100,
|
||||||
|
tool_name="bash",
|
||||||
|
tool_call_id="tc-1",
|
||||||
|
storage_subdir=".tool-results",
|
||||||
|
sandbox=sb,
|
||||||
|
)
|
||||||
|
assert result is not None
|
||||||
|
assert result.startswith("/mnt/user-data/outputs/.tool-results/bash-")
|
||||||
|
assert result.endswith(".log")
|
||||||
|
assert any(c.startswith("mkdir -p ") for c in sb.commands)
|
||||||
|
assert any(c.startswith("test -s ") for c in sb.commands)
|
||||||
|
assert sb.writes and sb.writes[0][0] == result
|
||||||
|
assert sb.writes[0][1] == "x" * 100
|
||||||
|
|
||||||
|
def test_returns_none_when_write_raises(self):
|
||||||
|
from deerflow.agents.middlewares.tool_output_budget_middleware import (
|
||||||
|
_externalize_to_sandbox,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = _externalize_to_sandbox(
|
||||||
|
"x" * 100,
|
||||||
|
tool_name="web_fetch",
|
||||||
|
tool_call_id="tc-2",
|
||||||
|
storage_subdir=".tool-results",
|
||||||
|
sandbox=_FakeSandbox(write_ok=False),
|
||||||
|
)
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_returns_none_when_validation_fails(self):
|
||||||
|
from deerflow.agents.middlewares.tool_output_budget_middleware import (
|
||||||
|
_externalize_to_sandbox,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = _externalize_to_sandbox(
|
||||||
|
"x" * 100,
|
||||||
|
tool_name="bash",
|
||||||
|
tool_call_id="tc-3",
|
||||||
|
storage_subdir=".tool-results",
|
||||||
|
sandbox=_FakeSandbox(check_result="MISSING"),
|
||||||
|
)
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_rejects_unsafe_storage_subdir(self):
|
||||||
|
from deerflow.agents.middlewares.tool_output_budget_middleware import (
|
||||||
|
_externalize_to_sandbox,
|
||||||
|
)
|
||||||
|
|
||||||
|
sb = _FakeSandbox()
|
||||||
|
assert (
|
||||||
|
_externalize_to_sandbox(
|
||||||
|
"x" * 100,
|
||||||
|
tool_name="bash",
|
||||||
|
tool_call_id="tc-4",
|
||||||
|
storage_subdir="../escape",
|
||||||
|
sandbox=sb,
|
||||||
|
)
|
||||||
|
is None
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
_externalize_to_sandbox(
|
||||||
|
"x" * 100,
|
||||||
|
tool_name="bash",
|
||||||
|
tool_call_id="tc-5",
|
||||||
|
storage_subdir="/abs/path",
|
||||||
|
sandbox=sb,
|
||||||
|
)
|
||||||
|
is None
|
||||||
|
)
|
||||||
|
# Sandbox must not be touched when the subdir is rejected up-front.
|
||||||
|
assert sb.commands == []
|
||||||
|
assert sb.writes == []
|
||||||
|
|
||||||
|
def test_default_extension_for_unknown_tool(self):
|
||||||
|
from deerflow.agents.middlewares.tool_output_budget_middleware import (
|
||||||
|
_externalize_to_sandbox,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = _externalize_to_sandbox(
|
||||||
|
"data",
|
||||||
|
tool_name="unknown_tool",
|
||||||
|
tool_call_id="tc-6",
|
||||||
|
storage_subdir=".tool-results",
|
||||||
|
sandbox=_FakeSandbox(),
|
||||||
|
)
|
||||||
|
assert result is not None and result.endswith(".txt")
|
||||||
|
|
||||||
|
|
||||||
|
class TestBudgetContentSandboxDispatch:
|
||||||
|
"""_budget_content must branch on uses_thread_data_mounts (issue #3416)."""
|
||||||
|
|
||||||
|
def test_mounted_sandbox_uses_host_disk(self, monkeypatch, tmp_path):
|
||||||
|
from deerflow.agents.middlewares import tool_output_budget_middleware as mod
|
||||||
|
|
||||||
|
sb = _FakeSandbox()
|
||||||
|
monkeypatch.setattr(
|
||||||
|
mod,
|
||||||
|
"get_sandbox_provider",
|
||||||
|
lambda: _FakeProvider(uses_thread_data_mounts=True, sandbox=sb),
|
||||||
|
)
|
||||||
|
config = ToolOutputConfig(externalize_min_chars=50, preview_head_chars=20, preview_tail_chars=10)
|
||||||
|
result = mod._budget_content(
|
||||||
|
"x" * 500,
|
||||||
|
tool_name="remote_executor",
|
||||||
|
tool_call_id="tc-m",
|
||||||
|
outputs_path=str(tmp_path),
|
||||||
|
config=config,
|
||||||
|
sandbox=sb,
|
||||||
|
)
|
||||||
|
assert result is not None
|
||||||
|
assert "Full remote_executor output saved to /mnt/user-data/outputs/" in result
|
||||||
|
# Mounted path must NOT touch the sandbox.
|
||||||
|
assert sb.commands == []
|
||||||
|
assert sb.writes == []
|
||||||
|
# And the host file must exist.
|
||||||
|
storage_dir = tmp_path / ".tool-results"
|
||||||
|
assert storage_dir.is_dir()
|
||||||
|
assert len(list(storage_dir.iterdir())) == 1
|
||||||
|
|
||||||
|
def test_non_mounted_sandbox_writes_to_sandbox(self, monkeypatch, tmp_path):
|
||||||
|
from deerflow.agents.middlewares import tool_output_budget_middleware as mod
|
||||||
|
|
||||||
|
sb = _FakeSandbox()
|
||||||
|
monkeypatch.setattr(
|
||||||
|
mod,
|
||||||
|
"get_sandbox_provider",
|
||||||
|
lambda: _FakeProvider(uses_thread_data_mounts=False, sandbox=sb),
|
||||||
|
)
|
||||||
|
config = ToolOutputConfig(externalize_min_chars=50, preview_head_chars=20, preview_tail_chars=10)
|
||||||
|
result = mod._budget_content(
|
||||||
|
"x" * 500,
|
||||||
|
tool_name="remote_executor",
|
||||||
|
tool_call_id="tc-n",
|
||||||
|
outputs_path=str(tmp_path), # present, but ignored on non-mounted path
|
||||||
|
config=config,
|
||||||
|
sandbox=sb,
|
||||||
|
)
|
||||||
|
assert result is not None
|
||||||
|
assert "Full remote_executor output saved to /mnt/user-data/outputs/" in result
|
||||||
|
# Non-mounted path MUST write into the sandbox.
|
||||||
|
assert sb.writes and sb.writes[0][1] == "x" * 500
|
||||||
|
# And MUST NOT touch the host.
|
||||||
|
assert not (tmp_path / ".tool-results").exists()
|
||||||
|
|
||||||
|
def test_non_mounted_without_sandbox_falls_back(self, monkeypatch):
|
||||||
|
from deerflow.agents.middlewares import tool_output_budget_middleware as mod
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
mod,
|
||||||
|
"get_sandbox_provider",
|
||||||
|
lambda: _FakeProvider(uses_thread_data_mounts=False, sandbox=None),
|
||||||
|
)
|
||||||
|
config = ToolOutputConfig(
|
||||||
|
externalize_min_chars=50,
|
||||||
|
fallback_max_chars=500,
|
||||||
|
fallback_head_chars=100,
|
||||||
|
fallback_tail_chars=50,
|
||||||
|
)
|
||||||
|
result = mod._budget_content(
|
||||||
|
"x" * 5000,
|
||||||
|
tool_name="web_search",
|
||||||
|
tool_call_id="tc-fb",
|
||||||
|
outputs_path=None,
|
||||||
|
config=config,
|
||||||
|
sandbox=None,
|
||||||
|
)
|
||||||
|
assert result is not None
|
||||||
|
assert "Persistent storage unavailable" in result
|
||||||
|
|
||||||
|
|
||||||
|
class TestResolveSandbox:
|
||||||
|
def test_returns_none_when_no_state(self):
|
||||||
|
from deerflow.agents.middlewares.tool_output_budget_middleware import _resolve_sandbox
|
||||||
|
|
||||||
|
req = SimpleNamespace(runtime=None)
|
||||||
|
assert _resolve_sandbox(req) is None
|
||||||
|
|
||||||
|
def test_returns_none_when_state_has_no_sandbox(self):
|
||||||
|
from deerflow.agents.middlewares.tool_output_budget_middleware import _resolve_sandbox
|
||||||
|
|
||||||
|
req = SimpleNamespace(runtime=SimpleNamespace(state={}))
|
||||||
|
assert _resolve_sandbox(req) is None
|
||||||
|
|
||||||
|
def test_returns_none_when_sandbox_id_missing(self):
|
||||||
|
from deerflow.agents.middlewares.tool_output_budget_middleware import _resolve_sandbox
|
||||||
|
|
||||||
|
req = SimpleNamespace(runtime=SimpleNamespace(state={"sandbox": {}}))
|
||||||
|
assert _resolve_sandbox(req) is None
|
||||||
|
|
||||||
|
def test_returns_sandbox_from_provider(self, monkeypatch):
|
||||||
|
from deerflow.agents.middlewares import tool_output_budget_middleware as mod
|
||||||
|
|
||||||
|
sb = _FakeSandbox()
|
||||||
|
monkeypatch.setattr(
|
||||||
|
mod,
|
||||||
|
"get_sandbox_provider",
|
||||||
|
lambda: _FakeProvider(uses_thread_data_mounts=False, sandbox=sb),
|
||||||
|
)
|
||||||
|
req = SimpleNamespace(runtime=SimpleNamespace(state={"sandbox": {"sandbox_id": "sb-1"}}))
|
||||||
|
assert mod._resolve_sandbox(req) is sb
|
||||||
|
|
||||||
|
def test_returns_none_on_provider_exception(self, monkeypatch):
|
||||||
|
from deerflow.agents.middlewares import tool_output_budget_middleware as mod
|
||||||
|
|
||||||
|
class _Boom:
|
||||||
|
def get(self, sandbox_id):
|
||||||
|
raise RuntimeError("boom")
|
||||||
|
|
||||||
|
monkeypatch.setattr(mod, "get_sandbox_provider", lambda: _Boom())
|
||||||
|
req = SimpleNamespace(runtime=SimpleNamespace(state={"sandbox": {"sandbox_id": "sb-x"}}))
|
||||||
|
assert mod._resolve_sandbox(req) is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestWrapToolCallSandboxIntegration:
|
||||||
|
"""End-to-end via wrap_tool_call for the non-mounted path (issue #3416)."""
|
||||||
|
|
||||||
|
def test_oversized_output_lands_in_sandbox_not_host(self, monkeypatch, tmp_path):
|
||||||
|
from deerflow.agents.middlewares import tool_output_budget_middleware as mod
|
||||||
|
|
||||||
|
sb = _FakeSandbox()
|
||||||
|
monkeypatch.setattr(
|
||||||
|
mod,
|
||||||
|
"get_sandbox_provider",
|
||||||
|
lambda: _FakeProvider(uses_thread_data_mounts=False, sandbox=sb),
|
||||||
|
)
|
||||||
|
|
||||||
|
config = ToolOutputConfig(externalize_min_chars=50, preview_head_chars=20, preview_tail_chars=10)
|
||||||
|
mw = ToolOutputBudgetMiddleware(config=config)
|
||||||
|
content = "x" * 500
|
||||||
|
msg = _tm(content, name="remote_executor")
|
||||||
|
# Request carries BOTH outputs_path (host) AND a sandbox_id; the
|
||||||
|
# non-mounted branch must ignore outputs_path and write into sandbox.
|
||||||
|
req = SimpleNamespace(
|
||||||
|
tool_call={"name": "remote_executor", "id": "tc-1"},
|
||||||
|
runtime=SimpleNamespace(
|
||||||
|
state={
|
||||||
|
"thread_data": {"outputs_path": str(tmp_path)},
|
||||||
|
"sandbox": {"sandbox_id": "sb-1"},
|
||||||
|
}
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
result = mw.wrap_tool_call(req, lambda _: msg)
|
||||||
|
|
||||||
|
assert isinstance(result, ToolMessage)
|
||||||
|
assert "Full remote_executor output saved to /mnt/user-data/outputs/" in result.content
|
||||||
|
assert sb.writes and sb.writes[0][1] == content
|
||||||
|
# Host disk must not have been written.
|
||||||
|
assert not (tmp_path / ".tool-results").exists()
|
||||||
|
|||||||
Reference in New Issue
Block a user