From d6e2d25b3209a990bcc56f3346bc620ff3273ee0 Mon Sep 17 00:00:00 2001 From: Huixin615 Date: Mon, 8 Jun 2026 09:32:29 +0800 Subject: [PATCH] fix(tool-output-budget): only consult sandbox provider when a sandbox is resolved The previous revision called get_sandbox_provider() whenever externalization was triggered, including on the legacy host-disk path. Environments without a configured sandbox -- in particular CI runners without a config.yaml -- would raise FileNotFoundError there, get caught, and silently fall back to inline truncation. That defeated the host-disk externalization path that predates this PR and was the root cause of the regressing legacy tests. Restructure the branching so the provider is only consulted when a sandbox has actually been resolved for the current tool call: - sandbox resolved + provider.uses_thread_data_mounts: host-disk write (bind-mounted into the sandbox, equivalent to a sandbox-side write). - sandbox resolved + non-mounted provider: sandbox write (#3416). - no sandbox + outputs_path: host-disk write (legacy / non-sandbox tools, no provider call at all). - otherwise: inline fallback. No test changes; the legacy externalization tests are provider-agnostic by construction and now pass without monkeypatching. Refs: #3416 --- .../tool_output_budget_middleware.py | 41 +++++++++++++------ 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/backend/packages/harness/deerflow/agents/middlewares/tool_output_budget_middleware.py b/backend/packages/harness/deerflow/agents/middlewares/tool_output_budget_middleware.py index df95a22e0..22b1a59f2 100644 --- a/backend/packages/harness/deerflow/agents/middlewares/tool_output_budget_middleware.py +++ b/backend/packages/harness/deerflow/agents/middlewares/tool_output_budget_middleware.py @@ -340,32 +340,47 @@ def _budget_content( if threshold > 0 and len(content) > threshold: virtual_path: str | None = None - provider = None - if outputs_path or sandbox is not None: + # Decide persistence target based on what's available, without touching + # the sandbox provider unless a sandbox was actually resolved for this + # call. This keeps the legacy host-disk path provider-free, so callers + # without a configured sandbox (and CI environments without a + # config.yaml) continue to externalize to the host as before. + if sandbox is not None: + provider = None try: provider = get_sandbox_provider() except Exception: logger.exception("Failed to get sandbox provider for tool-output externalization; falling back to inline truncation") - if provider is not None and getattr(provider, "uses_thread_data_mounts", False): - # Host-mounted sandbox: the host outputs path is bind-mounted into - # the sandbox at the same virtual path, so writing host-side is - # equivalent to writing sandbox-side. Preserve the original - # behavior to avoid extra sandbox round-trips. - if outputs_path: - virtual_path = _externalize( + if provider is not None and getattr(provider, "uses_thread_data_mounts", False): + # Host-mounted sandbox: host outputs path is bind-mounted into + # the sandbox at the same virtual path, so writing host-side is + # equivalent. Preserve the original behavior to avoid extra + # sandbox round-trips. + if outputs_path: + virtual_path = _externalize( + content, + tool_name=tool_name, + tool_call_id=tool_call_id, + outputs_path=outputs_path, + storage_subdir=config.storage_subdir, + ) + else: + virtual_path = _externalize_to_sandbox( content, tool_name=tool_name, tool_call_id=tool_call_id, - outputs_path=outputs_path, storage_subdir=config.storage_subdir, + sandbox=sandbox, ) - elif sandbox is not None: - virtual_path = _externalize_to_sandbox( + elif outputs_path: + # No sandbox in this call (legacy / non-sandbox tools): write to + # host outputs path directly, no provider needed. + virtual_path = _externalize( content, tool_name=tool_name, tool_call_id=tool_call_id, + outputs_path=outputs_path, storage_subdir=config.storage_subdir, - sandbox=sandbox, ) if virtual_path is not None: logger.info(