fix(tool-output-budget): only consult sandbox provider when a sandbox is resolved

The previous revision called get_sandbox_provider() whenever externalization
was triggered, including on the legacy host-disk path. Environments without
a configured sandbox -- in particular CI runners without a config.yaml --
would raise FileNotFoundError there, get caught, and silently fall back to
inline truncation. That defeated the host-disk externalization path that
predates this PR and was the root cause of the regressing legacy tests.

Restructure the branching so the provider is only consulted when a sandbox
has actually been resolved for the current tool call:

  - sandbox resolved + provider.uses_thread_data_mounts: host-disk write
    (bind-mounted into the sandbox, equivalent to a sandbox-side write).
  - sandbox resolved + non-mounted provider:             sandbox write (#3416).
  - no sandbox + outputs_path:                           host-disk write
    (legacy / non-sandbox tools, no provider call at all).
  - otherwise:                                           inline fallback.

No test changes; the legacy externalization tests are provider-agnostic by
construction and now pass without monkeypatching.

Refs: #3416
This commit is contained in:
Huixin615
2026-06-08 09:32:29 +08:00
parent c837768242
commit d6e2d25b32
@@ -340,32 +340,47 @@ def _budget_content(
if threshold > 0 and len(content) > threshold: if threshold > 0 and len(content) > threshold:
virtual_path: str | None = None virtual_path: str | None = None
provider = None # Decide persistence target based on what's available, without touching
if outputs_path or sandbox is not None: # the sandbox provider unless a sandbox was actually resolved for this
# call. This keeps the legacy host-disk path provider-free, so callers
# without a configured sandbox (and CI environments without a
# config.yaml) continue to externalize to the host as before.
if sandbox is not None:
provider = None
try: try:
provider = get_sandbox_provider() provider = get_sandbox_provider()
except Exception: except Exception:
logger.exception("Failed to get sandbox provider for tool-output externalization; falling back to inline truncation") logger.exception("Failed to get sandbox provider for tool-output externalization; falling back to inline truncation")
if provider is not None and getattr(provider, "uses_thread_data_mounts", False): if provider is not None and getattr(provider, "uses_thread_data_mounts", False):
# Host-mounted sandbox: the host outputs path is bind-mounted into # Host-mounted sandbox: host outputs path is bind-mounted into
# the sandbox at the same virtual path, so writing host-side is # the sandbox at the same virtual path, so writing host-side is
# equivalent to writing sandbox-side. Preserve the original # equivalent. Preserve the original behavior to avoid extra
# behavior to avoid extra sandbox round-trips. # sandbox round-trips.
if outputs_path: if outputs_path:
virtual_path = _externalize( virtual_path = _externalize(
content,
tool_name=tool_name,
tool_call_id=tool_call_id,
outputs_path=outputs_path,
storage_subdir=config.storage_subdir,
)
else:
virtual_path = _externalize_to_sandbox(
content, content,
tool_name=tool_name, tool_name=tool_name,
tool_call_id=tool_call_id, tool_call_id=tool_call_id,
outputs_path=outputs_path,
storage_subdir=config.storage_subdir, storage_subdir=config.storage_subdir,
sandbox=sandbox,
) )
elif sandbox is not None: elif outputs_path:
virtual_path = _externalize_to_sandbox( # No sandbox in this call (legacy / non-sandbox tools): write to
# host outputs path directly, no provider needed.
virtual_path = _externalize(
content, content,
tool_name=tool_name, tool_name=tool_name,
tool_call_id=tool_call_id, tool_call_id=tool_call_id,
outputs_path=outputs_path,
storage_subdir=config.storage_subdir, storage_subdir=config.storage_subdir,
sandbox=sandbox,
) )
if virtual_path is not None: if virtual_path is not None:
logger.info( logger.info(