mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-06-10 09:25:57 +00:00
fix(agents): offload UploadsMiddleware uploads scan off the event loop (#3311)
UploadsMiddleware defines only the sync `before_agent` hook. LangChain wires a sync-only hook as `RunnableCallable(before_agent, None)`, and LangGraph's `ainvoke` runs it directly on the event loop when `afunc is None` — so the per-message uploads-directory scan (`exists`/`iterdir`/`stat` plus reading sibling `.md` outlines) blocks the asyncio event loop on every message that has an uploads directory. Add `abefore_agent` that offloads the scan to a worker thread via `run_in_executor`; it copies the current context, preserving the `user_id` contextvar read by `get_effective_user_id()`. Add a runtime anchor under `tests/blocking_io/` that drives the real `create_agent` graph via `ainvoke` under the strict Blockbuster gate, so a regression back onto the event loop fails CI. Update blocking-IO docs.
This commit is contained in:
@@ -0,0 +1,56 @@
|
||||
"""Regression anchor: UploadsMiddleware must not block the event loop.
|
||||
|
||||
``before_agent`` scans the thread uploads directory (``exists`` / ``iterdir`` /
|
||||
``stat`` plus reading sibling ``.md`` outlines). LangChain wires a sync-only
|
||||
``before_agent`` as ``RunnableCallable(before_agent, None)``; langgraph's
|
||||
``ainvoke`` runs it directly on the event loop when ``afunc is None``. So the
|
||||
filesystem scan must be offloaded (the middleware provides ``abefore_agent``).
|
||||
|
||||
This anchor drives the real ``create_agent`` graph via ``ainvoke`` under the
|
||||
strict Blockbuster gate. If the scan regresses back onto the event loop,
|
||||
Blockbuster raises ``BlockingError`` and this test fails.
|
||||
|
||||
The graph/middleware construction is offloaded with ``asyncio.to_thread`` only
|
||||
because ``Paths.__init__`` resolves paths synchronously; the surface under test
|
||||
(``before_agent``'s directory scan) is exercised on the event loop, not
|
||||
bypassed.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from langchain_core.language_models.fake_chat_models import FakeMessagesListChatModel
|
||||
from langchain_core.messages import AIMessage, HumanMessage
|
||||
|
||||
pytestmark = pytest.mark.asyncio
|
||||
|
||||
|
||||
class _FakeModel(FakeMessagesListChatModel):
|
||||
"""FakeMessagesListChatModel with a no-op ``bind_tools`` for create_agent."""
|
||||
|
||||
def bind_tools(self, tools, **kwargs): # type: ignore[override]
|
||||
return self
|
||||
|
||||
|
||||
async def test_before_agent_uploads_scan_does_not_block_event_loop(tmp_path: Path) -> None:
|
||||
from langchain.agents import create_agent
|
||||
|
||||
from deerflow.agents.middlewares.uploads_middleware import UploadsMiddleware
|
||||
from deerflow.runtime.user_context import get_effective_user_id
|
||||
|
||||
mw = await asyncio.to_thread(UploadsMiddleware, str(tmp_path))
|
||||
uploads_dir = await asyncio.to_thread(mw._paths.sandbox_uploads_dir, "t1", user_id=get_effective_user_id())
|
||||
uploads_dir.mkdir(parents=True, exist_ok=True) # test-side seeding (not in scanned_modules)
|
||||
(uploads_dir / "existing.txt").write_text("hello", encoding="utf-8")
|
||||
|
||||
agent = await asyncio.to_thread(lambda: create_agent(model=_FakeModel(responses=[AIMessage(content="ok")]), tools=[], middleware=[mw]))
|
||||
|
||||
result = await agent.ainvoke(
|
||||
{"messages": [HumanMessage(content="hi")]},
|
||||
{"configurable": {"thread_id": "t1"}},
|
||||
)
|
||||
|
||||
assert result["messages"]
|
||||
Reference in New Issue
Block a user