mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-06-13 19:06:01 +00:00
fix(channels): offload blocking filesystem IO in inbound file ingestion (#3529)
_ingest_inbound_files ensured the thread uploads dir (mkdir), enumerated it (iterdir/is_file) to de-duplicate names, and wrote each downloaded attachment (write_upload_file_no_symlink) directly on the event loop. Offload the directory prep and every per-file write via asyncio.to_thread; the genuinely async network read (file_reader) stays on the loop. Externally observable behavior is unchanged. Found via `make detect-blocking-io` (HIGH: iterdir on an async path). Add tests/blocking_io/test_channels_ingest.py anchor, verified red->green under the strict Blockbuster gate. Co-authored-by: ly-wang19 <ly-wang19@users.noreply.github.com> Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -608,8 +608,14 @@ async def _ingest_inbound_files(thread_id: str, msg: InboundMessage) -> list[dic
|
||||
write_upload_file_no_symlink,
|
||||
)
|
||||
|
||||
uploads_dir = ensure_uploads_dir(thread_id)
|
||||
seen_names = {entry.name for entry in uploads_dir.iterdir() if entry.is_file()}
|
||||
def _prepare_uploads_dir() -> tuple[Path, set[str]]:
|
||||
# Worker thread: ensure_uploads_dir's mkdir and the iterdir enumeration are
|
||||
# blocking filesystem IO that must stay off the event loop.
|
||||
target = ensure_uploads_dir(thread_id)
|
||||
existing = {entry.name for entry in target.iterdir() if entry.is_file()}
|
||||
return target, existing
|
||||
|
||||
uploads_dir, seen_names = await asyncio.to_thread(_prepare_uploads_dir)
|
||||
|
||||
created: list[dict[str, Any]] = []
|
||||
file_reader = INBOUND_FILE_READERS.get(msg.channel_name, _read_http_inbound_file)
|
||||
@@ -657,7 +663,7 @@ async def _ingest_inbound_files(thread_id: str, msg: InboundMessage) -> list[dic
|
||||
|
||||
dest = uploads_dir / safe_name
|
||||
try:
|
||||
dest = write_upload_file_no_symlink(uploads_dir, safe_name, data)
|
||||
dest = await asyncio.to_thread(write_upload_file_no_symlink, uploads_dir, safe_name, data)
|
||||
except UnsafeUploadPathError:
|
||||
logger.warning("[Manager] skipping inbound file with unsafe destination: %s", safe_name)
|
||||
continue
|
||||
|
||||
Reference in New Issue
Block a user