mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-06-13 10:55:59 +00:00
b8f5ed360f
* fix(skills): keep skill archive installation off the event loop ainstall_skill_from_archive — the async entry point awaited by the gateway POST /skills/install route — ran its entire filesystem pipeline inline on the event loop: zip extraction, frontmatter validation, rglob enumeration, per-file read_text, shutil.copytree staging, and tempdir cleanup. Restructure into offloaded phases: prepare (extract + validate) and commit (stage + move) run via asyncio.to_thread, the tempdir lifecycle is offloaded, and the security scanner's file enumeration and reads move off the loop — only the per-file LLM scan (genuinely async) stays awaited. Security decision logic and exception contract are unchanged. Anchor: tests/blocking_io/test_skills_install.py drives the real install pipeline (real .skill archive, real FS; only scan_skill_content stubbed) under the strict Blockbuster gate. Verified red on pre-fix code (BlockingError: os.stat), green with the fix. * fix(skills): log temp-dir cleanup failures instead of swallowing them Review follow-up on the install offload: rmtree(ignore_errors=True) kept the primary install exception but silently leaked the extraction dir on cleanup failure. Keep the never-mask behaviour, add a warning log. * fix(skills): bound install tmp cleanup and pass skill_dir explicitly (review) - Wrap the best-effort temp-dir cleanup in asyncio.wait_for (5s) so a hung filesystem in the finally block cannot stall or mask the install outcome; timeout is logged like the existing OSError path. - Hoist _collect_scannable_files to module level with skill_dir as an explicit argument instead of a closure capture.
74 lines
2.9 KiB
Python
74 lines
2.9 KiB
Python
"""Regression anchor: skill archive installation must not block the event loop.
|
|
|
|
``LocalSkillStorage.ainstall_skill_from_archive`` is the async entry point the
|
|
gateway ``POST /skills/install`` route awaits. It extracts the archive,
|
|
validates frontmatter, security-scans every installable file, and stages the
|
|
skill into the custom directory — all filesystem work that previously ran
|
|
inline on the event loop (zip extract, ``rglob`` enumeration, ``read_text``,
|
|
``shutil.copytree``). The fix offloads those phases via ``asyncio.to_thread``
|
|
while keeping the per-file LLM security scan as the only awaited work; if any
|
|
phase regresses back onto the loop, the strict Blockbuster gate raises
|
|
``BlockingError`` and this test fails.
|
|
|
|
Only the external LLM boundary (``scan_skill_content``) is stubbed — the
|
|
archive, extraction, validation, and staging all run against the real local
|
|
filesystem. Test-side setup IO is itself offloaded with ``asyncio.to_thread``
|
|
(matching ``test_agents_router``) so only the production path is exercised on
|
|
the loop.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import zipfile
|
|
from pathlib import Path
|
|
from types import SimpleNamespace
|
|
|
|
import pytest
|
|
|
|
from deerflow.skills.storage.local_skill_storage import LocalSkillStorage
|
|
|
|
pytestmark = pytest.mark.asyncio
|
|
|
|
_SKILL_MD = """---
|
|
name: loop-skill
|
|
description: Anchor fixture skill for the blocking-IO gate.
|
|
---
|
|
|
|
# Loop Skill
|
|
|
|
Drives the full install pipeline under the Blockbuster gate.
|
|
"""
|
|
|
|
_SUPPORT_MD = "Reference notes scanned by the per-file security pass.\n"
|
|
|
|
|
|
def _build_archive(archive: Path) -> None:
|
|
with zipfile.ZipFile(archive, "w") as zf:
|
|
zf.writestr("loop-skill/SKILL.md", _SKILL_MD)
|
|
zf.writestr("loop-skill/references/usage.md", _SUPPORT_MD)
|
|
|
|
|
|
async def test_install_skill_archive_does_not_block_event_loop(tmp_path: Path, monkeypatch) -> None:
|
|
archive = tmp_path / "loop-skill.skill"
|
|
await asyncio.to_thread(_build_archive, archive)
|
|
|
|
async def _allow_scan(content: str, *, executable: bool = False, location: str = "SKILL.md", app_config=None):
|
|
return SimpleNamespace(decision="allow", reason="anchor stub")
|
|
|
|
# External dependency boundary only: the security scanner is an LLM call.
|
|
monkeypatch.setattr("deerflow.skills.installer.scan_skill_content", _allow_scan)
|
|
|
|
# Constructor resolves paths (one-time, cached in production via
|
|
# get_or_new_skill_storage); offloaded here so the anchor exercises only
|
|
# the install pipeline itself on the loop.
|
|
storage = await asyncio.to_thread(LocalSkillStorage, host_path=str(tmp_path / "skills"))
|
|
|
|
result = await storage.ainstall_skill_from_archive(archive)
|
|
|
|
assert result["success"] is True
|
|
assert result["skill_name"] == "loop-skill"
|
|
installed_md = tmp_path / "skills" / "custom" / "loop-skill" / "SKILL.md"
|
|
assert await asyncio.to_thread(installed_md.exists)
|
|
assert await asyncio.to_thread((tmp_path / "skills" / "custom" / "loop-skill" / "references" / "usage.md").exists)
|