mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-06-13 10:55:59 +00:00
feat(skill): add blocking-io-guard — SOP skill for blocking-IO triage and runtime anchors (#3503)
* feat(blocking-io): add changed-lines blocking-IO scanner (L1) * feat(blocking-io): add scan-changed CLI wrapper * feat(skill): add blocking-io-guard developer SOP skill * docs(blocking-io): point contributors at the blocking-io-guard skill * style(blocking-io): apply ruff format to scanner and tests * docs(backend): document changed-lines blocking-IO scanner in CLAUDE.md * feat(skill): add post-fix re-scan check and PR batching policy * refactor(skill): fix SOP step ordering, align template with repo conventions - Move re-scan into an explicit 'apply the fix' step (was wedged after anchor generation while telling you to go back before the anchor) - Renumber steps 0-6; drop undefined 'L1' jargon - Mode A: document that the diff is <base>...HEAD (commit first) - Mode B: prefer make detect-blocking-io + findings JSON file - anchor template: module-level pytestmark per tests/blocking_io convention - CLAUDE.md: fix 'git diff --base' phrasing * fix(skill): catch findings introduced without touching the blocking line Review follow-up: changed-line intersection alone misses the case where a new async caller exposes an old sync helper — the static finding sits on the untouched blocking line, so Mode A returned empty and the SOP stopped on a false 'no blocking-IO surface'. Selection is now a union over the changed files: - findings on added lines of git diff <base>...HEAD (kept: a second identical symbol in an already-flagged function collides on the stable key and only this selection sees it); - findings new versus the merge base, matched by (path, function, symbol) — never line numbers. Base sources are materialized via git show <merge-base>:<path>; files absent at base count every head finding as new. SKILL.md now states the residual same-file-only blind spot (cross-file async callers) instead of treating an empty list as proof of zero exposure, and only requires reading sop-skeleton.md when generalizing to another detector domain. * docs(skill): examples teach test-writing, the teeth check defines the rule All examples in the references/template are filesystem-flavored; make explicit that they are instances, not the SOP's boundary — the same rules apply to every detector category (FILE_IO, HTTP, SUBPROCESS, SLEEP) and acceptance is always red/green teeth, never similarity to an example. Neutralize the template's arrange comment accordingly. * fix(blocking-io): harden changed-lines scanner per review - Dedup the union selection by the stable key (path, function, symbol) instead of dict identity, so a future selector returning copied dicts cannot silently empty the result. - parse_changed_lines now handles any unified diff: context lines advance the new-file counter, \-markers and deletions do not, and the counter resets at each +++ header. Previously correct only for --unified=0. - Add blocking_io_static.scan_source (in-memory scan); base-version comparison no longer round-trips through temp files. - Empty Mode A report now prints the same-file-only reachability caveat at the point of use instead of relying on the SOP text alone. * docs(skill): bound best-effort cleanup when the offload sits in finally Lesson from the #3505 review: the SOP routinely drives 'offload the cleanup branch' transformations, and an awaited cleanup in finally can mask or stall the primary exception. One sentence in Step 2 closes that gap at the point where the fix is written.
This commit is contained in:
@@ -0,0 +1,175 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import textwrap
|
||||
from pathlib import Path
|
||||
|
||||
from support.detectors import blocking_io_changed as changed
|
||||
from support.detectors import blocking_io_static as static
|
||||
|
||||
|
||||
def _write_python(path: Path, source: str) -> Path:
|
||||
path.write_text(textwrap.dedent(source).strip() + "\n", encoding="utf-8")
|
||||
return path
|
||||
|
||||
|
||||
_CLEANUP_BRANCH_SOURCE = """
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
async def create_agent(path: Path) -> None:
|
||||
path.mkdir()
|
||||
try:
|
||||
await _save(path)
|
||||
except Exception:
|
||||
shutil.rmtree(path)
|
||||
raise
|
||||
"""
|
||||
|
||||
|
||||
def test_parse_changed_lines_records_added_lines_only() -> None:
|
||||
diff = textwrap.dedent(
|
||||
"""\
|
||||
diff --git a/backend/app/x.py b/backend/app/x.py
|
||||
--- a/backend/app/x.py
|
||||
+++ b/backend/app/x.py
|
||||
@@ -10,0 +11,2 @@ def f():
|
||||
+ a = 1
|
||||
+ b = 2
|
||||
@@ -20 +22,0 @@ def g():
|
||||
- gone = 1
|
||||
"""
|
||||
)
|
||||
assert changed.parse_changed_lines(diff) == {"backend/app/x.py": {11, 12}}
|
||||
|
||||
|
||||
def test_parse_changed_lines_handles_context_diffs() -> None:
|
||||
diff = textwrap.dedent(
|
||||
"""\
|
||||
diff --git a/backend/app/x.py b/backend/app/x.py
|
||||
--- a/backend/app/x.py
|
||||
+++ b/backend/app/x.py
|
||||
@@ -8,7 +8,8 @@ def f():
|
||||
ctx1
|
||||
ctx2
|
||||
- removed
|
||||
+ added_one
|
||||
ctx3
|
||||
+ added_two
|
||||
ctx4
|
||||
\\ No newline at end of file
|
||||
"""
|
||||
)
|
||||
assert changed.parse_changed_lines(diff) == {"backend/app/x.py": {10, 12}}
|
||||
|
||||
|
||||
def test_parse_changed_lines_ignores_deleted_files() -> None:
|
||||
diff = textwrap.dedent(
|
||||
"""\
|
||||
diff --git a/x.py b/x.py
|
||||
+++ /dev/null
|
||||
@@ -1,2 +0,0 @@
|
||||
-gone
|
||||
"""
|
||||
)
|
||||
assert changed.parse_changed_lines(diff) == {}
|
||||
|
||||
|
||||
def test_select_findings_keeps_only_touched_candidates(tmp_path: Path) -> None:
|
||||
src = _write_python(tmp_path / "agents.py", _CLEANUP_BRANCH_SOURCE)
|
||||
findings = [f.to_dict() for f in static.scan_file(src, repo_root=tmp_path)]
|
||||
rmtree = next(f for f in findings if f["blocking_call"]["symbol"] == "shutil.rmtree")
|
||||
other = next(f for f in findings if f["blocking_call"]["symbol"] != "shutil.rmtree")
|
||||
|
||||
changed_lines = {"agents.py": {rmtree["location"]["line"]}}
|
||||
selected = changed.select_findings_on_changed_lines(findings, changed_lines)
|
||||
|
||||
assert [f["blocking_call"]["symbol"] for f in selected] == ["shutil.rmtree"]
|
||||
assert other not in selected
|
||||
|
||||
|
||||
def test_find_changed_blocking_io_surfaces_only_changed_candidate(tmp_path: Path, monkeypatch) -> None:
|
||||
src = _write_python(tmp_path / "agents.py", _CLEANUP_BRANCH_SOURCE)
|
||||
all_findings = [f.to_dict() for f in static.scan_file(src, repo_root=tmp_path)]
|
||||
rmtree_line = next(f["location"]["line"] for f in all_findings if f["blocking_call"]["symbol"] == "shutil.rmtree")
|
||||
|
||||
# Stub only the git boundary; the static scan runs for real against tmp_path.
|
||||
monkeypatch.setattr(
|
||||
changed,
|
||||
"changed_python_lines",
|
||||
lambda base, repo_root: {"agents.py": {rmtree_line}},
|
||||
)
|
||||
# Base content identical to head: every finding already existed, so only
|
||||
# the changed-line selection contributes (and the union must not double).
|
||||
monkeypatch.setattr(
|
||||
changed,
|
||||
"base_python_contents",
|
||||
lambda base, paths, repo_root: {"agents.py": src.read_text(encoding="utf-8")},
|
||||
)
|
||||
|
||||
result = changed.find_changed_blocking_io("origin/main", repo_root=tmp_path)
|
||||
|
||||
assert [f["blocking_call"]["symbol"] for f in result] == ["shutil.rmtree"]
|
||||
|
||||
|
||||
_SYNC_HELPER_BASE = """
|
||||
from pathlib import Path
|
||||
|
||||
def load(path: Path) -> str:
|
||||
return path.read_text()
|
||||
"""
|
||||
|
||||
_SYNC_HELPER_HEAD = """
|
||||
from pathlib import Path
|
||||
|
||||
def load(path: Path) -> str:
|
||||
return path.read_text()
|
||||
|
||||
async def route(path: Path) -> str:
|
||||
return load(path)
|
||||
"""
|
||||
|
||||
|
||||
def test_new_async_caller_exposing_old_sync_helper_is_reported(tmp_path: Path, monkeypatch) -> None:
|
||||
"""The blocking line is NOT in the diff — only the new async caller is.
|
||||
|
||||
The finding sits on the untouched `read_text` line, so changed-line
|
||||
selection alone would return empty; the new-vs-base comparison must
|
||||
surface it.
|
||||
"""
|
||||
src = _write_python(tmp_path / "mod.py", _SYNC_HELPER_HEAD)
|
||||
head_findings = [f.to_dict() for f in static.scan_file(src, repo_root=tmp_path)]
|
||||
read_text_line = next(f["location"]["line"] for f in head_findings if f["blocking_call"]["symbol"] == "path.read_text")
|
||||
added_lines = {line for line in range(1, len(src.read_text().splitlines()) + 1) if line > read_text_line}
|
||||
|
||||
monkeypatch.setattr(changed, "changed_python_lines", lambda base, repo_root: {"mod.py": added_lines})
|
||||
monkeypatch.setattr(
|
||||
changed,
|
||||
"base_python_contents",
|
||||
lambda base, paths, repo_root: {"mod.py": textwrap.dedent(_SYNC_HELPER_BASE).strip() + "\n"},
|
||||
)
|
||||
|
||||
result = changed.find_changed_blocking_io("origin/main", repo_root=tmp_path)
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0]["blocking_call"]["symbol"] == "path.read_text"
|
||||
assert result[0]["event_loop_exposure"] == "ASYNC_REACHABLE_SAME_FILE"
|
||||
|
||||
|
||||
def test_select_findings_new_vs_base_matches_by_stable_key(tmp_path: Path) -> None:
|
||||
head = _write_python(tmp_path / "mod.py", _SYNC_HELPER_HEAD)
|
||||
head_findings = [f.to_dict() for f in static.scan_file(head, repo_root=tmp_path)]
|
||||
|
||||
base_findings = changed.scan_python_contents({"mod.py": textwrap.dedent(_SYNC_HELPER_BASE).strip() + "\n"})
|
||||
assert base_findings == [] # no async exposure at base -> detector is silent
|
||||
|
||||
new = changed.select_findings_new_vs_base(head_findings, base_findings)
|
||||
assert [f["blocking_call"]["symbol"] for f in new] == ["path.read_text"]
|
||||
|
||||
# Same content at base and head -> nothing is new, regardless of line drift.
|
||||
assert changed.select_findings_new_vs_base(head_findings, head_findings) == []
|
||||
|
||||
|
||||
def test_format_report_empty_warns_about_cross_file_blind_spot() -> None:
|
||||
report = changed.format_report([], base="origin/main")
|
||||
assert "No blocking-IO candidates" in report
|
||||
assert "defined in another file" in report
|
||||
Reference in New Issue
Block a user