mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-06-13 10:55:59 +00:00
chore(blocking-io): fail-loud repo-root resolution and shared detector CLI shim (#3512)
* chore(blocking-io): fail-loud repo-root resolution and shared detector CLI shim The three detectors resolved REPO_ROOT with depth-indexed Path(__file__).resolve().parents[4]. If a detector file ever moves to a different directory depth, scan roots resolve under the wrong directory and the detector reports zero findings with no error — a silent-zero failure shape for a detection tool. - Add support/detectors/repo_root.py: resolve the repo root by walking upward to the .git marker (checked with exists() so git worktrees, where .git is a file, also resolve), raising RuntimeError when no marker is found. All three detectors use it at import time, so a relocated detector fails loudly instead of scanning an empty tree. - Extract scripts/_detector_cli.py from the three character-identical CLI shims; the sys.path computation lives in one place and raises when backend/tests cannot be found. - tests/test_detector_repo_root.py pins: resolution from an unmarked location raises instead of returning an empty scan; all three detectors share the resolved root; each CLI shim delegates to its detector. Testing: backend `make test` (4278 passed); smoke-ran `make detect-blocking-io`, `make detect-thread-boundaries`, and `scripts/scan_changed_blocking_io.py --base upstream/main`. Closes #3510 (review follow-up to #3503). * chore(blocking-io): declare detector modules import-only, drop script-mode residue Adversarial review caught that blocking_io_static.py and thread_boundaries.py kept shebangs and __main__ blocks but can no longer run as plain scripts: the new `from support.detectors.repo_root import` executes before anything puts backend/tests on sys.path, so direct invocation dies with ModuleNotFoundError before argparse. Direct execution was never a documented entry point (Makefile targets, the scripts/ shims, the blocking-io-guard skill, and tests all go through the support.detectors package), so converge on import-only instead of re-adding per-module bootstrap: drop the shebangs and the now unreachable __main__ blocks (plus the `import sys` they kept alive) and state the supported entry points in each module docstring. The shim delegation tests in test_detector_repo_root.py pin the supported CLI paths. Testing: backend `make test` (4278 passed); `make detect-blocking-io` and `make detect-thread-boundaries` smoke-ran.
This commit is contained in:
@@ -7,6 +7,9 @@ new versus the merge base — the latter catches exposure created without
|
||||
touching the blocking line itself (a new async caller making an old sync
|
||||
helper async-reachable). Used by the `blocking-io-guard` skill as the
|
||||
deterministic scope step.
|
||||
|
||||
Not directly executable: import as `support.detectors.blocking_io_changed` or
|
||||
run via the CLI shim `scripts/scan_changed_blocking_io.py`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -15,14 +18,14 @@ import argparse
|
||||
import json
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from collections.abc import Sequence
|
||||
from pathlib import Path
|
||||
|
||||
from support.detectors import blocking_io_static as static
|
||||
from support.detectors.repo_root import resolve_repo_root
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[4]
|
||||
REPO_ROOT = resolve_repo_root(Path(__file__))
|
||||
SCAN_ROOTS = (
|
||||
"backend/app",
|
||||
"backend/packages/harness/deerflow",
|
||||
@@ -207,7 +210,3 @@ def main(argv: Sequence[str] | None = None) -> int:
|
||||
else:
|
||||
print(format_report(findings, args.base))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Static inventory for likely backend event-loop blocking IO.
|
||||
|
||||
This detector parses backend business source with AST so untested paths are
|
||||
still visible during review. Findings are prioritized static candidates, not
|
||||
automatic bug decisions.
|
||||
|
||||
Not directly executable: import as `support.detectors.blocking_io_static` or
|
||||
run via the CLI shim `scripts/detect_blocking_io_static.py`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -12,13 +14,14 @@ import argparse
|
||||
import ast
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from collections import Counter, defaultdict, deque
|
||||
from collections.abc import Callable, Iterable, Sequence
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[4]
|
||||
from support.detectors.repo_root import resolve_repo_root
|
||||
|
||||
REPO_ROOT = resolve_repo_root(Path(__file__))
|
||||
DEFAULT_SCAN_PATHS = (
|
||||
REPO_ROOT / "backend" / "app",
|
||||
REPO_ROOT / "backend" / "packages" / "harness" / "deerflow",
|
||||
@@ -889,7 +892,3 @@ def main(argv: Sequence[str] | None = None) -> int:
|
||||
else:
|
||||
print(format_text(findings))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
"""Fail-loud repository-root resolution shared by the detectors.
|
||||
|
||||
Depth-indexed resolution (`Path(__file__).resolve().parents[N]`) fails
|
||||
silently when a detector file moves to a different directory depth: scan
|
||||
roots resolve under the wrong directory, nothing is scanned, and the
|
||||
detector reports zero findings with no error. Walking upward to a
|
||||
repository marker turns that into an immediate error instead.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
REPO_ROOT_MARKER = ".git"
|
||||
|
||||
|
||||
def resolve_repo_root(start: Path) -> Path:
|
||||
"""Return the repository root above `start` (the directory containing `.git`).
|
||||
|
||||
`.git` is checked with `exists()` rather than `is_dir()` so git worktrees
|
||||
(where `.git` is a file) resolve correctly.
|
||||
|
||||
Raises:
|
||||
RuntimeError: when no marker is found above `start`, so a relocated
|
||||
detector fails loudly instead of silently scanning an empty tree.
|
||||
"""
|
||||
resolved = start.resolve()
|
||||
for candidate in (resolved, *resolved.parents):
|
||||
if (candidate / REPO_ROOT_MARKER).exists():
|
||||
return candidate
|
||||
raise RuntimeError(f"could not resolve the repository root: no '{REPO_ROOT_MARKER}' marker found above {resolved}; refusing to guess scan paths")
|
||||
@@ -1,9 +1,11 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Inventory async/thread boundary points for developer review.
|
||||
|
||||
This detector is intentionally non-invasive: it parses Python source with AST
|
||||
and reports places where code crosses sync/async/thread boundaries. Findings
|
||||
are review evidence, not automatic bug decisions.
|
||||
|
||||
Not directly executable: import as `support.detectors.thread_boundaries` or
|
||||
run via the CLI shim `scripts/detect_thread_boundaries.py`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -12,12 +14,13 @@ import argparse
|
||||
import ast
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from collections.abc import Iterable, Sequence
|
||||
from dataclasses import asdict, dataclass
|
||||
from pathlib import Path
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[4]
|
||||
from support.detectors.repo_root import resolve_repo_root
|
||||
|
||||
REPO_ROOT = resolve_repo_root(Path(__file__))
|
||||
DEFAULT_SCAN_PATHS = (
|
||||
REPO_ROOT / "backend" / "app",
|
||||
REPO_ROOT / "backend" / "packages" / "harness" / "deerflow",
|
||||
@@ -501,7 +504,3 @@ def main(argv: Sequence[str] | None = None) -> int:
|
||||
else:
|
||||
print(format_text(findings))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
"""Pin fail-loud repo-root resolution and the shared CLI shim for the detector tooling.
|
||||
|
||||
The failure mode being guarded: depth-indexed `parents[N]` resolution from a
|
||||
relocated detector file silently resolves a wrong root, scans nothing, and
|
||||
reports zero findings. Resolution must instead raise when no repository
|
||||
marker is found.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from support.detectors import blocking_io_changed, blocking_io_static, thread_boundaries
|
||||
from support.detectors.repo_root import resolve_repo_root
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
|
||||
|
||||
def test_resolve_repo_root_finds_marker_from_detector_location():
|
||||
resolved = resolve_repo_root(Path(blocking_io_static.__file__))
|
||||
assert resolved == REPO_ROOT
|
||||
assert (resolved / ".git").exists()
|
||||
|
||||
|
||||
def test_all_detectors_share_the_resolved_root():
|
||||
assert blocking_io_static.REPO_ROOT == REPO_ROOT
|
||||
assert blocking_io_changed.REPO_ROOT == REPO_ROOT
|
||||
assert thread_boundaries.REPO_ROOT == REPO_ROOT
|
||||
|
||||
|
||||
def test_unmarked_location_raises_instead_of_scanning_nothing(tmp_path: Path):
|
||||
start = tmp_path / "moved" / "detectors" / "blocking_io_static.py"
|
||||
with pytest.raises(RuntimeError, match=r"\.git"):
|
||||
resolve_repo_root(start)
|
||||
|
||||
|
||||
def test_cli_shims_delegate_to_their_detectors(capsys: pytest.CaptureFixture[str]):
|
||||
# conftest puts scripts/ on sys.path; --help proves the shim resolves and
|
||||
# invokes the right detector main without running a scan.
|
||||
import detect_blocking_io_static
|
||||
import detect_thread_boundaries
|
||||
import scan_changed_blocking_io
|
||||
|
||||
for shim, description_fragment in (
|
||||
(detect_blocking_io_static, "Statically inventory blocking IO calls"),
|
||||
(detect_thread_boundaries, "Detect async/thread boundary points"),
|
||||
(scan_changed_blocking_io, "blocking-IO candidates this change introduces"),
|
||||
):
|
||||
with pytest.raises(SystemExit) as excinfo:
|
||||
shim.main(["--help"])
|
||||
assert excinfo.value.code == 0
|
||||
assert description_fragment in capsys.readouterr().out
|
||||
@@ -0,0 +1,27 @@
|
||||
"""Shared bootstrap for the detector CLI shims in this directory.
|
||||
|
||||
The detectors live under `backend/tests/support/detectors/` so they can be
|
||||
exercised by the test suite; the shims here only put that package on
|
||||
`sys.path` and delegate. Keeping the path computation in one place means a
|
||||
layout change breaks loudly in exactly one file instead of silently drifting
|
||||
across copies.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
import sys
|
||||
from collections.abc import Sequence
|
||||
from pathlib import Path
|
||||
|
||||
TEST_SUPPORT_PATH = Path(__file__).resolve().parents[1] / "backend" / "tests"
|
||||
|
||||
|
||||
def run_detector(module_name: str, argv: Sequence[str] | None = None) -> int:
|
||||
"""Import a `support.detectors.*` module and run its `main(argv)`."""
|
||||
if not TEST_SUPPORT_PATH.is_dir():
|
||||
raise RuntimeError(f"detector support path not found: {TEST_SUPPORT_PATH}; the scripts/ directory has moved relative to backend/tests")
|
||||
if str(TEST_SUPPORT_PATH) not in sys.path:
|
||||
sys.path.insert(0, str(TEST_SUPPORT_PATH))
|
||||
module = importlib.import_module(module_name)
|
||||
return module.main(argv)
|
||||
@@ -5,18 +5,12 @@ from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from collections.abc import Sequence
|
||||
from pathlib import Path
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||
TEST_SUPPORT_PATH = REPO_ROOT / "backend" / "tests"
|
||||
if str(TEST_SUPPORT_PATH) not in sys.path:
|
||||
sys.path.insert(0, str(TEST_SUPPORT_PATH))
|
||||
from _detector_cli import run_detector
|
||||
|
||||
|
||||
def main(argv: Sequence[str] | None = None) -> int:
|
||||
from support.detectors.blocking_io_static import main as detector_main
|
||||
|
||||
return detector_main(argv)
|
||||
return run_detector("support.detectors.blocking_io_static", argv)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -5,18 +5,12 @@ from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from collections.abc import Sequence
|
||||
from pathlib import Path
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||
TEST_SUPPORT_PATH = REPO_ROOT / "backend" / "tests"
|
||||
if str(TEST_SUPPORT_PATH) not in sys.path:
|
||||
sys.path.insert(0, str(TEST_SUPPORT_PATH))
|
||||
from _detector_cli import run_detector
|
||||
|
||||
|
||||
def main(argv: Sequence[str] | None = None) -> int:
|
||||
from support.detectors.thread_boundaries import main as detector_main
|
||||
|
||||
return detector_main(argv)
|
||||
return run_detector("support.detectors.thread_boundaries", argv)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -5,18 +5,12 @@ from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from collections.abc import Sequence
|
||||
from pathlib import Path
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||
TEST_SUPPORT_PATH = REPO_ROOT / "backend" / "tests"
|
||||
if str(TEST_SUPPORT_PATH) not in sys.path:
|
||||
sys.path.insert(0, str(TEST_SUPPORT_PATH))
|
||||
from _detector_cli import run_detector
|
||||
|
||||
|
||||
def main(argv: Sequence[str] | None = None) -> int:
|
||||
from support.detectors.blocking_io_changed import main as scanner_main
|
||||
|
||||
return scanner_main(argv)
|
||||
return run_detector("support.detectors.blocking_io_changed", argv)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user