From a838546a2b369d024c32d319d3445a5f4e9264a8 Mon Sep 17 00:00:00 2001 From: AochenShen99 Date: Fri, 12 Jun 2026 17:16:01 +0800 Subject: [PATCH] chore(blocking-io): fail-loud repo-root resolution and shared detector CLI shim (#3512) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(blocking-io): fail-loud repo-root resolution and shared detector CLI shim The three detectors resolved REPO_ROOT with depth-indexed Path(__file__).resolve().parents[4]. If a detector file ever moves to a different directory depth, scan roots resolve under the wrong directory and the detector reports zero findings with no error — a silent-zero failure shape for a detection tool. - Add support/detectors/repo_root.py: resolve the repo root by walking upward to the .git marker (checked with exists() so git worktrees, where .git is a file, also resolve), raising RuntimeError when no marker is found. All three detectors use it at import time, so a relocated detector fails loudly instead of scanning an empty tree. - Extract scripts/_detector_cli.py from the three character-identical CLI shims; the sys.path computation lives in one place and raises when backend/tests cannot be found. - tests/test_detector_repo_root.py pins: resolution from an unmarked location raises instead of returning an empty scan; all three detectors share the resolved root; each CLI shim delegates to its detector. Testing: backend `make test` (4278 passed); smoke-ran `make detect-blocking-io`, `make detect-thread-boundaries`, and `scripts/scan_changed_blocking_io.py --base upstream/main`. Closes #3510 (review follow-up to #3503). * chore(blocking-io): declare detector modules import-only, drop script-mode residue Adversarial review caught that blocking_io_static.py and thread_boundaries.py kept shebangs and __main__ blocks but can no longer run as plain scripts: the new `from support.detectors.repo_root import` executes before anything puts backend/tests on sys.path, so direct invocation dies with ModuleNotFoundError before argparse. Direct execution was never a documented entry point (Makefile targets, the scripts/ shims, the blocking-io-guard skill, and tests all go through the support.detectors package), so converge on import-only instead of re-adding per-module bootstrap: drop the shebangs and the now unreachable __main__ blocks (plus the `import sys` they kept alive) and state the supported entry points in each module docstring. The shim delegation tests in test_detector_repo_root.py pin the supported CLI paths. Testing: backend `make test` (4278 passed); `make detect-blocking-io` and `make detect-thread-boundaries` smoke-ran. --- .../support/detectors/blocking_io_changed.py | 11 ++-- .../support/detectors/blocking_io_static.py | 13 +++-- backend/tests/support/detectors/repo_root.py | 31 +++++++++++ .../support/detectors/thread_boundaries.py | 13 +++-- backend/tests/test_detector_repo_root.py | 53 +++++++++++++++++++ scripts/_detector_cli.py | 27 ++++++++++ scripts/detect_blocking_io_static.py | 10 +--- scripts/detect_thread_boundaries.py | 10 +--- scripts/scan_changed_blocking_io.py | 10 +--- 9 files changed, 134 insertions(+), 44 deletions(-) create mode 100644 backend/tests/support/detectors/repo_root.py create mode 100644 backend/tests/test_detector_repo_root.py create mode 100644 scripts/_detector_cli.py diff --git a/backend/tests/support/detectors/blocking_io_changed.py b/backend/tests/support/detectors/blocking_io_changed.py index 8aae54fd7..648e491a6 100644 --- a/backend/tests/support/detectors/blocking_io_changed.py +++ b/backend/tests/support/detectors/blocking_io_changed.py @@ -7,6 +7,9 @@ new versus the merge base — the latter catches exposure created without touching the blocking line itself (a new async caller making an old sync helper async-reachable). Used by the `blocking-io-guard` skill as the deterministic scope step. + +Not directly executable: import as `support.detectors.blocking_io_changed` or +run via the CLI shim `scripts/scan_changed_blocking_io.py`. """ from __future__ import annotations @@ -15,14 +18,14 @@ import argparse import json import re import subprocess -import sys from collections import defaultdict from collections.abc import Sequence from pathlib import Path from support.detectors import blocking_io_static as static +from support.detectors.repo_root import resolve_repo_root -REPO_ROOT = Path(__file__).resolve().parents[4] +REPO_ROOT = resolve_repo_root(Path(__file__)) SCAN_ROOTS = ( "backend/app", "backend/packages/harness/deerflow", @@ -207,7 +210,3 @@ def main(argv: Sequence[str] | None = None) -> int: else: print(format_report(findings, args.base)) return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/backend/tests/support/detectors/blocking_io_static.py b/backend/tests/support/detectors/blocking_io_static.py index 14f4a5571..e09ac54ba 100644 --- a/backend/tests/support/detectors/blocking_io_static.py +++ b/backend/tests/support/detectors/blocking_io_static.py @@ -1,9 +1,11 @@ -#!/usr/bin/env python3 """Static inventory for likely backend event-loop blocking IO. This detector parses backend business source with AST so untested paths are still visible during review. Findings are prioritized static candidates, not automatic bug decisions. + +Not directly executable: import as `support.detectors.blocking_io_static` or +run via the CLI shim `scripts/detect_blocking_io_static.py`. """ from __future__ import annotations @@ -12,13 +14,14 @@ import argparse import ast import json import os -import sys from collections import Counter, defaultdict, deque from collections.abc import Callable, Iterable, Sequence from dataclasses import dataclass from pathlib import Path -REPO_ROOT = Path(__file__).resolve().parents[4] +from support.detectors.repo_root import resolve_repo_root + +REPO_ROOT = resolve_repo_root(Path(__file__)) DEFAULT_SCAN_PATHS = ( REPO_ROOT / "backend" / "app", REPO_ROOT / "backend" / "packages" / "harness" / "deerflow", @@ -889,7 +892,3 @@ def main(argv: Sequence[str] | None = None) -> int: else: print(format_text(findings)) return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/backend/tests/support/detectors/repo_root.py b/backend/tests/support/detectors/repo_root.py new file mode 100644 index 000000000..e90dce0e8 --- /dev/null +++ b/backend/tests/support/detectors/repo_root.py @@ -0,0 +1,31 @@ +"""Fail-loud repository-root resolution shared by the detectors. + +Depth-indexed resolution (`Path(__file__).resolve().parents[N]`) fails +silently when a detector file moves to a different directory depth: scan +roots resolve under the wrong directory, nothing is scanned, and the +detector reports zero findings with no error. Walking upward to a +repository marker turns that into an immediate error instead. +""" + +from __future__ import annotations + +from pathlib import Path + +REPO_ROOT_MARKER = ".git" + + +def resolve_repo_root(start: Path) -> Path: + """Return the repository root above `start` (the directory containing `.git`). + + `.git` is checked with `exists()` rather than `is_dir()` so git worktrees + (where `.git` is a file) resolve correctly. + + Raises: + RuntimeError: when no marker is found above `start`, so a relocated + detector fails loudly instead of silently scanning an empty tree. + """ + resolved = start.resolve() + for candidate in (resolved, *resolved.parents): + if (candidate / REPO_ROOT_MARKER).exists(): + return candidate + raise RuntimeError(f"could not resolve the repository root: no '{REPO_ROOT_MARKER}' marker found above {resolved}; refusing to guess scan paths") diff --git a/backend/tests/support/detectors/thread_boundaries.py b/backend/tests/support/detectors/thread_boundaries.py index b1d043d47..6ea065064 100644 --- a/backend/tests/support/detectors/thread_boundaries.py +++ b/backend/tests/support/detectors/thread_boundaries.py @@ -1,9 +1,11 @@ -#!/usr/bin/env python3 """Inventory async/thread boundary points for developer review. This detector is intentionally non-invasive: it parses Python source with AST and reports places where code crosses sync/async/thread boundaries. Findings are review evidence, not automatic bug decisions. + +Not directly executable: import as `support.detectors.thread_boundaries` or +run via the CLI shim `scripts/detect_thread_boundaries.py`. """ from __future__ import annotations @@ -12,12 +14,13 @@ import argparse import ast import json import os -import sys from collections.abc import Iterable, Sequence from dataclasses import asdict, dataclass from pathlib import Path -REPO_ROOT = Path(__file__).resolve().parents[4] +from support.detectors.repo_root import resolve_repo_root + +REPO_ROOT = resolve_repo_root(Path(__file__)) DEFAULT_SCAN_PATHS = ( REPO_ROOT / "backend" / "app", REPO_ROOT / "backend" / "packages" / "harness" / "deerflow", @@ -501,7 +504,3 @@ def main(argv: Sequence[str] | None = None) -> int: else: print(format_text(findings)) return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/backend/tests/test_detector_repo_root.py b/backend/tests/test_detector_repo_root.py new file mode 100644 index 000000000..340345735 --- /dev/null +++ b/backend/tests/test_detector_repo_root.py @@ -0,0 +1,53 @@ +"""Pin fail-loud repo-root resolution and the shared CLI shim for the detector tooling. + +The failure mode being guarded: depth-indexed `parents[N]` resolution from a +relocated detector file silently resolves a wrong root, scans nothing, and +reports zero findings. Resolution must instead raise when no repository +marker is found. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest +from support.detectors import blocking_io_changed, blocking_io_static, thread_boundaries +from support.detectors.repo_root import resolve_repo_root + +REPO_ROOT = Path(__file__).resolve().parents[2] + + +def test_resolve_repo_root_finds_marker_from_detector_location(): + resolved = resolve_repo_root(Path(blocking_io_static.__file__)) + assert resolved == REPO_ROOT + assert (resolved / ".git").exists() + + +def test_all_detectors_share_the_resolved_root(): + assert blocking_io_static.REPO_ROOT == REPO_ROOT + assert blocking_io_changed.REPO_ROOT == REPO_ROOT + assert thread_boundaries.REPO_ROOT == REPO_ROOT + + +def test_unmarked_location_raises_instead_of_scanning_nothing(tmp_path: Path): + start = tmp_path / "moved" / "detectors" / "blocking_io_static.py" + with pytest.raises(RuntimeError, match=r"\.git"): + resolve_repo_root(start) + + +def test_cli_shims_delegate_to_their_detectors(capsys: pytest.CaptureFixture[str]): + # conftest puts scripts/ on sys.path; --help proves the shim resolves and + # invokes the right detector main without running a scan. + import detect_blocking_io_static + import detect_thread_boundaries + import scan_changed_blocking_io + + for shim, description_fragment in ( + (detect_blocking_io_static, "Statically inventory blocking IO calls"), + (detect_thread_boundaries, "Detect async/thread boundary points"), + (scan_changed_blocking_io, "blocking-IO candidates this change introduces"), + ): + with pytest.raises(SystemExit) as excinfo: + shim.main(["--help"]) + assert excinfo.value.code == 0 + assert description_fragment in capsys.readouterr().out diff --git a/scripts/_detector_cli.py b/scripts/_detector_cli.py new file mode 100644 index 000000000..11989e165 --- /dev/null +++ b/scripts/_detector_cli.py @@ -0,0 +1,27 @@ +"""Shared bootstrap for the detector CLI shims in this directory. + +The detectors live under `backend/tests/support/detectors/` so they can be +exercised by the test suite; the shims here only put that package on +`sys.path` and delegate. Keeping the path computation in one place means a +layout change breaks loudly in exactly one file instead of silently drifting +across copies. +""" + +from __future__ import annotations + +import importlib +import sys +from collections.abc import Sequence +from pathlib import Path + +TEST_SUPPORT_PATH = Path(__file__).resolve().parents[1] / "backend" / "tests" + + +def run_detector(module_name: str, argv: Sequence[str] | None = None) -> int: + """Import a `support.detectors.*` module and run its `main(argv)`.""" + if not TEST_SUPPORT_PATH.is_dir(): + raise RuntimeError(f"detector support path not found: {TEST_SUPPORT_PATH}; the scripts/ directory has moved relative to backend/tests") + if str(TEST_SUPPORT_PATH) not in sys.path: + sys.path.insert(0, str(TEST_SUPPORT_PATH)) + module = importlib.import_module(module_name) + return module.main(argv) diff --git a/scripts/detect_blocking_io_static.py b/scripts/detect_blocking_io_static.py index 7a9c94a71..fbfddfae5 100644 --- a/scripts/detect_blocking_io_static.py +++ b/scripts/detect_blocking_io_static.py @@ -5,18 +5,12 @@ from __future__ import annotations import sys from collections.abc import Sequence -from pathlib import Path -REPO_ROOT = Path(__file__).resolve().parents[1] -TEST_SUPPORT_PATH = REPO_ROOT / "backend" / "tests" -if str(TEST_SUPPORT_PATH) not in sys.path: - sys.path.insert(0, str(TEST_SUPPORT_PATH)) +from _detector_cli import run_detector def main(argv: Sequence[str] | None = None) -> int: - from support.detectors.blocking_io_static import main as detector_main - - return detector_main(argv) + return run_detector("support.detectors.blocking_io_static", argv) if __name__ == "__main__": diff --git a/scripts/detect_thread_boundaries.py b/scripts/detect_thread_boundaries.py index c4a59132e..c1d18a839 100644 --- a/scripts/detect_thread_boundaries.py +++ b/scripts/detect_thread_boundaries.py @@ -5,18 +5,12 @@ from __future__ import annotations import sys from collections.abc import Sequence -from pathlib import Path -REPO_ROOT = Path(__file__).resolve().parents[1] -TEST_SUPPORT_PATH = REPO_ROOT / "backend" / "tests" -if str(TEST_SUPPORT_PATH) not in sys.path: - sys.path.insert(0, str(TEST_SUPPORT_PATH)) +from _detector_cli import run_detector def main(argv: Sequence[str] | None = None) -> int: - from support.detectors.thread_boundaries import main as detector_main - - return detector_main(argv) + return run_detector("support.detectors.thread_boundaries", argv) if __name__ == "__main__": diff --git a/scripts/scan_changed_blocking_io.py b/scripts/scan_changed_blocking_io.py index bc70f21ff..77c0608c1 100644 --- a/scripts/scan_changed_blocking_io.py +++ b/scripts/scan_changed_blocking_io.py @@ -5,18 +5,12 @@ from __future__ import annotations import sys from collections.abc import Sequence -from pathlib import Path -REPO_ROOT = Path(__file__).resolve().parents[1] -TEST_SUPPORT_PATH = REPO_ROOT / "backend" / "tests" -if str(TEST_SUPPORT_PATH) not in sys.path: - sys.path.insert(0, str(TEST_SUPPORT_PATH)) +from _detector_cli import run_detector def main(argv: Sequence[str] | None = None) -> int: - from support.detectors.blocking_io_changed import main as scanner_main - - return scanner_main(argv) + return run_detector("support.detectors.blocking_io_changed", argv) if __name__ == "__main__":