chore(blocking-io): fail-loud repo-root resolution and shared detector CLI shim (#3512)

* chore(blocking-io): fail-loud repo-root resolution and shared detector CLI shim

The three detectors resolved REPO_ROOT with depth-indexed
Path(__file__).resolve().parents[4]. If a detector file ever moves to a
different directory depth, scan roots resolve under the wrong directory
and the detector reports zero findings with no error — a silent-zero
failure shape for a detection tool.

- Add support/detectors/repo_root.py: resolve the repo root by walking
  upward to the .git marker (checked with exists() so git worktrees,
  where .git is a file, also resolve), raising RuntimeError when no
  marker is found. All three detectors use it at import time, so a
  relocated detector fails loudly instead of scanning an empty tree.
- Extract scripts/_detector_cli.py from the three character-identical
  CLI shims; the sys.path computation lives in one place and raises
  when backend/tests cannot be found.
- tests/test_detector_repo_root.py pins: resolution from an unmarked
  location raises instead of returning an empty scan; all three
  detectors share the resolved root; each CLI shim delegates to its
  detector.

Testing: backend `make test` (4278 passed); smoke-ran
`make detect-blocking-io`, `make detect-thread-boundaries`, and
`scripts/scan_changed_blocking_io.py --base upstream/main`.

Closes #3510 (review follow-up to #3503).

* chore(blocking-io): declare detector modules import-only, drop script-mode residue

Adversarial review caught that blocking_io_static.py and
thread_boundaries.py kept shebangs and __main__ blocks but can no longer
run as plain scripts: the new `from support.detectors.repo_root import`
executes before anything puts backend/tests on sys.path, so direct
invocation dies with ModuleNotFoundError before argparse.

Direct execution was never a documented entry point (Makefile targets,
the scripts/ shims, the blocking-io-guard skill, and tests all go
through the support.detectors package), so converge on import-only
instead of re-adding per-module bootstrap: drop the shebangs and the now
unreachable __main__ blocks (plus the `import sys` they kept alive) and
state the supported entry points in each module docstring. The shim
delegation tests in test_detector_repo_root.py pin the supported CLI
paths.

Testing: backend `make test` (4278 passed); `make detect-blocking-io`
and `make detect-thread-boundaries` smoke-ran.
This commit is contained in:
AochenShen99
2026-06-12 17:16:01 +08:00
committed by GitHub
parent bbce6c0ac0
commit a838546a2b
9 changed files with 134 additions and 44 deletions
@@ -7,6 +7,9 @@ new versus the merge base — the latter catches exposure created without
touching the blocking line itself (a new async caller making an old sync
helper async-reachable). Used by the `blocking-io-guard` skill as the
deterministic scope step.
Not directly executable: import as `support.detectors.blocking_io_changed` or
run via the CLI shim `scripts/scan_changed_blocking_io.py`.
"""
from __future__ import annotations
@@ -15,14 +18,14 @@ import argparse
import json
import re
import subprocess
import sys
from collections import defaultdict
from collections.abc import Sequence
from pathlib import Path
from support.detectors import blocking_io_static as static
from support.detectors.repo_root import resolve_repo_root
REPO_ROOT = Path(__file__).resolve().parents[4]
REPO_ROOT = resolve_repo_root(Path(__file__))
SCAN_ROOTS = (
"backend/app",
"backend/packages/harness/deerflow",
@@ -207,7 +210,3 @@ def main(argv: Sequence[str] | None = None) -> int:
else:
print(format_report(findings, args.base))
return 0
if __name__ == "__main__":
sys.exit(main())
@@ -1,9 +1,11 @@
#!/usr/bin/env python3
"""Static inventory for likely backend event-loop blocking IO.
This detector parses backend business source with AST so untested paths are
still visible during review. Findings are prioritized static candidates, not
automatic bug decisions.
Not directly executable: import as `support.detectors.blocking_io_static` or
run via the CLI shim `scripts/detect_blocking_io_static.py`.
"""
from __future__ import annotations
@@ -12,13 +14,14 @@ import argparse
import ast
import json
import os
import sys
from collections import Counter, defaultdict, deque
from collections.abc import Callable, Iterable, Sequence
from dataclasses import dataclass
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parents[4]
from support.detectors.repo_root import resolve_repo_root
REPO_ROOT = resolve_repo_root(Path(__file__))
DEFAULT_SCAN_PATHS = (
REPO_ROOT / "backend" / "app",
REPO_ROOT / "backend" / "packages" / "harness" / "deerflow",
@@ -889,7 +892,3 @@ def main(argv: Sequence[str] | None = None) -> int:
else:
print(format_text(findings))
return 0
if __name__ == "__main__":
sys.exit(main())
@@ -0,0 +1,31 @@
"""Fail-loud repository-root resolution shared by the detectors.
Depth-indexed resolution (`Path(__file__).resolve().parents[N]`) fails
silently when a detector file moves to a different directory depth: scan
roots resolve under the wrong directory, nothing is scanned, and the
detector reports zero findings with no error. Walking upward to a
repository marker turns that into an immediate error instead.
"""
from __future__ import annotations
from pathlib import Path
REPO_ROOT_MARKER = ".git"
def resolve_repo_root(start: Path) -> Path:
"""Return the repository root above `start` (the directory containing `.git`).
`.git` is checked with `exists()` rather than `is_dir()` so git worktrees
(where `.git` is a file) resolve correctly.
Raises:
RuntimeError: when no marker is found above `start`, so a relocated
detector fails loudly instead of silently scanning an empty tree.
"""
resolved = start.resolve()
for candidate in (resolved, *resolved.parents):
if (candidate / REPO_ROOT_MARKER).exists():
return candidate
raise RuntimeError(f"could not resolve the repository root: no '{REPO_ROOT_MARKER}' marker found above {resolved}; refusing to guess scan paths")
@@ -1,9 +1,11 @@
#!/usr/bin/env python3
"""Inventory async/thread boundary points for developer review.
This detector is intentionally non-invasive: it parses Python source with AST
and reports places where code crosses sync/async/thread boundaries. Findings
are review evidence, not automatic bug decisions.
Not directly executable: import as `support.detectors.thread_boundaries` or
run via the CLI shim `scripts/detect_thread_boundaries.py`.
"""
from __future__ import annotations
@@ -12,12 +14,13 @@ import argparse
import ast
import json
import os
import sys
from collections.abc import Iterable, Sequence
from dataclasses import asdict, dataclass
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parents[4]
from support.detectors.repo_root import resolve_repo_root
REPO_ROOT = resolve_repo_root(Path(__file__))
DEFAULT_SCAN_PATHS = (
REPO_ROOT / "backend" / "app",
REPO_ROOT / "backend" / "packages" / "harness" / "deerflow",
@@ -501,7 +504,3 @@ def main(argv: Sequence[str] | None = None) -> int:
else:
print(format_text(findings))
return 0
if __name__ == "__main__":
sys.exit(main())
+53
View File
@@ -0,0 +1,53 @@
"""Pin fail-loud repo-root resolution and the shared CLI shim for the detector tooling.
The failure mode being guarded: depth-indexed `parents[N]` resolution from a
relocated detector file silently resolves a wrong root, scans nothing, and
reports zero findings. Resolution must instead raise when no repository
marker is found.
"""
from __future__ import annotations
from pathlib import Path
import pytest
from support.detectors import blocking_io_changed, blocking_io_static, thread_boundaries
from support.detectors.repo_root import resolve_repo_root
REPO_ROOT = Path(__file__).resolve().parents[2]
def test_resolve_repo_root_finds_marker_from_detector_location():
resolved = resolve_repo_root(Path(blocking_io_static.__file__))
assert resolved == REPO_ROOT
assert (resolved / ".git").exists()
def test_all_detectors_share_the_resolved_root():
assert blocking_io_static.REPO_ROOT == REPO_ROOT
assert blocking_io_changed.REPO_ROOT == REPO_ROOT
assert thread_boundaries.REPO_ROOT == REPO_ROOT
def test_unmarked_location_raises_instead_of_scanning_nothing(tmp_path: Path):
start = tmp_path / "moved" / "detectors" / "blocking_io_static.py"
with pytest.raises(RuntimeError, match=r"\.git"):
resolve_repo_root(start)
def test_cli_shims_delegate_to_their_detectors(capsys: pytest.CaptureFixture[str]):
# conftest puts scripts/ on sys.path; --help proves the shim resolves and
# invokes the right detector main without running a scan.
import detect_blocking_io_static
import detect_thread_boundaries
import scan_changed_blocking_io
for shim, description_fragment in (
(detect_blocking_io_static, "Statically inventory blocking IO calls"),
(detect_thread_boundaries, "Detect async/thread boundary points"),
(scan_changed_blocking_io, "blocking-IO candidates this change introduces"),
):
with pytest.raises(SystemExit) as excinfo:
shim.main(["--help"])
assert excinfo.value.code == 0
assert description_fragment in capsys.readouterr().out
+27
View File
@@ -0,0 +1,27 @@
"""Shared bootstrap for the detector CLI shims in this directory.
The detectors live under `backend/tests/support/detectors/` so they can be
exercised by the test suite; the shims here only put that package on
`sys.path` and delegate. Keeping the path computation in one place means a
layout change breaks loudly in exactly one file instead of silently drifting
across copies.
"""
from __future__ import annotations
import importlib
import sys
from collections.abc import Sequence
from pathlib import Path
TEST_SUPPORT_PATH = Path(__file__).resolve().parents[1] / "backend" / "tests"
def run_detector(module_name: str, argv: Sequence[str] | None = None) -> int:
"""Import a `support.detectors.*` module and run its `main(argv)`."""
if not TEST_SUPPORT_PATH.is_dir():
raise RuntimeError(f"detector support path not found: {TEST_SUPPORT_PATH}; the scripts/ directory has moved relative to backend/tests")
if str(TEST_SUPPORT_PATH) not in sys.path:
sys.path.insert(0, str(TEST_SUPPORT_PATH))
module = importlib.import_module(module_name)
return module.main(argv)
+2 -8
View File
@@ -5,18 +5,12 @@ from __future__ import annotations
import sys
from collections.abc import Sequence
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parents[1]
TEST_SUPPORT_PATH = REPO_ROOT / "backend" / "tests"
if str(TEST_SUPPORT_PATH) not in sys.path:
sys.path.insert(0, str(TEST_SUPPORT_PATH))
from _detector_cli import run_detector
def main(argv: Sequence[str] | None = None) -> int:
from support.detectors.blocking_io_static import main as detector_main
return detector_main(argv)
return run_detector("support.detectors.blocking_io_static", argv)
if __name__ == "__main__":
+2 -8
View File
@@ -5,18 +5,12 @@ from __future__ import annotations
import sys
from collections.abc import Sequence
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parents[1]
TEST_SUPPORT_PATH = REPO_ROOT / "backend" / "tests"
if str(TEST_SUPPORT_PATH) not in sys.path:
sys.path.insert(0, str(TEST_SUPPORT_PATH))
from _detector_cli import run_detector
def main(argv: Sequence[str] | None = None) -> int:
from support.detectors.thread_boundaries import main as detector_main
return detector_main(argv)
return run_detector("support.detectors.thread_boundaries", argv)
if __name__ == "__main__":
+2 -8
View File
@@ -5,18 +5,12 @@ from __future__ import annotations
import sys
from collections.abc import Sequence
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parents[1]
TEST_SUPPORT_PATH = REPO_ROOT / "backend" / "tests"
if str(TEST_SUPPORT_PATH) not in sys.path:
sys.path.insert(0, str(TEST_SUPPORT_PATH))
from _detector_cli import run_detector
def main(argv: Sequence[str] | None = None) -> int:
from support.detectors.blocking_io_changed import main as scanner_main
return scanner_main(argv)
return run_detector("support.detectors.blocking_io_changed", argv)
if __name__ == "__main__":