mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-06-13 19:06:01 +00:00
chore(blocking-io): fail-loud repo-root resolution and shared detector CLI shim (#3512)
* chore(blocking-io): fail-loud repo-root resolution and shared detector CLI shim The three detectors resolved REPO_ROOT with depth-indexed Path(__file__).resolve().parents[4]. If a detector file ever moves to a different directory depth, scan roots resolve under the wrong directory and the detector reports zero findings with no error — a silent-zero failure shape for a detection tool. - Add support/detectors/repo_root.py: resolve the repo root by walking upward to the .git marker (checked with exists() so git worktrees, where .git is a file, also resolve), raising RuntimeError when no marker is found. All three detectors use it at import time, so a relocated detector fails loudly instead of scanning an empty tree. - Extract scripts/_detector_cli.py from the three character-identical CLI shims; the sys.path computation lives in one place and raises when backend/tests cannot be found. - tests/test_detector_repo_root.py pins: resolution from an unmarked location raises instead of returning an empty scan; all three detectors share the resolved root; each CLI shim delegates to its detector. Testing: backend `make test` (4278 passed); smoke-ran `make detect-blocking-io`, `make detect-thread-boundaries`, and `scripts/scan_changed_blocking_io.py --base upstream/main`. Closes #3510 (review follow-up to #3503). * chore(blocking-io): declare detector modules import-only, drop script-mode residue Adversarial review caught that blocking_io_static.py and thread_boundaries.py kept shebangs and __main__ blocks but can no longer run as plain scripts: the new `from support.detectors.repo_root import` executes before anything puts backend/tests on sys.path, so direct invocation dies with ModuleNotFoundError before argparse. Direct execution was never a documented entry point (Makefile targets, the scripts/ shims, the blocking-io-guard skill, and tests all go through the support.detectors package), so converge on import-only instead of re-adding per-module bootstrap: drop the shebangs and the now unreachable __main__ blocks (plus the `import sys` they kept alive) and state the supported entry points in each module docstring. The shim delegation tests in test_detector_repo_root.py pin the supported CLI paths. Testing: backend `make test` (4278 passed); `make detect-blocking-io` and `make detect-thread-boundaries` smoke-ran.
This commit is contained in:
@@ -7,6 +7,9 @@ new versus the merge base — the latter catches exposure created without
|
||||
touching the blocking line itself (a new async caller making an old sync
|
||||
helper async-reachable). Used by the `blocking-io-guard` skill as the
|
||||
deterministic scope step.
|
||||
|
||||
Not directly executable: import as `support.detectors.blocking_io_changed` or
|
||||
run via the CLI shim `scripts/scan_changed_blocking_io.py`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -15,14 +18,14 @@ import argparse
|
||||
import json
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from collections.abc import Sequence
|
||||
from pathlib import Path
|
||||
|
||||
from support.detectors import blocking_io_static as static
|
||||
from support.detectors.repo_root import resolve_repo_root
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[4]
|
||||
REPO_ROOT = resolve_repo_root(Path(__file__))
|
||||
SCAN_ROOTS = (
|
||||
"backend/app",
|
||||
"backend/packages/harness/deerflow",
|
||||
@@ -207,7 +210,3 @@ def main(argv: Sequence[str] | None = None) -> int:
|
||||
else:
|
||||
print(format_report(findings, args.base))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Static inventory for likely backend event-loop blocking IO.
|
||||
|
||||
This detector parses backend business source with AST so untested paths are
|
||||
still visible during review. Findings are prioritized static candidates, not
|
||||
automatic bug decisions.
|
||||
|
||||
Not directly executable: import as `support.detectors.blocking_io_static` or
|
||||
run via the CLI shim `scripts/detect_blocking_io_static.py`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -12,13 +14,14 @@ import argparse
|
||||
import ast
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from collections import Counter, defaultdict, deque
|
||||
from collections.abc import Callable, Iterable, Sequence
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[4]
|
||||
from support.detectors.repo_root import resolve_repo_root
|
||||
|
||||
REPO_ROOT = resolve_repo_root(Path(__file__))
|
||||
DEFAULT_SCAN_PATHS = (
|
||||
REPO_ROOT / "backend" / "app",
|
||||
REPO_ROOT / "backend" / "packages" / "harness" / "deerflow",
|
||||
@@ -889,7 +892,3 @@ def main(argv: Sequence[str] | None = None) -> int:
|
||||
else:
|
||||
print(format_text(findings))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
"""Fail-loud repository-root resolution shared by the detectors.
|
||||
|
||||
Depth-indexed resolution (`Path(__file__).resolve().parents[N]`) fails
|
||||
silently when a detector file moves to a different directory depth: scan
|
||||
roots resolve under the wrong directory, nothing is scanned, and the
|
||||
detector reports zero findings with no error. Walking upward to a
|
||||
repository marker turns that into an immediate error instead.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
REPO_ROOT_MARKER = ".git"
|
||||
|
||||
|
||||
def resolve_repo_root(start: Path) -> Path:
|
||||
"""Return the repository root above `start` (the directory containing `.git`).
|
||||
|
||||
`.git` is checked with `exists()` rather than `is_dir()` so git worktrees
|
||||
(where `.git` is a file) resolve correctly.
|
||||
|
||||
Raises:
|
||||
RuntimeError: when no marker is found above `start`, so a relocated
|
||||
detector fails loudly instead of silently scanning an empty tree.
|
||||
"""
|
||||
resolved = start.resolve()
|
||||
for candidate in (resolved, *resolved.parents):
|
||||
if (candidate / REPO_ROOT_MARKER).exists():
|
||||
return candidate
|
||||
raise RuntimeError(f"could not resolve the repository root: no '{REPO_ROOT_MARKER}' marker found above {resolved}; refusing to guess scan paths")
|
||||
@@ -1,9 +1,11 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Inventory async/thread boundary points for developer review.
|
||||
|
||||
This detector is intentionally non-invasive: it parses Python source with AST
|
||||
and reports places where code crosses sync/async/thread boundaries. Findings
|
||||
are review evidence, not automatic bug decisions.
|
||||
|
||||
Not directly executable: import as `support.detectors.thread_boundaries` or
|
||||
run via the CLI shim `scripts/detect_thread_boundaries.py`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -12,12 +14,13 @@ import argparse
|
||||
import ast
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from collections.abc import Iterable, Sequence
|
||||
from dataclasses import asdict, dataclass
|
||||
from pathlib import Path
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[4]
|
||||
from support.detectors.repo_root import resolve_repo_root
|
||||
|
||||
REPO_ROOT = resolve_repo_root(Path(__file__))
|
||||
DEFAULT_SCAN_PATHS = (
|
||||
REPO_ROOT / "backend" / "app",
|
||||
REPO_ROOT / "backend" / "packages" / "harness" / "deerflow",
|
||||
@@ -501,7 +504,3 @@ def main(argv: Sequence[str] | None = None) -> int:
|
||||
else:
|
||||
print(format_text(findings))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
||||
Reference in New Issue
Block a user