mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-06-10 17:35:57 +00:00
89ae74d4f4
* fix(skills): surface offending line and quoting hint on SKILL.md YAML errors
When a SKILL.md front-matter fails to parse, the existing log only
echoes PyYAML's raw message, leaving authors to grep the file for the
offending line. This is especially painful for the very common
LLM-authored mistake of an unquoted scalar containing ': '
(e.g. 'description: foo: bar'), which fails with
'mapping values are not allowed here' and silently drops the skill.
Enrich the error log with:
- the source line PyYAML pointed at via problem_mark
- a targeted, copy-pasteable quoting hint when (and only when) the
error is the well-known 'mapping values are not allowed' scanner
error on an unquoted value
The skill is still rejected (no semantics are guessed or rewritten);
only the diagnostic is improved.
Fixes #3333
* improve(skills): address CR feedback on SKILL.md YAML error diagnostics
Per review on #3335:
- Log the file line number (mark.line + 2) instead of the
front-matter-internal line number, so authors land on the right
row in their editor.
- Use exc.problem == "mapping values are not allowed here" for a
tighter match than substring-scanning str(exc).
- Preserve the offending key's leading whitespace in the quoting
hint so nested mappings stay nested when authors paste the fix
back.
- Rewrite the regression test to actually exercise the new
behaviour: PyYAML's own message already echoes the offending
line (and truncates it with "..."), so the old assertion
passed on main. New assertions pin (a) the file-line number,
(b) the full untruncated line, and (c) the copy-pasteable hint.
- Add a guard test for nested-key indentation so the
partition()/strip() shape cannot regress silently.
Refs #3333, #3335
* fix(skills): escape backslashes in YAML quoting hint
The hint emitted by _format_yaml_error previously escaped only double
quotes, so values containing backslashes (e.g. Windows paths like
C:\Temp or regex escapes like \d) produced a suggested scalar that
was either invalid YAML or silently re-interpreted by PyYAML's
double-quoted escape rules when pasted back. Escape order matters:
backslashes first, then double quotes.
Adds two regression tests covering Windows-path and regex-style
backslashes.
Address Copilot CR feedback on PR #3335.
142 lines
5.3 KiB
Python
142 lines
5.3 KiB
Python
import logging
|
|
import re
|
|
from pathlib import Path
|
|
|
|
import yaml
|
|
|
|
from .types import SKILL_MD_FILE, Skill, SkillCategory
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _format_yaml_error(skill_file: Path, exc: yaml.YAMLError, source: str) -> str:
|
|
"""Render a developer-friendly explanation of a YAML front-matter error."""
|
|
|
|
lines = [f"Invalid YAML front-matter in {skill_file}: {exc}"]
|
|
|
|
mark = getattr(exc, "problem_mark", None)
|
|
source_lines = source.splitlines()
|
|
if mark is not None and 0 <= mark.line < len(source_lines):
|
|
offending = source_lines[mark.line]
|
|
|
|
# mark.line is 0-based within the front-matter body; +1 makes it
|
|
# 1-based, +1 more accounts for the leading `---` fence that the
|
|
# front-matter regex strips before yaml.safe_load sees it. The
|
|
# result matches the line number an author sees in their editor.
|
|
file_line_number = mark.line + 2
|
|
lines.append(f" line {file_line_number}: {offending}")
|
|
|
|
# Targeted hint for the most common authoring mistake: an unquoted
|
|
# scalar value whose body contains ``: ``. We only surface the hint
|
|
# when we are confident it applies, to avoid misleading authors who
|
|
# hit unrelated YAML errors.
|
|
if getattr(exc, "problem", "") == "mapping values are not allowed here" and ":" in offending:
|
|
key, _, value = offending.partition(":")
|
|
value = value.strip()
|
|
if value and value[0] not in {'"', "'", "|", ">", "[", "{"}:
|
|
escaped = value.replace("\\", "\\\\").replace('"', '\\"')
|
|
lines.append(f' hint: values containing ":" must be quoted, e.g. {key}: "{escaped}"')
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
def parse_allowed_tools(raw: object, skill_file: Path) -> list[str] | None:
|
|
"""Parse the optional allowed-tools frontmatter field.
|
|
|
|
Returns None when the field is omitted. Returns a list when the field is a
|
|
YAML sequence of strings, including an empty list for explicit no-tool
|
|
skills. Raises ValueError for malformed values.
|
|
"""
|
|
if raw is None:
|
|
return None
|
|
if not isinstance(raw, list):
|
|
raise ValueError(f"allowed-tools in {skill_file} must be a list of strings")
|
|
|
|
allowed_tools: list[str] = []
|
|
for item in raw:
|
|
if not isinstance(item, str):
|
|
raise ValueError(f"allowed-tools in {skill_file} must contain only strings")
|
|
tool_name = item.strip()
|
|
if not tool_name:
|
|
raise ValueError(f"allowed-tools in {skill_file} cannot contain empty tool names")
|
|
allowed_tools.append(tool_name)
|
|
return allowed_tools
|
|
|
|
|
|
def parse_skill_file(skill_file: Path, category: SkillCategory, relative_path: Path | None = None) -> Skill | None:
|
|
"""Parse a SKILL.md file and extract metadata.
|
|
|
|
Args:
|
|
skill_file: Path to the SKILL.md file.
|
|
category: Category of the skill.
|
|
relative_path: Relative path from the category root to the skill
|
|
directory. Defaults to the skill directory name when omitted.
|
|
|
|
Returns:
|
|
Skill object if parsing succeeds, None otherwise.
|
|
"""
|
|
if not skill_file.exists() or skill_file.name != SKILL_MD_FILE:
|
|
return None
|
|
|
|
try:
|
|
content = skill_file.read_text(encoding="utf-8")
|
|
|
|
# Extract YAML front-matter block between leading ``---`` fences.
|
|
front_matter_match = re.match(r"^---\s*\n(.*?)\n---\s*\n", content, re.DOTALL)
|
|
if not front_matter_match:
|
|
return None
|
|
|
|
front_matter_text = front_matter_match.group(1)
|
|
|
|
try:
|
|
metadata = yaml.safe_load(front_matter_text)
|
|
except yaml.YAMLError as exc:
|
|
logger.error("%s", _format_yaml_error(skill_file, exc, front_matter_text))
|
|
return None
|
|
|
|
if not isinstance(metadata, dict):
|
|
logger.error("Front-matter in %s is not a YAML mapping", skill_file)
|
|
return None
|
|
|
|
# Extract required fields. Both must be non-empty strings.
|
|
name = metadata.get("name")
|
|
description = metadata.get("description")
|
|
|
|
if not name or not isinstance(name, str):
|
|
return None
|
|
if not description or not isinstance(description, str):
|
|
return None
|
|
|
|
# Normalise: strip surrounding whitespace that YAML may preserve.
|
|
name = name.strip()
|
|
description = description.strip()
|
|
|
|
if not name or not description:
|
|
return None
|
|
|
|
license_text = metadata.get("license")
|
|
if license_text is not None:
|
|
license_text = str(license_text).strip() or None
|
|
|
|
try:
|
|
allowed_tools = parse_allowed_tools(metadata.get("allowed-tools"), skill_file)
|
|
except ValueError as exc:
|
|
logger.error("Invalid allowed-tools in %s: %s", skill_file, exc)
|
|
return None
|
|
|
|
return Skill(
|
|
name=name,
|
|
description=description,
|
|
license=license_text,
|
|
skill_dir=skill_file.parent,
|
|
skill_file=skill_file,
|
|
relative_path=relative_path or Path(skill_file.parent.name),
|
|
category=category,
|
|
allowed_tools=allowed_tools,
|
|
enabled=True, # Actual state comes from the extensions config file.
|
|
)
|
|
|
|
except Exception:
|
|
logger.exception("Unexpected error parsing skill file %s", skill_file)
|
|
return None
|