Files
deer-flow/backend/packages/harness/deerflow/skills/parser.py
T
Huixin615 89ae74d4f4 fix(skills): surface offending line and quoting hint on SKILL.md YAML… (#3335)
* fix(skills): surface offending line and quoting hint on SKILL.md YAML errors

When a SKILL.md front-matter fails to parse, the existing log only
echoes PyYAML's raw message, leaving authors to grep the file for the
offending line. This is especially painful for the very common
LLM-authored mistake of an unquoted scalar containing ': '
(e.g. 'description: foo: bar'), which fails with
'mapping values are not allowed here' and silently drops the skill.

Enrich the error log with:
  - the source line PyYAML pointed at via problem_mark
  - a targeted, copy-pasteable quoting hint when (and only when) the
    error is the well-known 'mapping values are not allowed' scanner
    error on an unquoted value

The skill is still rejected (no semantics are guessed or rewritten);
only the diagnostic is improved.

Fixes #3333

* improve(skills): address CR feedback on SKILL.md YAML error diagnostics

Per review on #3335:

- Log the file line number (mark.line + 2) instead of the
  front-matter-internal line number, so authors land on the right
  row in their editor.
- Use exc.problem == "mapping values are not allowed here" for a
  tighter match than substring-scanning str(exc).
- Preserve the offending key's leading whitespace in the quoting
  hint so nested mappings stay nested when authors paste the fix
  back.
- Rewrite the regression test to actually exercise the new
  behaviour: PyYAML's own message already echoes the offending
  line (and truncates it with "..."), so the old assertion
  passed on main. New assertions pin (a) the file-line number,
  (b) the full untruncated line, and (c) the copy-pasteable hint.
- Add a guard test for nested-key indentation so the
  partition()/strip() shape cannot regress silently.

Refs #3333, #3335

* fix(skills): escape backslashes in YAML quoting hint

The hint emitted by _format_yaml_error previously escaped only double
quotes, so values containing backslashes (e.g. Windows paths like
C:\Temp or regex escapes like \d) produced a suggested scalar that
was either invalid YAML or silently re-interpreted by PyYAML's
double-quoted escape rules when pasted back. Escape order matters:
backslashes first, then double quotes.

Adds two regression tests covering Windows-path and regex-style
backslashes.

Address Copilot CR feedback on PR #3335.
2026-06-03 21:53:52 +08:00

142 lines
5.3 KiB
Python

import logging
import re
from pathlib import Path
import yaml
from .types import SKILL_MD_FILE, Skill, SkillCategory
logger = logging.getLogger(__name__)
def _format_yaml_error(skill_file: Path, exc: yaml.YAMLError, source: str) -> str:
"""Render a developer-friendly explanation of a YAML front-matter error."""
lines = [f"Invalid YAML front-matter in {skill_file}: {exc}"]
mark = getattr(exc, "problem_mark", None)
source_lines = source.splitlines()
if mark is not None and 0 <= mark.line < len(source_lines):
offending = source_lines[mark.line]
# mark.line is 0-based within the front-matter body; +1 makes it
# 1-based, +1 more accounts for the leading `---` fence that the
# front-matter regex strips before yaml.safe_load sees it. The
# result matches the line number an author sees in their editor.
file_line_number = mark.line + 2
lines.append(f" line {file_line_number}: {offending}")
# Targeted hint for the most common authoring mistake: an unquoted
# scalar value whose body contains ``: ``. We only surface the hint
# when we are confident it applies, to avoid misleading authors who
# hit unrelated YAML errors.
if getattr(exc, "problem", "") == "mapping values are not allowed here" and ":" in offending:
key, _, value = offending.partition(":")
value = value.strip()
if value and value[0] not in {'"', "'", "|", ">", "[", "{"}:
escaped = value.replace("\\", "\\\\").replace('"', '\\"')
lines.append(f' hint: values containing ":" must be quoted, e.g. {key}: "{escaped}"')
return "\n".join(lines)
def parse_allowed_tools(raw: object, skill_file: Path) -> list[str] | None:
"""Parse the optional allowed-tools frontmatter field.
Returns None when the field is omitted. Returns a list when the field is a
YAML sequence of strings, including an empty list for explicit no-tool
skills. Raises ValueError for malformed values.
"""
if raw is None:
return None
if not isinstance(raw, list):
raise ValueError(f"allowed-tools in {skill_file} must be a list of strings")
allowed_tools: list[str] = []
for item in raw:
if not isinstance(item, str):
raise ValueError(f"allowed-tools in {skill_file} must contain only strings")
tool_name = item.strip()
if not tool_name:
raise ValueError(f"allowed-tools in {skill_file} cannot contain empty tool names")
allowed_tools.append(tool_name)
return allowed_tools
def parse_skill_file(skill_file: Path, category: SkillCategory, relative_path: Path | None = None) -> Skill | None:
"""Parse a SKILL.md file and extract metadata.
Args:
skill_file: Path to the SKILL.md file.
category: Category of the skill.
relative_path: Relative path from the category root to the skill
directory. Defaults to the skill directory name when omitted.
Returns:
Skill object if parsing succeeds, None otherwise.
"""
if not skill_file.exists() or skill_file.name != SKILL_MD_FILE:
return None
try:
content = skill_file.read_text(encoding="utf-8")
# Extract YAML front-matter block between leading ``---`` fences.
front_matter_match = re.match(r"^---\s*\n(.*?)\n---\s*\n", content, re.DOTALL)
if not front_matter_match:
return None
front_matter_text = front_matter_match.group(1)
try:
metadata = yaml.safe_load(front_matter_text)
except yaml.YAMLError as exc:
logger.error("%s", _format_yaml_error(skill_file, exc, front_matter_text))
return None
if not isinstance(metadata, dict):
logger.error("Front-matter in %s is not a YAML mapping", skill_file)
return None
# Extract required fields. Both must be non-empty strings.
name = metadata.get("name")
description = metadata.get("description")
if not name or not isinstance(name, str):
return None
if not description or not isinstance(description, str):
return None
# Normalise: strip surrounding whitespace that YAML may preserve.
name = name.strip()
description = description.strip()
if not name or not description:
return None
license_text = metadata.get("license")
if license_text is not None:
license_text = str(license_text).strip() or None
try:
allowed_tools = parse_allowed_tools(metadata.get("allowed-tools"), skill_file)
except ValueError as exc:
logger.error("Invalid allowed-tools in %s: %s", skill_file, exc)
return None
return Skill(
name=name,
description=description,
license=license_text,
skill_dir=skill_file.parent,
skill_file=skill_file,
relative_path=relative_path or Path(skill_file.parent.name),
category=category,
allowed_tools=allowed_tools,
enabled=True, # Actual state comes from the extensions config file.
)
except Exception:
logger.exception("Unexpected error parsing skill file %s", skill_file)
return None