mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-23 08:25:57 +00:00
* Refactor tests for SKILL.md parser Updated tests for SKILL.md parser to handle quoted names and descriptions correctly. Added new tests for parsing plain and single-quoted names, and ensured multi-line descriptions are processed properly. * Implement tool name validation and deduplication Add tool name mismatch warning and deduplication logic * Refactor skill file parsing and error handling * Add tests for tool name deduplication Added tests for tool name deduplication in get_available_tools(). Ensured that duplicates are not returned, the first occurrence is kept, and warnings are logged for skipped duplicates. * Apply suggestions from code review Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> * Update minimal config to include tools list * Update test for nonexistent skill file Ensure the test for nonexistent files checks for None. * Refactor tool loading and add skill management support Refactor tool loading logic to include skill management tools based on configuration and clean up comments. * Enhance code comments for tool loading logic Added comments to clarify the purpose of various code sections related to tool loading and configuration. * Fix assertion for duplicate tool name warning * Fix indentation issues in tools.py * Fix the lint error of test_tool_deduplication * Fix the lint error of tools.py * Fix the lint error * Fix the lint error * make format --------- Co-authored-by: Willem Jiang <willem.jiang@gmail.com> Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -2,21 +2,24 @@ import logging
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
from .types import Skill
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def parse_skill_file(skill_file: Path, category: str, relative_path: Path | None = None) -> Skill | None:
|
||||
"""
|
||||
Parse a SKILL.md file and extract metadata.
|
||||
"""Parse a SKILL.md file and extract metadata.
|
||||
|
||||
Args:
|
||||
skill_file: Path to the SKILL.md file
|
||||
category: Category of the skill ('public' or 'custom')
|
||||
skill_file: Path to the SKILL.md file.
|
||||
category: Category of the skill ('public' or 'custom').
|
||||
relative_path: Relative path from the category root to the skill
|
||||
directory. Defaults to the skill directory name when omitted.
|
||||
|
||||
Returns:
|
||||
Skill object if parsing succeeds, None otherwise
|
||||
Skill object if parsing succeeds, None otherwise.
|
||||
"""
|
||||
if not skill_file.exists() or skill_file.name != "SKILL.md":
|
||||
return None
|
||||
@@ -24,90 +27,42 @@ def parse_skill_file(skill_file: Path, category: str, relative_path: Path | None
|
||||
try:
|
||||
content = skill_file.read_text(encoding="utf-8")
|
||||
|
||||
# Extract YAML front matter
|
||||
# Pattern: ---\nkey: value\n---
|
||||
# Extract YAML front-matter block between leading ``---`` fences.
|
||||
front_matter_match = re.match(r"^---\s*\n(.*?)\n---\s*\n", content, re.DOTALL)
|
||||
|
||||
if not front_matter_match:
|
||||
return None
|
||||
|
||||
front_matter = front_matter_match.group(1)
|
||||
front_matter_text = front_matter_match.group(1)
|
||||
|
||||
# Parse YAML front matter with basic multiline string support
|
||||
metadata = {}
|
||||
lines = front_matter.split("\n")
|
||||
current_key = None
|
||||
current_value = []
|
||||
is_multiline = False
|
||||
multiline_style = None
|
||||
indent_level = None
|
||||
try:
|
||||
metadata = yaml.safe_load(front_matter_text)
|
||||
except yaml.YAMLError as exc:
|
||||
logger.error("Invalid YAML front-matter in %s: %s", skill_file, exc)
|
||||
return None
|
||||
|
||||
for line in lines:
|
||||
if is_multiline:
|
||||
if not line.strip():
|
||||
current_value.append("")
|
||||
continue
|
||||
if not isinstance(metadata, dict):
|
||||
logger.error("Front-matter in %s is not a YAML mapping", skill_file)
|
||||
return None
|
||||
|
||||
current_indent = len(line) - len(line.lstrip())
|
||||
|
||||
if indent_level is None:
|
||||
if current_indent > 0:
|
||||
indent_level = current_indent
|
||||
current_value.append(line[indent_level:])
|
||||
continue
|
||||
elif current_indent >= indent_level:
|
||||
current_value.append(line[indent_level:])
|
||||
continue
|
||||
|
||||
# If we reach here, it's either a new key or the end of multiline
|
||||
if current_key and is_multiline:
|
||||
if multiline_style == "|":
|
||||
metadata[current_key] = "\n".join(current_value).rstrip()
|
||||
else:
|
||||
text = "\n".join(current_value).rstrip()
|
||||
# Replace single newlines with spaces for folded blocks
|
||||
metadata[current_key] = re.sub(r"(?<!\n)\n(?!\n)", " ", text)
|
||||
|
||||
current_key = None
|
||||
current_value = []
|
||||
is_multiline = False
|
||||
multiline_style = None
|
||||
indent_level = None
|
||||
|
||||
if not line.strip():
|
||||
continue
|
||||
|
||||
if ":" in line:
|
||||
# Handle nested dicts simply by ignoring indentation for now,
|
||||
# or just extracting top-level keys
|
||||
key, value = line.split(":", 1)
|
||||
key = key.strip()
|
||||
value = value.strip()
|
||||
|
||||
if value in (">", "|"):
|
||||
current_key = key
|
||||
is_multiline = True
|
||||
multiline_style = value
|
||||
current_value = []
|
||||
indent_level = None
|
||||
else:
|
||||
metadata[key] = value
|
||||
|
||||
if current_key and is_multiline:
|
||||
if multiline_style == "|":
|
||||
metadata[current_key] = "\n".join(current_value).rstrip()
|
||||
else:
|
||||
text = "\n".join(current_value).rstrip()
|
||||
metadata[current_key] = re.sub(r"(?<!\n)\n(?!\n)", " ", text)
|
||||
|
||||
# Extract required fields
|
||||
# Extract required fields. Both must be non-empty strings.
|
||||
name = metadata.get("name")
|
||||
description = metadata.get("description")
|
||||
|
||||
if not name or not isinstance(name, str):
|
||||
return None
|
||||
if not description or not isinstance(description, str):
|
||||
return None
|
||||
|
||||
# Normalise: strip surrounding whitespace that YAML may preserve.
|
||||
name = name.strip()
|
||||
description = description.strip()
|
||||
|
||||
if not name or not description:
|
||||
return None
|
||||
|
||||
license_text = metadata.get("license")
|
||||
if license_text is not None:
|
||||
license_text = str(license_text).strip() or None
|
||||
|
||||
return Skill(
|
||||
name=name,
|
||||
@@ -117,9 +72,9 @@ def parse_skill_file(skill_file: Path, category: str, relative_path: Path | None
|
||||
skill_file=skill_file,
|
||||
relative_path=relative_path or Path(skill_file.parent.name),
|
||||
category=category,
|
||||
enabled=True, # Default to enabled, actual state comes from config file
|
||||
enabled=True, # Actual state comes from the extensions config file.
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error parsing skill file %s: %s", skill_file, e)
|
||||
except Exception:
|
||||
logger.exception("Unexpected error parsing skill file %s", skill_file)
|
||||
return None
|
||||
|
||||
@@ -59,7 +59,22 @@ def get_available_tools(
|
||||
if not is_host_bash_allowed(config):
|
||||
tool_configs = [tool for tool in tool_configs if not _is_host_bash_tool(tool)]
|
||||
|
||||
loaded_tools = [resolve_variable(tool.use, BaseTool) for tool in tool_configs]
|
||||
loaded_tools_raw = [(cfg, resolve_variable(cfg.use, BaseTool)) for cfg in tool_configs]
|
||||
|
||||
# Warn when the config ``name`` field and the tool object's ``.name``
|
||||
# attribute diverge — this mismatch is the root cause of issue #1803 where
|
||||
# the LLM receives one name in its tool schema but the runtime router
|
||||
# recognises a different name, producing "not a valid tool" errors.
|
||||
for cfg, loaded in loaded_tools_raw:
|
||||
if cfg.name != loaded.name:
|
||||
logger.warning(
|
||||
"Tool name mismatch: config name %r does not match tool .name %r (use: %s). The tool's own .name will be used for binding.",
|
||||
cfg.name,
|
||||
loaded.name,
|
||||
cfg.use,
|
||||
)
|
||||
|
||||
loaded_tools = [t for _, t in loaded_tools_raw]
|
||||
|
||||
# Conditionally add tools based on config
|
||||
builtin_tools = BUILTIN_TOOLS.copy()
|
||||
@@ -134,4 +149,20 @@ def get_available_tools(
|
||||
logger.warning(f"Failed to load ACP tool: {e}")
|
||||
|
||||
logger.info(f"Total tools loaded: {len(loaded_tools)}, built-in tools: {len(builtin_tools)}, MCP tools: {len(mcp_tools)}, ACP tools: {len(acp_tools)}")
|
||||
return loaded_tools + builtin_tools + mcp_tools + acp_tools
|
||||
|
||||
# Deduplicate by tool name — config-loaded tools take priority, followed by
|
||||
# built-ins, MCP tools, and ACP tools. Duplicate names cause the LLM to
|
||||
# receive ambiguous or concatenated function schemas (issue #1803).
|
||||
all_tools = loaded_tools + builtin_tools + mcp_tools + acp_tools
|
||||
seen_names: set[str] = set()
|
||||
unique_tools: list[BaseTool] = []
|
||||
for t in all_tools:
|
||||
if t.name not in seen_names:
|
||||
unique_tools.append(t)
|
||||
seen_names.add(t.name)
|
||||
else:
|
||||
logger.warning(
|
||||
"Duplicate tool name %r detected and skipped — check your config.yaml and MCP server registrations (issue #1803).",
|
||||
t.name,
|
||||
)
|
||||
return unique_tools
|
||||
|
||||
Reference in New Issue
Block a user