fix: resolve tool duplication and skill parser YAML inconsistencies (#1803) (#2107)

* Refactor tests for SKILL.md parser

Updated tests for SKILL.md parser to handle quoted names and descriptions correctly. Added new tests for parsing plain and single-quoted names, and ensured multi-line descriptions are processed properly.

* Implement tool name validation and deduplication

Add tool name mismatch warning and deduplication logic

* Refactor skill file parsing and error handling

* Add tests for tool name deduplication

Added tests for tool name deduplication in get_available_tools(). Ensured that duplicates are not returned, the first occurrence is kept, and warnings are logged for skipped duplicates.

* Apply suggestions from code review

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>

* Update minimal config to include tools list

* Update test for nonexistent skill file

Ensure the test for nonexistent files checks for None.

* Refactor tool loading and add skill management support

Refactor tool loading logic to include skill management tools based on configuration and clean up comments.

* Enhance code comments for tool loading logic

Added comments to clarify the purpose of various code sections related to tool loading and configuration.

* Fix assertion for duplicate tool name warning

* Fix indentation issues in tools.py

* Fix the lint error of test_tool_deduplication

* Fix the lint error of tools.py

* Fix the lint error

* Fix the lint error

* make format

---------

Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Ansel
2026-04-20 08:25:03 -04:00
committed by GitHub
parent fc94e90f6c
commit 6dce26a52e
4 changed files with 282 additions and 178 deletions
@@ -2,21 +2,24 @@ import logging
import re
from pathlib import Path
import yaml
from .types import Skill
logger = logging.getLogger(__name__)
def parse_skill_file(skill_file: Path, category: str, relative_path: Path | None = None) -> Skill | None:
"""
Parse a SKILL.md file and extract metadata.
"""Parse a SKILL.md file and extract metadata.
Args:
skill_file: Path to the SKILL.md file
category: Category of the skill ('public' or 'custom')
skill_file: Path to the SKILL.md file.
category: Category of the skill ('public' or 'custom').
relative_path: Relative path from the category root to the skill
directory. Defaults to the skill directory name when omitted.
Returns:
Skill object if parsing succeeds, None otherwise
Skill object if parsing succeeds, None otherwise.
"""
if not skill_file.exists() or skill_file.name != "SKILL.md":
return None
@@ -24,90 +27,42 @@ def parse_skill_file(skill_file: Path, category: str, relative_path: Path | None
try:
content = skill_file.read_text(encoding="utf-8")
# Extract YAML front matter
# Pattern: ---\nkey: value\n---
# Extract YAML front-matter block between leading ``---`` fences.
front_matter_match = re.match(r"^---\s*\n(.*?)\n---\s*\n", content, re.DOTALL)
if not front_matter_match:
return None
front_matter = front_matter_match.group(1)
front_matter_text = front_matter_match.group(1)
# Parse YAML front matter with basic multiline string support
metadata = {}
lines = front_matter.split("\n")
current_key = None
current_value = []
is_multiline = False
multiline_style = None
indent_level = None
try:
metadata = yaml.safe_load(front_matter_text)
except yaml.YAMLError as exc:
logger.error("Invalid YAML front-matter in %s: %s", skill_file, exc)
return None
for line in lines:
if is_multiline:
if not line.strip():
current_value.append("")
continue
if not isinstance(metadata, dict):
logger.error("Front-matter in %s is not a YAML mapping", skill_file)
return None
current_indent = len(line) - len(line.lstrip())
if indent_level is None:
if current_indent > 0:
indent_level = current_indent
current_value.append(line[indent_level:])
continue
elif current_indent >= indent_level:
current_value.append(line[indent_level:])
continue
# If we reach here, it's either a new key or the end of multiline
if current_key and is_multiline:
if multiline_style == "|":
metadata[current_key] = "\n".join(current_value).rstrip()
else:
text = "\n".join(current_value).rstrip()
# Replace single newlines with spaces for folded blocks
metadata[current_key] = re.sub(r"(?<!\n)\n(?!\n)", " ", text)
current_key = None
current_value = []
is_multiline = False
multiline_style = None
indent_level = None
if not line.strip():
continue
if ":" in line:
# Handle nested dicts simply by ignoring indentation for now,
# or just extracting top-level keys
key, value = line.split(":", 1)
key = key.strip()
value = value.strip()
if value in (">", "|"):
current_key = key
is_multiline = True
multiline_style = value
current_value = []
indent_level = None
else:
metadata[key] = value
if current_key and is_multiline:
if multiline_style == "|":
metadata[current_key] = "\n".join(current_value).rstrip()
else:
text = "\n".join(current_value).rstrip()
metadata[current_key] = re.sub(r"(?<!\n)\n(?!\n)", " ", text)
# Extract required fields
# Extract required fields. Both must be non-empty strings.
name = metadata.get("name")
description = metadata.get("description")
if not name or not isinstance(name, str):
return None
if not description or not isinstance(description, str):
return None
# Normalise: strip surrounding whitespace that YAML may preserve.
name = name.strip()
description = description.strip()
if not name or not description:
return None
license_text = metadata.get("license")
if license_text is not None:
license_text = str(license_text).strip() or None
return Skill(
name=name,
@@ -117,9 +72,9 @@ def parse_skill_file(skill_file: Path, category: str, relative_path: Path | None
skill_file=skill_file,
relative_path=relative_path or Path(skill_file.parent.name),
category=category,
enabled=True, # Default to enabled, actual state comes from config file
enabled=True, # Actual state comes from the extensions config file.
)
except Exception as e:
logger.error("Error parsing skill file %s: %s", skill_file, e)
except Exception:
logger.exception("Unexpected error parsing skill file %s", skill_file)
return None
@@ -59,7 +59,22 @@ def get_available_tools(
if not is_host_bash_allowed(config):
tool_configs = [tool for tool in tool_configs if not _is_host_bash_tool(tool)]
loaded_tools = [resolve_variable(tool.use, BaseTool) for tool in tool_configs]
loaded_tools_raw = [(cfg, resolve_variable(cfg.use, BaseTool)) for cfg in tool_configs]
# Warn when the config ``name`` field and the tool object's ``.name``
# attribute diverge — this mismatch is the root cause of issue #1803 where
# the LLM receives one name in its tool schema but the runtime router
# recognises a different name, producing "not a valid tool" errors.
for cfg, loaded in loaded_tools_raw:
if cfg.name != loaded.name:
logger.warning(
"Tool name mismatch: config name %r does not match tool .name %r (use: %s). The tool's own .name will be used for binding.",
cfg.name,
loaded.name,
cfg.use,
)
loaded_tools = [t for _, t in loaded_tools_raw]
# Conditionally add tools based on config
builtin_tools = BUILTIN_TOOLS.copy()
@@ -134,4 +149,20 @@ def get_available_tools(
logger.warning(f"Failed to load ACP tool: {e}")
logger.info(f"Total tools loaded: {len(loaded_tools)}, built-in tools: {len(builtin_tools)}, MCP tools: {len(mcp_tools)}, ACP tools: {len(acp_tools)}")
return loaded_tools + builtin_tools + mcp_tools + acp_tools
# Deduplicate by tool name — config-loaded tools take priority, followed by
# built-ins, MCP tools, and ACP tools. Duplicate names cause the LLM to
# receive ambiguous or concatenated function schemas (issue #1803).
all_tools = loaded_tools + builtin_tools + mcp_tools + acp_tools
seen_names: set[str] = set()
unique_tools: list[BaseTool] = []
for t in all_tools:
if t.name not in seen_names:
unique_tools.append(t)
seen_names.add(t.name)
else:
logger.warning(
"Duplicate tool name %r detected and skipped — check your config.yaml and MCP server registrations (issue #1803).",
t.name,
)
return unique_tools