fix: resolve tool duplication and skill parser YAML inconsistencies (#1803) (#2107)

* Refactor tests for SKILL.md parser Updated tests for SKILL.md parser to handle quoted names and descriptions correctly. Added new tests for parsing plain and single-quoted names, and ensured multi-line descriptions are processed properly. * Implement tool name validation and deduplication Add tool name mismatch warning and deduplication logic * Refactor skill file parsing and error handling * Add tests for tool name deduplication Added tests for tool name deduplication in get_available_tools(). Ensured that duplicates are not returned, the first occurrence is kept, and warnings are logged for skipped duplicates. * Apply suggestions from code review Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> * Update minimal config to include tools list * Update test for nonexistent skill file Ensure the test for nonexistent files checks for None. * Refactor tool loading and add skill management support Refactor tool loading logic to include skill management tools based on configuration and clean up comments. * Enhance code comments for tool loading logic Added comments to clarify the purpose of various code sections related to tool loading and configuration. * Fix assertion for duplicate tool name warning * Fix indentation issues in tools.py * Fix the lint error of test_tool_deduplication * Fix the lint error of tools.py * Fix the lint error * Fix the lint error * make format --------- Co-authored-by: Willem Jiang <willem.jiang@gmail.com> Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
2026-05-23 08:25:57 +00:00 · 2026-04-20 08:25:03 -04:00
parent fc94e90f6c
commit 6dce26a52e
4 changed files with 282 additions and 178 deletions
@@ -2,21 +2,24 @@ import logging
 import re
 from pathlib import Path

+import yaml
+
 from .types import Skill

 logger = logging.getLogger(__name__)


 def parse_skill_file(skill_file: Path, category: str, relative_path: Path | None = None) -> Skill | None:
-    """
-    Parse a SKILL.md file and extract metadata.
+    """Parse a SKILL.md file and extract metadata.

    Args:
-        skill_file: Path to the SKILL.md file
-        category: Category of the skill ('public' or 'custom')
+        skill_file: Path to the SKILL.md file.
+        category: Category of the skill ('public' or 'custom').
+        relative_path: Relative path from the category root to the skill
+            directory.  Defaults to the skill directory name when omitted.

    Returns:
-        Skill object if parsing succeeds, None otherwise
+        Skill object if parsing succeeds, None otherwise.
    """
    if not skill_file.exists() or skill_file.name != "SKILL.md":
        return None
@@ -24,90 +27,42 @@ def parse_skill_file(skill_file: Path, category: str, relative_path: Path | None
    try:
        content = skill_file.read_text(encoding="utf-8")

-        # Extract YAML front matter
-        # Pattern: ---\nkey: value\n---
+        # Extract YAML front-matter block between leading ``---`` fences.
        front_matter_match = re.match(r"^---\s*\n(.*?)\n---\s*\n", content, re.DOTALL)
-
        if not front_matter_match:
            return None

-        front_matter = front_matter_match.group(1)
+        front_matter_text = front_matter_match.group(1)

-        # Parse YAML front matter with basic multiline string support
-        metadata = {}
-        lines = front_matter.split("\n")
-        current_key = None
-        current_value = []
-        is_multiline = False
-        multiline_style = None
-        indent_level = None
+        try:
+            metadata = yaml.safe_load(front_matter_text)
+        except yaml.YAMLError as exc:
+            logger.error("Invalid YAML front-matter in %s: %s", skill_file, exc)
+            return None

-        for line in lines:
-            if is_multiline:
-                if not line.strip():
-                    current_value.append("")
-                    continue
+        if not isinstance(metadata, dict):
+            logger.error("Front-matter in %s is not a YAML mapping", skill_file)
+            return None

-                current_indent = len(line) - len(line.lstrip())
-
-                if indent_level is None:
-                    if current_indent > 0:
-                        indent_level = current_indent
-                        current_value.append(line[indent_level:])
-                        continue
-                elif current_indent >= indent_level:
-                    current_value.append(line[indent_level:])
-                    continue
-
-            # If we reach here, it's either a new key or the end of multiline
-            if current_key and is_multiline:
-                if multiline_style == "|":
-                    metadata[current_key] = "\n".join(current_value).rstrip()
-                else:
-                    text = "\n".join(current_value).rstrip()
-                    # Replace single newlines with spaces for folded blocks
-                    metadata[current_key] = re.sub(r"(?<!\n)\n(?!\n)", " ", text)
-
-                current_key = None
-                current_value = []
-                is_multiline = False
-                multiline_style = None
-                indent_level = None
-
-            if not line.strip():
-                continue
-
-            if ":" in line:
-                # Handle nested dicts simply by ignoring indentation for now,
-                # or just extracting top-level keys
-                key, value = line.split(":", 1)
-                key = key.strip()
-                value = value.strip()
-
-                if value in (">", "|"):
-                    current_key = key
-                    is_multiline = True
-                    multiline_style = value
-                    current_value = []
-                    indent_level = None
-                else:
-                    metadata[key] = value
-
-        if current_key and is_multiline:
-            if multiline_style == "|":
-                metadata[current_key] = "\n".join(current_value).rstrip()
-            else:
-                text = "\n".join(current_value).rstrip()
-                metadata[current_key] = re.sub(r"(?<!\n)\n(?!\n)", " ", text)
-
-        # Extract required fields
+        # Extract required fields.  Both must be non-empty strings.
        name = metadata.get("name")
        description = metadata.get("description")

+        if not name or not isinstance(name, str):
+            return None
+        if not description or not isinstance(description, str):
+            return None
+
+        # Normalise: strip surrounding whitespace that YAML may preserve.
+        name = name.strip()
+        description = description.strip()
+
        if not name or not description:
            return None

        license_text = metadata.get("license")
+        if license_text is not None:
+            license_text = str(license_text).strip() or None

        return Skill(
            name=name,
@@ -117,9 +72,9 @@ def parse_skill_file(skill_file: Path, category: str, relative_path: Path | None
            skill_file=skill_file,
            relative_path=relative_path or Path(skill_file.parent.name),
            category=category,
-            enabled=True,  # Default to enabled, actual state comes from config file
+            enabled=True,  # Actual state comes from the extensions config file.
        )

-    except Exception as e:
-        logger.error("Error parsing skill file %s: %s", skill_file, e)
+    except Exception:
+        logger.exception("Unexpected error parsing skill file %s", skill_file)
        return None
@@ -59,7 +59,22 @@ def get_available_tools(
    if not is_host_bash_allowed(config):
        tool_configs = [tool for tool in tool_configs if not _is_host_bash_tool(tool)]

-    loaded_tools = [resolve_variable(tool.use, BaseTool) for tool in tool_configs]
+    loaded_tools_raw = [(cfg, resolve_variable(cfg.use, BaseTool)) for cfg in tool_configs]
+
+    # Warn when the config ``name`` field and the tool object's ``.name``
+    # attribute diverge — this mismatch is the root cause of issue #1803 where
+    # the LLM receives one name in its tool schema but the runtime router
+    # recognises a different name, producing "not a valid tool" errors.
+    for cfg, loaded in loaded_tools_raw:
+        if cfg.name != loaded.name:
+            logger.warning(
+                "Tool name mismatch: config name %r does not match tool .name %r (use: %s). The tool's own .name will be used for binding.",
+                cfg.name,
+                loaded.name,
+                cfg.use,
+            )
+
+    loaded_tools = [t for _, t in loaded_tools_raw]

    # Conditionally add tools based on config
    builtin_tools = BUILTIN_TOOLS.copy()
@@ -134,4 +149,20 @@ def get_available_tools(
        logger.warning(f"Failed to load ACP tool: {e}")

    logger.info(f"Total tools loaded: {len(loaded_tools)}, built-in tools: {len(builtin_tools)}, MCP tools: {len(mcp_tools)}, ACP tools: {len(acp_tools)}")
-    return loaded_tools + builtin_tools + mcp_tools + acp_tools
+
+    # Deduplicate by tool name — config-loaded tools take priority, followed by
+    # built-ins, MCP tools, and ACP tools.  Duplicate names cause the LLM to
+    # receive ambiguous or concatenated function schemas (issue #1803).
+    all_tools = loaded_tools + builtin_tools + mcp_tools + acp_tools
+    seen_names: set[str] = set()
+    unique_tools: list[BaseTool] = []
+    for t in all_tools:
+        if t.name not in seen_names:
+            unique_tools.append(t)
+            seen_names.add(t.name)
+        else:
+            logger.warning(
+                "Duplicate tool name %r detected and skipped — check your config.yaml and MCP server registrations (issue #1803).",
+                t.name,
+            )
+    return unique_tools