fix(suggestions): strip inline <think> reasoning before parsing follow-up questions (#3435)

Reasoning models such as MiniMax-M3 inline their chain-of-thought into the message content as <think>...</think> (reasoning_split defaults to false) instead of a separate reasoning_content field. The follow-up-suggestions endpoint extracted the JSON array via find('[') / rfind(']'), which silently broke whenever the reasoning text contained '[' or ']' — or when long thinking hit max_tokens and truncated before the array was emitted — returning empty suggestions. - Add _strip_think_blocks() and apply it before JSON extraction; it removes complete <think>...</think> blocks (case-insensitive) and drops an unclosed <think> left by max_tokens truncation. - Document the MiniMax thinking toggle in config.example.yaml (when_thinking_enabled: adaptive / when_thinking_disabled: disabled) so thinking_enabled=False actually disables reasoning on M3; note that M2.x models always think and rely on the defensive strip above. - Tests cover complete/unclosed think blocks, brackets-inside-think, think + code-fence, and an end-to-end suggestions case reproducing the empty-result bug. Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-10 09:25:57 +00:00 · 2026-06-08 15:48:00 +08:00
parent 88759015e4
commit 3b105d1e5f
4 changed files with 115 additions and 2 deletions
@@ -25,6 +25,60 @@ def test_parse_json_string_list_rejects_non_list():
    assert suggestions._parse_json_string_list(text) is None


+def test_strip_think_blocks_removes_complete_block():
+    text = "<think>\nreasoning here\n</think>\nanswer"
+    assert suggestions._strip_think_blocks(text) == "answer"
+
+
+def test_strip_think_blocks_is_case_insensitive():
+    text = "<Think>reasoning</THINK>\nanswer"
+    assert suggestions._strip_think_blocks(text) == "answer"
+
+
+def test_strip_think_blocks_drops_unclosed_block():
+    # Reasoning models truncated at max_tokens emit an unclosed <think>.
+    text = "<think>\nreasoning that never finished because tokens ran out"
+    assert suggestions._strip_think_blocks(text) == ""
+
+
+def test_strip_think_blocks_keeps_text_without_think():
+    text = '["a", "b"]'
+    assert suggestions._strip_think_blocks(text) == '["a", "b"]'
+
+
+def test_parse_json_string_list_ignores_brackets_inside_think_block():
+    # MiniMax-M3 inlines its chain-of-thought as <think>...</think> in content
+    # (reasoning_split=false). When that reasoning contains '[' / ']', the old
+    # find('[')/rfind(']') logic grabbed the wrong span and parsing failed.
+    text = '<think>\nMaybe a list like ["x", "y"] could work. Let me craft 3.\n</think>\n["Q1", "Q2", "Q3"]'
+    assert suggestions._parse_json_string_list(text) == ["Q1", "Q2", "Q3"]
+
+
+def test_parse_json_string_list_strips_think_then_code_fence():
+    text = '<think>reasoning</think>\n```json\n["Q1", "Q2"]\n```'
+    assert suggestions._parse_json_string_list(text) == ["Q1", "Q2"]
+
+
+def test_generate_suggestions_strips_inline_think_block(monkeypatch):
+    # End-to-end: model returns thinking inline followed by the JSON array.
+    req = suggestions.SuggestionsRequest(
+        messages=[
+            suggestions.SuggestionMessage(role="user", content="介绍深度学习"),
+            suggestions.SuggestionMessage(role="assistant", content="深度学习是机器学习的分支。"),
+        ],
+        n=3,
+        model_name=None,
+    )
+    content = '<think>\nThe user asked about deep learning. Options: maybe [1] frameworks, [2] math basics.\n</think>\n["深度学习和机器学习的区别？", "常用框架有哪些？", "需要什么数学基础？"]'
+    fake_model = MagicMock()
+    fake_model.ainvoke = AsyncMock(return_value=MagicMock(content=content))
+    monkeypatch.setattr(suggestions, "create_chat_model", lambda **kwargs: fake_model)
+
+    result = asyncio.run(suggestions.generate_suggestions.__wrapped__("t1", req, request=None, config=SimpleNamespace()))
+
+    assert result.suggestions == ["深度学习和机器学习的区别？", "常用框架有哪些？", "需要什么数学基础？"]
+
+
 def test_format_conversation_formats_roles():
    messages = [
        suggestions.SuggestionMessage(role="User", content="Hi"),