mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-06-10 17:35:57 +00:00
3b105d1e5f
Reasoning models such as MiniMax-M3 inline their chain-of-thought into the
message content as <think>...</think> (reasoning_split defaults to false)
instead of a separate reasoning_content field. The follow-up-suggestions
endpoint extracted the JSON array via find('[') / rfind(']'), which silently
broke whenever the reasoning text contained '[' or ']' — or when long thinking
hit max_tokens and truncated before the array was emitted — returning empty
suggestions.
- Add _strip_think_blocks() and apply it before JSON extraction; it removes
complete <think>...</think> blocks (case-insensitive) and drops an unclosed
<think> left by max_tokens truncation.
- Document the MiniMax thinking toggle in config.example.yaml
(when_thinking_enabled: adaptive / when_thinking_disabled: disabled) so
thinking_enabled=False actually disables reasoning on M3; note that M2.x
models always think and rely on the defensive strip above.
- Tests cover complete/unclosed think blocks, brackets-inside-think, think +
code-fence, and an end-to-end suggestions case reproducing the empty-result
bug.
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
169 lines
6.4 KiB
Python
169 lines
6.4 KiB
Python
import json
|
|
import logging
|
|
import re
|
|
|
|
from fastapi import APIRouter, Depends, Request
|
|
from langchain_core.messages import HumanMessage, SystemMessage
|
|
from pydantic import BaseModel, Field
|
|
|
|
from app.gateway.authz import require_permission
|
|
from app.gateway.deps import get_config
|
|
from deerflow.config.app_config import AppConfig
|
|
from deerflow.models import create_chat_model
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
router = APIRouter(prefix="/api", tags=["suggestions"])
|
|
|
|
|
|
class SuggestionMessage(BaseModel):
|
|
role: str = Field(..., description="Message role: user|assistant")
|
|
content: str = Field(..., description="Message content as plain text")
|
|
|
|
|
|
class SuggestionsRequest(BaseModel):
|
|
messages: list[SuggestionMessage] = Field(..., description="Recent conversation messages")
|
|
n: int = Field(default=3, ge=1, le=5, description="Number of suggestions to generate")
|
|
model_name: str | None = Field(default=None, description="Optional model override")
|
|
|
|
|
|
class SuggestionsResponse(BaseModel):
|
|
suggestions: list[str] = Field(default_factory=list, description="Suggested follow-up questions")
|
|
|
|
|
|
# Matches a complete <think>...</think> block (case-insensitive, spans newlines).
|
|
_THINK_BLOCK_RE = re.compile(r"<think\b[^>]*>.*?</think\s*>", re.IGNORECASE | re.DOTALL)
|
|
# Matches a dangling, unclosed <think> (model truncated at max_tokens mid-thought).
|
|
_OPEN_THINK_RE = re.compile(r"<think\b[^>]*>", re.IGNORECASE)
|
|
|
|
|
|
def _strip_think_blocks(text: str) -> str:
|
|
"""Remove reasoning-model ``<think>...</think>`` blocks from the response.
|
|
|
|
Reasoning models such as MiniMax-M3 inline their chain-of-thought into the
|
|
message ``content`` wrapped in ``<think>...</think>`` (``reasoning_split``
|
|
defaults to false), rather than exposing a separate ``reasoning_content``
|
|
field. The thinking text frequently contains ``[`` / ``]`` characters, which
|
|
corrupted the downstream ``find('[')`` / ``rfind(']')`` JSON extraction and
|
|
produced empty suggestions. We strip the reasoning before parsing so only
|
|
the actual answer remains.
|
|
"""
|
|
text = _THINK_BLOCK_RE.sub("", text)
|
|
# Drop any unclosed <think> (and everything after it) left by truncation.
|
|
open_match = _OPEN_THINK_RE.search(text)
|
|
if open_match:
|
|
text = text[: open_match.start()]
|
|
return text.strip()
|
|
|
|
|
|
def _strip_markdown_code_fence(text: str) -> str:
|
|
stripped = text.strip()
|
|
if not stripped.startswith("```"):
|
|
return stripped
|
|
lines = stripped.splitlines()
|
|
if len(lines) >= 3 and lines[0].startswith("```") and lines[-1].startswith("```"):
|
|
return "\n".join(lines[1:-1]).strip()
|
|
return stripped
|
|
|
|
|
|
def _parse_json_string_list(text: str) -> list[str] | None:
|
|
candidate = _strip_think_blocks(text)
|
|
candidate = _strip_markdown_code_fence(candidate)
|
|
start = candidate.find("[")
|
|
end = candidate.rfind("]")
|
|
if start == -1 or end == -1 or end <= start:
|
|
return None
|
|
candidate = candidate[start : end + 1]
|
|
try:
|
|
data = json.loads(candidate)
|
|
except Exception:
|
|
return None
|
|
if not isinstance(data, list):
|
|
return None
|
|
out: list[str] = []
|
|
for item in data:
|
|
if not isinstance(item, str):
|
|
continue
|
|
s = item.strip()
|
|
if not s:
|
|
continue
|
|
out.append(s)
|
|
return out
|
|
|
|
|
|
def _extract_response_text(content: object) -> str:
|
|
if isinstance(content, str):
|
|
return content
|
|
if isinstance(content, list):
|
|
parts: list[str] = []
|
|
for block in content:
|
|
if isinstance(block, str):
|
|
parts.append(block)
|
|
elif isinstance(block, dict) and block.get("type") in {"text", "output_text"}:
|
|
text = block.get("text")
|
|
if isinstance(text, str):
|
|
parts.append(text)
|
|
return "\n".join(parts) if parts else ""
|
|
if content is None:
|
|
return ""
|
|
return str(content)
|
|
|
|
|
|
def _format_conversation(messages: list[SuggestionMessage]) -> str:
|
|
parts: list[str] = []
|
|
for m in messages:
|
|
role = m.role.strip().lower()
|
|
if role in ("user", "human"):
|
|
parts.append(f"User: {m.content.strip()}")
|
|
elif role in ("assistant", "ai"):
|
|
parts.append(f"Assistant: {m.content.strip()}")
|
|
else:
|
|
parts.append(f"{m.role}: {m.content.strip()}")
|
|
return "\n".join(parts).strip()
|
|
|
|
|
|
@router.post(
|
|
"/threads/{thread_id}/suggestions",
|
|
response_model=SuggestionsResponse,
|
|
summary="Generate Follow-up Questions",
|
|
description="Generate short follow-up questions a user might ask next, based on recent conversation context.",
|
|
)
|
|
@require_permission("threads", "read", owner_check=True)
|
|
async def generate_suggestions(
|
|
thread_id: str,
|
|
body: SuggestionsRequest,
|
|
request: Request,
|
|
config: AppConfig = Depends(get_config),
|
|
) -> SuggestionsResponse:
|
|
if not body.messages:
|
|
return SuggestionsResponse(suggestions=[])
|
|
|
|
n = body.n
|
|
conversation = _format_conversation(body.messages)
|
|
if not conversation:
|
|
return SuggestionsResponse(suggestions=[])
|
|
|
|
system_instruction = (
|
|
"You are generating follow-up questions to help the user continue the conversation.\n"
|
|
f"Based on the conversation below, produce EXACTLY {n} short questions the user might ask next.\n"
|
|
"Requirements:\n"
|
|
"- Questions must be relevant to the preceding conversation.\n"
|
|
"- Questions must be written in the same language as the user.\n"
|
|
"- Keep each question concise (ideally <= 20 words / <= 40 Chinese characters).\n"
|
|
"- Do NOT include numbering, markdown, or any extra text.\n"
|
|
"- Output MUST be a JSON array of strings only.\n"
|
|
)
|
|
user_content = f"Conversation Context:\n{conversation}\n\nGenerate {n} follow-up questions"
|
|
|
|
try:
|
|
model = create_chat_model(name=body.model_name, thinking_enabled=False, app_config=config)
|
|
response = await model.ainvoke([SystemMessage(content=system_instruction), HumanMessage(content=user_content)], config={"run_name": "suggest_agent"})
|
|
raw = _extract_response_text(response.content)
|
|
suggestions = _parse_json_string_list(raw) or []
|
|
cleaned = [s.replace("\n", " ").strip() for s in suggestions if s.strip()]
|
|
cleaned = cleaned[:n]
|
|
return SuggestionsResponse(suggestions=cleaned)
|
|
except Exception as exc:
|
|
logger.exception("Failed to generate suggestions: thread_id=%s err=%s", thread_id, exc)
|
|
return SuggestionsResponse(suggestions=[])
|