feat(models): add StepFun reasoning model adapter (#3461)

Add PatchedChatStepFun adapter for StepFun reasoning models (step-3.7-flash, step-3.5-flash). Captures reasoning from both streaming and non-streaming responses and replays it on historical assistant messages for multi-turn tool-call conversations. - New: PatchedChatStepFun adapter with streaming/non-streaming reasoning capture - Support both reasoning and reasoning_content field names - 17 unit tests covering all response paths - Updated: config.example.yaml with StepFun configuration example
2026-06-11 09:55:59 +00:00 · 2026-06-09 18:01:43 +08:00
parent 8db16bb3d8
commit 37337b77f9
4 changed files with 507 additions and 0 deletions
@@ -21,6 +21,7 @@ INFOQUEST_API_KEY=your-infoquest-api-key
 # DEEPSEEK_API_KEY=your-deepseek-api-key
 # NOVITA_API_KEY=your-novita-api-key  # OpenAI-compatible, see https://novita.ai
 # MINIMAX_API_KEY=your-minimax-api-key  # OpenAI-compatible, see https://platform.minimax.io
 # STEPFUN_API_KEY=your-stepfun-api-key  # OpenAI-compatible, see https://platform.stepfun.com
 # VLLM_API_KEY=your-vllm-api-key  # OpenAI-compatible
 # FEISHU_APP_ID=your-feishu-app-id
 # FEISHU_APP_SECRET=your-feishu-app-secret
@@ -0,0 +1,175 @@
 """Patched ChatOpenAI adapter for StepFun reasoning models.
 StepFun returns ``reasoning`` (or ``reasoning_content`` with deepseek-style) in
 both streaming deltas and non-streaming responses. Standard ``ChatOpenAI``
 ignores these non-standard fields, so reasoning content is silently dropped.
 This adapter captures reasoning from all response paths and replays it on
 historical assistant messages for multi-turn tool-call conversations.
 """
 from __future__ import annotations
 from collections.abc import Mapping
 from typing import Any
 from langchain_core.language_models import LanguageModelInput
 from langchain_core.messages import AIMessage, AIMessageChunk
 from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
 from langchain_openai import ChatOpenAI
 from deerflow.models.assistant_payload_replay import (
    restore_assistant_payloads,
    restore_reasoning_content,
 )
 _MISSING = object()
 def _extract_reasoning(value: Any) -> str | object:
    """Return reasoning content from a dict/Pydantic object.
    StepFun may return reasoning via ``reasoning`` (default) or
    ``reasoning_content`` (deepseek-style). Check both fields.
    """
    if isinstance(value, Mapping):
        # Check reasoning_content first (deepseek-style), then reasoning (default)
        for field in ("reasoning_content", "reasoning"):
            if field in value and value[field] is not None:
                return value[field]
        return _MISSING
    # Pydantic / SDK object attributes
    for field in ("reasoning_content", "reasoning"):
        attr = getattr(value, field, _MISSING)
        if attr is not _MISSING and attr is not None:
            return attr
    # Some SDK versions store extra fields in model_extra
    model_extra = getattr(value, "model_extra", None)
    if isinstance(model_extra, Mapping):
        for field in ("reasoning_content", "reasoning"):
            if field in model_extra and model_extra[field] is not None:
                return model_extra[field]
    return _MISSING
 def _with_reasoning_content(message: AIMessage | AIMessageChunk, reasoning: str) -> AIMessage | AIMessageChunk:
    """Return a copy of *message* with reasoning_content stored in additional_kwargs."""
    additional_kwargs = dict(message.additional_kwargs)
    if additional_kwargs.get("reasoning_content") != reasoning:
        additional_kwargs["reasoning_content"] = reasoning
    return message.model_copy(update={"additional_kwargs": additional_kwargs})
 def _get_typed_choice_message(response: Any, index: int) -> Any:
    """Extract the SDK-typed choice message at *index*, if available."""
    choices = getattr(response, "choices", None)
    if choices is None:
        return None
    try:
        return choices[index].message
    except (AttributeError, IndexError, TypeError):
        return None
 class PatchedChatStepFun(ChatOpenAI):
    """ChatOpenAI with full reasoning support for StepFun models.
    Captures ``reasoning`` / ``reasoning_content`` from both streaming and
    non-streaming responses and replays it on historical assistant messages in
    multi-turn tool-call conversations.
    """
    @classmethod
    def is_lc_serializable(cls) -> bool:
        return True
    @property
    def lc_secrets(self) -> dict[str, str]:
        return {"api_key": "STEPFUN_API_KEY", "openai_api_key": "STEPFUN_API_KEY"}
    # --- Request payload replay ---
    def _get_request_payload(
        self,
        input_: LanguageModelInput,
        *,
        stop: list[str] | None = None,
        **kwargs: Any,
    ) -> dict:
        """Restore ``reasoning_content`` on historical assistant messages."""
        original_messages = self._convert_input(input_).to_messages()
        payload = super()._get_request_payload(input_, stop=stop, **kwargs)
        restore_assistant_payloads(
            payload.get("messages", []),
            original_messages,
            restore_reasoning_content,
        )
        return payload
    # --- Streaming reasoning capture ---
    def _convert_chunk_to_generation_chunk(
        self,
        chunk: dict,
        default_chunk_class: type,
        base_generation_info: dict | None,
    ) -> ChatGenerationChunk | None:
        """Capture ``reasoning`` / ``reasoning_content`` from streaming deltas."""
        generation_chunk = super()._convert_chunk_to_generation_chunk(
            chunk,
            default_chunk_class,
            base_generation_info,
        )
        if generation_chunk is None:
            return None
        choices = chunk.get("choices", [])
        if choices:
            delta = choices[0].get("delta") or {}
            reasoning = _extract_reasoning(delta)
            if reasoning is not _MISSING and isinstance(generation_chunk.message, AIMessageChunk):
                generation_chunk = ChatGenerationChunk(
                    message=_with_reasoning_content(generation_chunk.message, reasoning),
                    generation_info=generation_chunk.generation_info,
                )
        return generation_chunk
    # --- Non-streaming reasoning capture ---
    def _create_chat_result(
        self,
        response: dict | Any,
        generation_info: dict | None = None,
    ) -> ChatResult:
        """Extract ``reasoning`` / ``reasoning_content`` from non-streaming responses."""
        result = super()._create_chat_result(response, generation_info)
        response_dict = response if isinstance(response, dict) else response.model_dump()
        choices = response_dict.get("choices", [])
        patched_generations: list[ChatGeneration] | None = None
        for index, generation in enumerate(result.generations):
            choice = choices[index] if index < len(choices) else {}
            choice_message = choice.get("message", {}) if isinstance(choice, Mapping) else {}
            reasoning = _extract_reasoning(choice_message)
            if reasoning is _MISSING and not isinstance(response, dict):
                reasoning = _extract_reasoning(_get_typed_choice_message(response, index))
            message = generation.message
            if reasoning is not _MISSING and isinstance(message, AIMessage):
                if patched_generations is None:
                    patched_generations = list(result.generations)
                patched_generations[index] = ChatGeneration(
                    message=_with_reasoning_content(message, reasoning),
                    generation_info=generation.generation_info,
                )
        return ChatResult(
            generations=patched_generations or result.generations,
            llm_output=result.llm_output,
        )
@@ -0,0 +1,305 @@
 """Tests for deerflow.models.patched_stepfun.PatchedChatStepFun."""
 from __future__ import annotations
 from unittest.mock import MagicMock, patch
 from langchain_core.messages import AIMessage, AIMessageChunk, HumanMessage
 def _make_model(**kwargs):
    from deerflow.models.patched_stepfun import PatchedChatStepFun
    return PatchedChatStepFun(
        model="step-3.7-flash",
        api_key="test-key",
        base_url="https://api.stepfun.com/v1",
        **kwargs,
    )
 # ---------------------------------------------------------------------------
 # Basic properties
 # ---------------------------------------------------------------------------
 def test_is_lc_serializable_returns_true():
    from deerflow.models.patched_stepfun import PatchedChatStepFun
    assert PatchedChatStepFun.is_lc_serializable() is True
 def test_lc_secrets_contains_stepfun_api_key_mapping():
    model = _make_model()
    assert model.lc_secrets["api_key"] == "STEPFUN_API_KEY"
    assert model.lc_secrets["openai_api_key"] == "STEPFUN_API_KEY"
 # ---------------------------------------------------------------------------
 # _extract_reasoning helper
 # ---------------------------------------------------------------------------
 def test_extract_reasoning_from_dict_with_reasoning():
    from deerflow.models.patched_stepfun import _extract_reasoning
    assert _extract_reasoning({"reasoning": "thinking..."}) == "thinking..."
 def test_extract_reasoning_from_dict_with_reasoning_content():
    from deerflow.models.patched_stepfun import _extract_reasoning
    assert _extract_reasoning({"reasoning_content": "thinking..."}) == "thinking..."
 def test_extract_reasoning_prefers_reasoning_content_over_reasoning():
    from deerflow.models.patched_stepfun import _extract_reasoning
    result = _extract_reasoning({"reasoning_content": "deepseek", "reasoning": "native"})
    assert result == "deepseek"
 def test_extract_reasoning_missing_returns_sentinel():
    from deerflow.models.patched_stepfun import _MISSING, _extract_reasoning
    assert _extract_reasoning({}) is _MISSING
    assert _extract_reasoning({"reasoning": None}) is _MISSING
 # ---------------------------------------------------------------------------
 # Request payload replay (_get_request_payload)
 # ---------------------------------------------------------------------------
 def test_reasoning_content_injected_into_assistant_tool_call_message():
    model = _make_model()
    human = HumanMessage(content="Check Beijing weather.")
    ai = AIMessage(
        content="",
        additional_kwargs={"reasoning_content": "I need to call the weather tool."},
    )
    payload_message = {
        "role": "assistant",
        "content": "",
        "tool_calls": [
            {
                "id": "call_weather",
                "type": "function",
                "function": {"name": "get_weather", "arguments": '{"location":"Beijing"}'},
            }
        ],
    }
    base_payload = {
        "messages": [
            {"role": "user", "content": "Check Beijing weather."},
            payload_message,
        ]
    }
    with patch.object(type(model).__bases__[0], "_get_request_payload", return_value=base_payload):
        with patch.object(model, "_convert_input") as mock_convert:
            mock_convert.return_value = MagicMock(to_messages=lambda: [human, ai])
            payload = model._get_request_payload([human, ai])
    assert payload["messages"][1]["reasoning_content"] == "I need to call the weather tool."
 def test_reasoning_content_is_noop_when_missing():
    model = _make_model()
    human = HumanMessage(content="hello")
    ai = AIMessage(content="hi", additional_kwargs={})
    base_payload = {
        "messages": [
            {"role": "user", "content": "hello"},
            {"role": "assistant", "content": "hi"},
        ]
    }
    with patch.object(type(model).__bases__[0], "_get_request_payload", return_value=base_payload):
        with patch.object(model, "_convert_input") as mock_convert:
            mock_convert.return_value = MagicMock(to_messages=lambda: [human, ai])
            payload = model._get_request_payload([human, ai])
    assert "reasoning_content" not in payload["messages"][1]
 # ---------------------------------------------------------------------------
 # Streaming reasoning capture (_convert_chunk_to_generation_chunk)
 # ---------------------------------------------------------------------------
 def test_convert_chunk_captures_reasoning_field():
    """StepFun default format: delta.reasoning."""
    model = _make_model()
    chunk = model._convert_chunk_to_generation_chunk(
        {"choices": [{"delta": {"role": "assistant", "reasoning": "I need "}}]},
        AIMessageChunk,
        {},
    )
    assert chunk is not None
    assert chunk.message.additional_kwargs["reasoning_content"] == "I need "
 def test_convert_chunk_captures_reasoning_content_field():
    """StepFun deepseek-style format: delta.reasoning_content."""
    model = _make_model()
    chunk = model._convert_chunk_to_generation_chunk(
        {"choices": [{"delta": {"role": "assistant", "reasoning_content": "I need "}}]},
        AIMessageChunk,
        {},
    )
    assert chunk is not None
    assert chunk.message.additional_kwargs["reasoning_content"] == "I need "
 def test_convert_chunk_streams_reasoning_then_content():
    """Full streaming flow: reasoning deltas followed by content."""
    model = _make_model()
    first = model._convert_chunk_to_generation_chunk(
        {"choices": [{"delta": {"role": "assistant", "reasoning": "I need "}}]},
        AIMessageChunk,
        {},
    )
    second = model._convert_chunk_to_generation_chunk(
        {"choices": [{"delta": {"reasoning": "a tool."}}]},
        AIMessageChunk,
        {},
    )
    answer = model._convert_chunk_to_generation_chunk(
        {"choices": [{"delta": {"content": "Done."}, "finish_reason": "stop"}], "model": "step-3.7-flash"},
        AIMessageChunk,
        {},
    )
    assert first is not None
    assert second is not None
    assert answer is not None
    combined = first.message + second.message + answer.message
    assert combined.additional_kwargs["reasoning_content"] == "I need a tool."
    assert combined.content == "Done."
 def test_convert_chunk_noop_when_no_reasoning():
    model = _make_model()
    chunk = model._convert_chunk_to_generation_chunk(
        {"choices": [{"delta": {"content": "Hello."}, "finish_reason": "stop"}], "model": "step-3.7-flash"},
        AIMessageChunk,
        {},
    )
    assert chunk is not None
    assert "reasoning_content" not in chunk.message.additional_kwargs
 # ---------------------------------------------------------------------------
 # Non-streaming reasoning capture (_create_chat_result)
 # ---------------------------------------------------------------------------
 def test_create_chat_result_extracts_reasoning_field():
    """StepFun default format: message.reasoning."""
    model = _make_model()
    response = {
        "choices": [
            {
                "message": {
                    "role": "assistant",
                    "content": "The weather is sunny.",
                    "reasoning": "The tool returned sunny weather.",
                },
                "finish_reason": "stop",
            }
        ],
        "model": "step-3.7-flash",
    }
    result = model._create_chat_result(response)
    message = result.generations[0].message
    assert message.content == "The weather is sunny."
    assert message.additional_kwargs["reasoning_content"] == "The tool returned sunny weather."
 def test_create_chat_result_extracts_reasoning_content_field():
    """StepFun deepseek-style format: message.reasoning_content."""
    model = _make_model()
    response = {
        "choices": [
            {
                "message": {
                    "role": "assistant",
                    "content": "The weather is sunny.",
                    "reasoning_content": "The tool returned sunny weather.",
                },
                "finish_reason": "stop",
            }
        ],
        "model": "step-3.7-flash",
    }
    result = model._create_chat_result(response)
    message = result.generations[0].message
    assert message.content == "The weather is sunny."
    assert message.additional_kwargs["reasoning_content"] == "The tool returned sunny weather."
 def test_create_chat_result_reads_reasoning_from_sdk_object():
    """When the response is a Pydantic model, reasoning is an attribute."""
    model = _make_model()
    class FakeMessage:
        reasoning = "Reasoning stored on the SDK message object."
        reasoning_content = None
        model_extra = None
    class FakeChoice:
        message = FakeMessage()
    class FakeResponse:
        choices = [FakeChoice()]
        def model_dump(self, **kwargs):
            return {
                "choices": [
                    {
                        "message": {
                            "role": "assistant",
                            "content": "Answer.",
                        },
                        "finish_reason": "stop",
                    }
                ],
                "model": "step-3.7-flash",
            }
    result = model._create_chat_result(FakeResponse())
    assert result.generations[0].message.additional_kwargs["reasoning_content"] == "Reasoning stored on the SDK message object."
 def test_create_chat_result_noop_when_no_reasoning():
    model = _make_model()
    response = {
        "choices": [
            {
                "message": {
                    "role": "assistant",
                    "content": "Hello!",
                },
                "finish_reason": "stop",
            }
        ],
        "model": "step-3.7-flash",
    }
    result = model._create_chat_result(response)
    assert "reasoning_content" not in result.generations[0].message.additional_kwargs
@@ -274,6 +274,32 @@ models:
  #       thinking:
  #         type: disabled
  # Example: StepFun (阶跃星辰) reasoning models
  # StepFun provides OpenAI-compatible API with reasoning models.
  # With reasoning_format: deepseek-style, the API returns reasoning_content
  # (same field as DeepSeek), which must be replayed on historical assistant
  # messages in multi-turn tool-call conversations.
  # Use PatchedChatStepFun instead of plain ChatOpenAI.
  # Docs: https://platform.stepfun.com/docs/api-reference/chat-completions
  # - name: step-3.7-flash
  #   display_name: Step 3.7 Flash
  #   use: deerflow.models.patched_stepfun:PatchedChatStepFun
  #   model: step-3.7-flash
  #   api_key: $STEPFUN_API_KEY
  #   base_url: https://api.stepfun.com/v1
  #   request_timeout: 600.0
  #   max_retries: 2
  #   max_tokens: 4096
  #   supports_thinking: true
  #   supports_reasoning_effort: true
  #   supports_vision: true
  #   when_thinking_enabled:
  #     extra_body:
  #       reasoning_format: deepseek-style
  #   when_thinking_disabled:
  #     extra_body:
  #       reasoning_format: deepseek-style
  # Example: MiniMax (OpenAI-compatible) - International Edition
  # MiniMax provides high-performance models with 512K context window and 128K max output
  # Docs: https://platform.minimax.io/docs/api-reference/text-openai-api