fix(middleware): fix LLM fallback run status (#3321)

* Fix LLM fallback run status * optimize LLM fallback maker extraction in streaming path
2026-06-10 17:35:57 +00:00 · 2026-05-31 16:42:13 +02:00
parent 9f3be2a9fa
commit 79cc227917
5 changed files with 362 additions and 5 deletions
@@ -94,6 +94,31 @@ def test_async_model_call_returns_user_message_for_quota_errors() -> None:

    assert isinstance(result, AIMessage)
    assert "out of quota" in str(result.content)
+    assert result.additional_kwargs["deerflow_error_fallback"] is True
+    assert result.additional_kwargs["error_reason"] == "quota"
+    assert result.additional_kwargs["error_type"] == "FakeError"
+
+
+def test_async_model_call_marks_transient_retry_exhaustion_as_error_fallback(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    middleware = _build_middleware(retry_max_attempts=2, retry_base_delay_ms=25, retry_cap_delay_ms=25)
+
+    async def fake_sleep(_delay: float) -> None:
+        return None
+
+    async def handler(_request) -> AIMessage:
+        raise FakeError("Connection error.", status_code=503)
+
+    monkeypatch.setattr("asyncio.sleep", fake_sleep)
+
+    result = asyncio.run(middleware.awrap_model_call(SimpleNamespace(), handler))
+
+    assert isinstance(result, AIMessage)
+    assert "temporarily unavailable" in str(result.content)
+    assert result.additional_kwargs["deerflow_error_fallback"] is True
+    assert result.additional_kwargs["error_reason"] == "transient"
+    assert result.additional_kwargs["error_detail"] == "Connection error."


 def test_sync_model_call_uses_retry_after_header(monkeypatch: pytest.MonkeyPatch) -> None: