feat: static system prompt with DynamicContextMiddleware for prefix-cache optimization (#2801)
* feat(middleware): inject dynamic context via DynamicContextMiddleware
Move memory and current date out of the system prompt and into a
dedicated <system-reminder> HumanMessage injected once per session
(frozen-snapshot pattern) via a new DynamicContextMiddleware.
This keeps the system prompt byte-exact across all users and sessions,
enabling maximum Anthropic/Bedrock prefix-cache reuse.
Key design decisions:
- ID-swap technique: reminder takes the first HumanMessage's ID
(replacing it in-place via add_messages), original content gets a
derived `{id}__user` ID (appended after). Preserves correct ordering.
- hide_from_ui: True on reminder messages so frontend filters them out.
- Midnight crossing: date-update reminder injected before the current
turn's HumanMessage when the conversation spans midnight.
- INFO-level logging for production diagnostics.
Also adds prompt-caching breakpoint budget enforcement tests and
updates ClaudeChatModel docs to reference the new pattern.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
* feat(token-usage): log input/output token detail breakdown in middleware
Extend the LLM token usage log line to include input_token_details and
output_token_details (cache_creation, cache_read, reasoning, audio, etc.)
when present. Adds tests covering Anthropic cache detail logging from
both usage_metadata and response_metadata.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
* fix: fix nginx
* fix(middleware): always inject date; gate memory on injection_enabled
Date injection is now unconditional — it is part of the static system
prompt replacement and should always be present. Memory injection
remains gated by `memory.injection_enabled` in the app config.
Previously the entire DynamicContextMiddleware was skipped when
injection_enabled was False, which also suppressed the date.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
* fix(lint): format files and correct test assertions for token usage middleware
- ruff format dynamic_context_middleware.py and test_claude_provider_prompt_caching.py
- Remove unused pytest import from test_dynamic_context_middleware.py
- Fix two tests that asserted response_metadata fallback logic that
doesn't exist: replace with tests that match actual middleware behavior
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
* fix(middleware): address Copilot review comments on DynamicContextMiddleware
- Use additional_kwargs flag for reminder detection instead of content
substring matching, so user messages containing '<system-reminder>'
are not mistakenly treated as injected reminders
- Generate stable UUID when original HumanMessage.id is None to prevent
ambiguous 'None__user' derived IDs and message collisions
- Downgrade per-turn no-op log to DEBUG; keep actual injection events at INFO
- Add two new tests: missing-id UUID fallback and user-text false-positive
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---------
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -258,6 +258,12 @@ def _build_middlewares(
|
|||||||
resolved_app_config = app_config or get_app_config()
|
resolved_app_config = app_config or get_app_config()
|
||||||
middlewares = build_lead_runtime_middlewares(app_config=resolved_app_config, lazy_init=True)
|
middlewares = build_lead_runtime_middlewares(app_config=resolved_app_config, lazy_init=True)
|
||||||
|
|
||||||
|
# Always inject current date (and optionally memory) as <system-reminder> into the
|
||||||
|
# first HumanMessage to keep the system prompt fully static for prefix-cache reuse.
|
||||||
|
from deerflow.agents.middlewares.dynamic_context_middleware import DynamicContextMiddleware
|
||||||
|
|
||||||
|
middlewares.append(DynamicContextMiddleware(agent_name=agent_name, app_config=resolved_app_config))
|
||||||
|
|
||||||
# Add summarization middleware if enabled
|
# Add summarization middleware if enabled
|
||||||
summarization_middleware = _create_summarization_middleware(app_config=resolved_app_config)
|
summarization_middleware = _create_summarization_middleware(app_config=resolved_app_config)
|
||||||
if summarization_middleware is not None:
|
if summarization_middleware is not None:
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ from __future__ import annotations
|
|||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
import threading
|
import threading
|
||||||
from datetime import datetime
|
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
@@ -368,8 +367,6 @@ You are {agent_name}, an open-source super agent.
|
|||||||
|
|
||||||
{soul}
|
{soul}
|
||||||
{self_update_section}
|
{self_update_section}
|
||||||
{memory_context}
|
|
||||||
|
|
||||||
<thinking_style>
|
<thinking_style>
|
||||||
- Think concisely and strategically about the user's request BEFORE taking action
|
- Think concisely and strategically about the user's request BEFORE taking action
|
||||||
- Break down the task: What is clear? What is ambiguous? What is missing?
|
- Break down the task: What is clear? What is ambiguous? What is missing?
|
||||||
@@ -776,9 +773,6 @@ def apply_prompt_template(
|
|||||||
available_skills: set[str] | None = None,
|
available_skills: set[str] | None = None,
|
||||||
app_config: AppConfig | None = None,
|
app_config: AppConfig | None = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
# Get memory context
|
|
||||||
memory_context = _get_memory_context(agent_name, app_config=app_config)
|
|
||||||
|
|
||||||
# Include subagent section only if enabled (from runtime parameter)
|
# Include subagent section only if enabled (from runtime parameter)
|
||||||
n = max_concurrent_subagents
|
n = max_concurrent_subagents
|
||||||
subagent_section = _build_subagent_section(n, app_config=app_config) if subagent_enabled else ""
|
subagent_section = _build_subagent_section(n, app_config=app_config) if subagent_enabled else ""
|
||||||
@@ -812,18 +806,18 @@ def apply_prompt_template(
|
|||||||
custom_mounts_section = _build_custom_mounts_section(app_config=app_config)
|
custom_mounts_section = _build_custom_mounts_section(app_config=app_config)
|
||||||
acp_and_mounts_section = "\n".join(section for section in (acp_section, custom_mounts_section) if section)
|
acp_and_mounts_section = "\n".join(section for section in (acp_section, custom_mounts_section) if section)
|
||||||
|
|
||||||
# Format the prompt with dynamic skills and memory
|
# Build and return the fully static system prompt.
|
||||||
prompt = SYSTEM_PROMPT_TEMPLATE.format(
|
# Memory and current date are injected per-turn via DynamicContextMiddleware
|
||||||
|
# as a <system-reminder> in the first HumanMessage, keeping this prompt
|
||||||
|
# identical across users and sessions for maximum prefix-cache reuse.
|
||||||
|
return SYSTEM_PROMPT_TEMPLATE.format(
|
||||||
agent_name=agent_name or "DeerFlow 2.0",
|
agent_name=agent_name or "DeerFlow 2.0",
|
||||||
soul=get_agent_soul(agent_name),
|
soul=get_agent_soul(agent_name),
|
||||||
self_update_section=_build_self_update_section(agent_name),
|
self_update_section=_build_self_update_section(agent_name),
|
||||||
skills_section=skills_section,
|
skills_section=skills_section,
|
||||||
deferred_tools_section=deferred_tools_section,
|
deferred_tools_section=deferred_tools_section,
|
||||||
memory_context=memory_context,
|
|
||||||
subagent_section=subagent_section,
|
subagent_section=subagent_section,
|
||||||
subagent_reminder=subagent_reminder,
|
subagent_reminder=subagent_reminder,
|
||||||
subagent_thinking=subagent_thinking,
|
subagent_thinking=subagent_thinking,
|
||||||
acp_section=acp_and_mounts_section,
|
acp_section=acp_and_mounts_section,
|
||||||
)
|
)
|
||||||
|
|
||||||
return prompt + f"\n<current_date>{datetime.now().strftime('%Y-%m-%d, %A')}</current_date>"
|
|
||||||
|
|||||||
@@ -0,0 +1,193 @@
|
|||||||
|
"""Middleware to inject dynamic context (memory, current date) as a system-reminder.
|
||||||
|
|
||||||
|
The system prompt is kept fully static for maximum prefix-cache reuse across users
|
||||||
|
and sessions. The current date is always injected. Per-user memory is also injected
|
||||||
|
when ``memory.injection_enabled`` is True in the app config. Both are delivered once
|
||||||
|
per conversation as a dedicated <system-reminder> HumanMessage inserted before the
|
||||||
|
first user message (frozen-snapshot pattern).
|
||||||
|
|
||||||
|
When a conversation spans midnight the middleware detects the date change and injects
|
||||||
|
a lightweight date-update reminder as a separate HumanMessage before the current turn.
|
||||||
|
This correction is persisted so subsequent turns on the new day see a consistent history
|
||||||
|
and do not re-inject.
|
||||||
|
|
||||||
|
Reminder format:
|
||||||
|
|
||||||
|
<system-reminder>
|
||||||
|
<memory>...</memory>
|
||||||
|
|
||||||
|
<current_date>2026-05-08, Friday</current_date>
|
||||||
|
</system-reminder>
|
||||||
|
|
||||||
|
Date-update format:
|
||||||
|
|
||||||
|
<system-reminder>
|
||||||
|
<current_date>2026-05-09, Saturday</current_date>
|
||||||
|
</system-reminder>
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import uuid
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import TYPE_CHECKING, override
|
||||||
|
|
||||||
|
from langchain.agents.middleware import AgentMiddleware
|
||||||
|
from langchain_core.messages import HumanMessage
|
||||||
|
from langgraph.runtime import Runtime
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from deerflow.config.app_config import AppConfig
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_DATE_RE = re.compile(r"<current_date>([^<]+)</current_date>")
|
||||||
|
_DYNAMIC_CONTEXT_REMINDER_KEY = "dynamic_context_reminder"
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_date(content: str) -> str | None:
|
||||||
|
"""Return the first <current_date> value found in *content*, or None."""
|
||||||
|
m = _DATE_RE.search(content)
|
||||||
|
return m.group(1) if m else None
|
||||||
|
|
||||||
|
|
||||||
|
def _last_injected_date(messages: list) -> str | None:
|
||||||
|
"""Scan messages in reverse and return the most recently injected date.
|
||||||
|
|
||||||
|
Detection uses the ``dynamic_context_reminder`` additional_kwargs flag rather
|
||||||
|
than content substring matching, so user messages containing ``<system-reminder>``
|
||||||
|
are not mistakenly treated as injected reminders.
|
||||||
|
"""
|
||||||
|
for msg in reversed(messages):
|
||||||
|
if isinstance(msg, HumanMessage) and msg.additional_kwargs.get(_DYNAMIC_CONTEXT_REMINDER_KEY):
|
||||||
|
content_str = msg.content if isinstance(msg.content, str) else str(msg.content)
|
||||||
|
return _extract_date(content_str)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class DynamicContextMiddleware(AgentMiddleware):
|
||||||
|
"""Inject memory and current date into HumanMessages as a <system-reminder>.
|
||||||
|
|
||||||
|
First turn
|
||||||
|
----------
|
||||||
|
Prepends a full system-reminder (memory + date) to the first HumanMessage and
|
||||||
|
persists it (same message ID). The first message is then frozen for the whole
|
||||||
|
session — its content never changes again, so the prefix cache can hit on every
|
||||||
|
subsequent turn.
|
||||||
|
|
||||||
|
Midnight crossing
|
||||||
|
-----------------
|
||||||
|
If the conversation spans midnight, the current date differs from the date that
|
||||||
|
was injected earlier. In that case a lightweight date-update reminder is prepended
|
||||||
|
to the **current** (last) HumanMessage and persisted. Subsequent turns on the new
|
||||||
|
day see the corrected date in history and skip re-injection.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, agent_name: str | None = None, *, app_config: AppConfig | None = None):
|
||||||
|
super().__init__()
|
||||||
|
self._agent_name = agent_name
|
||||||
|
self._app_config = app_config
|
||||||
|
|
||||||
|
def _build_full_reminder(self) -> str:
|
||||||
|
from deerflow.agents.lead_agent.prompt import _get_memory_context
|
||||||
|
|
||||||
|
# Memory injection is gated by injection_enabled; date is always included.
|
||||||
|
injection_enabled = self._app_config.memory.injection_enabled if self._app_config else True
|
||||||
|
memory_context = _get_memory_context(self._agent_name, app_config=self._app_config) if injection_enabled else ""
|
||||||
|
current_date = datetime.now().strftime("%Y-%m-%d, %A")
|
||||||
|
|
||||||
|
lines: list[str] = ["<system-reminder>"]
|
||||||
|
if memory_context:
|
||||||
|
lines.append(memory_context.strip())
|
||||||
|
lines.append("") # blank line separating memory from date
|
||||||
|
lines.append(f"<current_date>{current_date}</current_date>")
|
||||||
|
lines.append("</system-reminder>")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
def _build_date_update_reminder(self) -> str:
|
||||||
|
current_date = datetime.now().strftime("%Y-%m-%d, %A")
|
||||||
|
return "\n".join(
|
||||||
|
[
|
||||||
|
"<system-reminder>",
|
||||||
|
f"<current_date>{current_date}</current_date>",
|
||||||
|
"</system-reminder>",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _make_reminder_and_user_messages(original: HumanMessage, reminder_content: str) -> tuple[HumanMessage, HumanMessage]:
|
||||||
|
"""Return (reminder_msg, user_msg) using the ID-swap technique.
|
||||||
|
|
||||||
|
reminder_msg takes the original message's ID so that add_messages replaces it
|
||||||
|
in-place (preserving position). user_msg carries the original content with a
|
||||||
|
derived ``{id}__user`` ID and is appended immediately after by add_messages.
|
||||||
|
|
||||||
|
If the original message has no ID a stable UUID is generated so the derived
|
||||||
|
``{id}__user`` ID never collapses to the ambiguous ``None__user`` string.
|
||||||
|
"""
|
||||||
|
stable_id = original.id or str(uuid.uuid4())
|
||||||
|
reminder_msg = HumanMessage(
|
||||||
|
content=reminder_content,
|
||||||
|
id=stable_id,
|
||||||
|
additional_kwargs={"hide_from_ui": True, _DYNAMIC_CONTEXT_REMINDER_KEY: True},
|
||||||
|
)
|
||||||
|
user_msg = HumanMessage(
|
||||||
|
content=original.content,
|
||||||
|
id=f"{stable_id}__user",
|
||||||
|
name=original.name,
|
||||||
|
additional_kwargs=original.additional_kwargs,
|
||||||
|
)
|
||||||
|
return reminder_msg, user_msg
|
||||||
|
|
||||||
|
def _inject(self, state) -> dict | None:
|
||||||
|
messages = list(state.get("messages", []))
|
||||||
|
if not messages:
|
||||||
|
return None
|
||||||
|
|
||||||
|
current_date = datetime.now().strftime("%Y-%m-%d, %A")
|
||||||
|
last_date = _last_injected_date(messages)
|
||||||
|
logger.debug(
|
||||||
|
"DynamicContextMiddleware._inject: msg_count=%d last_date=%r current_date=%r",
|
||||||
|
len(messages),
|
||||||
|
last_date,
|
||||||
|
current_date,
|
||||||
|
)
|
||||||
|
|
||||||
|
if last_date is None:
|
||||||
|
# ── First turn: inject full reminder as a separate HumanMessage ─────
|
||||||
|
first_idx = next((i for i, m in enumerate(messages) if isinstance(m, HumanMessage)), None)
|
||||||
|
if first_idx is None:
|
||||||
|
return None
|
||||||
|
full_reminder = self._build_full_reminder()
|
||||||
|
logger.info(
|
||||||
|
"DynamicContextMiddleware: injecting full reminder (len=%d, has_memory=%s) into first HumanMessage id=%r",
|
||||||
|
len(full_reminder),
|
||||||
|
"<memory>" in full_reminder,
|
||||||
|
messages[first_idx].id,
|
||||||
|
)
|
||||||
|
reminder_msg, user_msg = self._make_reminder_and_user_messages(messages[first_idx], full_reminder)
|
||||||
|
return {"messages": [reminder_msg, user_msg]}
|
||||||
|
|
||||||
|
if last_date == current_date:
|
||||||
|
# ── Same day: nothing to do ──────────────────────────────────────────
|
||||||
|
return None
|
||||||
|
|
||||||
|
# ── Midnight crossed: inject date-update reminder as a separate HumanMessage ──
|
||||||
|
last_human_idx = next((i for i in reversed(range(len(messages))) if isinstance(messages[i], HumanMessage)), None)
|
||||||
|
if last_human_idx is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
reminder_msg, user_msg = self._make_reminder_and_user_messages(messages[last_human_idx], self._build_date_update_reminder())
|
||||||
|
logger.info("DynamicContextMiddleware: midnight crossing detected — injected date update before current turn")
|
||||||
|
return {"messages": [reminder_msg, user_msg]}
|
||||||
|
|
||||||
|
@override
|
||||||
|
def before_agent(self, state, runtime: Runtime) -> dict | None:
|
||||||
|
return self._inject(state)
|
||||||
|
|
||||||
|
@override
|
||||||
|
async def abefore_agent(self, state, runtime: Runtime) -> dict | None:
|
||||||
|
return self._inject(state)
|
||||||
@@ -267,11 +267,20 @@ class TokenUsageMiddleware(AgentMiddleware):
|
|||||||
|
|
||||||
usage = getattr(last, "usage_metadata", None)
|
usage = getattr(last, "usage_metadata", None)
|
||||||
if usage:
|
if usage:
|
||||||
|
input_token_details = usage.get("input_token_details") or {}
|
||||||
|
output_token_details = usage.get("output_token_details") or {}
|
||||||
|
detail_parts = []
|
||||||
|
if input_token_details:
|
||||||
|
detail_parts.append(f"input_token_details={input_token_details}")
|
||||||
|
if output_token_details:
|
||||||
|
detail_parts.append(f"output_token_details={output_token_details}")
|
||||||
|
detail_suffix = f" {' '.join(detail_parts)}" if detail_parts else ""
|
||||||
logger.info(
|
logger.info(
|
||||||
"LLM token usage: input=%s output=%s total=%s",
|
"LLM token usage: input=%s output=%s total=%s%s",
|
||||||
usage.get("input_tokens", "?"),
|
usage.get("input_tokens", "?"),
|
||||||
usage.get("output_tokens", "?"),
|
usage.get("output_tokens", "?"),
|
||||||
usage.get("total_tokens", "?"),
|
usage.get("total_tokens", "?"),
|
||||||
|
detail_suffix,
|
||||||
)
|
)
|
||||||
|
|
||||||
todos = state.get("todos") or []
|
todos = state.get("todos") or []
|
||||||
|
|||||||
@@ -196,6 +196,10 @@ class ClaudeChatModel(ChatAnthropic):
|
|||||||
enforced by both the Anthropic API and AWS Bedrock. Breakpoints are
|
enforced by both the Anthropic API and AWS Bedrock. Breakpoints are
|
||||||
placed on the *last* eligible blocks because later breakpoints cover a
|
placed on the *last* eligible blocks because later breakpoints cover a
|
||||||
larger prefix and yield better cache hit rates.
|
larger prefix and yield better cache hit rates.
|
||||||
|
|
||||||
|
The system prompt is expected to be fully static (no per-user memory or
|
||||||
|
current date). Dynamic context is injected per-turn via
|
||||||
|
DynamicContextMiddleware as a <system-reminder> in the first HumanMessage.
|
||||||
"""
|
"""
|
||||||
MAX_CACHE_BREAKPOINTS = 4
|
MAX_CACHE_BREAKPOINTS = 4
|
||||||
|
|
||||||
|
|||||||
@@ -110,6 +110,22 @@ def test_auth_post_allows_forwarded_same_origin():
|
|||||||
assert response.cookies.get("csrf_token")
|
assert response.cookies.get("csrf_token")
|
||||||
|
|
||||||
|
|
||||||
|
def test_auth_post_allows_forwarded_same_origin_with_non_default_port():
|
||||||
|
client = TestClient(_make_app(), base_url="http://internal:8000")
|
||||||
|
|
||||||
|
response = client.post(
|
||||||
|
"/api/v1/auth/login/local",
|
||||||
|
headers={
|
||||||
|
"Origin": "http://localhost:2026",
|
||||||
|
"X-Forwarded-Proto": "http",
|
||||||
|
"X-Forwarded-Host": "localhost:2026",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert response.cookies.get("csrf_token")
|
||||||
|
|
||||||
|
|
||||||
def test_auth_post_allows_rfc_forwarded_same_origin():
|
def test_auth_post_allows_rfc_forwarded_same_origin():
|
||||||
client = TestClient(_make_app(), base_url="http://internal:8000")
|
client = TestClient(_make_app(), base_url="http://internal:8000")
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,312 @@
|
|||||||
|
"""Tests for DynamicContextMiddleware.
|
||||||
|
|
||||||
|
Verifies that memory and current date are injected as a <system-reminder> into
|
||||||
|
the first HumanMessage exactly once per session (frozen-snapshot pattern).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from types import SimpleNamespace
|
||||||
|
from unittest import mock
|
||||||
|
|
||||||
|
from langchain_core.messages import AIMessage, HumanMessage
|
||||||
|
|
||||||
|
from deerflow.agents.middlewares.dynamic_context_middleware import (
|
||||||
|
_DYNAMIC_CONTEXT_REMINDER_KEY,
|
||||||
|
DynamicContextMiddleware,
|
||||||
|
)
|
||||||
|
|
||||||
|
_SYSTEM_REMINDER_TAG = "<system-reminder>"
|
||||||
|
|
||||||
|
|
||||||
|
def _make_middleware(**kwargs) -> DynamicContextMiddleware:
|
||||||
|
return DynamicContextMiddleware(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def _fake_runtime():
|
||||||
|
return SimpleNamespace(context={})
|
||||||
|
|
||||||
|
|
||||||
|
def _reminder_msg(content: str, msg_id: str) -> HumanMessage:
|
||||||
|
"""Build a reminder HumanMessage the way the middleware would produce it."""
|
||||||
|
return HumanMessage(
|
||||||
|
content=content,
|
||||||
|
id=msg_id,
|
||||||
|
additional_kwargs={"hide_from_ui": True, _DYNAMIC_CONTEXT_REMINDER_KEY: True},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Basic injection
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_injects_system_reminder_into_first_human_message():
|
||||||
|
mw = _make_middleware()
|
||||||
|
state = {"messages": [HumanMessage(content="Hello", id="msg-1")]}
|
||||||
|
|
||||||
|
with mock.patch("deerflow.agents.lead_agent.prompt._get_memory_context", return_value=""), mock.patch("deerflow.agents.middlewares.dynamic_context_middleware.datetime") as mock_dt:
|
||||||
|
mock_dt.now.return_value.strftime.return_value = "2026-05-08, Friday"
|
||||||
|
result = mw.before_agent(state, _fake_runtime())
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
updated_msgs = result["messages"]
|
||||||
|
assert len(updated_msgs) == 2
|
||||||
|
|
||||||
|
reminder_msg = updated_msgs[0]
|
||||||
|
assert isinstance(reminder_msg, HumanMessage)
|
||||||
|
assert reminder_msg.id == "msg-1" # takes the original ID (position swap)
|
||||||
|
assert reminder_msg.additional_kwargs.get(_DYNAMIC_CONTEXT_REMINDER_KEY) is True
|
||||||
|
assert _SYSTEM_REMINDER_TAG in reminder_msg.content
|
||||||
|
assert "<current_date>2026-05-08, Friday</current_date>" in reminder_msg.content
|
||||||
|
assert "Hello" not in reminder_msg.content # reminder only — no user text
|
||||||
|
|
||||||
|
user_msg = updated_msgs[1]
|
||||||
|
assert isinstance(user_msg, HumanMessage)
|
||||||
|
assert user_msg.id == "msg-1__user" # derived ID
|
||||||
|
assert user_msg.content == "Hello"
|
||||||
|
|
||||||
|
|
||||||
|
def test_memory_included_when_present():
|
||||||
|
mw = _make_middleware()
|
||||||
|
state = {"messages": [HumanMessage(content="Hi", id="msg-1")]}
|
||||||
|
|
||||||
|
with (
|
||||||
|
mock.patch(
|
||||||
|
"deerflow.agents.lead_agent.prompt._get_memory_context",
|
||||||
|
return_value="<memory>\nUser prefers Python.\n</memory>",
|
||||||
|
),
|
||||||
|
mock.patch("deerflow.agents.middlewares.dynamic_context_middleware.datetime") as mock_dt,
|
||||||
|
):
|
||||||
|
mock_dt.now.return_value.strftime.return_value = "2026-05-08, Friday"
|
||||||
|
result = mw.before_agent(state, _fake_runtime())
|
||||||
|
|
||||||
|
# Reminder is the first returned message; user query is the second
|
||||||
|
reminder_content = result["messages"][0].content
|
||||||
|
assert "User prefers Python." in reminder_content
|
||||||
|
assert "<current_date>2026-05-08, Friday</current_date>" in reminder_content
|
||||||
|
assert result["messages"][1].content == "Hi"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Frozen-snapshot: no re-injection within a session
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_skips_injection_if_already_present():
|
||||||
|
"""Second turn: separate reminder message already present → no update."""
|
||||||
|
mw = _make_middleware()
|
||||||
|
reminder_content = "<system-reminder>\n<current_date>2026-05-08, Friday</current_date>\n</system-reminder>"
|
||||||
|
state = {
|
||||||
|
"messages": [
|
||||||
|
_reminder_msg(reminder_content, "msg-1"),
|
||||||
|
HumanMessage(content="Hello", id="msg-1__user"),
|
||||||
|
AIMessage(content="Hi there"),
|
||||||
|
HumanMessage(content="Follow-up", id="msg-2"),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
with mock.patch("deerflow.agents.middlewares.dynamic_context_middleware.datetime") as mock_dt:
|
||||||
|
mock_dt.now.return_value.strftime.return_value = "2026-05-08, Friday"
|
||||||
|
result = mw.before_agent(state, _fake_runtime())
|
||||||
|
|
||||||
|
assert result is None # no update needed
|
||||||
|
|
||||||
|
|
||||||
|
def test_injects_only_into_first_human_message_not_later_ones():
|
||||||
|
"""Reminder targets the first HumanMessage; subsequent messages are not touched."""
|
||||||
|
mw = _make_middleware()
|
||||||
|
state = {
|
||||||
|
"messages": [
|
||||||
|
HumanMessage(content="First", id="msg-1"),
|
||||||
|
AIMessage(content="Reply"),
|
||||||
|
HumanMessage(content="Second", id="msg-2"),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
with mock.patch("deerflow.agents.lead_agent.prompt._get_memory_context", return_value=""), mock.patch("deerflow.agents.middlewares.dynamic_context_middleware.datetime") as mock_dt:
|
||||||
|
mock_dt.now.return_value.strftime.return_value = "2026-05-08, Friday"
|
||||||
|
result = mw.before_agent(state, _fake_runtime())
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
msgs = result["messages"]
|
||||||
|
# Only the two injected messages are returned (reminder + original first query)
|
||||||
|
assert len(msgs) == 2
|
||||||
|
assert msgs[0].id == "msg-1" # reminder takes first message's ID
|
||||||
|
assert msgs[0].additional_kwargs.get(_DYNAMIC_CONTEXT_REMINDER_KEY) is True
|
||||||
|
assert _SYSTEM_REMINDER_TAG in msgs[0].content
|
||||||
|
assert msgs[1].id == "msg-1__user" # original content with derived ID
|
||||||
|
assert msgs[1].content == "First"
|
||||||
|
# "Second" (msg-2) is not in the returned update — it is left unchanged
|
||||||
|
assert all(m.id != "msg-2" for m in msgs)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Edge cases
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_messages_returns_none():
|
||||||
|
mw = _make_middleware()
|
||||||
|
result = mw.before_agent({"messages": []}, _fake_runtime())
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_human_message_returns_none():
|
||||||
|
mw = _make_middleware()
|
||||||
|
state = {"messages": [AIMessage(content="assistant only")]}
|
||||||
|
with mock.patch("deerflow.agents.lead_agent.prompt._get_memory_context", return_value=""):
|
||||||
|
result = mw.before_agent(state, _fake_runtime())
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_list_content_message_handled_as_separate_reminder():
|
||||||
|
"""List-content (e.g. multi-modal) messages remain intact; reminder is a separate message."""
|
||||||
|
mw = _make_middleware()
|
||||||
|
original_content = [{"type": "text", "text": "Hello"}]
|
||||||
|
state = {"messages": [HumanMessage(content=original_content, id="msg-1")]}
|
||||||
|
|
||||||
|
with mock.patch("deerflow.agents.lead_agent.prompt._get_memory_context", return_value=""), mock.patch("deerflow.agents.middlewares.dynamic_context_middleware.datetime") as mock_dt:
|
||||||
|
mock_dt.now.return_value.strftime.return_value = "2026-05-08, Friday"
|
||||||
|
result = mw.before_agent(state, _fake_runtime())
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
msgs = result["messages"]
|
||||||
|
assert len(msgs) == 2
|
||||||
|
# Reminder is a plain string message with the flag set
|
||||||
|
assert isinstance(msgs[0].content, str)
|
||||||
|
assert msgs[0].additional_kwargs.get(_DYNAMIC_CONTEXT_REMINDER_KEY) is True
|
||||||
|
assert _SYSTEM_REMINDER_TAG in msgs[0].content
|
||||||
|
# Original list-content message is untouched
|
||||||
|
assert msgs[1].content == original_content
|
||||||
|
|
||||||
|
|
||||||
|
def test_reminder_uses_original_id_user_message_uses_derived_id():
|
||||||
|
"""Reminder takes original ID (position swap); user message gets {id}__user."""
|
||||||
|
mw = _make_middleware()
|
||||||
|
original_id = "original-id-abc"
|
||||||
|
state = {"messages": [HumanMessage(content="Hello", id=original_id)]}
|
||||||
|
|
||||||
|
with mock.patch("deerflow.agents.lead_agent.prompt._get_memory_context", return_value=""), mock.patch("deerflow.agents.middlewares.dynamic_context_middleware.datetime") as mock_dt:
|
||||||
|
mock_dt.now.return_value.strftime.return_value = "2026-05-08, Friday"
|
||||||
|
result = mw.before_agent(state, _fake_runtime())
|
||||||
|
|
||||||
|
assert result["messages"][0].id == original_id
|
||||||
|
assert result["messages"][1].id == f"{original_id}__user"
|
||||||
|
|
||||||
|
|
||||||
|
def test_message_without_id_gets_stable_uuid():
|
||||||
|
"""If the original HumanMessage has no ID, a UUID is generated and used consistently."""
|
||||||
|
mw = _make_middleware()
|
||||||
|
state = {"messages": [HumanMessage(content="Hello", id=None)]}
|
||||||
|
|
||||||
|
with mock.patch("deerflow.agents.lead_agent.prompt._get_memory_context", return_value=""), mock.patch("deerflow.agents.middlewares.dynamic_context_middleware.datetime") as mock_dt:
|
||||||
|
mock_dt.now.return_value.strftime.return_value = "2026-05-08, Friday"
|
||||||
|
result = mw.before_agent(state, _fake_runtime())
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
reminder_id = result["messages"][0].id
|
||||||
|
user_id = result["messages"][1].id
|
||||||
|
assert reminder_id is not None
|
||||||
|
assert reminder_id != "None"
|
||||||
|
assert user_id == f"{reminder_id}__user"
|
||||||
|
|
||||||
|
|
||||||
|
def test_user_message_containing_system_reminder_tag_does_not_prevent_injection():
|
||||||
|
"""A user message containing '<system-reminder>' must not be mistaken for a reminder."""
|
||||||
|
mw = _make_middleware()
|
||||||
|
state = {
|
||||||
|
"messages": [
|
||||||
|
HumanMessage(content="What is <system-reminder>?", id="msg-1"),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
with mock.patch("deerflow.agents.lead_agent.prompt._get_memory_context", return_value=""), mock.patch("deerflow.agents.middlewares.dynamic_context_middleware.datetime") as mock_dt:
|
||||||
|
mock_dt.now.return_value.strftime.return_value = "2026-05-08, Friday"
|
||||||
|
result = mw.before_agent(state, _fake_runtime())
|
||||||
|
|
||||||
|
# Injection must happen — the user message does NOT carry the reminder flag
|
||||||
|
assert result is not None
|
||||||
|
assert result["messages"][0].additional_kwargs.get(_DYNAMIC_CONTEXT_REMINDER_KEY) is True
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Midnight crossing
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_midnight_crossing_injects_date_update_as_separate_message():
|
||||||
|
"""When the date has changed, a separate date-update reminder is injected before
|
||||||
|
the current turn's HumanMessage using the ID-swap technique."""
|
||||||
|
mw = _make_middleware()
|
||||||
|
reminder_content = "<system-reminder>\n<current_date>2026-05-08, Friday</current_date>\n</system-reminder>"
|
||||||
|
state = {
|
||||||
|
"messages": [
|
||||||
|
_reminder_msg(reminder_content, "msg-1"),
|
||||||
|
HumanMessage(content="Hello", id="msg-1__user"),
|
||||||
|
AIMessage(content="Response"),
|
||||||
|
HumanMessage(content="Good morning", id="msg-2"),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
with mock.patch("deerflow.agents.middlewares.dynamic_context_middleware.datetime") as mock_dt:
|
||||||
|
mock_dt.now.return_value.strftime.return_value = "2026-05-09, Saturday"
|
||||||
|
result = mw.before_agent(state, _fake_runtime())
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
msgs = result["messages"]
|
||||||
|
assert len(msgs) == 2
|
||||||
|
|
||||||
|
# Date-update reminder takes the current message's ID
|
||||||
|
assert msgs[0].id == "msg-2"
|
||||||
|
assert msgs[0].additional_kwargs.get(_DYNAMIC_CONTEXT_REMINDER_KEY) is True
|
||||||
|
assert _SYSTEM_REMINDER_TAG in msgs[0].content
|
||||||
|
assert "<current_date>2026-05-09, Saturday</current_date>" in msgs[0].content
|
||||||
|
assert "Good morning" not in msgs[0].content # reminder only
|
||||||
|
|
||||||
|
# Original user text appended with derived ID
|
||||||
|
assert msgs[1].id == "msg-2__user"
|
||||||
|
assert msgs[1].content == "Good morning"
|
||||||
|
|
||||||
|
|
||||||
|
def test_midnight_crossing_id_swap():
|
||||||
|
"""Date-update reminder uses original ID; user message uses {id}__user."""
|
||||||
|
mw = _make_middleware()
|
||||||
|
reminder_content = "<system-reminder>\n<current_date>2026-05-08, Friday</current_date>\n</system-reminder>"
|
||||||
|
state = {
|
||||||
|
"messages": [
|
||||||
|
_reminder_msg(reminder_content, "msg-1"),
|
||||||
|
HumanMessage(content="Next day message", id="msg-2"),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
with mock.patch("deerflow.agents.middlewares.dynamic_context_middleware.datetime") as mock_dt:
|
||||||
|
mock_dt.now.return_value.strftime.return_value = "2026-05-09, Saturday"
|
||||||
|
result = mw.before_agent(state, _fake_runtime())
|
||||||
|
|
||||||
|
assert result["messages"][0].id == "msg-2"
|
||||||
|
assert result["messages"][1].id == "msg-2__user"
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_second_midnight_injection_once_date_updated():
|
||||||
|
"""After a midnight update is persisted, the same-day path skips re-injection."""
|
||||||
|
mw = _make_middleware()
|
||||||
|
date_update_content = "<system-reminder>\n<current_date>2026-05-09, Saturday</current_date>\n</system-reminder>"
|
||||||
|
state = {
|
||||||
|
"messages": [
|
||||||
|
_reminder_msg(
|
||||||
|
"<system-reminder>\n<current_date>2026-05-08, Friday</current_date>\n</system-reminder>",
|
||||||
|
"msg-1",
|
||||||
|
),
|
||||||
|
HumanMessage(content="Hello", id="msg-1__user"),
|
||||||
|
AIMessage(content="Response"),
|
||||||
|
_reminder_msg(date_update_content, "msg-2"),
|
||||||
|
HumanMessage(content="Good morning", id="msg-2__user"),
|
||||||
|
AIMessage(content="Good morning!"),
|
||||||
|
HumanMessage(content="Third turn", id="msg-3"),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
with mock.patch("deerflow.agents.middlewares.dynamic_context_middleware.datetime") as mock_dt:
|
||||||
|
mock_dt.now.return_value.strftime.return_value = "2026-05-09, Saturday"
|
||||||
|
result = mw.before_agent(state, _fake_runtime())
|
||||||
|
|
||||||
|
assert result is None # same day as last injected date → no update
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
"""Tests for TokenUsageMiddleware attribution annotations."""
|
"""Tests for TokenUsageMiddleware attribution annotations."""
|
||||||
|
|
||||||
|
import logging
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
from langchain_core.messages import AIMessage
|
from langchain_core.messages import AIMessage
|
||||||
@@ -17,6 +18,82 @@ def _make_runtime():
|
|||||||
|
|
||||||
|
|
||||||
class TestTokenUsageMiddleware:
|
class TestTokenUsageMiddleware:
|
||||||
|
def test_logs_cache_token_details(self, caplog):
|
||||||
|
middleware = TokenUsageMiddleware()
|
||||||
|
message = AIMessage(
|
||||||
|
content="Here is the final answer.",
|
||||||
|
usage_metadata={
|
||||||
|
"input_tokens": 350,
|
||||||
|
"output_tokens": 240,
|
||||||
|
"total_tokens": 590,
|
||||||
|
"input_token_details": {
|
||||||
|
"audio": 10,
|
||||||
|
"cache_creation": 200,
|
||||||
|
"cache_read": 100,
|
||||||
|
},
|
||||||
|
"output_token_details": {
|
||||||
|
"audio": 10,
|
||||||
|
"reasoning": 200,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
with caplog.at_level(
|
||||||
|
logging.INFO,
|
||||||
|
logger="deerflow.agents.middlewares.token_usage_middleware",
|
||||||
|
):
|
||||||
|
result = middleware.after_model({"messages": [message]}, _make_runtime())
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert "LLM token usage: input=350 output=240 total=590" in caplog.text
|
||||||
|
assert "input_token_details={'audio': 10, 'cache_creation': 200, 'cache_read': 100}" in caplog.text
|
||||||
|
assert "output_token_details={'audio': 10, 'reasoning': 200}" in caplog.text
|
||||||
|
|
||||||
|
def test_logs_basic_tokens_when_no_detail_fields_in_usage_metadata(self, caplog):
|
||||||
|
"""When usage_metadata has only totals (no input_token_details), log just the counts."""
|
||||||
|
middleware = TokenUsageMiddleware()
|
||||||
|
message = AIMessage(
|
||||||
|
content="Here is the final answer.",
|
||||||
|
usage_metadata={
|
||||||
|
"input_tokens": 350,
|
||||||
|
"output_tokens": 240,
|
||||||
|
"total_tokens": 590,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
with caplog.at_level(
|
||||||
|
logging.INFO,
|
||||||
|
logger="deerflow.agents.middlewares.token_usage_middleware",
|
||||||
|
):
|
||||||
|
result = middleware.after_model({"messages": [message]}, _make_runtime())
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert "LLM token usage: input=350 output=240 total=590" in caplog.text
|
||||||
|
assert "input_token_details" not in caplog.text
|
||||||
|
|
||||||
|
def test_no_log_when_usage_metadata_is_missing(self, caplog):
|
||||||
|
"""When usage_metadata is absent, no token usage line is logged."""
|
||||||
|
middleware = TokenUsageMiddleware()
|
||||||
|
message = AIMessage(
|
||||||
|
content="Here is the final answer.",
|
||||||
|
response_metadata={
|
||||||
|
"usage": {
|
||||||
|
"input_tokens": 350,
|
||||||
|
"output_tokens": 240,
|
||||||
|
"total_tokens": 590,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
with caplog.at_level(
|
||||||
|
logging.INFO,
|
||||||
|
logger="deerflow.agents.middlewares.token_usage_middleware",
|
||||||
|
):
|
||||||
|
result = middleware.after_model({"messages": [message]}, _make_runtime())
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert "LLM token usage" not in caplog.text
|
||||||
|
|
||||||
def test_annotates_todo_updates_with_structured_actions(self):
|
def test_annotates_todo_updates_with_structured_actions(self):
|
||||||
middleware = TokenUsageMiddleware()
|
middleware = TokenUsageMiddleware()
|
||||||
message = AIMessage(
|
message = AIMessage(
|
||||||
|
|||||||
Reference in New Issue
Block a user