"""Integration test: worker.run_agent injects Langfuse trace metadata. Verifies that the agent factory's resulting graph receives a ``RunnableConfig`` whose ``metadata`` carries the Langfuse reserved keys (``langfuse_session_id`` / ``langfuse_user_id`` / ``langfuse_trace_name``). """ from __future__ import annotations import asyncio import pytest from deerflow.runtime.runs.manager import RunRecord from deerflow.runtime.runs.schemas import DisconnectMode, RunStatus from deerflow.runtime.runs.worker import RunContext, run_agent class _FakeAgent: """Minimal LangGraph-like graph that captures the runnable config.""" def __init__(self) -> None: self.captured_config: dict | None = None self.metadata: dict = {} # Worker may assign these attributes; need them to exist. self.checkpointer = None self.store = None self.interrupt_before_nodes: list[str] = [] self.interrupt_after_nodes: list[str] = [] async def astream(self, graph_input, *, config, stream_mode, **kwargs): self.captured_config = config # Empty async generator — no chunks produced. return yield # pragma: no cover (makes this an async generator) class _FakeRunManager: async def set_status(self, *_args, **_kwargs) -> None: return None async def update_model_name(self, *_args, **_kwargs) -> None: return None async def update_run_completion(self, *_args, **_kwargs) -> None: return None class _FakeBridge: def __init__(self) -> None: self.events: list[tuple[str, object]] = [] async def publish(self, _run_id, event, payload) -> None: self.events.append((event, payload)) async def publish_end(self, _run_id) -> None: self.events.append(("end", None)) async def cleanup(self, _run_id, *, delay: int = 0) -> None: return None @pytest.fixture(autouse=True) def _clear_tracing_env(monkeypatch): from deerflow.config.tracing_config import reset_tracing_config for name in ("LANGFUSE_TRACING", "LANGFUSE_PUBLIC_KEY", "LANGFUSE_SECRET_KEY", "LANGFUSE_BASE_URL"): monkeypatch.delenv(name, raising=False) reset_tracing_config() yield reset_tracing_config() @pytest.mark.asyncio async def test_run_agent_injects_langfuse_metadata(monkeypatch): monkeypatch.setenv("LANGFUSE_TRACING", "true") monkeypatch.setenv("LANGFUSE_PUBLIC_KEY", "pk-lf-test") monkeypatch.setenv("LANGFUSE_SECRET_KEY", "sk-lf-test") from deerflow.config.tracing_config import reset_tracing_config reset_tracing_config() fake_agent = _FakeAgent() def agent_factory(config): return fake_agent record = RunRecord( run_id="run-1", thread_id="thread-xyz", assistant_id="lead-agent", status=RunStatus.pending, on_disconnect=DisconnectMode.cancel, model_name="gpt-4o", ) record.abort_event = asyncio.Event() ctx = RunContext(checkpointer=None) await run_agent( _FakeBridge(), _FakeRunManager(), record, ctx=ctx, agent_factory=agent_factory, graph_input={"messages": []}, config={"configurable": {"thread_id": "thread-xyz"}}, ) assert fake_agent.captured_config is not None, "astream was not invoked" metadata = fake_agent.captured_config.get("metadata") or {} assert metadata.get("langfuse_session_id") == "thread-xyz" # conftest.py autouse fixture injects ``test-user-autouse`` into the # contextvar — the worker should read it via ``get_effective_user_id``. user_id = metadata.get("langfuse_user_id") assert user_id == "test-user-autouse", f"expected test-user-autouse, got {user_id}" assert metadata.get("langfuse_trace_name") == "lead-agent" tags = metadata.get("langfuse_tags") or [] assert "model:gpt-4o" in tags @pytest.mark.asyncio async def test_run_agent_falls_back_to_default_user_when_unset(monkeypatch): """When no user is in the contextvar, langfuse_user_id falls back to 'default'. Uses ``monkeypatch.setattr`` to redirect ``get_effective_user_id`` to return ``"default"`` rather than directly mutating the contextvar — direct contextvar operations across pytest test boundaries have produced spooky cross-file pollution when combined with the langfuse OTel global tracer provider. """ monkeypatch.setenv("LANGFUSE_TRACING", "true") monkeypatch.setenv("LANGFUSE_PUBLIC_KEY", "pk-lf-test") monkeypatch.setenv("LANGFUSE_SECRET_KEY", "sk-lf-test") from deerflow.config.tracing_config import reset_tracing_config from deerflow.runtime.runs import worker as worker_module from deerflow.runtime.user_context import DEFAULT_USER_ID reset_tracing_config() monkeypatch.setattr(worker_module, "get_effective_user_id", lambda: DEFAULT_USER_ID) fake_agent = _FakeAgent() def agent_factory(config): return fake_agent record = RunRecord( run_id="run-fallback", thread_id="thread-fb", assistant_id="lead-agent", status=RunStatus.pending, on_disconnect=DisconnectMode.cancel, ) record.abort_event = asyncio.Event() ctx = RunContext(checkpointer=None) await run_agent( _FakeBridge(), _FakeRunManager(), record, ctx=ctx, agent_factory=agent_factory, graph_input={"messages": []}, config={"configurable": {"thread_id": "thread-fb"}}, ) metadata = fake_agent.captured_config.get("metadata") or {} assert metadata.get("langfuse_user_id") == "default" @pytest.mark.asyncio async def test_run_agent_preserves_caller_metadata_overrides(monkeypatch): """Caller-provided langfuse_* keys must NOT be overridden by the default injection.""" monkeypatch.setenv("LANGFUSE_TRACING", "true") monkeypatch.setenv("LANGFUSE_PUBLIC_KEY", "pk-lf-test") monkeypatch.setenv("LANGFUSE_SECRET_KEY", "sk-lf-test") from deerflow.config.tracing_config import reset_tracing_config reset_tracing_config() fake_agent = _FakeAgent() def agent_factory(config): return fake_agent record = RunRecord( run_id="run-2", thread_id="thread-default", assistant_id="lead-agent", status=RunStatus.pending, on_disconnect=DisconnectMode.cancel, ) record.abort_event = asyncio.Event() ctx = RunContext(checkpointer=None) await run_agent( _FakeBridge(), _FakeRunManager(), record, ctx=ctx, agent_factory=agent_factory, graph_input={"messages": []}, config={ "configurable": {"thread_id": "thread-default"}, "metadata": { "langfuse_session_id": "custom-session-id", "langfuse_user_id": "explicit-user", }, }, ) metadata = fake_agent.captured_config.get("metadata") or {} # Caller-supplied keys win. assert metadata["langfuse_session_id"] == "custom-session-id" assert metadata["langfuse_user_id"] == "explicit-user" # Worker still fills in keys that the caller didn't set. assert metadata["langfuse_trace_name"] == "lead-agent" @pytest.mark.asyncio async def test_run_agent_skips_metadata_when_langfuse_disabled(monkeypatch): fake_agent = _FakeAgent() def agent_factory(config): return fake_agent record = RunRecord( run_id="run-3", thread_id="thread-noop", assistant_id="lead-agent", status=RunStatus.pending, on_disconnect=DisconnectMode.cancel, ) record.abort_event = asyncio.Event() ctx = RunContext(checkpointer=None) await run_agent( _FakeBridge(), _FakeRunManager(), record, ctx=ctx, agent_factory=agent_factory, graph_input={"messages": []}, config={"configurable": {"thread_id": "thread-noop"}}, ) metadata = fake_agent.captured_config.get("metadata") or {} assert "langfuse_session_id" not in metadata assert "langfuse_user_id" not in metadata assert "langfuse_trace_name" not in metadata