mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-20 23:21:06 +00:00
4b139fb689
Add request-scoped contextvar-based owner filtering to threads_meta,
runs, run_events, and feedback repositories. Router code is unchanged
— isolation is enforced at the storage layer so that any caller that
forgets to pass owner_id still gets filtered results, and new routes
cannot accidentally leak data.
Core infrastructure
-------------------
- deerflow/runtime/user_context.py (new):
- ContextVar[CurrentUser | None] with default None
- runtime_checkable CurrentUser Protocol (structural subtype with .id)
- set/reset/get/require helpers
- AUTO sentinel + resolve_owner_id(value, method_name) for sentinel
three-state resolution: AUTO reads contextvar, explicit str
overrides, explicit None bypasses the filter (for migration/CLI)
Repository changes
------------------
- ThreadMetaRepository: create/get/search/update_*/delete gain
owner_id=AUTO kwarg; read paths filter by owner, writes stamp it,
mutations check ownership before applying
- RunRepository: put/get/list_by_thread/delete gain owner_id=AUTO kwarg
- FeedbackRepository: create/get/list_by_run/list_by_thread/delete
gain owner_id=AUTO kwarg
- DbRunEventStore: list_messages/list_events/list_messages_by_run/
count_messages/delete_by_thread/delete_by_run gain owner_id=AUTO
kwarg. Write paths (put/put_batch) read contextvar softly: when a
request-scoped user is available, owner_id is stamped; background
worker writes without a user context pass None which is valid
(orphan row to be bound by migration)
Schema
------
- persistence/models/run_event.py: RunEventRow.owner_id = Mapped[
str | None] = mapped_column(String(64), nullable=True, index=True)
- No alembic migration needed: 2.0 ships fresh, Base.metadata.create_all
picks up the new column automatically
Middleware
----------
- auth_middleware.py: after cookie check, call get_optional_user_from_
request to load the real User, stamp it into request.state.user AND
the contextvar via set_current_user, reset in a try/finally. Public
paths and unauthenticated requests continue without contextvar, and
@require_auth handles the strict 401 path
Test infrastructure
-------------------
- tests/conftest.py: @pytest.fixture(autouse=True) _auto_user_context
sets a default SimpleNamespace(id="test-user-autouse") on every test
unless marked @pytest.mark.no_auto_user. Keeps existing 20+
persistence tests passing without modification
- pyproject.toml [tool.pytest.ini_options]: register no_auto_user
marker so pytest does not emit warnings for opt-out tests
- tests/test_user_context.py: 6 tests covering three-state semantics,
Protocol duck typing, and require/optional APIs
- tests/test_thread_meta_repo.py: one test updated to pass owner_id=
None explicitly where it was previously relying on the old default
Test results
------------
- test_user_context.py: 6 passed
- test_auth*.py + test_langgraph_auth.py + test_ensure_admin.py: 127
- test_run_event_store / test_run_repository / test_thread_meta_repo
/ test_feedback: 92 passed
- Full backend suite: 1905 passed, 2 failed (both @requires_llm flaky
integration tests unrelated to auth), 1 skipped
149 lines
5.2 KiB
Python
149 lines
5.2 KiB
Python
"""Request-scoped user context for owner-based authorization.
|
|
|
|
This module holds a :class:`~contextvars.ContextVar` that the gateway's
|
|
auth middleware sets after a successful authentication. Repository
|
|
methods read the contextvar via a sentinel default parameter, letting
|
|
routers stay free of ``owner_id`` boilerplate.
|
|
|
|
Three-state semantics for the repository ``owner_id`` parameter (the
|
|
consumer side of this module lives in ``deerflow.persistence.*``):
|
|
|
|
- ``_AUTO`` (module-private sentinel, default): read from contextvar;
|
|
raise :class:`RuntimeError` if unset.
|
|
- Explicit ``str``: use the provided value, overriding contextvar.
|
|
- Explicit ``None``: no WHERE clause — used only by migration scripts
|
|
and admin CLIs that intentionally bypass isolation.
|
|
|
|
Dependency direction
|
|
--------------------
|
|
``persistence`` (lower layer) reads from this module; ``gateway.auth``
|
|
(higher layer) writes to it. ``CurrentUser`` is defined here as a
|
|
:class:`typing.Protocol` so that ``persistence`` never needs to import
|
|
the concrete ``User`` class from ``gateway.auth.models``. Any object
|
|
with an ``.id: str`` attribute structurally satisfies the protocol.
|
|
|
|
Asyncio semantics
|
|
-----------------
|
|
``ContextVar`` is task-local under asyncio, not thread-local. Each
|
|
FastAPI request runs in its own task, so the context is naturally
|
|
isolated. ``asyncio.create_task`` and ``asyncio.to_thread`` inherit the
|
|
parent task's context, which is typically the intended behaviour; if
|
|
a background task must *not* see the foreground user, wrap it with
|
|
``contextvars.copy_context()`` to get a clean copy.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from contextvars import ContextVar, Token
|
|
from typing import Final, Protocol, runtime_checkable
|
|
|
|
|
|
@runtime_checkable
|
|
class CurrentUser(Protocol):
|
|
"""Structural type for the current authenticated user.
|
|
|
|
Any object with an ``.id: str`` attribute satisfies this protocol.
|
|
Concrete implementations live in ``app.gateway.auth.models.User``.
|
|
"""
|
|
|
|
id: str
|
|
|
|
|
|
_current_user: Final[ContextVar["CurrentUser | None"]] = ContextVar(
|
|
"deerflow_current_user", default=None
|
|
)
|
|
|
|
|
|
def set_current_user(user: CurrentUser) -> Token[CurrentUser | None]:
|
|
"""Set the current user for this async task.
|
|
|
|
Returns a reset token that should be passed to
|
|
:func:`reset_current_user` in a ``finally`` block to restore the
|
|
previous context.
|
|
"""
|
|
return _current_user.set(user)
|
|
|
|
|
|
def reset_current_user(token: Token[CurrentUser | None]) -> None:
|
|
"""Restore the context to the state captured by ``token``."""
|
|
_current_user.reset(token)
|
|
|
|
|
|
def get_current_user() -> CurrentUser | None:
|
|
"""Return the current user, or ``None`` if unset.
|
|
|
|
Safe to call in any context. Used by code paths that can proceed
|
|
without a user (e.g. migration scripts, public endpoints).
|
|
"""
|
|
return _current_user.get()
|
|
|
|
|
|
def require_current_user() -> CurrentUser:
|
|
"""Return the current user, or raise :class:`RuntimeError`.
|
|
|
|
Used by repository code that must not be called outside a
|
|
request-authenticated context. The error message is phrased so
|
|
that a caller debugging a stack trace can locate the offending
|
|
code path.
|
|
"""
|
|
user = _current_user.get()
|
|
if user is None:
|
|
raise RuntimeError("repository accessed without user context")
|
|
return user
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Sentinel-based owner_id resolution
|
|
# ---------------------------------------------------------------------------
|
|
#
|
|
# Repository methods accept an ``owner_id`` keyword-only argument that
|
|
# defaults to ``AUTO``. The three possible values drive distinct
|
|
# behaviours; see the docstring on :func:`resolve_owner_id`.
|
|
|
|
|
|
class _AutoSentinel:
|
|
"""Singleton marker meaning 'resolve owner_id from contextvar'."""
|
|
|
|
_instance: "_AutoSentinel | None" = None
|
|
|
|
def __new__(cls) -> "_AutoSentinel":
|
|
if cls._instance is None:
|
|
cls._instance = super().__new__(cls)
|
|
return cls._instance
|
|
|
|
def __repr__(self) -> str:
|
|
return "<AUTO>"
|
|
|
|
|
|
AUTO: Final[_AutoSentinel] = _AutoSentinel()
|
|
|
|
|
|
def resolve_owner_id(
|
|
value: "str | None | _AutoSentinel",
|
|
*,
|
|
method_name: str = "repository method",
|
|
) -> str | None:
|
|
"""Resolve the owner_id parameter passed to a repository method.
|
|
|
|
Three-state semantics:
|
|
|
|
- :data:`AUTO` (default): read from contextvar; raise
|
|
:class:`RuntimeError` if no user is in context. This is the
|
|
common case for request-scoped calls.
|
|
- Explicit ``str``: use the provided id verbatim, overriding any
|
|
contextvar value. Useful for tests and admin-override flows.
|
|
- Explicit ``None``: no filter — the repository should skip the
|
|
owner_id WHERE clause entirely. Reserved for migration scripts
|
|
and CLI tools that intentionally bypass isolation.
|
|
"""
|
|
if isinstance(value, _AutoSentinel):
|
|
user = _current_user.get()
|
|
if user is None:
|
|
raise RuntimeError(
|
|
f"{method_name} called with owner_id=AUTO but no user context is set; "
|
|
"pass an explicit owner_id, set the contextvar via auth middleware, "
|
|
"or opt out with owner_id=None for migration/CLI paths."
|
|
)
|
|
return user.id
|
|
return value
|