refactor(persistence): rename owner_id to user_id and thread_meta_repo to thread_store

Rename owner_id to user_id across all persistence models, repositories, stores, routers, and tests for clearer semantics. Rename thread_meta_repo to thread_store for consistency with run_store/run_event_store naming. Add ThreadMetaStore return type annotation to get_thread_store(). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-23 08:25:57 +00:00 · 2026-04-10 15:05:10 +08:00
parent 03952eca53
commit 8da1903168
32 changed files with 256 additions and 276 deletions
@@ -3,16 +3,16 @@
 The production gateway runs ``AuthMiddleware`` (validates the JWT cookie)
 ahead of every router, plus ``@require_permission(owner_check=True)``
 decorators that read ``request.state.auth`` and call
-``thread_meta_repo.check_access``. Router-level unit tests construct
+``thread_store.check_access``. Router-level unit tests construct
 **bare** FastAPI apps that include only one router — they have neither
-the auth middleware nor a real thread_meta_repo, so the decorators raise
+the auth middleware nor a real thread_store, so the decorators raise
 401 (TestClient path) or ValueError (direct-call path).

 This module provides two surfaces:

 1. :func:`make_authed_test_app` — wraps ``FastAPI()`` with a tiny
   ``BaseHTTPMiddleware`` that stamps a fake user / AuthContext on every
-   request, plus a permissive ``thread_meta_repo`` mock on
+   request, plus a permissive ``thread_store`` mock on
   ``app.state``. Use from TestClient-based router tests.

 2. :func:`call_unwrapped` — invokes the underlying function bypassing
@@ -86,20 +86,20 @@ def make_authed_test_app(
    user_factory: Callable[[], User] | None = None,
    owner_check_passes: bool = True,
 ) -> FastAPI:
-    """Build a FastAPI test app with stub auth + permissive thread_meta_repo.
+    """Build a FastAPI test app with stub auth + permissive thread_store.

    Args:
        user_factory: Override the default test user. Must return a fully
            populated :class:`User`. Useful for cross-user isolation tests
            that need a stable id across requests.
-        owner_check_passes: When True (default), ``thread_meta_repo.check_access``
+        owner_check_passes: When True (default), ``thread_store.check_access``
            returns True for every call so ``@require_permission(owner_check=True)``
            never blocks the route under test. Pass False to verify that
            permission failures surface correctly.

    Returns:
        A ``FastAPI`` app with the stub middleware installed and
-        ``app.state.thread_meta_repo`` set to a permissive mock. The
+        ``app.state.thread_store`` set to a permissive mock. The
        caller is still responsible for ``app.include_router(...)``.
    """
    factory = user_factory or _make_stub_user
@@ -108,7 +108,7 @@ def make_authed_test_app(

    repo = MagicMock()
    repo.check_access = AsyncMock(return_value=owner_check_passes)
-    app.state.thread_meta_repo = repo
+    app.state.thread_store = repo

    return app

@@ -60,7 +60,7 @@ def provisioner_module():
 # Auto-set user context for every test unless marked no_auto_user
 # ---------------------------------------------------------------------------
 #
-# Repository methods read ``owner_id`` from a contextvar by default
+# Repository methods read ``user_id`` from a contextvar by default
 # (see ``deerflow.runtime.user_context``). Without this fixture, every
 # pre-existing persistence test would raise RuntimeError because the
 # contextvar is unset. The fixture sets a default test user on every
@@ -6,13 +6,13 @@ from unittest.mock import AsyncMock, MagicMock, patch
 import pytest

 import deerflow.config.app_config as app_config_module
-from deerflow.runtime.checkpointer import get_checkpointer, reset_checkpointer
 from deerflow.config.checkpointer_config import (
    CheckpointerConfig,
    get_checkpointer_config,
    load_checkpointer_config_from_dict,
    set_checkpointer_config,
 )
+from deerflow.runtime.checkpointer import get_checkpointer, reset_checkpointer


@pytest.fixture(autouse=True)
@@ -199,12 +199,12 @@ def test_migration_failure_is_non_fatal():
 # ── Section 5.1-5.6 upgrade path: orphan thread migration ────────────────


-def test_migrate_orphaned_threads_stamps_owner_id_on_unowned_rows():
+def test_migrate_orphaned_threads_stamps_user_id_on_unowned_rows():
    """First boot finds Store-only legacy threads → stamps admin's id.

    Validates the **TC-UPG-02 upgrade story**: an operator running main
    (no auth) accumulates threads in the LangGraph Store namespace
-    ``("threads",)`` with no ``metadata.owner_id``. After upgrading to
+    ``("threads",)`` with no ``metadata.user_id``. After upgrading to
    feat/auth-on-2.0-rc, the first ``_ensure_admin_user`` boot should
    rewrite each unowned item with the freshly created admin's id.
    """
@@ -215,7 +215,7 @@ def test_migrate_orphaned_threads_stamps_owner_id_on_unowned_rows():
        SimpleNamespace(key="t1", value={"metadata": {"title": "old-thread-1"}}),
        SimpleNamespace(key="t2", value={"metadata": {"title": "old-thread-2"}}),
        SimpleNamespace(key="t3", value={"metadata": {}}),
-        SimpleNamespace(key="t4", value={"metadata": {"owner_id": "someone-else", "title": "preserved"}}),
+        SimpleNamespace(key="t4", value={"metadata": {"user_id": "someone-else", "title": "preserved"}}),
    ]
    store = AsyncMock()
    # asearch returns the entire batch on first call, then an empty page
@@ -235,11 +235,11 @@ def test_migrate_orphaned_threads_stamps_owner_id_on_unowned_rows():
    assert len(aput_calls) == 3
    rewritten_keys = {call[1] for call in aput_calls}
    assert rewritten_keys == {"t1", "t2", "t3"}
-    # Each rewrite carries the new owner_id; titles preserved where present.
+    # Each rewrite carries the new user_id; titles preserved where present.
    by_key = {call[1]: call[2] for call in aput_calls}
-    assert by_key["t1"]["metadata"]["owner_id"] == "admin-id-42"
+    assert by_key["t1"]["metadata"]["user_id"] == "admin-id-42"
    assert by_key["t1"]["metadata"]["title"] == "old-thread-1"
-    assert by_key["t3"]["metadata"]["owner_id"] == "admin-id-42"
+    assert by_key["t3"]["metadata"]["user_id"] == "admin-id-42"
    # The pre-owned item must NOT have been rewritten.
    assert "t4" not in rewritten_keys

@@ -60,8 +60,8 @@ class TestFeedbackRepository:
    @pytest.mark.anyio
    async def test_create_with_owner(self, tmp_path):
        repo = await _make_feedback_repo(tmp_path)
-        record = await repo.create(run_id="r1", thread_id="t1", rating=1, owner_id="user-1")
-        assert record["owner_id"] == "user-1"
+        record = await repo.create(run_id="r1", thread_id="t1", rating=1, user_id="user-1")
+        assert record["user_id"] == "user-1"
        await _cleanup()

    @pytest.mark.anyio
@@ -175,46 +175,46 @@ def _make_ctx(user_id):
 def test_filter_injects_user_id():
    value = {}
    asyncio.run(add_owner_filter(_make_ctx("user-a"), value))
-    assert value["metadata"]["owner_id"] == "user-a"
+    assert value["metadata"]["user_id"] == "user-a"


 def test_filter_preserves_existing_metadata():
    value = {"metadata": {"title": "hello"}}
    asyncio.run(add_owner_filter(_make_ctx("user-a"), value))
-    assert value["metadata"]["owner_id"] == "user-a"
+    assert value["metadata"]["user_id"] == "user-a"
    assert value["metadata"]["title"] == "hello"


 def test_filter_returns_user_id_dict():
    result = asyncio.run(add_owner_filter(_make_ctx("user-x"), {}))
-    assert result == {"owner_id": "user-x"}
+    assert result == {"user_id": "user-x"}


 def test_filter_read_write_consistency():
    value = {}
    filter_dict = asyncio.run(add_owner_filter(_make_ctx("user-1"), value))
-    assert value["metadata"]["owner_id"] == filter_dict["owner_id"]
+    assert value["metadata"]["user_id"] == filter_dict["user_id"]


 def test_different_users_different_filters():
    f_a = asyncio.run(add_owner_filter(_make_ctx("a"), {}))
    f_b = asyncio.run(add_owner_filter(_make_ctx("b"), {}))
-    assert f_a["owner_id"] != f_b["owner_id"]
+    assert f_a["user_id"] != f_b["user_id"]


 def test_filter_overrides_conflicting_user_id():
    """If value already has a different user_id in metadata, it gets overwritten."""
-    value = {"metadata": {"owner_id": "attacker"}}
+    value = {"metadata": {"user_id": "attacker"}}
    asyncio.run(add_owner_filter(_make_ctx("real-owner"), value))
-    assert value["metadata"]["owner_id"] == "real-owner"
+    assert value["metadata"]["user_id"] == "real-owner"


 def test_filter_with_empty_metadata():
    """Explicit empty metadata dict is fine."""
    value = {"metadata": {}}
    result = asyncio.run(add_owner_filter(_make_ctx("user-z"), value))
-    assert value["metadata"]["owner_id"] == "user-z"
-    assert result == {"owner_id": "user-z"}
+    assert value["metadata"]["user_id"] == "user-z"
+    assert result == {"user_id": "user-z"}


 # ── Gateway parity ───────────────────────────────────────────────────────
@@ -9,8 +9,8 @@ These tests bypass the HTTP layer and exercise the storage-layer
 owner filter directly by switching the ``user_context`` contextvar
 between two users. The safety property under test is:

-  After a repository write with owner_id=A, a subsequent read with
-  owner_id=B must not return the row, and vice versa.
+  After a repository write with user_id=A, a subsequent read with
+  user_id=B must not return the row, and vice versa.

 The HTTP layer is covered by test_auth_middleware.py, which proves
 that a request cookie reaches the ``set_current_user`` call. Together
@@ -431,13 +431,13 @@ async def test_repository_without_context_raises(tmp_path):
        await cleanup()


-# ── Escape hatch: explicit owner_id=None bypasses filter (for migration) ──
+# ── Escape hatch: explicit user_id=None bypasses filter (for migration) ──


@pytest.mark.anyio
@pytest.mark.no_auto_user
 async def test_explicit_none_bypasses_filter(tmp_path):
-    """Migration scripts pass owner_id=None to see all rows regardless of owner."""
+    """Migration scripts pass user_id=None to see all rows regardless of owner."""
    from deerflow.persistence.engine import get_session_factory
    from deerflow.persistence.thread_meta import ThreadMetaRepository

@@ -452,14 +452,14 @@ async def test_explicit_none_bypasses_filter(tmp_path):
            await repo.create("t-beta")

        # Migration-style read: no contextvar, explicit None bypass.
-        all_rows = await repo.search(owner_id=None)
+        all_rows = await repo.search(user_id=None)
        thread_ids = {r["thread_id"] for r in all_rows}
        assert thread_ids == {"t-alpha", "t-beta"}

        # Explicit get with None does not apply the filter either.
-        row_a = await repo.get("t-alpha", owner_id=None)
+        row_a = await repo.get("t-alpha", user_id=None)
        assert row_a is not None
-        row_b = await repo.get("t-beta", owner_id=None)
+        row_b = await repo.get("t-beta", user_id=None)
        assert row_b is not None
    finally:
        await cleanup()
@@ -2,7 +2,7 @@

 Tests:
 1. DatabaseConfig property derivation (paths, URLs)
-2. MemoryRunStore CRUD + owner_id filtering
+2. MemoryRunStore CRUD + user_id filtering
 3. Base.to_dict() via inspect mixin
 4. Engine init/close lifecycle (memory + SQLite)
 5. Postgres missing-dep error message
@@ -106,17 +106,17 @@ class TestMemoryRunStore:

    @pytest.mark.anyio
    async def test_list_by_thread_owner_filter(self, store):
-        await store.put("r1", thread_id="t1", owner_id="alice")
-        await store.put("r2", thread_id="t1", owner_id="bob")
-        rows = await store.list_by_thread("t1", owner_id="alice")
+        await store.put("r1", thread_id="t1", user_id="alice")
+        await store.put("r2", thread_id="t1", user_id="bob")
+        rows = await store.list_by_thread("t1", user_id="alice")
        assert len(rows) == 1
-        assert rows[0]["owner_id"] == "alice"
+        assert rows[0]["user_id"] == "alice"

    @pytest.mark.anyio
    async def test_owner_none_returns_all(self, store):
-        await store.put("r1", thread_id="t1", owner_id="alice")
-        await store.put("r2", thread_id="t1", owner_id="bob")
-        rows = await store.list_by_thread("t1", owner_id=None)
+        await store.put("r1", thread_id="t1", user_id="alice")
+        await store.put("r2", thread_id="t1", user_id="bob")
+        rows = await store.list_by_thread("t1", user_id=None)
        assert len(rows) == 2

    @pytest.mark.anyio
@@ -73,11 +73,11 @@ class TestRunRepository:
    @pytest.mark.anyio
    async def test_list_by_thread_owner_filter(self, tmp_path):
        repo = await _make_repo(tmp_path)
-        await repo.put("r1", thread_id="t1", owner_id="alice")
-        await repo.put("r2", thread_id="t1", owner_id="bob")
-        rows = await repo.list_by_thread("t1", owner_id="alice")
+        await repo.put("r1", thread_id="t1", user_id="alice")
+        await repo.put("r2", thread_id="t1", user_id="bob")
+        rows = await repo.list_by_thread("t1", user_id="alice")
        assert len(rows) == 1
-        assert rows[0]["owner_id"] == "alice"
+        assert rows[0]["user_id"] == "alice"
        await _cleanup()

    @pytest.mark.anyio
@@ -189,8 +189,8 @@ class TestRunRepository:
    @pytest.mark.anyio
    async def test_owner_none_returns_all(self, tmp_path):
        repo = await _make_repo(tmp_path)
-        await repo.put("r1", thread_id="t1", owner_id="alice")
-        await repo.put("r2", thread_id="t1", owner_id="bob")
-        rows = await repo.list_by_thread("t1", owner_id=None)
+        await repo.put("r1", thread_id="t1", user_id="alice")
+        await repo.put("r2", thread_id="t1", user_id="bob")
+        rows = await repo.list_by_thread("t1", user_id=None)
        assert len(rows) == 2
        await _cleanup()
@@ -47,7 +47,7 @@ def test_generate_suggestions_parses_and_limits(monkeypatch):
    monkeypatch.setattr(suggestions, "create_chat_model", lambda **kwargs: fake_model)

    # Bypass the require_permission decorator (which needs request +
-    # thread_meta_repo) — these tests cover the parsing logic.
+    # thread_store) — these tests cover the parsing logic.
    result = asyncio.run(suggestions.generate_suggestions.__wrapped__("t1", req, request=None))

    assert result.suggestions == ["Q1", "Q2", "Q3"]
@@ -67,7 +67,7 @@ def test_generate_suggestions_parses_list_block_content(monkeypatch):
    monkeypatch.setattr(suggestions, "create_chat_model", lambda **kwargs: fake_model)

    # Bypass the require_permission decorator (which needs request +
-    # thread_meta_repo) — these tests cover the parsing logic.
+    # thread_store) — these tests cover the parsing logic.
    result = asyncio.run(suggestions.generate_suggestions.__wrapped__("t1", req, request=None))

    assert result.suggestions == ["Q1", "Q2"]
@@ -87,7 +87,7 @@ def test_generate_suggestions_parses_output_text_block_content(monkeypatch):
    monkeypatch.setattr(suggestions, "create_chat_model", lambda **kwargs: fake_model)

    # Bypass the require_permission decorator (which needs request +
-    # thread_meta_repo) — these tests cover the parsing logic.
+    # thread_store) — these tests cover the parsing logic.
    result = asyncio.run(suggestions.generate_suggestions.__wrapped__("t1", req, request=None))

    assert result.suggestions == ["Q1", "Q2"]
@@ -104,7 +104,7 @@ def test_generate_suggestions_returns_empty_on_model_error(monkeypatch):
    monkeypatch.setattr(suggestions, "create_chat_model", lambda **kwargs: fake_model)

    # Bypass the require_permission decorator (which needs request +
-    # thread_meta_repo) — these tests cover the parsing logic.
+    # thread_store) — these tests cover the parsing logic.
    result = asyncio.run(suggestions.generate_suggestions.__wrapped__("t1", req, request=None))

    assert result.suggestions == []
@@ -43,8 +43,8 @@ class TestThreadMetaRepository:
    @pytest.mark.anyio
    async def test_create_with_owner_and_display_name(self, tmp_path):
        repo = await _make_repo(tmp_path)
-        record = await repo.create("t1", owner_id="user1", display_name="My Thread")
-        assert record["owner_id"] == "user1"
+        record = await repo.create("t1", user_id="user1", display_name="My Thread")
+        assert record["user_id"] == "user1"
        assert record["display_name"] == "My Thread"
        await _cleanup()

@@ -61,26 +61,6 @@ class TestThreadMetaRepository:
        assert await repo.get("nonexistent") is None
        await _cleanup()

-    @pytest.mark.anyio
-    async def test_list_by_owner(self, tmp_path):
-        repo = await _make_repo(tmp_path)
-        await repo.create("t1", owner_id="user1")
-        await repo.create("t2", owner_id="user1")
-        await repo.create("t3", owner_id="user2")
-        results = await repo.list_by_owner("user1")
-        assert len(results) == 2
-        assert all(r["owner_id"] == "user1" for r in results)
-        await _cleanup()
-
-    @pytest.mark.anyio
-    async def test_list_by_owner_with_limit_and_offset(self, tmp_path):
-        repo = await _make_repo(tmp_path)
-        for i in range(5):
-            await repo.create(f"t{i}", owner_id="user1")
-        results = await repo.list_by_owner("user1", limit=2, offset=1)
-        assert len(results) == 2
-        await _cleanup()
-
    @pytest.mark.anyio
    async def test_check_access_no_record_allows(self, tmp_path):
        repo = await _make_repo(tmp_path)
@@ -90,23 +70,23 @@ class TestThreadMetaRepository:
    @pytest.mark.anyio
    async def test_check_access_owner_matches(self, tmp_path):
        repo = await _make_repo(tmp_path)
-        await repo.create("t1", owner_id="user1")
+        await repo.create("t1", user_id="user1")
        assert await repo.check_access("t1", "user1") is True
        await _cleanup()

    @pytest.mark.anyio
    async def test_check_access_owner_mismatch(self, tmp_path):
        repo = await _make_repo(tmp_path)
-        await repo.create("t1", owner_id="user1")
+        await repo.create("t1", user_id="user1")
        assert await repo.check_access("t1", "user2") is False
        await _cleanup()

    @pytest.mark.anyio
    async def test_check_access_no_owner_allows_all(self, tmp_path):
        repo = await _make_repo(tmp_path)
-        # Explicit owner_id=None to bypass the new AUTO default that
+        # Explicit user_id=None to bypass the new AUTO default that
        # would otherwise pick up the test user from the autouse fixture.
-        await repo.create("t1", owner_id=None)
+        await repo.create("t1", user_id=None)
        assert await repo.check_access("t1", "anyone") is True
        await _cleanup()

@@ -125,27 +105,27 @@ class TestThreadMetaRepository:
    @pytest.mark.anyio
    async def test_check_access_strict_owner_match_allowed(self, tmp_path):
        repo = await _make_repo(tmp_path)
-        await repo.create("t1", owner_id="user1")
+        await repo.create("t1", user_id="user1")
        assert await repo.check_access("t1", "user1", require_existing=True) is True
        await _cleanup()

    @pytest.mark.anyio
    async def test_check_access_strict_owner_mismatch_denied(self, tmp_path):
        repo = await _make_repo(tmp_path)
-        await repo.create("t1", owner_id="user1")
+        await repo.create("t1", user_id="user1")
        assert await repo.check_access("t1", "user2", require_existing=True) is False
        await _cleanup()

    @pytest.mark.anyio
    async def test_check_access_strict_null_owner_still_allowed(self, tmp_path):
-        """Even in strict mode, a row with NULL owner_id stays shared.
+        """Even in strict mode, a row with NULL user_id stays shared.

        The strict flag tightens the *missing row* case, not the *shared
        row* case — legacy pre-auth rows that survived a clean migration
        without an owner are still everyone's.
        """
        repo = await _make_repo(tmp_path)
-        await repo.create("t1", owner_id=None)
+        await repo.create("t1", user_id=None)
        assert await repo.check_access("t1", "anyone", require_existing=True) is True
        await _cleanup()

@@ -113,14 +113,8 @@ def test_delete_thread_data_returns_generic_500_error(tmp_path):
 # ── Server-reserved metadata key stripping ──────────────────────────────────


-def test_strip_reserved_metadata_removes_owner_id():
-    """Client-supplied owner_id is dropped to prevent reflection attacks."""
-    out = threads._strip_reserved_metadata({"owner_id": "victim-id", "title": "ok"})
-    assert out == {"title": "ok"}
-
-
 def test_strip_reserved_metadata_removes_user_id():
-    """user_id is also reserved (defense in depth for any future use)."""
+    """Client-supplied user_id is dropped to prevent reflection attacks."""
    out = threads._strip_reserved_metadata({"user_id": "victim-id", "title": "ok"})
    assert out == {"title": "ok"}

@@ -136,6 +130,6 @@ def test_strip_reserved_metadata_empty_input():
    assert threads._strip_reserved_metadata({}) == {}


-def test_strip_reserved_metadata_strips_both_simultaneously():
-    out = threads._strip_reserved_metadata({"owner_id": "x", "user_id": "y", "keep": "me"})
+def test_strip_reserved_metadata_strips_all_reserved_keys():
+    out = threads._strip_reserved_metadata({"user_id": "x", "keep": "me"})
    assert out == {"keep": "me"}