From 31513c2ccb26a3e2a4b637e4ebcae312140ecdf9 Mon Sep 17 00:00:00 2001 From: Xinmin Zeng <135568692+fancyboi999@users.noreply.github.com> Date: Thu, 21 May 2026 16:22:09 +0800 Subject: [PATCH] fix(persistence): emit tz-aware timestamps from SQLite-backed stores (#3130) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SQLAlchemy's DateTime(timezone=True) is a no-op on SQLite (the backend has no native tz type), so values round-tripped through the DB come back as naive datetimes. The four SQL _row_to_dict helpers were calling .isoformat() directly on those naive values, shipping timezone-less strings like "2026-05-20T06:10:22.970977" out of the API. The browser's new Date(...) then parses them as local time, shifting recent threads in /threads/search by the local UTC offset (about 8h in Asia/Shanghai). Route the four call sites through coerce_iso() instead — it already normalizes naive values as UTC and emits "+00:00" so the wire format always carries tz. No data migration is needed; existing SQLite rows read back via the corrected serializer. PostgreSQL deployments are unaffected because timestamptz preserves tzinfo end-to-end. Closes #3120 --- .../deerflow/persistence/feedback/sql.py | 4 +- .../harness/deerflow/persistence/run/sql.py | 7 +- .../deerflow/persistence/thread_meta/sql.py | 5 +- .../deerflow/runtime/events/store/db.py | 5 +- backend/tests/test_persistence_timezone.py | 106 ++++++++++++++++++ 5 files changed, 122 insertions(+), 5 deletions(-) create mode 100644 backend/tests/test_persistence_timezone.py diff --git a/backend/packages/harness/deerflow/persistence/feedback/sql.py b/backend/packages/harness/deerflow/persistence/feedback/sql.py index 1db74ce84..cdb5db89b 100644 --- a/backend/packages/harness/deerflow/persistence/feedback/sql.py +++ b/backend/packages/harness/deerflow/persistence/feedback/sql.py @@ -13,6 +13,7 @@ from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker from deerflow.persistence.feedback.model import FeedbackRow from deerflow.runtime.user_context import AUTO, _AutoSentinel, resolve_user_id +from deerflow.utils.time import coerce_iso class FeedbackRepository: @@ -24,7 +25,8 @@ class FeedbackRepository: d = row.to_dict() val = d.get("created_at") if isinstance(val, datetime): - d["created_at"] = val.isoformat() + # SQLite drops tzinfo on read; normalize via ``coerce_iso`` so output is always tz-aware. + d["created_at"] = coerce_iso(val) return d async def create( diff --git a/backend/packages/harness/deerflow/persistence/run/sql.py b/backend/packages/harness/deerflow/persistence/run/sql.py index d586a2b13..5679cc68f 100644 --- a/backend/packages/harness/deerflow/persistence/run/sql.py +++ b/backend/packages/harness/deerflow/persistence/run/sql.py @@ -17,6 +17,7 @@ from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker from deerflow.persistence.run.model import RunRow from deerflow.runtime.runs.store.base import RunStore from deerflow.runtime.user_context import AUTO, _AutoSentinel, resolve_user_id +from deerflow.utils.time import coerce_iso class RunRepository(RunStore): @@ -68,11 +69,13 @@ class RunRepository(RunStore): # Remap JSON columns to match RunStore interface d["metadata"] = d.pop("metadata_json", {}) d["kwargs"] = d.pop("kwargs_json", {}) - # Convert datetime to ISO string for consistency with MemoryRunStore + # Convert datetime to ISO string for consistency with MemoryRunStore. + # SQLite drops tzinfo on read despite ``DateTime(timezone=True)`` — + # ``coerce_iso`` normalizes naive datetimes as UTC. for key in ("created_at", "updated_at"): val = d.get(key) if isinstance(val, datetime): - d[key] = val.isoformat() + d[key] = coerce_iso(val) return d async def put( diff --git a/backend/packages/harness/deerflow/persistence/thread_meta/sql.py b/backend/packages/harness/deerflow/persistence/thread_meta/sql.py index 0d3f587de..930128087 100644 --- a/backend/packages/harness/deerflow/persistence/thread_meta/sql.py +++ b/backend/packages/harness/deerflow/persistence/thread_meta/sql.py @@ -13,6 +13,7 @@ from deerflow.persistence.json_compat import json_match from deerflow.persistence.thread_meta.base import InvalidMetadataFilterError, ThreadMetaStore from deerflow.persistence.thread_meta.model import ThreadMetaRow from deerflow.runtime.user_context import AUTO, _AutoSentinel, resolve_user_id +from deerflow.utils.time import coerce_iso logger = logging.getLogger(__name__) @@ -28,7 +29,9 @@ class ThreadMetaRepository(ThreadMetaStore): for key in ("created_at", "updated_at"): val = d.get(key) if isinstance(val, datetime): - d[key] = val.isoformat() + # SQLite drops tzinfo despite ``DateTime(timezone=True)``; + # ``coerce_iso`` normalizes naive values as UTC so the wire format always carries tz. + d[key] = coerce_iso(val) return d async def create( diff --git a/backend/packages/harness/deerflow/runtime/events/store/db.py b/backend/packages/harness/deerflow/runtime/events/store/db.py index b7e54754f..7bb55133e 100644 --- a/backend/packages/harness/deerflow/runtime/events/store/db.py +++ b/backend/packages/harness/deerflow/runtime/events/store/db.py @@ -17,6 +17,7 @@ from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker from deerflow.persistence.models.run_event import RunEventRow from deerflow.runtime.events.store.base import RunEventStore from deerflow.runtime.user_context import AUTO, _AutoSentinel, get_current_user, resolve_user_id +from deerflow.utils.time import coerce_iso logger = logging.getLogger(__name__) @@ -32,7 +33,9 @@ class DbRunEventStore(RunEventStore): d["metadata"] = d.pop("event_metadata", {}) val = d.get("created_at") if isinstance(val, datetime): - d["created_at"] = val.isoformat() + # SQLite drops tzinfo on read despite ``DateTime(timezone=True)``; + # ``coerce_iso`` normalizes naive datetimes as UTC. + d["created_at"] = coerce_iso(val) d.pop("id", None) # Restore structured content that was JSON-serialized on write. raw = d.get("content", "") diff --git a/backend/tests/test_persistence_timezone.py b/backend/tests/test_persistence_timezone.py new file mode 100644 index 000000000..7cd7b3310 --- /dev/null +++ b/backend/tests/test_persistence_timezone.py @@ -0,0 +1,106 @@ +"""Regression tests for #3120: SQLite-backed stores must emit tz-aware ISO timestamps. + +SQLAlchemy's ``DateTime(timezone=True)`` is a no-op on SQLite because the +backend has no native timezone type, so values read back are naive +``datetime`` instances. The four SQL ``_row_to_dict`` helpers therefore +have to normalize through :func:`deerflow.utils.time.coerce_iso` instead +of calling ``.isoformat()`` directly; otherwise the API ships +timezone-less strings (e.g. ``"2026-05-20T06:10:22.970977"``) and the +frontend's ``new Date(...)`` parses them as local time, shifting recent +threads by the local UTC offset. +""" + +import re + +import pytest + +_TZ_SUFFIX_RE = re.compile(r"(?:\+\d{2}:\d{2}|Z)$") + + +def _assert_tz_aware(value: str | None, *, context: str) -> None: + assert value, f"{context}: expected ISO string, got {value!r}" + assert _TZ_SUFFIX_RE.search(value), f"{context}: timestamp lacks tz suffix: {value!r}" + + +async def _init_sqlite(tmp_path): + from deerflow.persistence.engine import get_session_factory, init_engine + + url = f"sqlite+aiosqlite:///{tmp_path / 'tz.db'}" + await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path)) + return get_session_factory() + + +async def _cleanup(): + from deerflow.persistence.engine import close_engine + + await close_engine() + + +@pytest.mark.anyio +async def test_thread_meta_emits_tz_aware_timestamps(tmp_path): + from deerflow.persistence.thread_meta import ThreadMetaRepository + + repo = ThreadMetaRepository(await _init_sqlite(tmp_path)) + try: + created = await repo.create("t-tz", user_id="u1", display_name="tz") + _assert_tz_aware(created["created_at"], context="thread_meta.create.created_at") + _assert_tz_aware(created["updated_at"], context="thread_meta.create.updated_at") + + # Second read from DB exercises the same _row_to_dict path on a + # value that SQLite has round-tripped (where tzinfo is lost). + fetched = await repo.get("t-tz", user_id="u1") + _assert_tz_aware(fetched["created_at"], context="thread_meta.get.created_at") + _assert_tz_aware(fetched["updated_at"], context="thread_meta.get.updated_at") + + listed = await repo.search(user_id="u1") + assert listed, "search must return the created row" + _assert_tz_aware(listed[0]["created_at"], context="thread_meta.search.created_at") + _assert_tz_aware(listed[0]["updated_at"], context="thread_meta.search.updated_at") + finally: + await _cleanup() + + +@pytest.mark.anyio +async def test_run_repository_emits_tz_aware_timestamps(tmp_path): + from deerflow.persistence.run import RunRepository + + repo = RunRepository(await _init_sqlite(tmp_path)) + try: + await repo.put("r-tz", thread_id="t-tz", user_id="u1") + row = await repo.get("r-tz", user_id="u1") + _assert_tz_aware(row["created_at"], context="run.get.created_at") + _assert_tz_aware(row["updated_at"], context="run.get.updated_at") + finally: + await _cleanup() + + +@pytest.mark.anyio +async def test_feedback_repository_emits_tz_aware_timestamps(tmp_path): + from deerflow.persistence.feedback import FeedbackRepository + + repo = FeedbackRepository(await _init_sqlite(tmp_path)) + try: + record = await repo.create(run_id="r-tz", thread_id="t-tz", rating=1, user_id="u1") + _assert_tz_aware(record["created_at"], context="feedback.create.created_at") + finally: + await _cleanup() + + +@pytest.mark.anyio +async def test_run_event_store_emits_tz_aware_timestamps(tmp_path): + from deerflow.runtime.events.store.db import DbRunEventStore + + store = DbRunEventStore(await _init_sqlite(tmp_path)) + try: + await store.put( + thread_id="t-tz", + run_id="r-tz", + event_type="log", + category="log", + content="hello", + ) + events = await store.list_events("t-tz", "r-tz", user_id=None) + assert events, "expected at least one event" + _assert_tz_aware(events[0]["created_at"], context="run_event.list.created_at") + finally: + await _cleanup()