mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-22 16:06:50 +00:00
fix(persistence): address 22 review comments from CodeQL, Copilot, and Code Quality
Bug fixes: - Sanitize log params to prevent log injection (CodeQL) - Reset threads_meta.status to idle/error when run completes - Attach messages only to latest checkpoint in /history response - Write threads_meta on POST /threads so new threads appear in search Lint fixes: - Remove unused imports (journal.py, migrations/env.py, test_converters.py) - Convert lambda to named function (engine.py, Ruff E731) - Remove unused logger definitions in repos (Ruff F841) - Add logging to JSONL decode errors and empty except blocks - Separate assert side-effects in tests (CodeQL) - Remove unused local variables in tests (Ruff F841) - Fix max_trace_content truncation to use byte length, not char length Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -10,13 +10,15 @@ None and fall back to in-memory implementations.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
import json
|
||||
import logging
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, async_sessionmaker, create_async_engine
|
||||
|
||||
_json_serializer = lambda obj: json.dumps(obj, ensure_ascii=False)
|
||||
|
||||
def _json_serializer(obj: object) -> str:
|
||||
"""JSON serializer with ensure_ascii=False for Chinese character support."""
|
||||
return json.dumps(obj, ensure_ascii=False)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -106,7 +108,9 @@ async def init_engine(
|
||||
try:
|
||||
import deerflow.persistence.models # noqa: F401
|
||||
except ImportError:
|
||||
pass
|
||||
# Models package not yet available — tables won't be auto-created.
|
||||
# This is expected during initial scaffolding or minimal installs.
|
||||
logger.debug("deerflow.persistence.models not found; skipping auto-create tables")
|
||||
|
||||
try:
|
||||
async with _engine.begin() as conn:
|
||||
|
||||
@@ -8,6 +8,7 @@ have their own schema lifecycle and must not be touched by Alembic.
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from logging.config import fileConfig
|
||||
|
||||
from alembic import context
|
||||
@@ -17,9 +18,13 @@ from deerflow.persistence.base import Base
|
||||
|
||||
# Import all models so metadata is populated.
|
||||
try:
|
||||
import deerflow.persistence.models # noqa: F401
|
||||
import deerflow.persistence.models # noqa: F401 — register ORM models with Base.metadata
|
||||
except ImportError:
|
||||
pass
|
||||
# Models not available — migration will work with existing metadata only.
|
||||
logging.getLogger(__name__).warning(
|
||||
"Could not import deerflow.persistence.models; "
|
||||
"Alembic may not detect all tables"
|
||||
)
|
||||
|
||||
config = context.config
|
||||
if config.config_file_name is not None:
|
||||
|
||||
@@ -5,7 +5,6 @@ Each method acquires its own short-lived session.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
from datetime import UTC, datetime
|
||||
|
||||
@@ -14,8 +13,6 @@ from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||
|
||||
from deerflow.persistence.models.feedback import FeedbackRow
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FeedbackRepository:
|
||||
def __init__(self, session_factory: async_sessionmaker[AsyncSession]) -> None:
|
||||
|
||||
@@ -8,7 +8,6 @@ minutes -- we don't hold connections across long execution.
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any
|
||||
|
||||
@@ -18,8 +17,6 @@ from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||
from deerflow.persistence.models.run import RunRow
|
||||
from deerflow.runtime.runs.store.base import RunStore
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class RunRepository(RunStore):
|
||||
def __init__(self, session_factory: async_sessionmaker[AsyncSession]) -> None:
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any
|
||||
|
||||
@@ -11,8 +10,6 @@ from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||
|
||||
from deerflow.persistence.models.thread_meta import ThreadMetaRow
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ThreadMetaRepository:
|
||||
def __init__(self, session_factory: async_sessionmaker[AsyncSession]) -> None:
|
||||
|
||||
@@ -7,6 +7,7 @@ at ``max_trace_content`` bytes to avoid bloating the database.
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from sqlalchemy import delete, func, select
|
||||
@@ -15,6 +16,8 @@ from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||
from deerflow.persistence.models.run_event import RunEventRow
|
||||
from deerflow.runtime.events.store.base import RunEventStore
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DbRunEventStore(RunEventStore):
|
||||
def __init__(self, session_factory: async_sessionmaker[AsyncSession], *, max_trace_content: int = 10240):
|
||||
@@ -35,15 +38,19 @@ class DbRunEventStore(RunEventStore):
|
||||
try:
|
||||
d["content"] = json.loads(raw)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
pass
|
||||
# Content looked like JSON (content_is_dict flag) but failed to parse;
|
||||
# keep the raw string as-is.
|
||||
logger.debug("Failed to deserialize content as JSON for event seq=%s", d.get("seq"))
|
||||
return d
|
||||
|
||||
def _truncate_trace(self, category: str, content: str | dict, metadata: dict | None) -> tuple[str | dict, dict]:
|
||||
if category == "trace":
|
||||
text = json.dumps(content, default=str, ensure_ascii=False) if isinstance(content, dict) else content
|
||||
if len(text) > self._max_trace_content:
|
||||
content = text[: self._max_trace_content]
|
||||
metadata = {**(metadata or {}), "content_truncated": True}
|
||||
encoded = text.encode("utf-8")
|
||||
if len(encoded) > self._max_trace_content:
|
||||
# Truncate by bytes, then decode back (may cut a multi-byte char, so use errors="ignore")
|
||||
content = encoded[: self._max_trace_content].decode("utf-8", errors="ignore")
|
||||
metadata = {**(metadata or {}), "content_truncated": True, "original_byte_length": len(encoded)}
|
||||
return content, metadata or {}
|
||||
|
||||
async def put(self, *, thread_id, run_id, event_type, category, content="", metadata=None, created_at=None):
|
||||
|
||||
@@ -51,6 +51,7 @@ class JsonlRunEventStore(RunEventStore):
|
||||
record = json.loads(line)
|
||||
max_seq = max(max_seq, record.get("seq", 0))
|
||||
except json.JSONDecodeError:
|
||||
logger.debug("Skipping malformed JSONL line in %s", f)
|
||||
continue
|
||||
self._seq_counters[thread_id] = max_seq
|
||||
|
||||
@@ -73,6 +74,7 @@ class JsonlRunEventStore(RunEventStore):
|
||||
try:
|
||||
events.append(json.loads(line))
|
||||
except json.JSONDecodeError:
|
||||
logger.debug("Skipping malformed JSONL line in %s", f)
|
||||
continue
|
||||
events.sort(key=lambda e: e.get("seq", 0))
|
||||
return events
|
||||
@@ -89,6 +91,7 @@ class JsonlRunEventStore(RunEventStore):
|
||||
try:
|
||||
events.append(json.loads(line))
|
||||
except json.JSONDecodeError:
|
||||
logger.debug("Skipping malformed JSONL line in %s", path)
|
||||
continue
|
||||
events.sort(key=lambda e: e.get("seq", 0))
|
||||
return events
|
||||
|
||||
@@ -135,7 +135,7 @@ class RunJournal(BaseCallbackHandler):
|
||||
self._llm_start_times[str(run_id)] = time.monotonic()
|
||||
|
||||
def on_llm_end(self, response: Any, *, run_id: UUID, **kwargs: Any) -> None:
|
||||
from deerflow.runtime.converters import langchain_to_openai_completion, langchain_to_openai_message
|
||||
from deerflow.runtime.converters import langchain_to_openai_completion
|
||||
|
||||
try:
|
||||
message = response.generations[0][0].message
|
||||
|
||||
@@ -17,7 +17,10 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from typing import Any, Literal
|
||||
from typing import TYPE_CHECKING, Any, Literal
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain_core.messages import HumanMessage
|
||||
|
||||
from deerflow.runtime.serialization import serialize
|
||||
from deerflow.runtime.stream_bridge import StreamBridge
|
||||
@@ -273,6 +276,14 @@ async def run_agent(
|
||||
except Exception:
|
||||
logger.debug("Failed to sync title for thread %s (non-fatal)", thread_id)
|
||||
|
||||
# Update threads_meta status based on run outcome
|
||||
if thread_meta_repo is not None:
|
||||
try:
|
||||
final_status = "idle" if record.status == RunStatus.success else record.status.value
|
||||
await thread_meta_repo.update_status(thread_id, final_status)
|
||||
except Exception:
|
||||
logger.debug("Failed to update thread_meta status for %s (non-fatal)", thread_id)
|
||||
|
||||
await bridge.publish_end(run_id)
|
||||
asyncio.create_task(bridge.cleanup(run_id, delay=60))
|
||||
|
||||
@@ -294,7 +305,7 @@ def _lg_mode_to_sse_event(mode: str) -> str:
|
||||
return mode
|
||||
|
||||
|
||||
def _extract_human_message(graph_input: dict) -> "HumanMessage | None":
|
||||
def _extract_human_message(graph_input: dict) -> HumanMessage | None:
|
||||
"""Extract or construct a HumanMessage from graph_input for event recording.
|
||||
|
||||
Returns a LangChain HumanMessage so callers can use .model_dump() to get
|
||||
|
||||
Reference in New Issue
Block a user