mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-21 23:46:50 +00:00
fix: Memory update system has cache corruption, data loss, and thread-safety bugs (#2251)
* fix(memory): cache corruption, thread-safety, and caller mutation bugs
Bug 1 (updater.py): deep-copy current_memory before passing to
_apply_updates() so a subsequent save() failure cannot leave a
partially-mutated object in the storage cache.
Bug 3 (storage.py): add _cache_lock (threading.Lock) to
FileMemoryStorage and acquire it around every read/write of
_memory_cache, fixing concurrent-access races between the background
timer thread and HTTP reload calls.
Bug 4 (storage.py): replace in-place mutation
memory_data["lastUpdated"] = ...
with a shallow copy
memory_data = {**memory_data, "lastUpdated": ...}
so save() no longer silently modifies the caller's dict.
Regression tests added for all three bugs in test_memory_storage.py
and test_memory_updater.py.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
* style: format test_memory_updater.py with ruff
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
* style: remove stale bug-number labels from code comments and docstrings
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---------
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -67,6 +67,8 @@ class FileMemoryStorage(MemoryStorage):
|
||||
# Per-agent memory cache: keyed by agent_name (None = global)
|
||||
# Value: (memory_data, file_mtime)
|
||||
self._memory_cache: dict[str | None, tuple[dict[str, Any], float | None]] = {}
|
||||
# Guards all reads and writes to _memory_cache across concurrent callers.
|
||||
self._cache_lock = threading.Lock()
|
||||
|
||||
def _validate_agent_name(self, agent_name: str) -> None:
|
||||
"""Validate that the agent name is safe to use in filesystem paths.
|
||||
@@ -115,14 +117,17 @@ class FileMemoryStorage(MemoryStorage):
|
||||
except OSError:
|
||||
current_mtime = None
|
||||
|
||||
cached = self._memory_cache.get(agent_name)
|
||||
with self._cache_lock:
|
||||
cached = self._memory_cache.get(agent_name)
|
||||
if cached is not None and cached[1] == current_mtime:
|
||||
return cached[0]
|
||||
|
||||
if cached is None or cached[1] != current_mtime:
|
||||
memory_data = self._load_memory_from_file(agent_name)
|
||||
memory_data = self._load_memory_from_file(agent_name)
|
||||
|
||||
with self._cache_lock:
|
||||
self._memory_cache[agent_name] = (memory_data, current_mtime)
|
||||
return memory_data
|
||||
|
||||
return cached[0]
|
||||
return memory_data
|
||||
|
||||
def reload(self, agent_name: str | None = None) -> dict[str, Any]:
|
||||
"""Reload memory data from file, forcing cache invalidation."""
|
||||
@@ -134,7 +139,8 @@ class FileMemoryStorage(MemoryStorage):
|
||||
except OSError:
|
||||
mtime = None
|
||||
|
||||
self._memory_cache[agent_name] = (memory_data, mtime)
|
||||
with self._cache_lock:
|
||||
self._memory_cache[agent_name] = (memory_data, mtime)
|
||||
return memory_data
|
||||
|
||||
def save(self, memory_data: dict[str, Any], agent_name: str | None = None) -> bool:
|
||||
@@ -143,7 +149,10 @@ class FileMemoryStorage(MemoryStorage):
|
||||
|
||||
try:
|
||||
file_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
memory_data["lastUpdated"] = utc_now_iso_z()
|
||||
# Shallow-copy before adding lastUpdated so the caller's dict is not
|
||||
# mutated as a side-effect, and the cache reference is not silently
|
||||
# updated before the file write succeeds.
|
||||
memory_data = {**memory_data, "lastUpdated": utc_now_iso_z()}
|
||||
|
||||
temp_path = file_path.with_suffix(f".{uuid.uuid4().hex}.tmp")
|
||||
with open(temp_path, "w", encoding="utf-8") as f:
|
||||
@@ -156,7 +165,8 @@ class FileMemoryStorage(MemoryStorage):
|
||||
except OSError:
|
||||
mtime = None
|
||||
|
||||
self._memory_cache[agent_name] = (memory_data, mtime)
|
||||
with self._cache_lock:
|
||||
self._memory_cache[agent_name] = (memory_data, mtime)
|
||||
logger.info("Memory saved to %s", file_path)
|
||||
return True
|
||||
except OSError as e:
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
import asyncio
|
||||
import atexit
|
||||
import concurrent.futures
|
||||
import copy
|
||||
import json
|
||||
import logging
|
||||
import math
|
||||
@@ -380,7 +381,9 @@ class MemoryUpdater:
|
||||
response_text = "\n".join(lines[1:-1] if lines[-1] == "```" else lines[1:])
|
||||
|
||||
update_data = json.loads(response_text)
|
||||
updated_memory = self._apply_updates(current_memory, update_data, thread_id)
|
||||
# Deep-copy before in-place mutation so a subsequent save() failure
|
||||
# cannot corrupt the still-cached original object reference.
|
||||
updated_memory = self._apply_updates(copy.deepcopy(current_memory), update_data, thread_id)
|
||||
updated_memory = _strip_upload_mentions_from_memory(updated_memory)
|
||||
return get_memory_storage().save(updated_memory, agent_name)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user