mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-21 23:46:50 +00:00
feat(client): add thread query methods list_threads and get_thread (#1609)
* feat(client): add thread query methods `list_threads` and `get_thread` Implemented two public API methods in `DeerFlowClient` to query threads using the underlying `checkpointer`. * Update backend/packages/harness/deerflow/client.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update backend/packages/harness/deerflow/client.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update backend/tests/test_client.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update backend/packages/harness/deerflow/client.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fix(deerflow): Fix possible KeyError issue when sorting threads * fix unit test --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -315,6 +315,108 @@ class DeerFlowClient:
|
||||
return "\n".join(pieces) if pieces else ""
|
||||
return str(content)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API — threads
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def list_threads(self, limit: int = 10) -> dict:
|
||||
"""List the recent N threads.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of threads to return. Default is 10.
|
||||
|
||||
Returns:
|
||||
Dict with "thread_list" key containing list of thread info dicts,
|
||||
sorted by thread creation time descending.
|
||||
"""
|
||||
checkpointer = self._checkpointer
|
||||
if checkpointer is None:
|
||||
from deerflow.agents.checkpointer.provider import get_checkpointer
|
||||
|
||||
checkpointer = get_checkpointer()
|
||||
|
||||
thread_info_map = {}
|
||||
|
||||
for cp in checkpointer.list(config=None, limit=limit):
|
||||
cfg = cp.config.get("configurable", {})
|
||||
thread_id = cfg.get("thread_id")
|
||||
if not thread_id:
|
||||
continue
|
||||
|
||||
ts = cp.checkpoint.get("ts")
|
||||
checkpoint_id = cfg.get("checkpoint_id")
|
||||
|
||||
if thread_id not in thread_info_map:
|
||||
channel_values = cp.checkpoint.get("channel_values", {})
|
||||
thread_info_map[thread_id] = {
|
||||
"thread_id": thread_id,
|
||||
"created_at": ts,
|
||||
"updated_at": ts,
|
||||
"latest_checkpoint_id": checkpoint_id,
|
||||
"title": channel_values.get("title"),
|
||||
}
|
||||
else:
|
||||
# Explicitly compare timestamps to ensure accuracy when iterating over unordered namespaces.
|
||||
# Treat None as "missing" and only compare when existing values are non-None.
|
||||
if ts is not None:
|
||||
current_created = thread_info_map[thread_id]["created_at"]
|
||||
if current_created is None or ts < current_created:
|
||||
thread_info_map[thread_id]["created_at"] = ts
|
||||
|
||||
current_updated = thread_info_map[thread_id]["updated_at"]
|
||||
if current_updated is None or ts > current_updated:
|
||||
thread_info_map[thread_id]["updated_at"] = ts
|
||||
thread_info_map[thread_id]["latest_checkpoint_id"] = checkpoint_id
|
||||
channel_values = cp.checkpoint.get("channel_values", {})
|
||||
thread_info_map[thread_id]["title"] = channel_values.get("title")
|
||||
|
||||
threads = list(thread_info_map.values())
|
||||
threads.sort(key=lambda x: x.get("created_at") or "", reverse=True)
|
||||
|
||||
return {"thread_list": threads[:limit]}
|
||||
|
||||
def get_thread(self, thread_id: str) -> dict:
|
||||
"""Get the complete thread record, including all node execution records.
|
||||
|
||||
Args:
|
||||
thread_id: Thread ID.
|
||||
|
||||
Returns:
|
||||
Dict containing the thread's full checkpoint history.
|
||||
"""
|
||||
checkpointer = self._checkpointer
|
||||
if checkpointer is None:
|
||||
from deerflow.agents.checkpointer.provider import get_checkpointer
|
||||
|
||||
checkpointer = get_checkpointer()
|
||||
|
||||
config = {"configurable": {"thread_id": thread_id}}
|
||||
checkpoints = []
|
||||
|
||||
for cp in checkpointer.list(config):
|
||||
channel_values = dict(cp.checkpoint.get("channel_values", {}))
|
||||
if "messages" in channel_values:
|
||||
channel_values["messages"] = [self._serialize_message(m) if hasattr(m, "content") else m for m in channel_values["messages"]]
|
||||
|
||||
cfg = cp.config.get("configurable", {})
|
||||
parent_cfg = cp.parent_config.get("configurable", {}) if cp.parent_config else {}
|
||||
|
||||
checkpoints.append(
|
||||
{
|
||||
"checkpoint_id": cfg.get("checkpoint_id"),
|
||||
"parent_checkpoint_id": parent_cfg.get("checkpoint_id"),
|
||||
"ts": cp.checkpoint.get("ts"),
|
||||
"metadata": cp.metadata,
|
||||
"values": channel_values,
|
||||
"pending_writes": [{"task_id": w[0], "channel": w[1], "value": w[2]} for w in getattr(cp, "pending_writes", [])],
|
||||
}
|
||||
)
|
||||
|
||||
# Sort globally by timestamp to prevent partial ordering issues caused by different namespaces (e.g., subgraphs)
|
||||
checkpoints.sort(key=lambda x: x["ts"] if x["ts"] else "")
|
||||
|
||||
return {"thread_id": thread_id, "checkpoints": checkpoints}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API — conversation
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user