mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-24 08:55:59 +00:00
refactor(runtime): restructure runs module with new execution architecture
Major refactoring of deerflow/runtime/: - runs/callbacks/ - new callback system (builder, events, title, tokens) - runs/internal/ - execution internals (executor, supervisor, stream_logic, registry) - runs/internal/execution/ - execution artifacts and events handling - runs/facade.py - high-level run facade - runs/observer.py - run observation protocol - runs/types.py - type definitions - runs/store/ - simplified store interfaces (create, delete, query, event) Refactor stream_bridge/: - Replace old providers with contract.py and exceptions.py - Remove async_provider.py, base.py, memory.py Add documentation: - README.md and README_zh.md for runtime module Remove deprecated: - manager.py moved to internal/ - worker.py, schemas.py - user_context.py Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,21 +1,47 @@
|
||||
"""Stream bridge — decouples agent workers from SSE endpoints.
|
||||
"""Stream bridge public surface.
|
||||
|
||||
A ``StreamBridge`` sits between the background task that runs an agent
|
||||
(producer) and the HTTP endpoint that pushes Server-Sent Events to
|
||||
the client (consumer). This package provides an abstract protocol
|
||||
(:class:`StreamBridge`) plus a default in-memory implementation backed
|
||||
by :mod:`asyncio.Queue`.
|
||||
The harness package owns the stream abstraction and event semantics.
|
||||
Concrete backends are intentionally not part of the public API here so
|
||||
applications can inject infra-specific implementations.
|
||||
"""
|
||||
|
||||
from .async_provider import make_stream_bridge
|
||||
from .base import END_SENTINEL, HEARTBEAT_SENTINEL, StreamBridge, StreamEvent
|
||||
from .memory import MemoryStreamBridge
|
||||
from .contract import (
|
||||
CANCELLED_SENTINEL,
|
||||
END_SENTINEL,
|
||||
HEARTBEAT_SENTINEL,
|
||||
JSONScalar,
|
||||
JSONValue,
|
||||
TERMINAL_STATES,
|
||||
ResumeResult,
|
||||
StreamBridge,
|
||||
StreamEvent,
|
||||
StreamStatus,
|
||||
)
|
||||
from .exceptions import (
|
||||
BridgeClosedError,
|
||||
StreamBridgeError,
|
||||
StreamCapacityExceededError,
|
||||
StreamNotFoundError,
|
||||
StreamTerminatedError,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
# Sentinels
|
||||
"CANCELLED_SENTINEL",
|
||||
"END_SENTINEL",
|
||||
"HEARTBEAT_SENTINEL",
|
||||
"MemoryStreamBridge",
|
||||
# Types
|
||||
"JSONScalar",
|
||||
"JSONValue",
|
||||
"ResumeResult",
|
||||
"StreamBridge",
|
||||
"StreamEvent",
|
||||
"make_stream_bridge",
|
||||
"StreamStatus",
|
||||
"TERMINAL_STATES",
|
||||
# Exceptions
|
||||
"BridgeClosedError",
|
||||
"StreamBridgeError",
|
||||
"StreamCapacityExceededError",
|
||||
"StreamNotFoundError",
|
||||
"StreamTerminatedError",
|
||||
]
|
||||
|
||||
@@ -1,52 +0,0 @@
|
||||
"""Async stream bridge factory.
|
||||
|
||||
Provides an **async context manager** aligned with
|
||||
:func:`deerflow.runtime.checkpointer.async_provider.make_checkpointer`.
|
||||
|
||||
Usage (e.g. FastAPI lifespan)::
|
||||
|
||||
from deerflow.agents.stream_bridge import make_stream_bridge
|
||||
|
||||
async with make_stream_bridge() as bridge:
|
||||
app.state.stream_bridge = bridge
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import logging
|
||||
from collections.abc import AsyncIterator
|
||||
|
||||
from deerflow.config.stream_bridge_config import get_stream_bridge_config
|
||||
|
||||
from .base import StreamBridge
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@contextlib.asynccontextmanager
|
||||
async def make_stream_bridge(config=None) -> AsyncIterator[StreamBridge]:
|
||||
"""Async context manager that yields a :class:`StreamBridge`.
|
||||
|
||||
Falls back to :class:`MemoryStreamBridge` when no configuration is
|
||||
provided and nothing is set globally.
|
||||
"""
|
||||
if config is None:
|
||||
config = get_stream_bridge_config()
|
||||
|
||||
if config is None or config.type == "memory":
|
||||
from deerflow.runtime.stream_bridge.memory import MemoryStreamBridge
|
||||
|
||||
maxsize = config.queue_maxsize if config is not None else 256
|
||||
bridge = MemoryStreamBridge(queue_maxsize=maxsize)
|
||||
logger.info("Stream bridge initialised: memory (queue_maxsize=%d)", maxsize)
|
||||
try:
|
||||
yield bridge
|
||||
finally:
|
||||
await bridge.close()
|
||||
return
|
||||
|
||||
if config.type == "redis":
|
||||
raise NotImplementedError("Redis stream bridge planned for Phase 2")
|
||||
|
||||
raise ValueError(f"Unknown stream bridge type: {config.type!r}")
|
||||
@@ -1,72 +0,0 @@
|
||||
"""Abstract stream bridge protocol.
|
||||
|
||||
StreamBridge decouples agent workers (producers) from SSE endpoints
|
||||
(consumers), aligning with LangGraph Platform's Queue + StreamManager
|
||||
architecture.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
from collections.abc import AsyncIterator
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class StreamEvent:
|
||||
"""Single stream event.
|
||||
|
||||
Attributes:
|
||||
id: Monotonically increasing event ID (used as SSE ``id:`` field,
|
||||
supports ``Last-Event-ID`` reconnection).
|
||||
event: SSE event name, e.g. ``"metadata"``, ``"updates"``,
|
||||
``"events"``, ``"error"``, ``"end"``.
|
||||
data: JSON-serialisable payload.
|
||||
"""
|
||||
|
||||
id: str
|
||||
event: str
|
||||
data: Any
|
||||
|
||||
|
||||
HEARTBEAT_SENTINEL = StreamEvent(id="", event="__heartbeat__", data=None)
|
||||
END_SENTINEL = StreamEvent(id="", event="__end__", data=None)
|
||||
|
||||
|
||||
class StreamBridge(abc.ABC):
|
||||
"""Abstract base for stream bridges."""
|
||||
|
||||
@abc.abstractmethod
|
||||
async def publish(self, run_id: str, event: str, data: Any) -> None:
|
||||
"""Enqueue a single event for *run_id* (producer side)."""
|
||||
|
||||
@abc.abstractmethod
|
||||
async def publish_end(self, run_id: str) -> None:
|
||||
"""Signal that no more events will be produced for *run_id*."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def subscribe(
|
||||
self,
|
||||
run_id: str,
|
||||
*,
|
||||
last_event_id: str | None = None,
|
||||
heartbeat_interval: float = 15.0,
|
||||
) -> AsyncIterator[StreamEvent]:
|
||||
"""Async iterator that yields events for *run_id* (consumer side).
|
||||
|
||||
Yields :data:`HEARTBEAT_SENTINEL` when no event arrives within
|
||||
*heartbeat_interval* seconds. Yields :data:`END_SENTINEL` once
|
||||
the producer calls :meth:`publish_end`.
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
async def cleanup(self, run_id: str, *, delay: float = 0) -> None:
|
||||
"""Release resources associated with *run_id*.
|
||||
|
||||
If *delay* > 0 the implementation should wait before releasing,
|
||||
giving late subscribers a chance to drain remaining events.
|
||||
"""
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Release backend resources. Default is a no-op."""
|
||||
@@ -0,0 +1,112 @@
|
||||
"""Stream bridge contract and public types."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
from collections.abc import AsyncIterator
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Literal
|
||||
|
||||
type JSONScalar = None | bool | int | float | str
|
||||
type JSONValue = JSONScalar | list["JSONValue"] | dict[str, "JSONValue"]
|
||||
|
||||
|
||||
class StreamStatus(str, Enum):
|
||||
"""Stream lifecycle states."""
|
||||
|
||||
ACTIVE = "active"
|
||||
ENDED = "ended"
|
||||
CANCELLED = "cancelled"
|
||||
ERRORED = "errored"
|
||||
CLOSED = "closed"
|
||||
|
||||
|
||||
TERMINAL_STATES = frozenset({
|
||||
StreamStatus.ENDED,
|
||||
StreamStatus.CANCELLED,
|
||||
StreamStatus.ERRORED,
|
||||
})
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class StreamEvent:
|
||||
"""Single stream event."""
|
||||
|
||||
id: str
|
||||
event: str
|
||||
data: JSONValue
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class ResumeResult:
|
||||
"""Result of resolving Last-Event-ID."""
|
||||
|
||||
next_offset: int
|
||||
status: Literal["fresh", "resumed", "evicted", "invalid", "unknown"]
|
||||
gap_count: int = 0
|
||||
|
||||
|
||||
HEARTBEAT_SENTINEL = StreamEvent(id="", event="__heartbeat__", data=None)
|
||||
END_SENTINEL = StreamEvent(id="", event="__end__", data=None)
|
||||
CANCELLED_SENTINEL = StreamEvent(id="", event="__cancelled__", data=None)
|
||||
|
||||
|
||||
class StreamBridge(abc.ABC):
|
||||
"""Abstract base for stream bridges.
|
||||
|
||||
``StreamBridge`` defines runtime stream semantics, not storage semantics.
|
||||
Concrete backends may live outside the harness package and be injected by
|
||||
the application composition root.
|
||||
|
||||
Important boundary rules:
|
||||
- Terminal run events (``end``/``cancel``/``error``) are real replayable
|
||||
events and belong to run-level semantics.
|
||||
- ``close()`` is bridge-level shutdown and must not be treated as a run
|
||||
cancellation signal.
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
async def publish(self, run_id: str, event: str, data: JSONValue) -> str:
|
||||
"""Enqueue a single event for *run_id* and return its event ID."""
|
||||
|
||||
@abc.abstractmethod
|
||||
async def publish_end(self, run_id: str) -> str:
|
||||
"""Signal that no more events will be produced for *run_id*."""
|
||||
|
||||
async def publish_terminal(
|
||||
self,
|
||||
run_id: str,
|
||||
kind: StreamStatus,
|
||||
data: JSONValue = None,
|
||||
) -> str:
|
||||
"""Publish a terminal event (end/cancel/error)."""
|
||||
await self.publish_end(run_id)
|
||||
return ""
|
||||
|
||||
@abc.abstractmethod
|
||||
def subscribe(
|
||||
self,
|
||||
run_id: str,
|
||||
*,
|
||||
last_event_id: str | None = None,
|
||||
heartbeat_interval: float = 15.0,
|
||||
) -> AsyncIterator[StreamEvent]:
|
||||
"""Yield replayable stream events for *run_id*."""
|
||||
|
||||
@abc.abstractmethod
|
||||
async def cleanup(self, run_id: str, *, delay: float = 0) -> None:
|
||||
"""Release resources associated with *run_id*."""
|
||||
|
||||
async def cancel(self, run_id: str) -> None:
|
||||
"""Cancel a run and notify all subscribers."""
|
||||
await self.publish_terminal(run_id, StreamStatus.CANCELLED)
|
||||
|
||||
async def mark_awaiting_input(self, run_id: str) -> None:
|
||||
"""Mark stream as awaiting human input."""
|
||||
|
||||
async def start(self) -> None:
|
||||
"""Start background tasks, if needed."""
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Release bridge-level backend resources."""
|
||||
@@ -0,0 +1,23 @@
|
||||
"""Stream bridge exceptions."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
class StreamBridgeError(Exception):
|
||||
"""Base exception for stream bridge errors."""
|
||||
|
||||
|
||||
class BridgeClosedError(StreamBridgeError):
|
||||
"""Raised when operating on a closed bridge."""
|
||||
|
||||
|
||||
class StreamCapacityExceededError(StreamBridgeError):
|
||||
"""Raised when max_active_streams is reached and eviction is not possible."""
|
||||
|
||||
|
||||
class StreamTerminatedError(StreamBridgeError):
|
||||
"""Raised when publishing to a terminal stream."""
|
||||
|
||||
|
||||
class StreamNotFoundError(StreamBridgeError):
|
||||
"""Raised when referencing a non-existent stream."""
|
||||
@@ -1,133 +0,0 @@
|
||||
"""In-memory stream bridge backed by an in-process event log."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
from collections.abc import AsyncIterator
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from .base import END_SENTINEL, HEARTBEAT_SENTINEL, StreamBridge, StreamEvent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class _RunStream:
|
||||
events: list[StreamEvent] = field(default_factory=list)
|
||||
condition: asyncio.Condition = field(default_factory=asyncio.Condition)
|
||||
ended: bool = False
|
||||
start_offset: int = 0
|
||||
|
||||
|
||||
class MemoryStreamBridge(StreamBridge):
|
||||
"""Per-run in-memory event log implementation.
|
||||
|
||||
Events are retained for a bounded time window per run so late subscribers
|
||||
and reconnecting clients can replay buffered events from ``Last-Event-ID``.
|
||||
"""
|
||||
|
||||
def __init__(self, *, queue_maxsize: int = 256) -> None:
|
||||
self._maxsize = queue_maxsize
|
||||
self._streams: dict[str, _RunStream] = {}
|
||||
self._counters: dict[str, int] = {}
|
||||
|
||||
# -- helpers ---------------------------------------------------------------
|
||||
|
||||
def _get_or_create_stream(self, run_id: str) -> _RunStream:
|
||||
if run_id not in self._streams:
|
||||
self._streams[run_id] = _RunStream()
|
||||
self._counters[run_id] = 0
|
||||
return self._streams[run_id]
|
||||
|
||||
def _next_id(self, run_id: str) -> str:
|
||||
self._counters[run_id] = self._counters.get(run_id, 0) + 1
|
||||
ts = int(time.time() * 1000)
|
||||
seq = self._counters[run_id] - 1
|
||||
return f"{ts}-{seq}"
|
||||
|
||||
def _resolve_start_offset(self, stream: _RunStream, last_event_id: str | None) -> int:
|
||||
if last_event_id is None:
|
||||
return stream.start_offset
|
||||
|
||||
for index, entry in enumerate(stream.events):
|
||||
if entry.id == last_event_id:
|
||||
return stream.start_offset + index + 1
|
||||
|
||||
if stream.events:
|
||||
logger.warning(
|
||||
"last_event_id=%s not found in retained buffer; replaying from earliest retained event",
|
||||
last_event_id,
|
||||
)
|
||||
return stream.start_offset
|
||||
|
||||
# -- StreamBridge API ------------------------------------------------------
|
||||
|
||||
async def publish(self, run_id: str, event: str, data: Any) -> None:
|
||||
stream = self._get_or_create_stream(run_id)
|
||||
entry = StreamEvent(id=self._next_id(run_id), event=event, data=data)
|
||||
async with stream.condition:
|
||||
stream.events.append(entry)
|
||||
if len(stream.events) > self._maxsize:
|
||||
overflow = len(stream.events) - self._maxsize
|
||||
del stream.events[:overflow]
|
||||
stream.start_offset += overflow
|
||||
stream.condition.notify_all()
|
||||
|
||||
async def publish_end(self, run_id: str) -> None:
|
||||
stream = self._get_or_create_stream(run_id)
|
||||
async with stream.condition:
|
||||
stream.ended = True
|
||||
stream.condition.notify_all()
|
||||
|
||||
async def subscribe(
|
||||
self,
|
||||
run_id: str,
|
||||
*,
|
||||
last_event_id: str | None = None,
|
||||
heartbeat_interval: float = 15.0,
|
||||
) -> AsyncIterator[StreamEvent]:
|
||||
stream = self._get_or_create_stream(run_id)
|
||||
async with stream.condition:
|
||||
next_offset = self._resolve_start_offset(stream, last_event_id)
|
||||
|
||||
while True:
|
||||
async with stream.condition:
|
||||
if next_offset < stream.start_offset:
|
||||
logger.warning(
|
||||
"subscriber for run %s fell behind retained buffer; resuming from offset %s",
|
||||
run_id,
|
||||
stream.start_offset,
|
||||
)
|
||||
next_offset = stream.start_offset
|
||||
|
||||
local_index = next_offset - stream.start_offset
|
||||
if 0 <= local_index < len(stream.events):
|
||||
entry = stream.events[local_index]
|
||||
next_offset += 1
|
||||
elif stream.ended:
|
||||
entry = END_SENTINEL
|
||||
else:
|
||||
try:
|
||||
await asyncio.wait_for(stream.condition.wait(), timeout=heartbeat_interval)
|
||||
except TimeoutError:
|
||||
entry = HEARTBEAT_SENTINEL
|
||||
else:
|
||||
continue
|
||||
|
||||
if entry is END_SENTINEL:
|
||||
yield END_SENTINEL
|
||||
return
|
||||
yield entry
|
||||
|
||||
async def cleanup(self, run_id: str, *, delay: float = 0) -> None:
|
||||
if delay > 0:
|
||||
await asyncio.sleep(delay)
|
||||
self._streams.pop(run_id, None)
|
||||
self._counters.pop(run_id, None)
|
||||
|
||||
async def close(self) -> None:
|
||||
self._streams.clear()
|
||||
self._counters.clear()
|
||||
Reference in New Issue
Block a user