mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-06-11 09:55:59 +00:00
Merge remote-tracking branch 'origin/main' into codex/im-channel-connections
# Conflicts: # backend/app/gateway/services.py # frontend/src/app/workspace/chats/page.tsx
This commit is contained in:
+20
-14
@@ -185,21 +185,27 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
|
||||
# Pre-warm tiktoken encoding cache so the first memory-injection request
|
||||
# never blocks on the BPE data download (which hits an OpenAI/Azure URL
|
||||
# that may be unreachable in restricted networks — see issue #3402).
|
||||
try:
|
||||
from deerflow.agents.memory.prompt import warm_tiktoken_cache
|
||||
# When memory.token_counting is "char", token counting never touches
|
||||
# tiktoken, so skip the warm-up entirely (avoids even the 5s probe in
|
||||
# network-restricted deployments — see issue #3429).
|
||||
if startup_config.memory.token_counting == "char":
|
||||
logger.info("memory.token_counting='char'; skipping tiktoken warm-up (network-free token estimation)")
|
||||
else:
|
||||
try:
|
||||
from deerflow.agents.memory.prompt import warm_tiktoken_cache
|
||||
|
||||
warmed = await asyncio.wait_for(
|
||||
asyncio.to_thread(warm_tiktoken_cache),
|
||||
timeout=5,
|
||||
)
|
||||
if warmed:
|
||||
logger.info("tiktoken encoding cache warmed successfully")
|
||||
else:
|
||||
logger.warning("tiktoken encoding cache warm-up failed; token counting will use character-based fallback")
|
||||
except TimeoutError:
|
||||
logger.warning("tiktoken encoding cache warm-up timed out; token counting will use character-based fallback")
|
||||
except Exception:
|
||||
logger.warning("tiktoken warm-up skipped", exc_info=True)
|
||||
warmed = await asyncio.wait_for(
|
||||
asyncio.to_thread(warm_tiktoken_cache),
|
||||
timeout=5,
|
||||
)
|
||||
if warmed:
|
||||
logger.info("tiktoken encoding cache warmed successfully")
|
||||
else:
|
||||
logger.warning("tiktoken encoding cache warm-up failed; token counting will use character-based fallback until tiktoken loads successfully")
|
||||
except TimeoutError:
|
||||
logger.warning("tiktoken encoding cache warm-up timed out; token counting will use character-based fallback until tiktoken loads successfully")
|
||||
except Exception:
|
||||
logger.warning("tiktoken warm-up skipped", exc_info=True)
|
||||
|
||||
# Initialize LangGraph runtime components (StreamBridge, RunManager, checkpointer, store)
|
||||
async with langgraph_runtime(app, startup_config):
|
||||
|
||||
@@ -98,6 +98,7 @@ class MemoryConfigResponse(BaseModel):
|
||||
fact_confidence_threshold: float = Field(..., description="Minimum confidence threshold for facts")
|
||||
injection_enabled: bool = Field(..., description="Whether memory injection is enabled")
|
||||
max_injection_tokens: int = Field(..., description="Maximum tokens for memory injection")
|
||||
token_counting: str = Field(..., description="Token counting strategy for memory injection ('tiktoken' or 'char')")
|
||||
|
||||
|
||||
class MemoryStatusResponse(BaseModel):
|
||||
@@ -310,7 +311,8 @@ async def get_memory_config_endpoint() -> MemoryConfigResponse:
|
||||
"max_facts": 100,
|
||||
"fact_confidence_threshold": 0.7,
|
||||
"injection_enabled": true,
|
||||
"max_injection_tokens": 2000
|
||||
"max_injection_tokens": 2000,
|
||||
"token_counting": "tiktoken"
|
||||
}
|
||||
```
|
||||
"""
|
||||
@@ -323,6 +325,7 @@ async def get_memory_config_endpoint() -> MemoryConfigResponse:
|
||||
fact_confidence_threshold=config.fact_confidence_threshold,
|
||||
injection_enabled=config.injection_enabled,
|
||||
max_injection_tokens=config.max_injection_tokens,
|
||||
token_counting=config.token_counting,
|
||||
)
|
||||
|
||||
|
||||
@@ -351,6 +354,7 @@ async def get_memory_status() -> MemoryStatusResponse:
|
||||
fact_confidence_threshold=config.fact_confidence_threshold,
|
||||
injection_enabled=config.injection_enabled,
|
||||
max_injection_tokens=config.max_injection_tokens,
|
||||
token_counting=config.token_counting,
|
||||
),
|
||||
data=MemoryResponse(**memory_data),
|
||||
)
|
||||
|
||||
@@ -318,6 +318,21 @@ async def start_run(
|
||||
)
|
||||
|
||||
owner_user_id = get_trusted_internal_owner_user_id(request)
|
||||
# Stateless run endpoints carry thread_id in the request *body*, so the
|
||||
# @require_permission(owner_check=True) decorator -- which resolves ownership
|
||||
# from the path param -- cannot protect them. Enforce thread ownership here,
|
||||
# before any run is created, so one user cannot start runs on (or read /wait
|
||||
# checkpoint state from) another user's thread. Missing rows (auto-created
|
||||
# temp threads) and NULL-owner rows (shared / pre-auth data) stay accessible
|
||||
# via check_access; only a thread already owned by another user is rejected
|
||||
# with 404, matching thread_runs.py's anti-enumeration behaviour. Internal
|
||||
# channel runs act on behalf of IM users they do not own (see
|
||||
# inject_authenticated_user_context), so the internal system role is exempt.
|
||||
user = getattr(request.state, "user", None)
|
||||
if user is not None and getattr(user, "system_role", None) != INTERNAL_SYSTEM_ROLE:
|
||||
if not await run_ctx.thread_store.check_access(thread_id, str(user.id)):
|
||||
raise HTTPException(status_code=404, detail=f"Thread {thread_id} not found")
|
||||
|
||||
owner_context_token = set_current_user(SimpleNamespace(id=owner_user_id)) if owner_user_id else None
|
||||
try:
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user