Merge remote-tracking branch 'origin/main' into codex/im-channel-connections

# Conflicts:
#	backend/app/channels/discord.py
#	backend/app/channels/manager.py
#	backend/app/channels/slack.py
#	backend/app/channels/telegram.py
This commit is contained in:
taohe
2026-06-10 21:13:02 +08:00
85 changed files with 5575 additions and 253 deletions
+2
View File
@@ -6,6 +6,7 @@ from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from app.gateway.auth_disabled import warn_if_auth_disabled_enabled
from app.gateway.auth_middleware import AuthMiddleware
from app.gateway.config import get_gateway_config
from app.gateway.csrf_middleware import CSRFMiddleware, get_configured_cors_origins
@@ -173,6 +174,7 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
startup_config = get_app_config()
apply_logging_level(startup_config.log_level)
logger.info("Configuration loaded successfully")
warn_if_auth_disabled_enabled()
except Exception as e:
error_msg = f"Failed to load configuration during gateway startup: {e}"
logger.exception(error_msg)
+54
View File
@@ -0,0 +1,54 @@
"""Shared helpers for local/E2E auth-disabled mode."""
from __future__ import annotations
import logging
import os
from types import SimpleNamespace
AUTH_DISABLED_ENV_VAR = "DEER_FLOW_AUTH_DISABLED"
AUTH_DISABLED_USER_ID = "e2e-user"
AUTH_DISABLED_USER_EMAIL = "e2e@test.local"
AUTH_SOURCE_SESSION = "session"
AUTH_SOURCE_INTERNAL = "internal"
AUTH_SOURCE_AUTH_DISABLED = "auth_disabled"
_PRODUCTION_ENV_VARS: tuple[str, ...] = ("DEER_FLOW_ENV", "ENVIRONMENT")
_PRODUCTION_ENV_VALUES: frozenset[str] = frozenset({"prod", "production"})
logger = logging.getLogger(__name__)
def is_explicit_production_environment() -> bool:
return any(os.environ.get(name, "").strip().lower() in _PRODUCTION_ENV_VALUES for name in _PRODUCTION_ENV_VARS)
def is_auth_disabled_requested() -> bool:
return os.environ.get(AUTH_DISABLED_ENV_VAR) == "1"
def is_auth_disabled() -> bool:
return is_auth_disabled_requested() and not is_explicit_production_environment()
def warn_if_auth_disabled_enabled() -> None:
if not is_auth_disabled():
return
logger.warning(
"%s=1 is active: authentication is bypassed and anonymous requests run as synthetic admin user %r. Do not enable this in shared or production deployments.",
AUTH_DISABLED_ENV_VAR,
AUTH_DISABLED_USER_ID,
)
def get_auth_disabled_user():
return SimpleNamespace(
id=AUTH_DISABLED_USER_ID,
email=AUTH_DISABLED_USER_EMAIL,
password_hash=None,
system_role="admin",
needs_setup=False,
token_version=0,
)
+39 -22
View File
@@ -17,6 +17,13 @@ from starlette.responses import JSONResponse
from starlette.types import ASGIApp
from app.gateway.auth.errors import AuthErrorCode, AuthErrorResponse
from app.gateway.auth_disabled import (
AUTH_SOURCE_AUTH_DISABLED,
AUTH_SOURCE_INTERNAL,
AUTH_SOURCE_SESSION,
get_auth_disabled_user,
is_auth_disabled,
)
from app.gateway.authz import _ALL_PERMISSIONS, AuthContext
from app.gateway.internal_auth import INTERNAL_AUTH_HEADER_NAME, get_internal_user, is_valid_internal_auth_token
from deerflow.runtime.user_context import reset_current_user, set_current_user
@@ -83,8 +90,38 @@ class AuthMiddleware(BaseHTTPMiddleware):
if is_valid_internal_auth_token(request.headers.get(INTERNAL_AUTH_HEADER_NAME)):
internal_user = get_internal_user()
auth_source = AUTH_SOURCE_SESSION
access_token = request.cookies.get("access_token")
# Non-public path: require session cookie
if internal_user is None and not request.cookies.get("access_token"):
if internal_user is not None:
user = internal_user
auth_source = AUTH_SOURCE_INTERNAL
elif access_token:
# Strict JWT validation: reject junk/expired tokens with 401
# right here instead of silently passing through. This closes
# the "junk cookie bypass" gap (AUTH_TEST_PLAN test 7.5.8):
# without this, non-isolation routes like /api/models would
# accept any cookie-shaped string as authentication.
#
# We call the *strict* resolver so that fine-grained error
# codes (token_expired, token_invalid, user_not_found, …)
# propagate from AuthErrorCode, not get flattened into one
# generic code. BaseHTTPMiddleware doesn't let HTTPException
# bubble up, so we catch and render it as JSONResponse here.
from app.gateway.deps import get_current_user_from_request
try:
user = await get_current_user_from_request(request)
except HTTPException as exc:
if not is_auth_disabled():
return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail})
user = get_auth_disabled_user()
auth_source = AUTH_SOURCE_AUTH_DISABLED
elif is_auth_disabled():
user = get_auth_disabled_user()
auth_source = AUTH_SOURCE_AUTH_DISABLED
else:
return JSONResponse(
status_code=401,
content={
@@ -95,32 +132,12 @@ class AuthMiddleware(BaseHTTPMiddleware):
},
)
# Strict JWT validation: reject junk/expired tokens with 401
# right here instead of silently passing through. This closes
# the "junk cookie bypass" gap (AUTH_TEST_PLAN test 7.5.8):
# without this, non-isolation routes like /api/models would
# accept any cookie-shaped string as authentication.
#
# We call the *strict* resolver so that fine-grained error
# codes (token_expired, token_invalid, user_not_found, …)
# propagate from AuthErrorCode, not get flattened into one
# generic code. BaseHTTPMiddleware doesn't let HTTPException
# bubble up, so we catch and render it as JSONResponse here.
from app.gateway.deps import get_current_user_from_request
if internal_user is not None:
user = internal_user
else:
try:
user = await get_current_user_from_request(request)
except HTTPException as exc:
return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail})
# Stamp both request.state.user (for the contextvar pattern)
# and request.state.auth (so @require_permission's "auth is
# None" branch short-circuits instead of running the entire
# JWT-decode + DB-lookup pipeline a second time per request).
request.state.user = user
request.state.auth_source = auth_source
request.state.auth = AuthContext(user=user, permissions=_ALL_PERMISSIONS)
token = set_current_user(user)
try:
+5
View File
@@ -14,6 +14,8 @@ from starlette.middleware.base import BaseHTTPMiddleware
from starlette.responses import JSONResponse
from starlette.types import ASGIApp
from app.gateway.auth_disabled import is_auth_disabled
CSRF_COOKIE_NAME = "csrf_token"
CSRF_HEADER_NAME = "X-CSRF-Token"
CSRF_TOKEN_LENGTH = 64 # bytes
@@ -38,6 +40,9 @@ def should_check_csrf(request: Request) -> bool:
if request.method not in ("POST", "PUT", "DELETE", "PATCH"):
return False
if is_auth_disabled():
return False
path = request.url.path.rstrip("/")
if path.startswith("/api/channels/webhooks/"):
return False
+11
View File
@@ -331,6 +331,17 @@ async def get_current_user_from_request(request: Request):
Raises HTTPException 401 if not authenticated.
"""
state = getattr(request, "state", None)
state_user = getattr(state, "user", None)
from app.gateway.auth_disabled import AUTH_SOURCE_AUTH_DISABLED, AUTH_SOURCE_INTERNAL, AUTH_SOURCE_SESSION
if state_user is not None and getattr(state, "auth_source", None) in {
AUTH_SOURCE_SESSION,
AUTH_SOURCE_AUTH_DISABLED,
AUTH_SOURCE_INTERNAL,
}:
return state_user
from app.gateway.auth import decode_token
from app.gateway.auth.errors import AuthErrorCode, AuthErrorResponse, TokenError, token_error_to_code
+7
View File
@@ -20,6 +20,7 @@ from langgraph_sdk import Auth
from app.gateway.auth.errors import TokenError
from app.gateway.auth.jwt import decode_token
from app.gateway.auth_disabled import AUTH_DISABLED_USER_ID, is_auth_disabled
from app.gateway.deps import get_local_provider
auth = Auth()
@@ -38,6 +39,9 @@ def _check_csrf(request) -> None:
if method.upper() not in _CSRF_METHODS:
return
if is_auth_disabled():
return
cookie_token = request.cookies.get("csrf_token")
header_token = request.headers.get("x-csrf-token")
@@ -66,6 +70,9 @@ async def authenticate(request):
# are rejected early, even if the cookie carries a valid JWT.
_check_csrf(request)
if is_auth_disabled():
return AUTH_DISABLED_USER_ID
token = request.cookies.get("access_token")
if not token:
raise Auth.exceptions.HTTPException(
+70 -45
View File
@@ -1,5 +1,6 @@
"""CRUD API for custom agents."""
import asyncio
import logging
import re
import shutil
@@ -213,48 +214,61 @@ async def create_agent_endpoint(request: AgentCreateRequest) -> AgentResponse:
user_id = get_effective_user_id()
paths = get_paths()
agent_dir = paths.user_agent_dir(user_id, normalized_name)
legacy_dir = paths.agent_dir(normalized_name)
def _create_agent() -> AgentResponse | None:
# Worker thread: base-dir resolution, existence checks, directory/file
# creation, read-back, and failure cleanup are all blocking filesystem
# IO that must stay off the event loop.
agent_dir = paths.user_agent_dir(user_id, normalized_name)
legacy_dir = paths.agent_dir(normalized_name)
if agent_dir.exists() or legacy_dir.exists():
raise HTTPException(status_code=409, detail=f"Agent '{normalized_name}' already exists")
if legacy_dir.exists():
return None # signals 409 to the caller
try:
try:
agent_dir.mkdir(parents=True, exist_ok=False)
except FileExistsError:
return None # signals 409 to the caller
# Write config.yaml
config_data: dict = {"name": normalized_name}
if request.description:
config_data["description"] = request.description
if request.model is not None:
config_data["model"] = request.model
if request.tool_groups is not None:
config_data["tool_groups"] = request.tool_groups
if request.skills is not None:
config_data["skills"] = request.skills
config_file = agent_dir / "config.yaml"
with open(config_file, "w", encoding="utf-8") as f:
yaml.dump(config_data, f, default_flow_style=False, allow_unicode=True)
# Write SOUL.md
soul_file = agent_dir / "SOUL.md"
soul_file.write_text(request.soul, encoding="utf-8")
logger.info(f"Created agent '{normalized_name}' at {agent_dir}")
agent_cfg = load_agent_config(normalized_name, user_id=user_id)
return _agent_config_to_response(agent_cfg, include_soul=True, user_id=user_id)
except Exception:
# Clean up partial state on failure before surfacing the error.
if agent_dir.exists():
shutil.rmtree(agent_dir)
raise
try:
agent_dir.mkdir(parents=True, exist_ok=True)
# Write config.yaml
config_data: dict = {"name": normalized_name}
if request.description:
config_data["description"] = request.description
if request.model is not None:
config_data["model"] = request.model
if request.tool_groups is not None:
config_data["tool_groups"] = request.tool_groups
if request.skills is not None:
config_data["skills"] = request.skills
config_file = agent_dir / "config.yaml"
with open(config_file, "w", encoding="utf-8") as f:
yaml.dump(config_data, f, default_flow_style=False, allow_unicode=True)
# Write SOUL.md
soul_file = agent_dir / "SOUL.md"
soul_file.write_text(request.soul, encoding="utf-8")
logger.info(f"Created agent '{normalized_name}' at {agent_dir}")
agent_cfg = load_agent_config(normalized_name, user_id=user_id)
return _agent_config_to_response(agent_cfg, include_soul=True, user_id=user_id)
except HTTPException:
raise
response = await asyncio.to_thread(_create_agent)
except Exception as e:
# Clean up on failure
if agent_dir.exists():
shutil.rmtree(agent_dir)
logger.error(f"Failed to create agent '{request.name}': {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to create agent: {str(e)}")
if response is None:
raise HTTPException(status_code=409, detail=f"Agent '{normalized_name}' already exists")
return response
@router.put(
"/agents/{name}",
@@ -428,19 +442,30 @@ async def delete_agent(name: str) -> None:
name = _normalize_agent_name(name)
user_id = get_effective_user_id()
paths = get_paths()
agent_dir = paths.user_agent_dir(user_id, name)
if not agent_dir.exists():
if paths.agent_dir(name).exists():
raise HTTPException(
status_code=409,
detail=(f"Agent '{name}' only exists in the legacy shared layout and is not scoped to a user. Run scripts/migrate_user_isolation.py to move legacy agents into the per-user layout before deleting."),
)
raise HTTPException(status_code=404, detail=f"Agent '{name}' not found")
def _remove_agent_dir() -> tuple[str, str]:
# Runs in a worker thread: resolving the base dir, probing the directory
# (`exists`), and removing it (`rmtree`) are all blocking filesystem IO
# that must stay off the event loop.
agent_dir = paths.user_agent_dir(user_id, name)
if not agent_dir.exists():
outcome = "legacy" if paths.agent_dir(name).exists() else "missing"
return outcome, str(agent_dir)
shutil.rmtree(agent_dir)
return "deleted", str(agent_dir)
try:
shutil.rmtree(agent_dir)
logger.info(f"Deleted agent '{name}' from {agent_dir}")
outcome, agent_dir = await asyncio.to_thread(_remove_agent_dir)
except Exception as e:
logger.error(f"Failed to delete agent '{name}': {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to delete agent: {str(e)}")
if outcome == "legacy":
raise HTTPException(
status_code=409,
detail=(f"Agent '{name}' only exists in the legacy shared layout and is not scoped to a user. Run scripts/migrate_user_isolation.py to move legacy agents into the per-user layout before deleting."),
)
if outcome == "missing":
raise HTTPException(status_code=404, detail=f"Agent '{name}' not found")
logger.info(f"Deleted agent '{name}' from {agent_dir}")
+10
View File
@@ -341,9 +341,19 @@ async def change_password(request: Request, response: Response, body: ChangePass
- Re-issues session cookie with new token_version
"""
from app.gateway.auth.password import hash_password_async, verify_password_async
from app.gateway.auth_disabled import AUTH_SOURCE_AUTH_DISABLED
user = await get_current_user_from_request(request)
if getattr(request.state, "auth_source", None) == AUTH_SOURCE_AUTH_DISABLED:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=AuthErrorResponse(
code=AuthErrorCode.INVALID_CREDENTIALS,
message="Password changes are not available when DEER_FLOW_AUTH_DISABLED=1.",
).model_dump(),
)
if user.password_hash is None:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=AuthErrorResponse(code=AuthErrorCode.INVALID_CREDENTIALS, message="OAuth users cannot change password").model_dump())