mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-21 23:46:50 +00:00
fix(security): harden auth system and fix run journal logic bug (#2593)
* fix(security): harden auth system and fix run journal logic bug
- Fix inverted condition in RunJournal.on_chat_model_start that prevented
first human message capture (not messages → messages)
- Pre-hash passwords with SHA-256 before bcrypt to avoid silent 72-byte
truncation vulnerability
- Move load_dotenv() from module scope into get_auth_config() to prevent
import-time os.environ mutation breaking test isolation
- Return generic ‘Invalid token’ instead of exposing specific error
variants (expired, malformed, invalid_signature) to clients
- Make @require_auth independently enforce 401 instead of silently
passing through when AuthMiddleware is absent
- Rate-limit /setup-status endpoint with per-IP cooldown to mitigate
initialization-state information leak
- Document in-process rate limiter limitation for multi-worker deployments
* fix(security): return 429+Retry-After on setup-status rate limit, bound cooldown dict
Agent-Logs-Url: https://github.com/bytedance/deer-flow/sessions/070d0be8-99a5-46c8-85bb-6b81b5284021
Co-authored-by: WillemJiang <219644+WillemJiang@users.noreply.github.com>
* fix(security): add versioned password hashes with auto-migration on login
The SHA-256 pre-hash change silently broke verification for any existing
bcrypt-only password hashes. Introduce a <N>$ prefix scheme so hashes
are self-describing:
- v2 (current): bcrypt(b64(sha256(password))) with $ prefix
- v1 (legacy): plain bcrypt, prefixed $ or bare (no prefix)
verify_password auto-detects the version and falls back to v1 for older
hashes. LocalAuthProvider.authenticate() now rehashes legacy hashes to v2
on successful login via needs_rehash(), so existing users upgrade
transparently without a dedicated migration step.
* fix(auth): harden verify_password, best-effort rehash, update require_auth docstring, downgrade journal logging
- password.py: wrap bcrypt.checkpw in try/except → return False for malformed/corrupt hashes instead of crashing
- local_provider.py: wrap auto-rehash update_user() in try/except so transient DB errors don't fail valid logins
- authz.py: update require_auth docstring to reflect independent 401 enforcement
- journal.py: downgrade on_chat_model_start from INFO to DEBUG, log only metadata (batch_count, message_counts) instead of full serialized/messages content
Agent-Logs-Url: https://github.com/bytedance/deer-flow/sessions/48c5cf31-a4ab-418a-982a-6343c37bb299
Co-authored-by: WillemJiang <219644+WillemJiang@users.noreply.github.com>
* fix(auth): address code review - narrow ValueError catch, add rehash warning log, rename num_batches
Agent-Logs-Url: https://github.com/bytedance/deer-flow/sessions/48c5cf31-a4ab-418a-982a-6343c37bb299
Co-authored-by: WillemJiang <219644+WillemJiang@users.noreply.github.com>
---------
Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
This commit is contained in:
+101
-4
@@ -4,12 +4,14 @@ from datetime import timedelta
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from uuid import uuid4
|
||||
|
||||
import bcrypt
|
||||
import pytest
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from app.gateway.auth import create_access_token, decode_token, hash_password, verify_password
|
||||
from app.gateway.auth.models import User
|
||||
from app.gateway.auth.password import needs_rehash
|
||||
from app.gateway.authz import (
|
||||
AuthContext,
|
||||
Permissions,
|
||||
@@ -26,6 +28,7 @@ def test_hash_password_and_verify():
|
||||
password = "s3cr3tP@ssw0rd!"
|
||||
hashed = hash_password(password)
|
||||
assert hashed != password
|
||||
assert hashed.startswith("$dfv2$")
|
||||
assert verify_password(password, hashed) is True
|
||||
assert verify_password("wrongpassword", hashed) is False
|
||||
|
||||
@@ -47,6 +50,47 @@ def test_verify_password_rejects_empty():
|
||||
assert verify_password("", hashed) is False
|
||||
|
||||
|
||||
def test_hash_produces_v2_prefix():
|
||||
"""hash_password output starts with $dfv2$."""
|
||||
hashed = hash_password("anypassword123")
|
||||
assert hashed.startswith("$dfv2$")
|
||||
|
||||
|
||||
def test_verify_v1_prefixed_hash():
|
||||
"""verify_password handles $dfv1$ prefixed hashes (plain bcrypt)."""
|
||||
password = "legacyP@ssw0rd"
|
||||
raw_bcrypt = bcrypt.hashpw(password.encode("utf-8"), bcrypt.gensalt()).decode("utf-8")
|
||||
v1_hash = f"$dfv1${raw_bcrypt}"
|
||||
assert verify_password(password, v1_hash) is True
|
||||
assert verify_password("wrong", v1_hash) is False
|
||||
|
||||
|
||||
def test_verify_bare_bcrypt_hash():
|
||||
"""verify_password handles bare bcrypt hashes (no prefix) as v1."""
|
||||
password = "oldstyleP@ss"
|
||||
raw_bcrypt = bcrypt.hashpw(password.encode("utf-8"), bcrypt.gensalt()).decode("utf-8")
|
||||
assert verify_password(password, raw_bcrypt) is True
|
||||
assert verify_password("wrong", raw_bcrypt) is False
|
||||
|
||||
|
||||
def test_needs_rehash_returns_false_for_v2():
|
||||
"""v2 hashes do not need rehashing."""
|
||||
hashed = hash_password("something")
|
||||
assert needs_rehash(hashed) is False
|
||||
|
||||
|
||||
def test_needs_rehash_returns_true_for_v1():
|
||||
"""v1-prefixed hashes need rehashing."""
|
||||
raw = bcrypt.hashpw(b"pw", bcrypt.gensalt()).decode("utf-8")
|
||||
assert needs_rehash(f"$dfv1${raw}") is True
|
||||
|
||||
|
||||
def test_needs_rehash_returns_true_for_bare_bcrypt():
|
||||
"""Bare bcrypt hashes (no prefix) need rehashing."""
|
||||
raw = bcrypt.hashpw(b"pw", bcrypt.gensalt()).decode("utf-8")
|
||||
assert needs_rehash(raw) is True
|
||||
|
||||
|
||||
# ── JWT ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -166,7 +210,7 @@ def test_get_auth_context_set():
|
||||
|
||||
|
||||
def test_require_auth_sets_auth_context():
|
||||
"""require_auth sets auth context on request from cookie."""
|
||||
"""require_auth rejects unauthenticated requests with 401."""
|
||||
from fastapi import Request
|
||||
|
||||
app = FastAPI()
|
||||
@@ -178,10 +222,9 @@ def test_require_auth_sets_auth_context():
|
||||
return {"authenticated": ctx.is_authenticated}
|
||||
|
||||
with TestClient(app) as client:
|
||||
# No cookie → anonymous
|
||||
# No cookie → 401 (require_auth independently enforces authentication)
|
||||
response = client.get("/test")
|
||||
assert response.status_code == 200
|
||||
assert response.json()["authenticated"] is False
|
||||
assert response.status_code == 401
|
||||
|
||||
|
||||
def test_require_auth_requires_request_param():
|
||||
@@ -652,3 +695,57 @@ def test_missing_jwt_secret_generates_ephemeral(monkeypatch, caplog):
|
||||
|
||||
# Cleanup
|
||||
config_module._auth_config = None
|
||||
|
||||
|
||||
# ── Auto-rehash on login ──────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_authenticate_auto_rehashes_legacy_hash():
|
||||
"""authenticate() upgrades a bare bcrypt hash to v2 on successful login."""
|
||||
import asyncio
|
||||
|
||||
from app.gateway.auth.local_provider import LocalAuthProvider
|
||||
|
||||
password = "rehashTest123"
|
||||
|
||||
user = User(
|
||||
id=uuid4(),
|
||||
email="rehash@test.com",
|
||||
password_hash=bcrypt.hashpw(password.encode("utf-8"), bcrypt.gensalt()).decode("utf-8"),
|
||||
)
|
||||
|
||||
mock_repo = MagicMock()
|
||||
mock_repo.get_user_by_email = AsyncMock(return_value=user)
|
||||
mock_repo.update_user = AsyncMock(return_value=user)
|
||||
|
||||
provider = LocalAuthProvider(mock_repo)
|
||||
|
||||
result = asyncio.run(provider.authenticate({"email": "rehash@test.com", "password": password}))
|
||||
assert result is not None
|
||||
assert result.password_hash.startswith("$dfv2$")
|
||||
mock_repo.update_user.assert_called_once()
|
||||
|
||||
|
||||
def test_authenticate_skips_rehash_for_v2_hash():
|
||||
"""authenticate() does NOT rehash when the stored hash is already v2."""
|
||||
import asyncio
|
||||
|
||||
from app.gateway.auth.local_provider import LocalAuthProvider
|
||||
|
||||
password = "alreadyv2Pass!"
|
||||
|
||||
user = User(
|
||||
id=uuid4(),
|
||||
email="v2@test.com",
|
||||
password_hash=hash_password(password),
|
||||
)
|
||||
|
||||
mock_repo = MagicMock()
|
||||
mock_repo.get_user_by_email = AsyncMock(return_value=user)
|
||||
mock_repo.update_user = AsyncMock(return_value=user)
|
||||
|
||||
provider = LocalAuthProvider(mock_repo)
|
||||
|
||||
result = asyncio.run(provider.authenticate({"email": "v2@test.com", "password": password}))
|
||||
assert result is not None
|
||||
mock_repo.update_user.assert_not_called()
|
||||
|
||||
@@ -22,6 +22,7 @@ _TEST_SECRET = "test-secret-key-initialize-admin-min-32"
|
||||
def _setup_auth(tmp_path):
|
||||
"""Fresh SQLite engine + auth config per test."""
|
||||
from app.gateway import deps
|
||||
from app.gateway.routers.auth import _SETUP_STATUS_COOLDOWN
|
||||
from deerflow.persistence.engine import close_engine, init_engine
|
||||
|
||||
set_auth_config(AuthConfig(jwt_secret=_TEST_SECRET))
|
||||
@@ -29,11 +30,13 @@ def _setup_auth(tmp_path):
|
||||
asyncio.run(init_engine("sqlite", url=url, sqlite_dir=str(tmp_path)))
|
||||
deps._cached_local_provider = None
|
||||
deps._cached_repo = None
|
||||
_SETUP_STATUS_COOLDOWN.clear()
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
deps._cached_local_provider = None
|
||||
deps._cached_repo = None
|
||||
_SETUP_STATUS_COOLDOWN.clear()
|
||||
asyncio.run(close_engine())
|
||||
|
||||
|
||||
@@ -163,3 +166,17 @@ def test_setup_status_false_when_only_regular_user_exists(client):
|
||||
resp = client.get("/api/v1/auth/setup-status")
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["needs_setup"] is True
|
||||
|
||||
|
||||
def test_setup_status_rate_limited_on_second_call(client):
|
||||
"""Second /setup-status call within the cooldown window returns 429 with Retry-After."""
|
||||
# First call succeeds.
|
||||
resp1 = client.get("/api/v1/auth/setup-status")
|
||||
assert resp1.status_code == 200
|
||||
|
||||
# Immediate second call is rate-limited.
|
||||
resp2 = client.get("/api/v1/auth/setup-status")
|
||||
assert resp2.status_code == 429
|
||||
assert "Retry-After" in resp2.headers
|
||||
retry_after = int(resp2.headers["Retry-After"])
|
||||
assert 1 <= retry_after <= 60
|
||||
|
||||
@@ -63,7 +63,7 @@ def test_invalid_jwt_raises_401():
|
||||
with pytest.raises(Auth.exceptions.HTTPException) as exc:
|
||||
asyncio.run(authenticate(_req({"access_token": "garbage"})))
|
||||
assert exc.value.status_code == 401
|
||||
assert "Token error" in str(exc.value.detail)
|
||||
assert "Invalid token" in str(exc.value.detail)
|
||||
|
||||
|
||||
def test_expired_jwt_raises_401():
|
||||
@@ -295,7 +295,7 @@ def test_csrf_post_matching_token_proceeds_to_jwt():
|
||||
)
|
||||
# Past CSRF, rejected by JWT decode
|
||||
assert exc.value.status_code == 401
|
||||
assert "Token error" in str(exc.value.detail)
|
||||
assert "Invalid token" in str(exc.value.detail)
|
||||
|
||||
|
||||
def test_csrf_put_requires_token():
|
||||
|
||||
Reference in New Issue
Block a user