mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-22 07:56:48 +00:00
fix(sandbox): scope provisioner PVC data by user (#2973)
* fix(sandbox): scope provisioner PVC data by user * Address provisioner PVC review feedback
This commit is contained in:
@@ -37,7 +37,7 @@ services:
|
||||
- THREADS_HOST_PATH=${DEER_FLOW_ROOT}/backend/.deer-flow/threads
|
||||
# Production: use PVC instead of hostPath to avoid data loss on node failure.
|
||||
# When set, hostPath vars above are ignored for the corresponding volume.
|
||||
# USERDATA_PVC_NAME uses subPath (threads/{thread_id}/user-data) automatically.
|
||||
# USERDATA_PVC_NAME uses subPath (deer-flow/users/{user_id}/threads/{thread_id}/user-data) automatically.
|
||||
# - SKILLS_PVC_NAME=deer-flow-skills-pvc
|
||||
# - USERDATA_PVC_NAME=deer-flow-userdata-pvc
|
||||
- KUBECONFIG_PATH=/root/.kube/config
|
||||
|
||||
@@ -20,7 +20,7 @@ The **Sandbox Provisioner** is a FastAPI service that dynamically manages sandbo
|
||||
|
||||
### How It Works
|
||||
|
||||
1. **Backend Request**: When the backend needs to execute code, it sends a `POST /api/sandboxes` request with a `sandbox_id` and `thread_id`.
|
||||
1. **Backend Request**: When the backend needs to execute code, it sends a `POST /api/sandboxes` request with a `sandbox_id`, `thread_id`, and optional `user_id`.
|
||||
|
||||
2. **Pod Creation**: The provisioner creates a dedicated Pod in the `deer-flow` namespace with:
|
||||
- The sandbox container image (all-in-one-sandbox)
|
||||
@@ -70,10 +70,13 @@ Create a new sandbox Pod + Service.
|
||||
```json
|
||||
{
|
||||
"sandbox_id": "abc-123",
|
||||
"thread_id": "thread-456"
|
||||
"thread_id": "thread-456",
|
||||
"user_id": "user-789"
|
||||
}
|
||||
```
|
||||
|
||||
`user_id` is optional for backwards compatibility and defaults to `default`. When `USERDATA_PVC_NAME` is set, the provisioner uses it to isolate PVC-backed user-data directories.
|
||||
|
||||
**Response**:
|
||||
```json
|
||||
{
|
||||
@@ -138,11 +141,25 @@ The provisioner is configured via environment variables (set in [docker-compose-
|
||||
| `SKILLS_HOST_PATH` | - | **Host machine** path to skills directory (must be absolute) |
|
||||
| `THREADS_HOST_PATH` | - | **Host machine** path to threads data directory (must be absolute) |
|
||||
| `SKILLS_PVC_NAME` | empty (use hostPath) | PVC name for skills volume; when set, sandbox Pods use PVC instead of hostPath |
|
||||
| `USERDATA_PVC_NAME` | empty (use hostPath) | PVC name for user-data volume; when set, uses PVC with `subPath: threads/{thread_id}/user-data` |
|
||||
| `USERDATA_PVC_NAME` | empty (use hostPath) | PVC name for user-data volume; when set, uses PVC with `subPath: deer-flow/users/{user_id}/threads/{thread_id}/user-data` |
|
||||
| `KUBECONFIG_PATH` | `/root/.kube/config` | Path to kubeconfig **inside** the provisioner container |
|
||||
| `NODE_HOST` | `host.docker.internal` | Hostname that backend containers use to reach host NodePorts |
|
||||
| `K8S_API_SERVER` | (from kubeconfig) | Override K8s API server URL (e.g., `https://host.docker.internal:26443`) |
|
||||
|
||||
### PVC User-Data Upgrade Note
|
||||
|
||||
Older provisioner versions mounted PVC user-data from `threads/{thread_id}/user-data`. The user-scoped layout mounts from `deer-flow/users/{user_id}/threads/{thread_id}/user-data`.
|
||||
|
||||
If an existing deployment already has PVC-backed user-data under the legacy layout, migrate the DeerFlow data directory before relying on the new PVC subPath. Mount the same PVC path that the gateway uses as its DeerFlow base directory, then run the existing user-isolation migration script:
|
||||
|
||||
```bash
|
||||
cd backend
|
||||
PYTHONPATH=. python scripts/migrate_user_isolation.py --dry-run
|
||||
PYTHONPATH=. python scripts/migrate_user_isolation.py --user-id <target-user-id>
|
||||
```
|
||||
|
||||
This moves legacy `threads/{thread_id}/user-data` data under `users/<target-user-id>/threads/{thread_id}/user-data`, which matches the new provisioner PVC subPath when the gateway base directory is mounted at `deer-flow/` on the PVC. Use `default` as the target user only when the legacy data should remain in the default no-auth user namespace. Run the migration while no gateway or sandbox Pods are writing to those paths.
|
||||
|
||||
### Important: K8S_API_SERVER Override
|
||||
|
||||
If your kubeconfig uses `localhost`, `127.0.0.1`, or `0.0.0.0` as the API server address (common with OrbStack, minikube, kind), the provisioner **cannot** reach it from inside the Docker container.
|
||||
@@ -213,7 +230,7 @@ curl http://localhost:8002/health
|
||||
# Create a sandbox (via provisioner container for internal DNS)
|
||||
docker exec deer-flow-provisioner curl -X POST http://localhost:8002/api/sandboxes \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"sandbox_id":"test-001","thread_id":"thread-001"}'
|
||||
-d '{"sandbox_id":"test-001","thread_id":"thread-001","user_id":"user-001"}'
|
||||
|
||||
# Check sandbox status
|
||||
docker exec deer-flow-provisioner curl http://localhost:8002/api/sandboxes/test-001
|
||||
|
||||
+13
-15
@@ -63,6 +63,8 @@ THREADS_HOST_PATH = os.environ.get("THREADS_HOST_PATH", "/.deer-flow/threads")
|
||||
SKILLS_PVC_NAME = os.environ.get("SKILLS_PVC_NAME", "")
|
||||
USERDATA_PVC_NAME = os.environ.get("USERDATA_PVC_NAME", "")
|
||||
SAFE_THREAD_ID_PATTERN = r"^[A-Za-z0-9_\-]+$"
|
||||
SAFE_USER_ID_PATTERN = r"^[A-Za-z0-9_\-]+$"
|
||||
DEFAULT_USER_ID = "default"
|
||||
|
||||
# Path to the kubeconfig *inside* the provisioner container.
|
||||
# Typically the host's ~/.kube/config is mounted here.
|
||||
@@ -95,14 +97,6 @@ def join_host_path(base: str, *parts: str) -> str:
|
||||
return str(result)
|
||||
|
||||
|
||||
def _validate_thread_id(thread_id: str) -> str:
|
||||
if not re.match(SAFE_THREAD_ID_PATTERN, thread_id):
|
||||
raise ValueError(
|
||||
"Invalid thread_id: only alphanumeric characters, hyphens, and underscores are allowed."
|
||||
)
|
||||
return thread_id
|
||||
|
||||
|
||||
# ── K8s client setup ────────────────────────────────────────────────────
|
||||
|
||||
core_v1: k8s_client.CoreV1Api | None = None
|
||||
@@ -221,6 +215,7 @@ app = FastAPI(title="DeerFlow Sandbox Provisioner", lifespan=lifespan)
|
||||
class CreateSandboxRequest(BaseModel):
|
||||
sandbox_id: str
|
||||
thread_id: str = Field(pattern=SAFE_THREAD_ID_PATTERN)
|
||||
user_id: str = Field(default=DEFAULT_USER_ID, pattern=SAFE_USER_ID_PATTERN)
|
||||
|
||||
|
||||
class SandboxResponse(BaseModel):
|
||||
@@ -283,7 +278,7 @@ def _build_volumes(thread_id: str) -> list[k8s_client.V1Volume]:
|
||||
return [skills_vol, userdata_vol]
|
||||
|
||||
|
||||
def _build_volume_mounts(thread_id: str) -> list[k8s_client.V1VolumeMount]:
|
||||
def _build_volume_mounts(thread_id: str, user_id: str = DEFAULT_USER_ID) -> list[k8s_client.V1VolumeMount]:
|
||||
"""Build volume mount list, using subPath for PVC user-data."""
|
||||
userdata_mount = k8s_client.V1VolumeMount(
|
||||
name="user-data",
|
||||
@@ -291,7 +286,7 @@ def _build_volume_mounts(thread_id: str) -> list[k8s_client.V1VolumeMount]:
|
||||
read_only=False,
|
||||
)
|
||||
if USERDATA_PVC_NAME:
|
||||
userdata_mount.sub_path = f"threads/{thread_id}/user-data"
|
||||
userdata_mount.sub_path = f"deer-flow/users/{user_id}/threads/{thread_id}/user-data"
|
||||
|
||||
return [
|
||||
k8s_client.V1VolumeMount(
|
||||
@@ -303,9 +298,8 @@ def _build_volume_mounts(thread_id: str) -> list[k8s_client.V1VolumeMount]:
|
||||
]
|
||||
|
||||
|
||||
def _build_pod(sandbox_id: str, thread_id: str) -> k8s_client.V1Pod:
|
||||
def _build_pod(sandbox_id: str, thread_id: str, user_id: str = DEFAULT_USER_ID) -> k8s_client.V1Pod:
|
||||
"""Construct a Pod manifest for a single sandbox."""
|
||||
thread_id = _validate_thread_id(thread_id)
|
||||
return k8s_client.V1Pod(
|
||||
metadata=k8s_client.V1ObjectMeta(
|
||||
name=_pod_name(sandbox_id),
|
||||
@@ -362,7 +356,7 @@ def _build_pod(sandbox_id: str, thread_id: str) -> k8s_client.V1Pod:
|
||||
"ephemeral-storage": "500Mi",
|
||||
},
|
||||
),
|
||||
volume_mounts=_build_volume_mounts(thread_id),
|
||||
volume_mounts=_build_volume_mounts(thread_id, user_id=user_id),
|
||||
security_context=k8s_client.V1SecurityContext(
|
||||
privileged=False,
|
||||
allow_privilege_escalation=True,
|
||||
@@ -445,9 +439,13 @@ async def create_sandbox(req: CreateSandboxRequest):
|
||||
"""
|
||||
sandbox_id = req.sandbox_id
|
||||
thread_id = req.thread_id
|
||||
user_id = req.user_id
|
||||
|
||||
logger.info(
|
||||
f"Received request to create sandbox '{sandbox_id}' for thread '{thread_id}'"
|
||||
"Received request to create sandbox '%s' for thread '%s' user '%s'",
|
||||
sandbox_id,
|
||||
thread_id,
|
||||
user_id,
|
||||
)
|
||||
|
||||
# ── Fast path: sandbox already exists ────────────────────────────
|
||||
@@ -461,7 +459,7 @@ async def create_sandbox(req: CreateSandboxRequest):
|
||||
|
||||
# ── Create Pod ───────────────────────────────────────────────────
|
||||
try:
|
||||
core_v1.create_namespaced_pod(K8S_NAMESPACE, _build_pod(sandbox_id, thread_id))
|
||||
core_v1.create_namespaced_pod(K8S_NAMESPACE, _build_pod(sandbox_id, thread_id, user_id=user_id))
|
||||
logger.info(f"Created Pod {_pod_name(sandbox_id)}")
|
||||
except ApiException as exc:
|
||||
if exc.status != 409: # 409 = AlreadyExists
|
||||
|
||||
Reference in New Issue
Block a user