mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-24 17:06:00 +00:00
Fix duplicate gateway upload filenames (#2789)
This commit is contained in:
@@ -520,6 +520,7 @@ Multi-file upload with automatic document conversion:
|
|||||||
- Rejects directory inputs before copying so uploads stay all-or-nothing
|
- Rejects directory inputs before copying so uploads stay all-or-nothing
|
||||||
- Reuses one conversion worker per request when called from an active event loop
|
- Reuses one conversion worker per request when called from an active event loop
|
||||||
- Files stored in thread-isolated directories
|
- Files stored in thread-isolated directories
|
||||||
|
- Duplicate filenames in a single upload request are auto-renamed with `_N` suffixes so later files do not truncate earlier files
|
||||||
- Agent receives uploaded file list via `UploadsMiddleware`
|
- Agent receives uploaded file list via `UploadsMiddleware`
|
||||||
|
|
||||||
See [docs/FILE_UPLOAD.md](docs/FILE_UPLOAD.md) for details.
|
See [docs/FILE_UPLOAD.md](docs/FILE_UPLOAD.md) for details.
|
||||||
|
|||||||
+1
-1
@@ -124,7 +124,7 @@ FastAPI application providing REST endpoints for frontend integration:
|
|||||||
| `POST /api/memory/reload` | Force memory reload |
|
| `POST /api/memory/reload` | Force memory reload |
|
||||||
| `GET /api/memory/config` | Memory configuration |
|
| `GET /api/memory/config` | Memory configuration |
|
||||||
| `GET /api/memory/status` | Combined config + data |
|
| `GET /api/memory/status` | Combined config + data |
|
||||||
| `POST /api/threads/{id}/uploads` | Upload files (auto-converts PDF/PPT/Excel/Word to Markdown, rejects directory paths) |
|
| `POST /api/threads/{id}/uploads` | Upload files (auto-converts PDF/PPT/Excel/Word to Markdown, rejects directory paths, auto-renames duplicate filenames in one request) |
|
||||||
| `GET /api/threads/{id}/uploads/list` | List uploaded files |
|
| `GET /api/threads/{id}/uploads/list` | List uploaded files |
|
||||||
| `DELETE /api/threads/{id}` | Delete DeerFlow-managed local thread data after LangGraph thread deletion; unexpected failures are logged server-side and return a generic 500 detail |
|
| `DELETE /api/threads/{id}` | Delete DeerFlow-managed local thread data after LangGraph thread deletion; unexpected failures are logged server-side and return a generic 500 detail |
|
||||||
| `GET /api/threads/{id}/artifacts/{path}` | Serve generated artifacts |
|
| `GET /api/threads/{id}/artifacts/{path}` | Serve generated artifacts |
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ from deerflow.sandbox.sandbox_provider import SandboxProvider, get_sandbox_provi
|
|||||||
from deerflow.uploads.manager import (
|
from deerflow.uploads.manager import (
|
||||||
PathTraversalError,
|
PathTraversalError,
|
||||||
UnsafeUploadPathError,
|
UnsafeUploadPathError,
|
||||||
|
claim_unique_filename,
|
||||||
delete_file_safe,
|
delete_file_safe,
|
||||||
enrich_file_listing,
|
enrich_file_listing,
|
||||||
ensure_uploads_dir,
|
ensure_uploads_dir,
|
||||||
@@ -192,6 +193,10 @@ async def upload_files(
|
|||||||
sandbox_sync_targets = []
|
sandbox_sync_targets = []
|
||||||
skipped_files = []
|
skipped_files = []
|
||||||
total_size = 0
|
total_size = 0
|
||||||
|
# Track filenames within this request so duplicate form parts do not
|
||||||
|
# silently truncate each other. Existing uploads keep the historical
|
||||||
|
# overwrite behavior for a single replacement upload.
|
||||||
|
seen_filenames: set[str] = set()
|
||||||
|
|
||||||
sandbox_provider = get_sandbox_provider()
|
sandbox_provider = get_sandbox_provider()
|
||||||
sync_to_sandbox = not _uses_thread_data_mounts(sandbox_provider)
|
sync_to_sandbox = not _uses_thread_data_mounts(sandbox_provider)
|
||||||
@@ -208,7 +213,8 @@ async def upload_files(
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
safe_filename = normalize_filename(file.filename)
|
original_filename = normalize_filename(file.filename)
|
||||||
|
safe_filename = claim_unique_filename(original_filename, seen_filenames)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
logger.warning(f"Skipping file with unsafe filename: {file.filename!r}")
|
logger.warning(f"Skipping file with unsafe filename: {file.filename!r}")
|
||||||
continue
|
continue
|
||||||
@@ -236,6 +242,8 @@ async def upload_files(
|
|||||||
"virtual_path": virtual_path,
|
"virtual_path": virtual_path,
|
||||||
"artifact_url": upload_artifact_url(thread_id, safe_filename),
|
"artifact_url": upload_artifact_url(thread_id, safe_filename),
|
||||||
}
|
}
|
||||||
|
if safe_filename != original_filename:
|
||||||
|
file_info["original_filename"] = original_filename
|
||||||
|
|
||||||
logger.info(f"Saved file: {safe_filename} ({file_size} bytes) to {file_info['path']}")
|
logger.info(f"Saved file: {safe_filename} ({file_size} bytes) to {file_info['path']}")
|
||||||
|
|
||||||
|
|||||||
@@ -61,6 +61,39 @@ def test_upload_files_writes_thread_storage_and_skips_local_sandbox_sync(tmp_pat
|
|||||||
sandbox.update_file.assert_not_called()
|
sandbox.update_file.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
def test_upload_files_auto_renames_duplicate_form_filenames(tmp_path):
|
||||||
|
thread_uploads_dir = tmp_path / "uploads"
|
||||||
|
thread_uploads_dir.mkdir(parents=True)
|
||||||
|
|
||||||
|
provider = MagicMock()
|
||||||
|
provider.uses_thread_data_mounts = True
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch.object(uploads, "get_uploads_dir", return_value=thread_uploads_dir),
|
||||||
|
patch.object(uploads, "ensure_uploads_dir", return_value=thread_uploads_dir),
|
||||||
|
patch.object(uploads, "get_sandbox_provider", return_value=provider),
|
||||||
|
):
|
||||||
|
result = asyncio.run(
|
||||||
|
call_unwrapped(
|
||||||
|
uploads.upload_files,
|
||||||
|
"thread-local",
|
||||||
|
request=MagicMock(),
|
||||||
|
files=[
|
||||||
|
UploadFile(filename="data.txt", file=BytesIO(b"first")),
|
||||||
|
UploadFile(filename="data.txt", file=BytesIO(b"second")),
|
||||||
|
],
|
||||||
|
config=SimpleNamespace(),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result.success is True
|
||||||
|
assert [file_info["filename"] for file_info in result.files] == ["data.txt", "data_1.txt"]
|
||||||
|
assert "original_filename" not in result.files[0]
|
||||||
|
assert result.files[1]["original_filename"] == "data.txt"
|
||||||
|
assert (thread_uploads_dir / "data.txt").read_bytes() == b"first"
|
||||||
|
assert (thread_uploads_dir / "data_1.txt").read_bytes() == b"second"
|
||||||
|
|
||||||
|
|
||||||
def test_upload_files_skips_acquire_when_thread_data_is_mounted(tmp_path):
|
def test_upload_files_skips_acquire_when_thread_data_is_mounted(tmp_path):
|
||||||
thread_uploads_dir = tmp_path / "uploads"
|
thread_uploads_dir = tmp_path / "uploads"
|
||||||
thread_uploads_dir.mkdir(parents=True)
|
thread_uploads_dir.mkdir(parents=True)
|
||||||
|
|||||||
Reference in New Issue
Block a user