mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-21 07:26:50 +00:00
fix: improve JSON repair handling for markdown code blocks (#841)
* fix: improve JSON repair handling for markdown code blocks * unified import path * compress_crawl_udf * fix * reverse
This commit is contained in:
@@ -7,6 +7,7 @@ import re
|
||||
from typing import Any
|
||||
|
||||
import json_repair
|
||||
import re
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -121,6 +122,27 @@ def repair_json_output(content: str) -> str:
|
||||
if not content:
|
||||
return content
|
||||
|
||||
# Handle markdown code blocks (```json, ```ts, or ```)
|
||||
# This must be checked first, as content may start with ``` instead of { or [
|
||||
if "```" in content:
|
||||
# Remove opening markdown code block markers (```json, ```ts, or ```), allowing
|
||||
# optional leading spaces and multiple blank lines after the fence.
|
||||
content = re.sub(
|
||||
r'^[ \t]*```(?:json|ts)?[ \t]*\n+',
|
||||
'',
|
||||
content,
|
||||
flags=re.IGNORECASE | re.MULTILINE,
|
||||
)
|
||||
# Remove closing markdown code block markers (```), allowing optional
|
||||
# leading newlines and trailing spaces.
|
||||
content = re.sub(
|
||||
r'\n*```[ \t]*$',
|
||||
'',
|
||||
content,
|
||||
flags=re.MULTILINE,
|
||||
)
|
||||
content = content.strip()
|
||||
|
||||
# First attempt: try to extract valid JSON if there are extra tokens
|
||||
content = _extract_json_from_content(content)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user