fix(checkpointer): use AsyncConnectionPool for postgres to prevent stale connection errors (#3223) (#3226)

* fix(checkpointer): use AsyncConnectionPool for postgres to prevent stale connection errors (#3223)

  Replace AsyncPostgresSaver.from_conn_string() with an explicit
  AsyncConnectionPool that has check_connection enabled, so dead idle
  connections are detected and replaced on checkout instead of raising
  OperationalError.

* Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>

* Fixed the unit test error and lint error

* fix(checkpointer): add TCP keepalive to postgres connection pool (#3254)

  Enable TCP keepalive probes on the AsyncConnectionPool to prevent
  idle postgres connections from being dropped by the server or network
  middleware. Combined with the existing check_connection callback, this
  provides defense-in-depth against stale connection errors.

  Fixes #3254

* Changed the code as review suggestion

---------

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Willem Jiang
2026-06-01 09:05:11 +08:00
committed by GitHub
parent d6a604d5a1
commit 031d6fbcbe
2 changed files with 136 additions and 12 deletions
+93
View File
@@ -326,6 +326,99 @@ class TestAsyncCheckpointer:
mock_saver_cls.from_conn_string.assert_called_once_with("/tmp/resolved/test.db")
mock_saver.setup.assert_awaited_once()
@pytest.mark.anyio
async def test_postgres_uses_connection_pool(self):
"""Async postgres checkpointer should use AsyncConnectionPool, not a single connection."""
from deerflow.runtime.checkpointer.async_provider import make_checkpointer
mock_config = MagicMock()
mock_config.checkpointer = CheckpointerConfig(type="postgres", connection_string="postgresql://localhost/db")
mock_saver = AsyncMock()
mock_saver_cls = MagicMock(return_value=mock_saver)
mock_pool_instance = AsyncMock()
mock_pool_instance.__aenter__.return_value = mock_pool_instance
mock_pool_instance.__aexit__.return_value = False
mock_pool_cls = MagicMock(return_value=mock_pool_instance)
mock_pool_cls.check_connection = AsyncMock()
mock_dict_row = MagicMock()
mock_pg_module = MagicMock()
mock_pg_module.AsyncPostgresSaver = mock_saver_cls
mock_psycopg_rows = MagicMock()
mock_psycopg_rows.dict_row = mock_dict_row
with (
patch("deerflow.runtime.checkpointer.async_provider.get_app_config", return_value=mock_config),
patch.dict(sys.modules, {"langgraph.checkpoint.postgres.aio": mock_pg_module}),
patch.dict(sys.modules, {"psycopg.rows": mock_psycopg_rows}),
patch.dict(sys.modules, {"psycopg_pool": MagicMock(AsyncConnectionPool=mock_pool_cls)}),
):
# AsyncConnectionPool() is a callable that returns mock_pool_instance
# We need the constructor to be an async context manager
async with make_checkpointer() as saver:
assert saver is mock_saver
# Verify the pool was constructed with check Connection
mock_pool_cls.assert_called_once()
call_kwargs = mock_pool_cls.call_args
assert call_kwargs[0][0] == "postgresql://localhost/db"
assert call_kwargs[1]["check"] is mock_pool_cls.check_connection
# Verify saver was constructed with the pool (not via from_conn_string)
mock_saver_cls.assert_called_once_with(conn=mock_pool_instance)
mock_saver.setup.assert_awaited_once()
@pytest.mark.anyio
async def test_database_postgres_uses_connection_pool(self):
"""Unified database postgres path should use AsyncConnectionPool with keepalive."""
from deerflow.config.database_config import DatabaseConfig
from deerflow.runtime.checkpointer.async_provider import make_checkpointer
db_config = DatabaseConfig(backend="postgres", postgres_url="postgresql://localhost/db")
mock_config = MagicMock()
mock_config.checkpointer = None
mock_config.database = db_config
mock_saver = AsyncMock()
mock_saver_cls = MagicMock(return_value=mock_saver)
mock_pool_instance = AsyncMock()
mock_pool_instance.__aenter__.return_value = mock_pool_instance
mock_pool_instance.__aexit__.return_value = False
mock_pool_cls = MagicMock(return_value=mock_pool_instance)
mock_pool_cls.check_connection = AsyncMock()
mock_dict_row = MagicMock()
mock_pg_module = MagicMock()
mock_pg_module.AsyncPostgresSaver = mock_saver_cls
mock_psycopg_rows = MagicMock()
mock_psycopg_rows.dict_row = mock_dict_row
with (
patch("deerflow.runtime.checkpointer.async_provider.get_app_config", return_value=mock_config),
patch.dict(sys.modules, {"langgraph.checkpoint.postgres.aio": mock_pg_module}),
patch.dict(sys.modules, {"psycopg.rows": mock_psycopg_rows}),
patch.dict(sys.modules, {"psycopg_pool": MagicMock(AsyncConnectionPool=mock_pool_cls)}),
):
async with make_checkpointer() as saver:
assert saver is mock_saver
mock_pool_cls.assert_called_once()
call_kwargs = mock_pool_cls.call_args
assert call_kwargs[0][0] == "postgresql://localhost/db"
assert call_kwargs[1]["check"] is mock_pool_cls.check_connection
mock_saver_cls.assert_called_once_with(conn=mock_pool_instance)
mock_saver.setup.assert_awaited_once()
@pytest.mark.anyio
async def test_database_sqlite_creates_parent_dir_via_to_thread(self):
"""Unified database SQLite setup should also move path IO off the event loop."""