fix: separate Telegram connection pools and add timeout retry to prevent pool exhaustion
The root cause of "Pool timeout" errors is that long-polling (getUpdates) and outbound API calls (send_message, send_photo, etc.) shared the same HTTPXRequest pool — polling holds connections indefinitely, starving sends under concurrent load (e.g. cron jobs + user chat). - Split into two independent pools: API calls (default 32) and polling (4) - Expose connection_pool_size / pool_timeout in TelegramConfig for tuning - Add _call_with_retry() with exponential backoff (3 attempts) on TimedOut - Apply retry to _send_text and remote media URL sends
This commit is contained in:
@@ -18,6 +18,10 @@ class _FakeHTTPXRequest:
|
||||
self.kwargs = kwargs
|
||||
self.__class__.instances.append(self)
|
||||
|
||||
@classmethod
|
||||
def clear(cls) -> None:
|
||||
cls.instances.clear()
|
||||
|
||||
|
||||
class _FakeUpdater:
|
||||
def __init__(self, on_start_polling) -> None:
|
||||
@@ -144,7 +148,8 @@ def _make_telegram_update(
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_start_uses_request_proxy_without_builder_proxy(monkeypatch) -> None:
|
||||
async def test_start_creates_separate_pools_with_proxy(monkeypatch) -> None:
|
||||
_FakeHTTPXRequest.clear()
|
||||
config = TelegramConfig(
|
||||
enabled=True,
|
||||
token="123:abc",
|
||||
@@ -164,10 +169,106 @@ async def test_start_uses_request_proxy_without_builder_proxy(monkeypatch) -> No
|
||||
|
||||
await channel.start()
|
||||
|
||||
assert len(_FakeHTTPXRequest.instances) == 1
|
||||
assert _FakeHTTPXRequest.instances[0].kwargs["proxy"] == config.proxy
|
||||
assert builder.request_value is _FakeHTTPXRequest.instances[0]
|
||||
assert builder.get_updates_request_value is _FakeHTTPXRequest.instances[0]
|
||||
assert len(_FakeHTTPXRequest.instances) == 2
|
||||
api_req, poll_req = _FakeHTTPXRequest.instances
|
||||
assert api_req.kwargs["proxy"] == config.proxy
|
||||
assert poll_req.kwargs["proxy"] == config.proxy
|
||||
assert api_req.kwargs["connection_pool_size"] == 32
|
||||
assert poll_req.kwargs["connection_pool_size"] == 4
|
||||
assert builder.request_value is api_req
|
||||
assert builder.get_updates_request_value is poll_req
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_start_respects_custom_pool_config(monkeypatch) -> None:
|
||||
_FakeHTTPXRequest.clear()
|
||||
config = TelegramConfig(
|
||||
enabled=True,
|
||||
token="123:abc",
|
||||
allow_from=["*"],
|
||||
connection_pool_size=32,
|
||||
pool_timeout=10.0,
|
||||
)
|
||||
bus = MessageBus()
|
||||
channel = TelegramChannel(config, bus)
|
||||
app = _FakeApp(lambda: setattr(channel, "_running", False))
|
||||
builder = _FakeBuilder(app)
|
||||
|
||||
monkeypatch.setattr("nanobot.channels.telegram.HTTPXRequest", _FakeHTTPXRequest)
|
||||
monkeypatch.setattr(
|
||||
"nanobot.channels.telegram.Application",
|
||||
SimpleNamespace(builder=lambda: builder),
|
||||
)
|
||||
|
||||
await channel.start()
|
||||
|
||||
api_req = _FakeHTTPXRequest.instances[0]
|
||||
poll_req = _FakeHTTPXRequest.instances[1]
|
||||
assert api_req.kwargs["connection_pool_size"] == 32
|
||||
assert api_req.kwargs["pool_timeout"] == 10.0
|
||||
assert poll_req.kwargs["pool_timeout"] == 10.0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_text_retries_on_timeout() -> None:
|
||||
"""_send_text retries on TimedOut before succeeding."""
|
||||
from telegram.error import TimedOut
|
||||
|
||||
channel = TelegramChannel(
|
||||
TelegramConfig(enabled=True, token="123:abc", allow_from=["*"]),
|
||||
MessageBus(),
|
||||
)
|
||||
channel._app = _FakeApp(lambda: None)
|
||||
|
||||
call_count = 0
|
||||
original_send = channel._app.bot.send_message
|
||||
|
||||
async def flaky_send(**kwargs):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
if call_count <= 2:
|
||||
raise TimedOut()
|
||||
return await original_send(**kwargs)
|
||||
|
||||
channel._app.bot.send_message = flaky_send
|
||||
|
||||
import nanobot.channels.telegram as tg_mod
|
||||
orig_delay = tg_mod._SEND_RETRY_BASE_DELAY
|
||||
tg_mod._SEND_RETRY_BASE_DELAY = 0.01
|
||||
try:
|
||||
await channel._send_text(123, "hello", None, {})
|
||||
finally:
|
||||
tg_mod._SEND_RETRY_BASE_DELAY = orig_delay
|
||||
|
||||
assert call_count == 3
|
||||
assert len(channel._app.bot.sent_messages) == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_text_gives_up_after_max_retries() -> None:
|
||||
"""_send_text raises TimedOut after exhausting all retries."""
|
||||
from telegram.error import TimedOut
|
||||
|
||||
channel = TelegramChannel(
|
||||
TelegramConfig(enabled=True, token="123:abc", allow_from=["*"]),
|
||||
MessageBus(),
|
||||
)
|
||||
channel._app = _FakeApp(lambda: None)
|
||||
|
||||
async def always_timeout(**kwargs):
|
||||
raise TimedOut()
|
||||
|
||||
channel._app.bot.send_message = always_timeout
|
||||
|
||||
import nanobot.channels.telegram as tg_mod
|
||||
orig_delay = tg_mod._SEND_RETRY_BASE_DELAY
|
||||
tg_mod._SEND_RETRY_BASE_DELAY = 0.01
|
||||
try:
|
||||
await channel._send_text(123, "hello", None, {})
|
||||
finally:
|
||||
tg_mod._SEND_RETRY_BASE_DELAY = orig_delay
|
||||
|
||||
assert channel._app.bot.sent_messages == []
|
||||
|
||||
|
||||
def test_derive_topic_session_key_uses_thread_id() -> None:
|
||||
|
||||
Reference in New Issue
Block a user