feat: add LLM retry with exponential backoff for transient errors
provider.chat() had no retry logic — a transient 429 rate limit, 502 gateway error, or network timeout would permanently fail the entire message. For a system running cron jobs and heartbeats 24/7, even a brief provider blip causes lost tasks. Adds _chat_with_retry() that: - Retries up to 3 times with 1s/2s/4s exponential backoff - Only retries transient errors (429, 5xx, timeout, connection) - Returns immediately on permanent errors (400, 401, etc.) - Falls through to the final attempt if all retries exhaust
This commit is contained in:
@@ -159,6 +159,33 @@ class AgentLoop:
|
|||||||
if hasattr(tool, "set_context"):
|
if hasattr(tool, "set_context"):
|
||||||
tool.set_context(channel, chat_id, *([message_id] if name == "message" else []))
|
tool.set_context(channel, chat_id, *([message_id] if name == "message" else []))
|
||||||
|
|
||||||
|
_RETRY_DELAYS = (1, 2, 4) # seconds — exponential backoff for transient LLM errors
|
||||||
|
|
||||||
|
async def _chat_with_retry(self, **kwargs: Any) -> Any:
|
||||||
|
"""Call provider.chat() with retry on transient errors (429, 5xx, network)."""
|
||||||
|
from nanobot.providers.base import LLMResponse
|
||||||
|
|
||||||
|
last_response: LLMResponse | None = None
|
||||||
|
for attempt, delay in enumerate(self._RETRY_DELAYS):
|
||||||
|
response = await self.provider.chat(**kwargs)
|
||||||
|
if response.finish_reason != "error":
|
||||||
|
return response
|
||||||
|
# Check if the error looks transient (rate limit, server error, network)
|
||||||
|
err = (response.content or "").lower()
|
||||||
|
is_transient = any(kw in err for kw in (
|
||||||
|
"429", "rate limit", "500", "502", "503", "504",
|
||||||
|
"overloaded", "timeout", "connection", "server error",
|
||||||
|
))
|
||||||
|
if not is_transient:
|
||||||
|
return response # permanent error (400, 401, etc.) — don't retry
|
||||||
|
last_response = response
|
||||||
|
logger.warning("LLM transient error (attempt {}/{}), retrying in {}s: {}",
|
||||||
|
attempt + 1, len(self._RETRY_DELAYS), delay, err[:120])
|
||||||
|
await asyncio.sleep(delay)
|
||||||
|
# All retries exhausted — make one final attempt
|
||||||
|
response = await self.provider.chat(**kwargs)
|
||||||
|
return response if response.finish_reason != "error" else (last_response or response)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _strip_think(text: str | None) -> str | None:
|
def _strip_think(text: str | None) -> str | None:
|
||||||
"""Remove <think>…</think> blocks that some models embed in content."""
|
"""Remove <think>…</think> blocks that some models embed in content."""
|
||||||
@@ -191,7 +218,7 @@ class AgentLoop:
|
|||||||
while iteration < self.max_iterations:
|
while iteration < self.max_iterations:
|
||||||
iteration += 1
|
iteration += 1
|
||||||
|
|
||||||
response = await self.provider.chat(
|
response = await self._chat_with_retry(
|
||||||
messages=messages,
|
messages=messages,
|
||||||
tools=self.tools.get_definitions(),
|
tools=self.tools.get_definitions(),
|
||||||
model=self.model,
|
model=self.model,
|
||||||
|
|||||||
Reference in New Issue
Block a user