fix: handle CancelledError in MCP tool calls to prevent process crash

MCP SDK's anyio cancel scopes can leak CancelledError on timeout or
failure paths. Since CancelledError is a BaseException (not Exception),
it escapes both MCPToolWrapper.execute() and ToolRegistry.execute(),
crashing the agent loop.

Now catches CancelledError and returns a graceful error to the LLM,
while still re-raising genuine task cancellations from /stop.
Also catches general Exception for other MCP failures (connection
drops, invalid responses, etc.).

Related: #1055
This commit is contained in:
Alfredo Arenas
2026-03-08 08:05:18 -06:00
committed by GitHub
parent 0b68360286
commit ed3b9c16f9

View File

@@ -44,13 +44,24 @@ class MCPToolWrapper(Tool):
except asyncio.TimeoutError: except asyncio.TimeoutError:
logger.warning("MCP tool '{}' timed out after {}s", self._name, self._tool_timeout) logger.warning("MCP tool '{}' timed out after {}s", self._name, self._tool_timeout)
return f"(MCP tool call timed out after {self._tool_timeout}s)" return f"(MCP tool call timed out after {self._tool_timeout}s)"
except asyncio.CancelledError:
# MCP SDK's anyio cancel scopes can leak CancelledError on timeout/failure.
# Re-raise only if our task was externally cancelled (e.g. /stop).
task = asyncio.current_task()
if task is not None and task.cancelling() > 0:
raise
logger.warning("MCP tool '{}' was cancelled by server/SDK", self._name)
return f"(MCP tool call was cancelled)"
except Exception as exc:
logger.warning("MCP tool '{}' failed: {}: {}", self._name, type(exc).__name__, exc)
return f"(MCP tool call failed: {type(exc).__name__})"
parts = [] parts = []
for block in result.content: for block in result.content:
if isinstance(block, types.TextContent): if isinstance(block, types.TextContent):
parts.append(block.text) parts.append(block.text)
else: else:
parts.append(str(block)) parts.append(str(block))
return "\n".join(parts) or "(no output)" return "\n".join(parts) or "(no output)
async def connect_mcp_servers( async def connect_mcp_servers(