improve agent reliability: behavioral constraints, full tool history, error hints

This commit is contained in:
Re-bin
2026-02-23 09:13:08 +00:00
parent 491739223d
commit d9462284e1
5 changed files with 58 additions and 28 deletions

View File

@@ -16,7 +16,7 @@
⚡️ Delivers core agent functionality in just **~4,000** lines of code — **99% smaller** than Clawdbot's 430k+ lines. ⚡️ Delivers core agent functionality in just **~4,000** lines of code — **99% smaller** than Clawdbot's 430k+ lines.
📏 Real-time line count: **3,862 lines** (run `bash core_agent_lines.sh` to verify anytime) 📏 Real-time line count: **3,897 lines** (run `bash core_agent_lines.sh` to verify anytime)
## 📢 News ## 📢 News

View File

@@ -96,14 +96,18 @@ Your workspace is at: {workspace_path}
- History log: {workspace_path}/memory/HISTORY.md (grep-searchable) - History log: {workspace_path}/memory/HISTORY.md (grep-searchable)
- Custom skills: {workspace_path}/skills/{{skill-name}}/SKILL.md - Custom skills: {workspace_path}/skills/{{skill-name}}/SKILL.md
IMPORTANT: When responding to direct questions or conversations, reply directly with your text response. Reply directly with text for conversations. Only use the 'message' tool to send to a specific chat channel.
Only use the 'message' tool when you need to send a message to a specific chat channel (like WhatsApp).
For normal conversation, just respond with text - do not call the message tool.
Always be helpful, accurate, and concise. Before calling tools, briefly tell the user what you're about to do (one short sentence in the user's language). ## Tool Call Guidelines
If you need to use tools, call them directly — never send a preliminary message like "Let me check" without actually calling a tool. - Before calling tools, you may briefly state your intent (e.g. "Let me check that"), but NEVER predict or describe the expected result before receiving it.
When remembering something important, write to {workspace_path}/memory/MEMORY.md - Before modifying a file, read it first to confirm its current content.
To recall past events, grep {workspace_path}/memory/HISTORY.md""" - Do not assume a file or directory exists — use list_dir or read_file to verify.
- After writing or editing a file, re-read it if accuracy matters.
- If a tool call fails, analyze the error before retrying with a different approach.
## Memory
- Remember important facts: write to {workspace_path}/memory/MEMORY.md
- Recall past events: grep {workspace_path}/memory/HISTORY.md"""
def _load_bootstrap_files(self) -> str: def _load_bootstrap_files(self) -> str:
"""Load all bootstrap files from workspace.""" """Load all bootstrap files from workspace."""

View File

@@ -49,10 +49,10 @@ class AgentLoop:
provider: LLMProvider, provider: LLMProvider,
workspace: Path, workspace: Path,
model: str | None = None, model: str | None = None,
max_iterations: int = 20, max_iterations: int = 40,
temperature: float = 0.1, temperature: float = 0.1,
max_tokens: int = 4096, max_tokens: int = 4096,
memory_window: int = 50, memory_window: int = 100,
brave_api_key: str | None = None, brave_api_key: str | None = None,
exec_config: ExecToolConfig | None = None, exec_config: ExecToolConfig | None = None,
cron_service: CronService | None = None, cron_service: CronService | None = None,
@@ -175,8 +175,8 @@ class AgentLoop:
self, self,
initial_messages: list[dict], initial_messages: list[dict],
on_progress: Callable[..., Awaitable[None]] | None = None, on_progress: Callable[..., Awaitable[None]] | None = None,
) -> tuple[str | None, list[str]]: ) -> tuple[str | None, list[str], list[dict]]:
"""Run the agent iteration loop. Returns (final_content, tools_used).""" """Run the agent iteration loop. Returns (final_content, tools_used, messages)."""
messages = initial_messages messages = initial_messages
iteration = 0 iteration = 0
final_content = None final_content = None
@@ -228,7 +228,14 @@ class AgentLoop:
final_content = self._strip_think(response.content) final_content = self._strip_think(response.content)
break break
return final_content, tools_used if final_content is None and iteration >= self.max_iterations:
logger.warning("Max iterations ({}) reached", self.max_iterations)
final_content = (
f"I reached the maximum number of tool call iterations ({self.max_iterations}) "
"without completing the task. You can try breaking the task into smaller steps."
)
return final_content, tools_used, messages
async def run(self) -> None: async def run(self) -> None:
"""Run the agent loop, processing messages from the bus.""" """Run the agent loop, processing messages from the bus."""
@@ -301,13 +308,13 @@ class AgentLoop:
key = f"{channel}:{chat_id}" key = f"{channel}:{chat_id}"
session = self.sessions.get_or_create(key) session = self.sessions.get_or_create(key)
self._set_tool_context(channel, chat_id, msg.metadata.get("message_id")) self._set_tool_context(channel, chat_id, msg.metadata.get("message_id"))
history = session.get_history(max_messages=self.memory_window)
messages = self.context.build_messages( messages = self.context.build_messages(
history=session.get_history(max_messages=self.memory_window), history=history,
current_message=msg.content, channel=channel, chat_id=chat_id, current_message=msg.content, channel=channel, chat_id=chat_id,
) )
final_content, _ = await self._run_agent_loop(messages) final_content, _, all_msgs = await self._run_agent_loop(messages)
session.add_message("user", f"[System: {msg.sender_id}] {msg.content}") self._save_turn(session, all_msgs, 1 + len(history))
session.add_message("assistant", final_content or "Background task completed.")
self.sessions.save(session) self.sessions.save(session)
return OutboundMessage(channel=channel, chat_id=chat_id, return OutboundMessage(channel=channel, chat_id=chat_id,
content=final_content or "Background task completed.") content=final_content or "Background task completed.")
@@ -377,8 +384,9 @@ class AgentLoop:
if isinstance(message_tool, MessageTool): if isinstance(message_tool, MessageTool):
message_tool.start_turn() message_tool.start_turn()
history = session.get_history(max_messages=self.memory_window)
initial_messages = self.context.build_messages( initial_messages = self.context.build_messages(
history=session.get_history(max_messages=self.memory_window), history=history,
current_message=msg.content, current_message=msg.content,
media=msg.media if msg.media else None, media=msg.media if msg.media else None,
channel=msg.channel, chat_id=msg.chat_id, channel=msg.channel, chat_id=msg.chat_id,
@@ -392,7 +400,7 @@ class AgentLoop:
channel=msg.channel, chat_id=msg.chat_id, content=content, metadata=meta, channel=msg.channel, chat_id=msg.chat_id, content=content, metadata=meta,
)) ))
final_content, tools_used = await self._run_agent_loop( final_content, _, all_msgs = await self._run_agent_loop(
initial_messages, on_progress=on_progress or _bus_progress, initial_messages, on_progress=on_progress or _bus_progress,
) )
@@ -402,9 +410,7 @@ class AgentLoop:
preview = final_content[:120] + "..." if len(final_content) > 120 else final_content preview = final_content[:120] + "..." if len(final_content) > 120 else final_content
logger.info("Response to {}:{}: {}", msg.channel, msg.sender_id, preview) logger.info("Response to {}:{}: {}", msg.channel, msg.sender_id, preview)
session.add_message("user", msg.content) self._save_turn(session, all_msgs, 1 + len(history))
session.add_message("assistant", final_content,
tools_used=tools_used if tools_used else None)
self.sessions.save(session) self.sessions.save(session)
if message_tool := self.tools.get("message"): if message_tool := self.tools.get("message"):
@@ -416,6 +422,21 @@ class AgentLoop:
metadata=msg.metadata or {}, metadata=msg.metadata or {},
) )
_TOOL_RESULT_MAX_CHARS = 500
def _save_turn(self, session: Session, messages: list[dict], skip: int) -> None:
"""Save new-turn messages into session, truncating large tool results."""
from datetime import datetime
for m in messages[skip:]:
entry = {k: v for k, v in m.items() if k != "reasoning_content"}
if entry.get("role") == "tool" and isinstance(entry.get("content"), str):
content = entry["content"]
if len(content) > self._TOOL_RESULT_MAX_CHARS:
entry["content"] = content[:self._TOOL_RESULT_MAX_CHARS] + "\n... (truncated)"
entry.setdefault("timestamp", datetime.now().isoformat())
session.messages.append(entry)
session.updated_at = datetime.now()
async def _consolidate_memory(self, session, archive_all: bool = False) -> bool: async def _consolidate_memory(self, session, archive_all: bool = False) -> bool:
"""Delegate to MemoryStore.consolidate(). Returns True on success.""" """Delegate to MemoryStore.consolidate(). Returns True on success."""
return await MemoryStore(self.workspace).consolidate( return await MemoryStore(self.workspace).consolidate(

View File

@@ -49,17 +49,22 @@ class ToolRegistry:
Raises: Raises:
KeyError: If tool not found. KeyError: If tool not found.
""" """
_HINT = "\n\n[Analyze the error above and try a different approach.]"
tool = self._tools.get(name) tool = self._tools.get(name)
if not tool: if not tool:
return f"Error: Tool '{name}' not found" return f"Error: Tool '{name}' not found. Available: {', '.join(self.tool_names)}"
try: try:
errors = tool.validate_params(params) errors = tool.validate_params(params)
if errors: if errors:
return f"Error: Invalid parameters for tool '{name}': " + "; ".join(errors) return f"Error: Invalid parameters for tool '{name}': " + "; ".join(errors) + _HINT
return await tool.execute(**params) result = await tool.execute(**params)
if isinstance(result, str) and result.startswith("Error"):
return result + _HINT
return result
except Exception as e: except Exception as e:
return f"Error executing {name}: {str(e)}" return f"Error executing {name}: {str(e)}" + _HINT
@property @property
def tool_names(self) -> list[str]: def tool_names(self) -> list[str]:

View File

@@ -188,8 +188,8 @@ class AgentDefaults(Base):
model: str = "anthropic/claude-opus-4-5" model: str = "anthropic/claude-opus-4-5"
max_tokens: int = 8192 max_tokens: int = 8192
temperature: float = 0.1 temperature: float = 0.1
max_tool_iterations: int = 20 max_tool_iterations: int = 40
memory_window: int = 50 memory_window: int = 100
class AgentsConfig(Base): class AgentsConfig(Base):