feat: add reasoning_effort config to enable LLM thinking mode

This commit is contained in:
Re-bin
2026-02-28 17:18:05 +00:00
parent 0a5f3b6194
commit f9d72e2e74
9 changed files with 23 additions and 2 deletions

View File

@@ -16,7 +16,7 @@
⚡️ Delivers core agent functionality in just **~4,000** lines of code — **99% smaller** than Clawdbot's 430k+ lines.
📏 Real-time line count: **3,927 lines** (run `bash core_agent_lines.sh` to verify anytime)
📏 Real-time line count: **3,935 lines** (run `bash core_agent_lines.sh` to verify anytime)
## 📢 News

View File

@@ -56,6 +56,7 @@ class AgentLoop:
temperature: float = 0.1,
max_tokens: int = 4096,
memory_window: int = 100,
reasoning_effort: str | None = None,
brave_api_key: str | None = None,
exec_config: ExecToolConfig | None = None,
cron_service: CronService | None = None,
@@ -74,6 +75,7 @@ class AgentLoop:
self.temperature = temperature
self.max_tokens = max_tokens
self.memory_window = memory_window
self.reasoning_effort = reasoning_effort
self.brave_api_key = brave_api_key
self.exec_config = exec_config or ExecToolConfig()
self.cron_service = cron_service
@@ -89,6 +91,7 @@ class AgentLoop:
model=self.model,
temperature=self.temperature,
max_tokens=self.max_tokens,
reasoning_effort=reasoning_effort,
brave_api_key=brave_api_key,
exec_config=self.exec_config,
restrict_to_workspace=restrict_to_workspace,
@@ -191,6 +194,7 @@ class AgentLoop:
model=self.model,
temperature=self.temperature,
max_tokens=self.max_tokens,
reasoning_effort=self.reasoning_effort,
)
if response.has_tool_calls:

View File

@@ -28,6 +28,7 @@ class SubagentManager:
model: str | None = None,
temperature: float = 0.7,
max_tokens: int = 4096,
reasoning_effort: str | None = None,
brave_api_key: str | None = None,
exec_config: "ExecToolConfig | None" = None,
restrict_to_workspace: bool = False,
@@ -39,6 +40,7 @@ class SubagentManager:
self.model = model or provider.get_default_model()
self.temperature = temperature
self.max_tokens = max_tokens
self.reasoning_effort = reasoning_effort
self.brave_api_key = brave_api_key
self.exec_config = exec_config or ExecToolConfig()
self.restrict_to_workspace = restrict_to_workspace
@@ -124,6 +126,7 @@ class SubagentManager:
model=self.model,
temperature=self.temperature,
max_tokens=self.max_tokens,
reasoning_effort=self.reasoning_effort,
)
if response.has_tool_calls:

View File

@@ -283,6 +283,7 @@ def gateway(
max_tokens=config.agents.defaults.max_tokens,
max_iterations=config.agents.defaults.max_tool_iterations,
memory_window=config.agents.defaults.memory_window,
reasoning_effort=config.agents.defaults.reasoning_effort,
brave_api_key=config.tools.web.search.api_key or None,
exec_config=config.tools.exec,
cron_service=cron,
@@ -441,6 +442,7 @@ def agent(
max_tokens=config.agents.defaults.max_tokens,
max_iterations=config.agents.defaults.max_tool_iterations,
memory_window=config.agents.defaults.memory_window,
reasoning_effort=config.agents.defaults.reasoning_effort,
brave_api_key=config.tools.web.search.api_key or None,
exec_config=config.tools.exec,
cron_service=cron,
@@ -932,6 +934,7 @@ def cron_run(
max_tokens=config.agents.defaults.max_tokens,
max_iterations=config.agents.defaults.max_tool_iterations,
memory_window=config.agents.defaults.memory_window,
reasoning_effort=config.agents.defaults.reasoning_effort,
brave_api_key=config.tools.web.search.api_key or None,
exec_config=config.tools.exec,
restrict_to_workspace=config.tools.restrict_to_workspace,

View File

@@ -226,6 +226,7 @@ class AgentDefaults(Base):
temperature: float = 0.1
max_tool_iterations: int = 40
memory_window: int = 100
reasoning_effort: str | None = None # low / medium / high — enables LLM thinking mode
class AgentsConfig(Base):

View File

@@ -88,6 +88,7 @@ class LLMProvider(ABC):
model: str | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
reasoning_effort: str | None = None,
) -> LLMResponse:
"""
Send a chat completion request.

View File

@@ -18,13 +18,16 @@ class CustomProvider(LLMProvider):
self._client = AsyncOpenAI(api_key=api_key, base_url=api_base)
async def chat(self, messages: list[dict[str, Any]], tools: list[dict[str, Any]] | None = None,
model: str | None = None, max_tokens: int = 4096, temperature: float = 0.7) -> LLMResponse:
model: str | None = None, max_tokens: int = 4096, temperature: float = 0.7,
reasoning_effort: str | None = None) -> LLMResponse:
kwargs: dict[str, Any] = {
"model": model or self.default_model,
"messages": self._sanitize_empty_content(messages),
"max_tokens": max(1, max_tokens),
"temperature": temperature,
}
if reasoning_effort:
kwargs["reasoning_effort"] = reasoning_effort
if tools:
kwargs.update(tools=tools, tool_choice="auto")
try:

View File

@@ -178,6 +178,7 @@ class LiteLLMProvider(LLMProvider):
model: str | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
reasoning_effort: str | None = None,
) -> LLMResponse:
"""
Send a chat completion request via LiteLLM.
@@ -224,6 +225,10 @@ class LiteLLMProvider(LLMProvider):
if self.extra_headers:
kwargs["extra_headers"] = self.extra_headers
if reasoning_effort:
kwargs["reasoning_effort"] = reasoning_effort
kwargs["drop_params"] = True
if tools:
kwargs["tools"] = tools
kwargs["tool_choice"] = "auto"

View File

@@ -31,6 +31,7 @@ class OpenAICodexProvider(LLMProvider):
model: str | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
reasoning_effort: str | None = None,
) -> LLMResponse:
model = model or self.default_model
system_prompt, input_items = _convert_messages(messages)