refactor: move generation settings to provider level, eliminate parameter passthrough
This commit is contained in:
@@ -52,9 +52,6 @@ class AgentLoop:
|
||||
workspace: Path,
|
||||
model: str | None = None,
|
||||
max_iterations: int = 40,
|
||||
temperature: float = 0.1,
|
||||
max_tokens: int = 4096,
|
||||
reasoning_effort: str | None = None,
|
||||
context_window_tokens: int = 65_536,
|
||||
brave_api_key: str | None = None,
|
||||
web_proxy: str | None = None,
|
||||
@@ -72,9 +69,6 @@ class AgentLoop:
|
||||
self.workspace = workspace
|
||||
self.model = model or provider.get_default_model()
|
||||
self.max_iterations = max_iterations
|
||||
self.temperature = temperature
|
||||
self.max_tokens = max_tokens
|
||||
self.reasoning_effort = reasoning_effort
|
||||
self.context_window_tokens = context_window_tokens
|
||||
self.brave_api_key = brave_api_key
|
||||
self.web_proxy = web_proxy
|
||||
@@ -90,9 +84,6 @@ class AgentLoop:
|
||||
workspace=workspace,
|
||||
bus=bus,
|
||||
model=self.model,
|
||||
temperature=self.temperature,
|
||||
max_tokens=self.max_tokens,
|
||||
reasoning_effort=reasoning_effort,
|
||||
brave_api_key=brave_api_key,
|
||||
web_proxy=web_proxy,
|
||||
exec_config=self.exec_config,
|
||||
@@ -114,9 +105,6 @@ class AgentLoop:
|
||||
context_window_tokens=context_window_tokens,
|
||||
build_messages=self.context.build_messages,
|
||||
get_tool_definitions=self.tools.get_definitions,
|
||||
temperature=self.temperature,
|
||||
max_tokens=self.max_tokens,
|
||||
reasoning_effort=self.reasoning_effort,
|
||||
)
|
||||
self._register_default_tools()
|
||||
|
||||
@@ -205,9 +193,6 @@ class AgentLoop:
|
||||
messages=messages,
|
||||
tools=tool_defs,
|
||||
model=self.model,
|
||||
temperature=self.temperature,
|
||||
max_tokens=self.max_tokens,
|
||||
reasoning_effort=self.reasoning_effort,
|
||||
)
|
||||
|
||||
if response.has_tool_calls:
|
||||
|
||||
@@ -57,7 +57,6 @@ def _normalize_save_memory_args(args: Any) -> dict[str, Any] | None:
|
||||
return args[0] if args and isinstance(args[0], dict) else None
|
||||
return args if isinstance(args, dict) else None
|
||||
|
||||
|
||||
class MemoryStore:
|
||||
"""Two-layer memory: MEMORY.md (long-term facts) + HISTORY.md (grep-searchable log)."""
|
||||
|
||||
@@ -99,9 +98,6 @@ class MemoryStore:
|
||||
messages: list[dict],
|
||||
provider: LLMProvider,
|
||||
model: str,
|
||||
temperature: float | None = None,
|
||||
max_tokens: int | None = None,
|
||||
reasoning_effort: str | None = None,
|
||||
) -> bool:
|
||||
"""Consolidate the provided message chunk into MEMORY.md + HISTORY.md."""
|
||||
if not messages:
|
||||
@@ -124,9 +120,6 @@ class MemoryStore:
|
||||
],
|
||||
tools=_SAVE_MEMORY_TOOL,
|
||||
model=model,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
reasoning_effort=reasoning_effort,
|
||||
)
|
||||
|
||||
if not response.has_tool_calls:
|
||||
@@ -166,9 +159,6 @@ class MemoryConsolidator:
|
||||
context_window_tokens: int,
|
||||
build_messages: Callable[..., list[dict[str, Any]]],
|
||||
get_tool_definitions: Callable[[], list[dict[str, Any]]],
|
||||
temperature: float | None = None,
|
||||
max_tokens: int | None = None,
|
||||
reasoning_effort: str | None = None,
|
||||
):
|
||||
self.store = MemoryStore(workspace)
|
||||
self.provider = provider
|
||||
@@ -177,9 +167,6 @@ class MemoryConsolidator:
|
||||
self.context_window_tokens = context_window_tokens
|
||||
self._build_messages = build_messages
|
||||
self._get_tool_definitions = get_tool_definitions
|
||||
self._temperature = temperature
|
||||
self._max_tokens = max_tokens
|
||||
self._reasoning_effort = reasoning_effort
|
||||
self._locks: weakref.WeakValueDictionary[str, asyncio.Lock] = weakref.WeakValueDictionary()
|
||||
|
||||
def get_lock(self, session_key: str) -> asyncio.Lock:
|
||||
@@ -188,14 +175,7 @@ class MemoryConsolidator:
|
||||
|
||||
async def consolidate_messages(self, messages: list[dict[str, object]]) -> bool:
|
||||
"""Archive a selected message chunk into persistent memory."""
|
||||
return await self.store.consolidate(
|
||||
messages,
|
||||
self.provider,
|
||||
self.model,
|
||||
temperature=self._temperature,
|
||||
max_tokens=self._max_tokens,
|
||||
reasoning_effort=self._reasoning_effort,
|
||||
)
|
||||
return await self.store.consolidate(messages, self.provider, self.model)
|
||||
|
||||
def pick_consolidation_boundary(
|
||||
self,
|
||||
|
||||
@@ -28,9 +28,6 @@ class SubagentManager:
|
||||
workspace: Path,
|
||||
bus: MessageBus,
|
||||
model: str | None = None,
|
||||
temperature: float = 0.7,
|
||||
max_tokens: int = 4096,
|
||||
reasoning_effort: str | None = None,
|
||||
brave_api_key: str | None = None,
|
||||
web_proxy: str | None = None,
|
||||
exec_config: "ExecToolConfig | None" = None,
|
||||
@@ -41,9 +38,6 @@ class SubagentManager:
|
||||
self.workspace = workspace
|
||||
self.bus = bus
|
||||
self.model = model or provider.get_default_model()
|
||||
self.temperature = temperature
|
||||
self.max_tokens = max_tokens
|
||||
self.reasoning_effort = reasoning_effort
|
||||
self.brave_api_key = brave_api_key
|
||||
self.web_proxy = web_proxy
|
||||
self.exec_config = exec_config or ExecToolConfig()
|
||||
@@ -128,9 +122,6 @@ class SubagentManager:
|
||||
messages=messages,
|
||||
tools=tools.get_definitions(),
|
||||
model=self.model,
|
||||
temperature=self.temperature,
|
||||
max_tokens=self.max_tokens,
|
||||
reasoning_effort=self.reasoning_effort,
|
||||
)
|
||||
|
||||
if response.has_tool_calls:
|
||||
|
||||
@@ -215,6 +215,7 @@ def onboard():
|
||||
|
||||
def _make_provider(config: Config):
|
||||
"""Create the appropriate LLM provider from config."""
|
||||
from nanobot.providers.base import GenerationSettings
|
||||
from nanobot.providers.openai_codex_provider import OpenAICodexProvider
|
||||
from nanobot.providers.azure_openai_provider import AzureOpenAIProvider
|
||||
|
||||
@@ -224,46 +225,50 @@ def _make_provider(config: Config):
|
||||
|
||||
# OpenAI Codex (OAuth)
|
||||
if provider_name == "openai_codex" or model.startswith("openai-codex/"):
|
||||
return OpenAICodexProvider(default_model=model)
|
||||
|
||||
provider = OpenAICodexProvider(default_model=model)
|
||||
# Custom: direct OpenAI-compatible endpoint, bypasses LiteLLM
|
||||
from nanobot.providers.custom_provider import CustomProvider
|
||||
if provider_name == "custom":
|
||||
return CustomProvider(
|
||||
elif provider_name == "custom":
|
||||
from nanobot.providers.custom_provider import CustomProvider
|
||||
provider = CustomProvider(
|
||||
api_key=p.api_key if p else "no-key",
|
||||
api_base=config.get_api_base(model) or "http://localhost:8000/v1",
|
||||
default_model=model,
|
||||
)
|
||||
|
||||
# Azure OpenAI: direct Azure OpenAI endpoint with deployment name
|
||||
if provider_name == "azure_openai":
|
||||
elif provider_name == "azure_openai":
|
||||
if not p or not p.api_key or not p.api_base:
|
||||
console.print("[red]Error: Azure OpenAI requires api_key and api_base.[/red]")
|
||||
console.print("Set them in ~/.nanobot/config.json under providers.azure_openai section")
|
||||
console.print("Use the model field to specify the deployment name.")
|
||||
raise typer.Exit(1)
|
||||
|
||||
return AzureOpenAIProvider(
|
||||
provider = AzureOpenAIProvider(
|
||||
api_key=p.api_key,
|
||||
api_base=p.api_base,
|
||||
default_model=model,
|
||||
)
|
||||
else:
|
||||
from nanobot.providers.litellm_provider import LiteLLMProvider
|
||||
from nanobot.providers.registry import find_by_name
|
||||
spec = find_by_name(provider_name)
|
||||
if not model.startswith("bedrock/") and not (p and p.api_key) and not (spec and (spec.is_oauth or spec.is_local)):
|
||||
console.print("[red]Error: No API key configured.[/red]")
|
||||
console.print("Set one in ~/.nanobot/config.json under providers section")
|
||||
raise typer.Exit(1)
|
||||
provider = LiteLLMProvider(
|
||||
api_key=p.api_key if p else None,
|
||||
api_base=config.get_api_base(model),
|
||||
default_model=model,
|
||||
extra_headers=p.extra_headers if p else None,
|
||||
provider_name=provider_name,
|
||||
)
|
||||
|
||||
from nanobot.providers.litellm_provider import LiteLLMProvider
|
||||
from nanobot.providers.registry import find_by_name
|
||||
spec = find_by_name(provider_name)
|
||||
if not model.startswith("bedrock/") and not (p and p.api_key) and not (spec and (spec.is_oauth or spec.is_local)):
|
||||
console.print("[red]Error: No API key configured.[/red]")
|
||||
console.print("Set one in ~/.nanobot/config.json under providers section")
|
||||
raise typer.Exit(1)
|
||||
|
||||
return LiteLLMProvider(
|
||||
api_key=p.api_key if p else None,
|
||||
api_base=config.get_api_base(model),
|
||||
default_model=model,
|
||||
extra_headers=p.extra_headers if p else None,
|
||||
provider_name=provider_name,
|
||||
defaults = config.agents.defaults
|
||||
provider.generation = GenerationSettings(
|
||||
temperature=defaults.temperature,
|
||||
max_tokens=defaults.max_tokens,
|
||||
reasoning_effort=defaults.reasoning_effort,
|
||||
)
|
||||
return provider
|
||||
|
||||
|
||||
def _load_runtime_config(config: str | None = None, workspace: str | None = None) -> Config:
|
||||
@@ -341,10 +346,7 @@ def gateway(
|
||||
provider=provider,
|
||||
workspace=config.workspace_path,
|
||||
model=config.agents.defaults.model,
|
||||
temperature=config.agents.defaults.temperature,
|
||||
max_tokens=config.agents.defaults.max_tokens,
|
||||
max_iterations=config.agents.defaults.max_tool_iterations,
|
||||
reasoning_effort=config.agents.defaults.reasoning_effort,
|
||||
context_window_tokens=config.agents.defaults.context_window_tokens,
|
||||
brave_api_key=config.tools.web.search.api_key or None,
|
||||
web_proxy=config.tools.web.proxy or None,
|
||||
@@ -527,10 +529,7 @@ def agent(
|
||||
provider=provider,
|
||||
workspace=config.workspace_path,
|
||||
model=config.agents.defaults.model,
|
||||
temperature=config.agents.defaults.temperature,
|
||||
max_tokens=config.agents.defaults.max_tokens,
|
||||
max_iterations=config.agents.defaults.max_tool_iterations,
|
||||
reasoning_effort=config.agents.defaults.reasoning_effort,
|
||||
context_window_tokens=config.agents.defaults.context_window_tokens,
|
||||
brave_api_key=config.tools.web.search.api_key or None,
|
||||
web_proxy=config.tools.web.proxy or None,
|
||||
|
||||
@@ -32,6 +32,21 @@ class LLMResponse:
|
||||
return len(self.tool_calls) > 0
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class GenerationSettings:
|
||||
"""Default generation parameters for LLM calls.
|
||||
|
||||
Stored on the provider so every call site inherits the same defaults
|
||||
without having to pass temperature / max_tokens / reasoning_effort
|
||||
through every layer. Individual call sites can still override by
|
||||
passing explicit keyword arguments to chat() / chat_with_retry().
|
||||
"""
|
||||
|
||||
temperature: float = 0.7
|
||||
max_tokens: int = 4096
|
||||
reasoning_effort: str | None = None
|
||||
|
||||
|
||||
class LLMProvider(ABC):
|
||||
"""
|
||||
Abstract base class for LLM providers.
|
||||
@@ -56,9 +71,12 @@ class LLMProvider(ABC):
|
||||
"temporarily unavailable",
|
||||
)
|
||||
|
||||
_SENTINEL = object()
|
||||
|
||||
def __init__(self, api_key: str | None = None, api_base: str | None = None):
|
||||
self.api_key = api_key
|
||||
self.api_base = api_base
|
||||
self.generation: GenerationSettings = GenerationSettings()
|
||||
|
||||
@staticmethod
|
||||
def _sanitize_empty_content(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
@@ -155,11 +173,23 @@ class LLMProvider(ABC):
|
||||
messages: list[dict[str, Any]],
|
||||
tools: list[dict[str, Any]] | None = None,
|
||||
model: str | None = None,
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
reasoning_effort: str | None = None,
|
||||
max_tokens: object = _SENTINEL,
|
||||
temperature: object = _SENTINEL,
|
||||
reasoning_effort: object = _SENTINEL,
|
||||
) -> LLMResponse:
|
||||
"""Call chat() with retry on transient provider failures."""
|
||||
"""Call chat() with retry on transient provider failures.
|
||||
|
||||
Parameters default to ``self.generation`` when not explicitly passed,
|
||||
so callers no longer need to thread temperature / max_tokens /
|
||||
reasoning_effort through every layer.
|
||||
"""
|
||||
if max_tokens is self._SENTINEL:
|
||||
max_tokens = self.generation.max_tokens
|
||||
if temperature is self._SENTINEL:
|
||||
temperature = self.generation.temperature
|
||||
if reasoning_effort is self._SENTINEL:
|
||||
reasoning_effort = self.generation.reasoning_effort
|
||||
|
||||
for attempt, delay in enumerate(self._CHAT_RETRY_DELAYS, start=1):
|
||||
try:
|
||||
response = await self.chat(
|
||||
|
||||
Reference in New Issue
Block a user