fix: raise tool result history limit to 16k and force save_memory in consolidation

This commit is contained in:
Re-bin
2026-03-12 03:08:53 +00:00
parent d313765442
commit 0a0017ff45
6 changed files with 18 additions and 6 deletions

View File

@@ -120,6 +120,7 @@ class MemoryStore:
],
tools=_SAVE_MEMORY_TOOL,
model=model,
tool_choice="required",
)
if not response.has_tool_calls:

View File

@@ -88,6 +88,7 @@ class AzureOpenAIProvider(LLMProvider):
max_tokens: int = 4096,
temperature: float = 0.7,
reasoning_effort: str | None = None,
tool_choice: str | dict[str, Any] | None = None,
) -> dict[str, Any]:
"""Prepare the request payload with Azure OpenAI 2024-10-21 compliance."""
payload: dict[str, Any] = {
@@ -106,7 +107,7 @@ class AzureOpenAIProvider(LLMProvider):
if tools:
payload["tools"] = tools
payload["tool_choice"] = "auto"
payload["tool_choice"] = tool_choice or "auto"
return payload
@@ -118,6 +119,7 @@ class AzureOpenAIProvider(LLMProvider):
max_tokens: int = 4096,
temperature: float = 0.7,
reasoning_effort: str | None = None,
tool_choice: str | dict[str, Any] | None = None,
) -> LLMResponse:
"""
Send a chat completion request to Azure OpenAI.
@@ -137,7 +139,8 @@ class AzureOpenAIProvider(LLMProvider):
url = self._build_chat_url(deployment_name)
headers = self._build_headers()
payload = self._prepare_request_payload(
deployment_name, messages, tools, max_tokens, temperature, reasoning_effort
deployment_name, messages, tools, max_tokens, temperature, reasoning_effort,
tool_choice=tool_choice,
)
try:

View File

@@ -166,6 +166,7 @@ class LLMProvider(ABC):
max_tokens: int = 4096,
temperature: float = 0.7,
reasoning_effort: str | None = None,
tool_choice: str | dict[str, Any] | None = None,
) -> LLMResponse:
"""
Send a chat completion request.
@@ -176,6 +177,7 @@ class LLMProvider(ABC):
model: Model identifier (provider-specific).
max_tokens: Maximum tokens in response.
temperature: Sampling temperature.
tool_choice: Tool selection strategy ("auto", "required", or specific tool dict).
Returns:
LLMResponse with content and/or tool calls.
@@ -195,6 +197,7 @@ class LLMProvider(ABC):
max_tokens: object = _SENTINEL,
temperature: object = _SENTINEL,
reasoning_effort: object = _SENTINEL,
tool_choice: str | dict[str, Any] | None = None,
) -> LLMResponse:
"""Call chat() with retry on transient provider failures.
@@ -218,6 +221,7 @@ class LLMProvider(ABC):
max_tokens=max_tokens,
temperature=temperature,
reasoning_effort=reasoning_effort,
tool_choice=tool_choice,
)
except asyncio.CancelledError:
raise
@@ -250,6 +254,7 @@ class LLMProvider(ABC):
max_tokens=max_tokens,
temperature=temperature,
reasoning_effort=reasoning_effort,
tool_choice=tool_choice,
)
except asyncio.CancelledError:
raise

View File

@@ -25,7 +25,8 @@ class CustomProvider(LLMProvider):
async def chat(self, messages: list[dict[str, Any]], tools: list[dict[str, Any]] | None = None,
model: str | None = None, max_tokens: int = 4096, temperature: float = 0.7,
reasoning_effort: str | None = None) -> LLMResponse:
reasoning_effort: str | None = None,
tool_choice: str | dict[str, Any] | None = None) -> LLMResponse:
kwargs: dict[str, Any] = {
"model": model or self.default_model,
"messages": self._sanitize_empty_content(messages),
@@ -35,7 +36,7 @@ class CustomProvider(LLMProvider):
if reasoning_effort:
kwargs["reasoning_effort"] = reasoning_effort
if tools:
kwargs.update(tools=tools, tool_choice="auto")
kwargs.update(tools=tools, tool_choice=tool_choice or "auto")
try:
return self._parse(await self._client.chat.completions.create(**kwargs))
except Exception as e:

View File

@@ -214,6 +214,7 @@ class LiteLLMProvider(LLMProvider):
max_tokens: int = 4096,
temperature: float = 0.7,
reasoning_effort: str | None = None,
tool_choice: str | dict[str, Any] | None = None,
) -> LLMResponse:
"""
Send a chat completion request via LiteLLM.
@@ -267,7 +268,7 @@ class LiteLLMProvider(LLMProvider):
if tools:
kwargs["tools"] = tools
kwargs["tool_choice"] = "auto"
kwargs["tool_choice"] = tool_choice or "auto"
try:
response = await acompletion(**kwargs)

View File

@@ -32,6 +32,7 @@ class OpenAICodexProvider(LLMProvider):
max_tokens: int = 4096,
temperature: float = 0.7,
reasoning_effort: str | None = None,
tool_choice: str | dict[str, Any] | None = None,
) -> LLMResponse:
model = model or self.default_model
system_prompt, input_items = _convert_messages(messages)
@@ -48,7 +49,7 @@ class OpenAICodexProvider(LLMProvider):
"text": {"verbosity": "medium"},
"include": ["reasoning.encrypted_content"],
"prompt_cache_key": _prompt_cache_key(messages),
"tool_choice": "auto",
"tool_choice": tool_choice or "auto",
"parallel_tool_calls": True,
}