fix: raise tool result history limit to 16k and force save_memory in consolidation

This commit is contained in:
Re-bin
2026-03-12 03:08:53 +00:00
parent d313765442
commit 0a0017ff45
6 changed files with 18 additions and 6 deletions

View File

@@ -120,6 +120,7 @@ class MemoryStore:
], ],
tools=_SAVE_MEMORY_TOOL, tools=_SAVE_MEMORY_TOOL,
model=model, model=model,
tool_choice="required",
) )
if not response.has_tool_calls: if not response.has_tool_calls:

View File

@@ -88,6 +88,7 @@ class AzureOpenAIProvider(LLMProvider):
max_tokens: int = 4096, max_tokens: int = 4096,
temperature: float = 0.7, temperature: float = 0.7,
reasoning_effort: str | None = None, reasoning_effort: str | None = None,
tool_choice: str | dict[str, Any] | None = None,
) -> dict[str, Any]: ) -> dict[str, Any]:
"""Prepare the request payload with Azure OpenAI 2024-10-21 compliance.""" """Prepare the request payload with Azure OpenAI 2024-10-21 compliance."""
payload: dict[str, Any] = { payload: dict[str, Any] = {
@@ -106,7 +107,7 @@ class AzureOpenAIProvider(LLMProvider):
if tools: if tools:
payload["tools"] = tools payload["tools"] = tools
payload["tool_choice"] = "auto" payload["tool_choice"] = tool_choice or "auto"
return payload return payload
@@ -118,6 +119,7 @@ class AzureOpenAIProvider(LLMProvider):
max_tokens: int = 4096, max_tokens: int = 4096,
temperature: float = 0.7, temperature: float = 0.7,
reasoning_effort: str | None = None, reasoning_effort: str | None = None,
tool_choice: str | dict[str, Any] | None = None,
) -> LLMResponse: ) -> LLMResponse:
""" """
Send a chat completion request to Azure OpenAI. Send a chat completion request to Azure OpenAI.
@@ -137,7 +139,8 @@ class AzureOpenAIProvider(LLMProvider):
url = self._build_chat_url(deployment_name) url = self._build_chat_url(deployment_name)
headers = self._build_headers() headers = self._build_headers()
payload = self._prepare_request_payload( payload = self._prepare_request_payload(
deployment_name, messages, tools, max_tokens, temperature, reasoning_effort deployment_name, messages, tools, max_tokens, temperature, reasoning_effort,
tool_choice=tool_choice,
) )
try: try:

View File

@@ -166,6 +166,7 @@ class LLMProvider(ABC):
max_tokens: int = 4096, max_tokens: int = 4096,
temperature: float = 0.7, temperature: float = 0.7,
reasoning_effort: str | None = None, reasoning_effort: str | None = None,
tool_choice: str | dict[str, Any] | None = None,
) -> LLMResponse: ) -> LLMResponse:
""" """
Send a chat completion request. Send a chat completion request.
@@ -176,6 +177,7 @@ class LLMProvider(ABC):
model: Model identifier (provider-specific). model: Model identifier (provider-specific).
max_tokens: Maximum tokens in response. max_tokens: Maximum tokens in response.
temperature: Sampling temperature. temperature: Sampling temperature.
tool_choice: Tool selection strategy ("auto", "required", or specific tool dict).
Returns: Returns:
LLMResponse with content and/or tool calls. LLMResponse with content and/or tool calls.
@@ -195,6 +197,7 @@ class LLMProvider(ABC):
max_tokens: object = _SENTINEL, max_tokens: object = _SENTINEL,
temperature: object = _SENTINEL, temperature: object = _SENTINEL,
reasoning_effort: object = _SENTINEL, reasoning_effort: object = _SENTINEL,
tool_choice: str | dict[str, Any] | None = None,
) -> LLMResponse: ) -> LLMResponse:
"""Call chat() with retry on transient provider failures. """Call chat() with retry on transient provider failures.
@@ -218,6 +221,7 @@ class LLMProvider(ABC):
max_tokens=max_tokens, max_tokens=max_tokens,
temperature=temperature, temperature=temperature,
reasoning_effort=reasoning_effort, reasoning_effort=reasoning_effort,
tool_choice=tool_choice,
) )
except asyncio.CancelledError: except asyncio.CancelledError:
raise raise
@@ -250,6 +254,7 @@ class LLMProvider(ABC):
max_tokens=max_tokens, max_tokens=max_tokens,
temperature=temperature, temperature=temperature,
reasoning_effort=reasoning_effort, reasoning_effort=reasoning_effort,
tool_choice=tool_choice,
) )
except asyncio.CancelledError: except asyncio.CancelledError:
raise raise

View File

@@ -25,7 +25,8 @@ class CustomProvider(LLMProvider):
async def chat(self, messages: list[dict[str, Any]], tools: list[dict[str, Any]] | None = None, async def chat(self, messages: list[dict[str, Any]], tools: list[dict[str, Any]] | None = None,
model: str | None = None, max_tokens: int = 4096, temperature: float = 0.7, model: str | None = None, max_tokens: int = 4096, temperature: float = 0.7,
reasoning_effort: str | None = None) -> LLMResponse: reasoning_effort: str | None = None,
tool_choice: str | dict[str, Any] | None = None) -> LLMResponse:
kwargs: dict[str, Any] = { kwargs: dict[str, Any] = {
"model": model or self.default_model, "model": model or self.default_model,
"messages": self._sanitize_empty_content(messages), "messages": self._sanitize_empty_content(messages),
@@ -35,7 +36,7 @@ class CustomProvider(LLMProvider):
if reasoning_effort: if reasoning_effort:
kwargs["reasoning_effort"] = reasoning_effort kwargs["reasoning_effort"] = reasoning_effort
if tools: if tools:
kwargs.update(tools=tools, tool_choice="auto") kwargs.update(tools=tools, tool_choice=tool_choice or "auto")
try: try:
return self._parse(await self._client.chat.completions.create(**kwargs)) return self._parse(await self._client.chat.completions.create(**kwargs))
except Exception as e: except Exception as e:

View File

@@ -214,6 +214,7 @@ class LiteLLMProvider(LLMProvider):
max_tokens: int = 4096, max_tokens: int = 4096,
temperature: float = 0.7, temperature: float = 0.7,
reasoning_effort: str | None = None, reasoning_effort: str | None = None,
tool_choice: str | dict[str, Any] | None = None,
) -> LLMResponse: ) -> LLMResponse:
""" """
Send a chat completion request via LiteLLM. Send a chat completion request via LiteLLM.
@@ -267,7 +268,7 @@ class LiteLLMProvider(LLMProvider):
if tools: if tools:
kwargs["tools"] = tools kwargs["tools"] = tools
kwargs["tool_choice"] = "auto" kwargs["tool_choice"] = tool_choice or "auto"
try: try:
response = await acompletion(**kwargs) response = await acompletion(**kwargs)

View File

@@ -32,6 +32,7 @@ class OpenAICodexProvider(LLMProvider):
max_tokens: int = 4096, max_tokens: int = 4096,
temperature: float = 0.7, temperature: float = 0.7,
reasoning_effort: str | None = None, reasoning_effort: str | None = None,
tool_choice: str | dict[str, Any] | None = None,
) -> LLMResponse: ) -> LLMResponse:
model = model or self.default_model model = model or self.default_model
system_prompt, input_items = _convert_messages(messages) system_prompt, input_items = _convert_messages(messages)
@@ -48,7 +49,7 @@ class OpenAICodexProvider(LLMProvider):
"text": {"verbosity": "medium"}, "text": {"verbosity": "medium"},
"include": ["reasoning.encrypted_content"], "include": ["reasoning.encrypted_content"],
"prompt_cache_key": _prompt_cache_key(messages), "prompt_cache_key": _prompt_cache_key(messages),
"tool_choice": "auto", "tool_choice": tool_choice or "auto",
"parallel_tool_calls": True, "parallel_tool_calls": True,
} }