fix: raise tool result history limit to 16k and force save_memory in consolidation
This commit is contained in:
@@ -120,6 +120,7 @@ class MemoryStore:
|
||||
],
|
||||
tools=_SAVE_MEMORY_TOOL,
|
||||
model=model,
|
||||
tool_choice="required",
|
||||
)
|
||||
|
||||
if not response.has_tool_calls:
|
||||
|
||||
@@ -88,6 +88,7 @@ class AzureOpenAIProvider(LLMProvider):
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
reasoning_effort: str | None = None,
|
||||
tool_choice: str | dict[str, Any] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Prepare the request payload with Azure OpenAI 2024-10-21 compliance."""
|
||||
payload: dict[str, Any] = {
|
||||
@@ -106,7 +107,7 @@ class AzureOpenAIProvider(LLMProvider):
|
||||
|
||||
if tools:
|
||||
payload["tools"] = tools
|
||||
payload["tool_choice"] = "auto"
|
||||
payload["tool_choice"] = tool_choice or "auto"
|
||||
|
||||
return payload
|
||||
|
||||
@@ -118,6 +119,7 @@ class AzureOpenAIProvider(LLMProvider):
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
reasoning_effort: str | None = None,
|
||||
tool_choice: str | dict[str, Any] | None = None,
|
||||
) -> LLMResponse:
|
||||
"""
|
||||
Send a chat completion request to Azure OpenAI.
|
||||
@@ -137,7 +139,8 @@ class AzureOpenAIProvider(LLMProvider):
|
||||
url = self._build_chat_url(deployment_name)
|
||||
headers = self._build_headers()
|
||||
payload = self._prepare_request_payload(
|
||||
deployment_name, messages, tools, max_tokens, temperature, reasoning_effort
|
||||
deployment_name, messages, tools, max_tokens, temperature, reasoning_effort,
|
||||
tool_choice=tool_choice,
|
||||
)
|
||||
|
||||
try:
|
||||
|
||||
@@ -166,6 +166,7 @@ class LLMProvider(ABC):
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
reasoning_effort: str | None = None,
|
||||
tool_choice: str | dict[str, Any] | None = None,
|
||||
) -> LLMResponse:
|
||||
"""
|
||||
Send a chat completion request.
|
||||
@@ -176,6 +177,7 @@ class LLMProvider(ABC):
|
||||
model: Model identifier (provider-specific).
|
||||
max_tokens: Maximum tokens in response.
|
||||
temperature: Sampling temperature.
|
||||
tool_choice: Tool selection strategy ("auto", "required", or specific tool dict).
|
||||
|
||||
Returns:
|
||||
LLMResponse with content and/or tool calls.
|
||||
@@ -195,6 +197,7 @@ class LLMProvider(ABC):
|
||||
max_tokens: object = _SENTINEL,
|
||||
temperature: object = _SENTINEL,
|
||||
reasoning_effort: object = _SENTINEL,
|
||||
tool_choice: str | dict[str, Any] | None = None,
|
||||
) -> LLMResponse:
|
||||
"""Call chat() with retry on transient provider failures.
|
||||
|
||||
@@ -218,6 +221,7 @@ class LLMProvider(ABC):
|
||||
max_tokens=max_tokens,
|
||||
temperature=temperature,
|
||||
reasoning_effort=reasoning_effort,
|
||||
tool_choice=tool_choice,
|
||||
)
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
@@ -250,6 +254,7 @@ class LLMProvider(ABC):
|
||||
max_tokens=max_tokens,
|
||||
temperature=temperature,
|
||||
reasoning_effort=reasoning_effort,
|
||||
tool_choice=tool_choice,
|
||||
)
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
|
||||
@@ -25,7 +25,8 @@ class CustomProvider(LLMProvider):
|
||||
|
||||
async def chat(self, messages: list[dict[str, Any]], tools: list[dict[str, Any]] | None = None,
|
||||
model: str | None = None, max_tokens: int = 4096, temperature: float = 0.7,
|
||||
reasoning_effort: str | None = None) -> LLMResponse:
|
||||
reasoning_effort: str | None = None,
|
||||
tool_choice: str | dict[str, Any] | None = None) -> LLMResponse:
|
||||
kwargs: dict[str, Any] = {
|
||||
"model": model or self.default_model,
|
||||
"messages": self._sanitize_empty_content(messages),
|
||||
@@ -35,7 +36,7 @@ class CustomProvider(LLMProvider):
|
||||
if reasoning_effort:
|
||||
kwargs["reasoning_effort"] = reasoning_effort
|
||||
if tools:
|
||||
kwargs.update(tools=tools, tool_choice="auto")
|
||||
kwargs.update(tools=tools, tool_choice=tool_choice or "auto")
|
||||
try:
|
||||
return self._parse(await self._client.chat.completions.create(**kwargs))
|
||||
except Exception as e:
|
||||
|
||||
@@ -214,6 +214,7 @@ class LiteLLMProvider(LLMProvider):
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
reasoning_effort: str | None = None,
|
||||
tool_choice: str | dict[str, Any] | None = None,
|
||||
) -> LLMResponse:
|
||||
"""
|
||||
Send a chat completion request via LiteLLM.
|
||||
@@ -267,7 +268,7 @@ class LiteLLMProvider(LLMProvider):
|
||||
|
||||
if tools:
|
||||
kwargs["tools"] = tools
|
||||
kwargs["tool_choice"] = "auto"
|
||||
kwargs["tool_choice"] = tool_choice or "auto"
|
||||
|
||||
try:
|
||||
response = await acompletion(**kwargs)
|
||||
|
||||
@@ -32,6 +32,7 @@ class OpenAICodexProvider(LLMProvider):
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
reasoning_effort: str | None = None,
|
||||
tool_choice: str | dict[str, Any] | None = None,
|
||||
) -> LLMResponse:
|
||||
model = model or self.default_model
|
||||
system_prompt, input_items = _convert_messages(messages)
|
||||
@@ -48,7 +49,7 @@ class OpenAICodexProvider(LLMProvider):
|
||||
"text": {"verbosity": "medium"},
|
||||
"include": ["reasoning.encrypted_content"],
|
||||
"prompt_cache_key": _prompt_cache_key(messages),
|
||||
"tool_choice": "auto",
|
||||
"tool_choice": tool_choice or "auto",
|
||||
"parallel_tool_calls": True,
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user