Merge remote-tracking branch 'origin/main'
# Conflicts: # nanobot/agent/context.py # nanobot/agent/loop.py # nanobot/agent/tools/web.py # nanobot/channels/telegram.py # nanobot/cli/commands.py # tests/test_commands.py # tests/test_config_migration.py # tests/test_telegram_channel.py
This commit is contained in:
@@ -51,6 +51,12 @@ class CustomProvider(LLMProvider):
|
||||
try:
|
||||
return self._parse(await self._client.chat.completions.create(**kwargs))
|
||||
except Exception as e:
|
||||
# JSONDecodeError.doc / APIError.response.text may carry the raw body
|
||||
# (e.g. "unsupported model: xxx") which is far more useful than the
|
||||
# generic "Expecting value …" message. Truncate to avoid huge HTML pages.
|
||||
body = getattr(e, "doc", None) or getattr(getattr(e, "response", None), "text", None)
|
||||
if body and body.strip():
|
||||
return LLMResponse(content=f"Error: {body.strip()[:500]}", finish_reason="error")
|
||||
return LLMResponse(content=f"Error: {e}", finish_reason="error")
|
||||
|
||||
def _parse(self, response: Any) -> LLMResponse:
|
||||
|
||||
@@ -128,24 +128,40 @@ class LiteLLMProvider(LLMProvider):
|
||||
messages: list[dict[str, Any]],
|
||||
tools: list[dict[str, Any]] | None,
|
||||
) -> tuple[list[dict[str, Any]], list[dict[str, Any]] | None]:
|
||||
"""Return copies of messages and tools with cache_control injected."""
|
||||
new_messages = []
|
||||
for msg in messages:
|
||||
if msg.get("role") == "system":
|
||||
content = msg["content"]
|
||||
if isinstance(content, str):
|
||||
new_content = [{"type": "text", "text": content, "cache_control": {"type": "ephemeral"}}]
|
||||
else:
|
||||
new_content = list(content)
|
||||
new_content[-1] = {**new_content[-1], "cache_control": {"type": "ephemeral"}}
|
||||
new_messages.append({**msg, "content": new_content})
|
||||
else:
|
||||
new_messages.append(msg)
|
||||
"""Return copies of messages and tools with cache_control injected.
|
||||
|
||||
Two breakpoints are placed:
|
||||
1. System message — caches the static system prompt
|
||||
2. Second-to-last message — caches the conversation history prefix
|
||||
This maximises cache hits across multi-turn conversations.
|
||||
"""
|
||||
cache_marker = {"type": "ephemeral"}
|
||||
new_messages = list(messages)
|
||||
|
||||
def _mark(msg: dict[str, Any]) -> dict[str, Any]:
|
||||
content = msg.get("content")
|
||||
if isinstance(content, str):
|
||||
return {**msg, "content": [
|
||||
{"type": "text", "text": content, "cache_control": cache_marker}
|
||||
]}
|
||||
elif isinstance(content, list) and content:
|
||||
new_content = list(content)
|
||||
new_content[-1] = {**new_content[-1], "cache_control": cache_marker}
|
||||
return {**msg, "content": new_content}
|
||||
return msg
|
||||
|
||||
# Breakpoint 1: system message
|
||||
if new_messages and new_messages[0].get("role") == "system":
|
||||
new_messages[0] = _mark(new_messages[0])
|
||||
|
||||
# Breakpoint 2: second-to-last message (caches conversation history prefix)
|
||||
if len(new_messages) >= 3:
|
||||
new_messages[-2] = _mark(new_messages[-2])
|
||||
|
||||
new_tools = tools
|
||||
if tools:
|
||||
new_tools = list(tools)
|
||||
new_tools[-1] = {**new_tools[-1], "cache_control": {"type": "ephemeral"}}
|
||||
new_tools[-1] = {**new_tools[-1], "cache_control": cache_marker}
|
||||
|
||||
return new_messages, new_tools
|
||||
|
||||
|
||||
Reference in New Issue
Block a user