feat: add vLLM/local LLM support

- Add vllm provider configuration in config schema
- Auto-detect vLLM endpoints and use hosted_vllm/ prefix for LiteLLM
- Pass api_base directly to acompletion for custom endpoints
- Add vLLM status display in CLI status command
- Add vLLM setup documentation in README
This commit is contained in:
ZhihaoZhang97
2026-02-02 11:23:04 +11:00
parent 959c4dadf8
commit 2b19dcf9fd
4 changed files with 61 additions and 2 deletions

View File

@@ -32,11 +32,17 @@ class LiteLLMProvider(LLMProvider):
(api_base and "openrouter" in api_base)
)
# Track if using custom endpoint (vLLM, etc.)
self.is_vllm = bool(api_base) and not self.is_openrouter
# Configure LiteLLM based on provider
if api_key:
if self.is_openrouter:
# OpenRouter mode - set key
os.environ["OPENROUTER_API_KEY"] = api_key
elif self.is_vllm:
# vLLM/custom endpoint - uses OpenAI-compatible API
os.environ["OPENAI_API_KEY"] = api_key
elif "anthropic" in default_model:
os.environ.setdefault("ANTHROPIC_API_KEY", api_key)
elif "openai" in default_model or "gpt" in default_model:
@@ -75,6 +81,11 @@ class LiteLLMProvider(LLMProvider):
if self.is_openrouter and not model.startswith("openrouter/"):
model = f"openrouter/{model}"
# For vLLM, use hosted_vllm/ prefix per LiteLLM docs
# Convert openai/ prefix to hosted_vllm/ if user specified it
if self.is_vllm:
model = f"hosted_vllm/{model}"
kwargs: dict[str, Any] = {
"model": model,
"messages": messages,
@@ -82,6 +93,10 @@ class LiteLLMProvider(LLMProvider):
"temperature": temperature,
}
# Pass api_base directly for custom endpoints (vLLM, etc.)
if self.api_base:
kwargs["api_base"] = self.api_base
if tools:
kwargs["tools"] = tools
kwargs["tool_choice"] = "auto"