feat(provider): add OpenVINO Model Server provider (#2193)
add OpenVINO Model Server provider
This commit is contained in:
76
README.md
76
README.md
@@ -803,6 +803,7 @@ Config file: `~/.nanobot/config.json`
|
|||||||
| `moonshot` | LLM (Moonshot/Kimi) | [platform.moonshot.cn](https://platform.moonshot.cn) |
|
| `moonshot` | LLM (Moonshot/Kimi) | [platform.moonshot.cn](https://platform.moonshot.cn) |
|
||||||
| `zhipu` | LLM (Zhipu GLM) | [open.bigmodel.cn](https://open.bigmodel.cn) |
|
| `zhipu` | LLM (Zhipu GLM) | [open.bigmodel.cn](https://open.bigmodel.cn) |
|
||||||
| `ollama` | LLM (local, Ollama) | — |
|
| `ollama` | LLM (local, Ollama) | — |
|
||||||
|
| `ovms` | LLM (local, OpenVINO Model Server) | [docs.openvino.ai](https://docs.openvino.ai/2026/model-server/ovms_docs_llm_quickstart.html) |
|
||||||
| `vllm` | LLM (local, any OpenAI-compatible server) | — |
|
| `vllm` | LLM (local, any OpenAI-compatible server) | — |
|
||||||
| `openai_codex` | LLM (Codex, OAuth) | `nanobot provider login openai-codex` |
|
| `openai_codex` | LLM (Codex, OAuth) | `nanobot provider login openai-codex` |
|
||||||
| `github_copilot` | LLM (GitHub Copilot, OAuth) | `nanobot provider login github-copilot` |
|
| `github_copilot` | LLM (GitHub Copilot, OAuth) | `nanobot provider login github-copilot` |
|
||||||
@@ -938,6 +939,81 @@ ollama run llama3.2
|
|||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary><b>OpenVINO Model Server (local / OpenAI-compatible)</b></summary>
|
||||||
|
|
||||||
|
Run LLMs locally on Intel GPUs using [OpenVINO Model Server](https://docs.openvino.ai/2026/model-server/ovms_docs_llm_quickstart.html). OVMS exposes an OpenAI-compatible API at `/v3`.
|
||||||
|
|
||||||
|
> Requires Docker and an Intel GPU with driver access (`/dev/dri`).
|
||||||
|
|
||||||
|
**1. Pull the model** (example):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mkdir -p ov/models && cd ov
|
||||||
|
|
||||||
|
docker run -d \
|
||||||
|
--rm \
|
||||||
|
--user $(id -u):$(id -g) \
|
||||||
|
-v $(pwd)/models:/models \
|
||||||
|
openvino/model_server:latest-gpu \
|
||||||
|
--pull \
|
||||||
|
--model_name openai/gpt-oss-20b \
|
||||||
|
--model_repository_path /models \
|
||||||
|
--source_model OpenVINO/gpt-oss-20b-int4-ov \
|
||||||
|
--task text_generation \
|
||||||
|
--tool_parser gptoss \
|
||||||
|
--reasoning_parser gptoss \
|
||||||
|
--enable_prefix_caching true \
|
||||||
|
--target_device GPU
|
||||||
|
```
|
||||||
|
|
||||||
|
> This downloads the model weights. Wait for the container to finish before proceeding.
|
||||||
|
|
||||||
|
**2. Start the server** (example):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run -d \
|
||||||
|
--rm \
|
||||||
|
--name ovms \
|
||||||
|
--user $(id -u):$(id -g) \
|
||||||
|
-p 8000:8000 \
|
||||||
|
-v $(pwd)/models:/models \
|
||||||
|
--device /dev/dri \
|
||||||
|
--group-add=$(stat -c "%g" /dev/dri/render* | head -n 1) \
|
||||||
|
openvino/model_server:latest-gpu \
|
||||||
|
--rest_port 8000 \
|
||||||
|
--model_name openai/gpt-oss-20b \
|
||||||
|
--model_repository_path /models \
|
||||||
|
--source_model OpenVINO/gpt-oss-20b-int4-ov \
|
||||||
|
--task text_generation \
|
||||||
|
--tool_parser gptoss \
|
||||||
|
--reasoning_parser gptoss \
|
||||||
|
--enable_prefix_caching true \
|
||||||
|
--target_device GPU
|
||||||
|
```
|
||||||
|
|
||||||
|
**3. Add to config** (partial — merge into `~/.nanobot/config.json`):
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"providers": {
|
||||||
|
"ovms": {
|
||||||
|
"apiBase": "http://localhost:8000/v3"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"agents": {
|
||||||
|
"defaults": {
|
||||||
|
"provider": "ovms",
|
||||||
|
"model": "openai/gpt-oss-20b"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
> OVMS is a local server — no API key required. Supports tool calling (`--tool_parser gptoss`), reasoning (`--reasoning_parser gptoss`), and streaming.
|
||||||
|
> See the [official OVMS docs](https://docs.openvino.ai/2026/model-server/ovms_docs_llm_quickstart.html) for more details.
|
||||||
|
</details>
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
<summary><b>vLLM (local / OpenAI-compatible)</b></summary>
|
<summary><b>vLLM (local / OpenAI-compatible)</b></summary>
|
||||||
|
|
||||||
|
|||||||
@@ -409,6 +409,14 @@ def _make_provider(config: Config):
|
|||||||
api_base=p.api_base,
|
api_base=p.api_base,
|
||||||
default_model=model,
|
default_model=model,
|
||||||
)
|
)
|
||||||
|
# OpenVINO Model Server: direct OpenAI-compatible endpoint at /v3
|
||||||
|
elif provider_name == "ovms":
|
||||||
|
from nanobot.providers.custom_provider import CustomProvider
|
||||||
|
provider = CustomProvider(
|
||||||
|
api_key=p.api_key if p else "no-key",
|
||||||
|
api_base=config.get_api_base(model) or "http://localhost:8000/v3",
|
||||||
|
default_model=model,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
from nanobot.providers.litellm_provider import LiteLLMProvider
|
from nanobot.providers.litellm_provider import LiteLLMProvider
|
||||||
from nanobot.providers.registry import find_by_name
|
from nanobot.providers.registry import find_by_name
|
||||||
|
|||||||
@@ -70,6 +70,7 @@ class ProvidersConfig(Base):
|
|||||||
dashscope: ProviderConfig = Field(default_factory=ProviderConfig)
|
dashscope: ProviderConfig = Field(default_factory=ProviderConfig)
|
||||||
vllm: ProviderConfig = Field(default_factory=ProviderConfig)
|
vllm: ProviderConfig = Field(default_factory=ProviderConfig)
|
||||||
ollama: ProviderConfig = Field(default_factory=ProviderConfig) # Ollama local models
|
ollama: ProviderConfig = Field(default_factory=ProviderConfig) # Ollama local models
|
||||||
|
ovms: ProviderConfig = Field(default_factory=ProviderConfig) # OpenVINO Model Server (OVMS)
|
||||||
gemini: ProviderConfig = Field(default_factory=ProviderConfig)
|
gemini: ProviderConfig = Field(default_factory=ProviderConfig)
|
||||||
moonshot: ProviderConfig = Field(default_factory=ProviderConfig)
|
moonshot: ProviderConfig = Field(default_factory=ProviderConfig)
|
||||||
minimax: ProviderConfig = Field(default_factory=ProviderConfig)
|
minimax: ProviderConfig = Field(default_factory=ProviderConfig)
|
||||||
|
|||||||
@@ -452,6 +452,17 @@ PROVIDERS: tuple[ProviderSpec, ...] = (
|
|||||||
strip_model_prefix=False,
|
strip_model_prefix=False,
|
||||||
model_overrides=(),
|
model_overrides=(),
|
||||||
),
|
),
|
||||||
|
# === OpenVINO Model Server (direct, local, OpenAI-compatible at /v3) ===
|
||||||
|
ProviderSpec(
|
||||||
|
name="ovms",
|
||||||
|
keywords=("openvino", "ovms"),
|
||||||
|
env_key="",
|
||||||
|
display_name="OpenVINO Model Server",
|
||||||
|
litellm_prefix="",
|
||||||
|
is_direct=True,
|
||||||
|
is_local=True,
|
||||||
|
default_api_base="http://localhost:8000/v3",
|
||||||
|
),
|
||||||
# === Auxiliary (not a primary LLM provider) ============================
|
# === Auxiliary (not a primary LLM provider) ============================
|
||||||
# Groq: mainly used for Whisper voice transcription, also usable for LLM.
|
# Groq: mainly used for Whisper voice transcription, also usable for LLM.
|
||||||
# Needs "groq/" prefix for LiteLLM routing. Placed last — it rarely wins fallback.
|
# Needs "groq/" prefix for LiteLLM routing. Placed last — it rarely wins fallback.
|
||||||
|
|||||||
Reference in New Issue
Block a user