From e87bb0a82da311dfc09134762a1264a4aa7d975f Mon Sep 17 00:00:00 2001
From: Xubin Ren <xubinrencs@gmail.com>
Date: Sat, 21 Mar 2026 06:21:26 +0000
Subject: [PATCH] fix(mcp): preserve schema semantics during normalization

Only normalize nullable MCP tool schemas for OpenAI-compatible providers so optional params still work without collapsing unrelated unions. Also teach local validation to honor nullable flags and add regression coverage for nullable and non-nullable schemas.

Made-with: Cursor
---
 nanobot/agent/tools/base.py   |   4 +-
 nanobot/agent/tools/mcp.py    | 150 +++++++++++++---------------------
 tests/test_mcp_tool.py        |  63 ++++++++++++++
 tests/test_tool_validation.py |  12 +++
 4 files changed, 135 insertions(+), 94 deletions(-)

diff --git a/nanobot/agent/tools/base.py b/nanobot/agent/tools/base.py
index af0e920..4017f7c 100644
--- a/nanobot/agent/tools/base.py
+++ b/nanobot/agent/tools/base.py
@@ -146,7 +146,9 @@ class Tool(ABC):
 
     def _validate(self, val: Any, schema: dict[str, Any], path: str) -> list[str]:
         raw_type = schema.get("type")
-        nullable = isinstance(raw_type, list) and "null" in raw_type
+        nullable = (isinstance(raw_type, list) and "null" in raw_type) or schema.get(
+            "nullable", False
+        )
         t, label = self._resolve_type(raw_type), path or "parameter"
         if nullable and val is None:
             return []
diff --git a/nanobot/agent/tools/mcp.py b/nanobot/agent/tools/mcp.py
index b64bc05..c1c3e79 100644
--- a/nanobot/agent/tools/mcp.py
+++ b/nanobot/agent/tools/mcp.py
@@ -11,103 +11,67 @@ from nanobot.agent.tools.base import Tool
 from nanobot.agent.tools.registry import ToolRegistry
 
 
-def _normalize_schema_for_openai(schema: dict[str, Any]) -> dict[str, Any]:
-    """Normalize JSON Schema for OpenAI-compatible providers.
-    
-    OpenAI's API (and many compatible providers) only supports a subset of JSON Schema:
-    - Top-level type must be 'object'
-    - No oneOf/anyOf/allOf/enum/not at the top level
-    - Properties should have simple types
-    """
+def _extract_nullable_branch(options: Any) -> tuple[dict[str, Any], bool] | None:
+    """Return the single non-null branch for nullable unions."""
+    if not isinstance(options, list):
+        return None
+
+    non_null: list[dict[str, Any]] = []
+    saw_null = False
+    for option in options:
+        if not isinstance(option, dict):
+            return None
+        if option.get("type") == "null":
+            saw_null = True
+            continue
+        non_null.append(option)
+
+    if saw_null and len(non_null) == 1:
+        return non_null[0], True
+    return None
+
+
+def _normalize_schema_for_openai(schema: Any) -> dict[str, Any]:
+    """Normalize only nullable JSON Schema patterns for tool definitions."""
     if not isinstance(schema, dict):
         return {"type": "object", "properties": {}}
-    
-    # If schema has oneOf/anyOf/allOf at top level, try to extract the first option
-    for key in ["oneOf", "anyOf", "allOf"]:
-        if key in schema:
-            options = schema[key]
-            if isinstance(options, list) and len(options) > 0:
-                # Use the first option as the base schema
-                first_option = options[0]
-                if isinstance(first_option, dict):
-                    # Merge with other schema properties, preferring the first option
-                    normalized = dict(schema)
-                    del normalized[key]
-                    normalized.update(first_option)
-                    return _normalize_schema_for_openai(normalized)
-    
-    # Ensure top-level type is object
-    if schema.get("type") != "object":
-        # If no type specified or different type, default to object
-        schema = {"type": "object", **{k: v for k, v in schema.items() if k != "type"}}
-    
-    # Clean up unsupported properties at top level
-    unsupported = ["enum", "not", "const"]
-    for key in unsupported:
-        schema.pop(key, None)
-    
-    # Ensure properties and required exist
-    if "properties" not in schema:
-        schema["properties"] = {}
-    if "required" not in schema:
-        schema["required"] = []
-    
-    # Recursively normalize nested property schemas
-    if "properties" in schema and isinstance(schema["properties"], dict):
-        for prop_name, prop_schema in schema["properties"].items():
-            if isinstance(prop_schema, dict):
-                schema["properties"][prop_name] = _normalize_property_schema(prop_schema)
-    
-    return schema
 
+    normalized = dict(schema)
 
-def _normalize_property_schema(schema: dict[str, Any]) -> dict[str, Any]:
-    """Normalize a property schema for OpenAI compatibility."""
-    if not isinstance(schema, dict):
-        return {"type": "string"}
-    
-    # Handle oneOf/anyOf in properties
-    for key in ["oneOf", "anyOf"]:
-        if key in schema:
-            options = schema[key]
-            if isinstance(options, list) and len(options) > 0:
-                first_option = options[0]
-                if isinstance(first_option, dict):
-                    # Replace the complex schema with the first option
-                    result = {k: v for k, v in schema.items() if k not in [key, "allOf", "not"]}
-                    result.update(first_option)
-                    return _normalize_property_schema(result)
-    
-    # Handle allOf by merging all subschemas
-    if "allOf" in schema:
-        subschemas = schema["allOf"]
-        if isinstance(subschemas, list):
-            merged = {}
-            for sub in subschemas:
-                if isinstance(sub, dict):
-                    merged.update(sub)
-            # Remove allOf and merge with other properties
-            result = {k: v for k, v in schema.items() if k != "allOf"}
-            result.update(merged)
-            return _normalize_property_schema(result)
-    
-    # Ensure type is simple
-    if "type" not in schema:
-        # Try to infer type from other properties
-        if "enum" in schema:
-            schema["type"] = "string"
-        elif "properties" in schema:
-            schema["type"] = "object"
-        elif "items" in schema:
-            schema["type"] = "array"
-        else:
-            schema["type"] = "string"
-    
-    # Clean up not/const
-    schema.pop("not", None)
-    schema.pop("const", None)
-    
-    return schema
+    raw_type = normalized.get("type")
+    if isinstance(raw_type, list):
+        non_null = [item for item in raw_type if item != "null"]
+        if "null" in raw_type and len(non_null) == 1:
+            normalized["type"] = non_null[0]
+            normalized["nullable"] = True
+
+    for key in ("oneOf", "anyOf"):
+        nullable_branch = _extract_nullable_branch(normalized.get(key))
+        if nullable_branch is not None:
+            branch, _ = nullable_branch
+            merged = {k: v for k, v in normalized.items() if k != key}
+            merged.update(branch)
+            normalized = merged
+            normalized["nullable"] = True
+            break
+
+    if "properties" in normalized and isinstance(normalized["properties"], dict):
+        normalized["properties"] = {
+            name: _normalize_schema_for_openai(prop)
+            if isinstance(prop, dict)
+            else prop
+            for name, prop in normalized["properties"].items()
+        }
+
+    if "items" in normalized and isinstance(normalized["items"], dict):
+        normalized["items"] = _normalize_schema_for_openai(normalized["items"])
+
+    if normalized.get("type") != "object":
+        return normalized
+
+    normalized.setdefault("properties", {})
+    normalized.setdefault("required", [])
+    return normalized
 
 
 class MCPToolWrapper(Tool):
diff --git a/tests/test_mcp_tool.py b/tests/test_mcp_tool.py
index d014f58..28666f0 100644
--- a/tests/test_mcp_tool.py
+++ b/tests/test_mcp_tool.py
@@ -84,6 +84,69 @@ def _make_wrapper(session: object, *, timeout: float = 0.1) -> MCPToolWrapper:
     return MCPToolWrapper(session, "test", tool_def, tool_timeout=timeout)
 
 
+def test_wrapper_preserves_non_nullable_unions() -> None:
+    tool_def = SimpleNamespace(
+        name="demo",
+        description="demo tool",
+        inputSchema={
+            "type": "object",
+            "properties": {
+                "value": {
+                    "anyOf": [{"type": "string"}, {"type": "integer"}],
+                }
+            },
+        },
+    )
+
+    wrapper = MCPToolWrapper(SimpleNamespace(call_tool=None), "test", tool_def)
+
+    assert wrapper.parameters["properties"]["value"]["anyOf"] == [
+        {"type": "string"},
+        {"type": "integer"},
+    ]
+
+
+def test_wrapper_normalizes_nullable_property_type_union() -> None:
+    tool_def = SimpleNamespace(
+        name="demo",
+        description="demo tool",
+        inputSchema={
+            "type": "object",
+            "properties": {
+                "name": {"type": ["string", "null"]},
+            },
+        },
+    )
+
+    wrapper = MCPToolWrapper(SimpleNamespace(call_tool=None), "test", tool_def)
+
+    assert wrapper.parameters["properties"]["name"] == {"type": "string", "nullable": True}
+
+
+def test_wrapper_normalizes_nullable_property_anyof() -> None:
+    tool_def = SimpleNamespace(
+        name="demo",
+        description="demo tool",
+        inputSchema={
+            "type": "object",
+            "properties": {
+                "name": {
+                    "anyOf": [{"type": "string"}, {"type": "null"}],
+                    "description": "optional name",
+                },
+            },
+        },
+    )
+
+    wrapper = MCPToolWrapper(SimpleNamespace(call_tool=None), "test", tool_def)
+
+    assert wrapper.parameters["properties"]["name"] == {
+        "type": "string",
+        "description": "optional name",
+        "nullable": True,
+    }
+
+
 @pytest.mark.asyncio
 async def test_execute_returns_text_blocks() -> None:
     async def call_tool(_name: str, arguments: dict) -> object:
diff --git a/tests/test_tool_validation.py b/tests/test_tool_validation.py
index e817f37..a95418f 100644
--- a/tests/test_tool_validation.py
+++ b/tests/test_tool_validation.py
@@ -455,6 +455,18 @@ def test_validate_nullable_param_accepts_none() -> None:
     assert errors == []
 
 
+def test_validate_nullable_flag_accepts_none() -> None:
+    """OpenAI-normalized nullable params should still accept None locally."""
+    tool = CastTestTool(
+        {
+            "type": "object",
+            "properties": {"name": {"type": "string", "nullable": True}},
+        }
+    )
+    errors = tool.validate_params({"name": None})
+    assert errors == []
+
+
 def test_cast_nullable_param_no_crash() -> None:
     """cast_params should not crash on nullable type (the original bug)."""
     tool = CastTestTool(