From b42eb1ccbcdf7bc3e926e2780f64290ec7ebf338 Mon Sep 17 00:00:00 2001
From: Ben Browning <bbrownin@redhat.com>
Date: Wed, 14 May 2025 17:16:33 -0400
Subject: [PATCH] fix: Responses API: handle type=None in streaming tool calls
 (#2166)

# What does this PR do?

In the Responses API, we convert incoming response requests to chat
completion requests. When streaming the resulting chunks of those chat
completion requests, inference providers that use OpenAI clients will
often return a `type=None` value in the tool call parts of the response.
This causes issues when we try to dump and load that response into our
pydantic model, because type cannot be None in the Responses API model
we're loading these into.

So, strip the "type" field, if present, off those chat completion tool
call results before dumping and loading them as our typed pydantic
models, which will apply our default value for that type field.

## Test Plan

This was found via manual testing of the Responses API with codex, where
I was getting errors in some tool call situations. I added a unit test
to simulate this scenario and verify the fix, as well as manual codex
testing to verify the fix.

Signed-off-by: Ben Browning <bbrownin@redhat.com>
---
 .../agents/meta_reference/openai_responses.py |  8 ++-
 .../meta_reference/test_openai_responses.py   | 70 +++++++++++++++++++
 2 files changed, 76 insertions(+), 2 deletions(-)

diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
index 3ead083ef..6d9d06109 100644
--- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
@@ -7,7 +7,7 @@
 import json
 import uuid
 from collections.abc import AsyncIterator
-from typing import cast
+from typing import Any, cast
 
 from openai.types.chat import ChatCompletionToolParam
 from pydantic import BaseModel
@@ -264,7 +264,11 @@ class OpenAIResponsesImpl:
                             if response_tool_call:
                                 response_tool_call.function.arguments += tool_call.function.arguments
                             else:
-                                response_tool_call = OpenAIChatCompletionToolCall(**tool_call.model_dump())
+                                tool_call_dict: dict[str, Any] = tool_call.model_dump()
+                                # Ensure we don't have any empty type field in the tool call dict.
+                                # The OpenAI client used by providers often returns a type=None here.
+                                tool_call_dict.pop("type", None)
+                                response_tool_call = OpenAIChatCompletionToolCall(**tool_call_dict)
                             chat_response_tool_calls[tool_call.index] = response_tool_call
 
             # Convert the dict of tool calls by index to a list of tool calls to pass back in our response
diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
index 3fe68cb9d..ed5f13a58 100644
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@@ -7,10 +7,18 @@
 from unittest.mock import AsyncMock, patch
 
 import pytest
+from openai.types.chat.chat_completion_chunk import (
+    ChatCompletionChunk,
+    Choice,
+    ChoiceDelta,
+    ChoiceDeltaToolCall,
+    ChoiceDeltaToolCallFunction,
+)
 
 from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseInputItemList,
     OpenAIResponseInputMessageContentText,
+    OpenAIResponseInputToolFunction,
     OpenAIResponseInputToolWebSearch,
     OpenAIResponseMessage,
     OpenAIResponseObject,
@@ -167,6 +175,68 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon
     assert result.output[1].content[0].text == "Dublin"
 
 
+@pytest.mark.asyncio
+async def test_create_openai_response_with_tool_call_type_none(openai_responses_impl, mock_inference_api):
+    """Test creating an OpenAI response with a tool call response that has a type of None."""
+    # Setup
+    input_text = "How hot it is in San Francisco today?"
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+
+    async def fake_stream():
+        yield ChatCompletionChunk(
+            id="123",
+            choices=[
+                Choice(
+                    index=0,
+                    delta=ChoiceDelta(
+                        tool_calls=[
+                            ChoiceDeltaToolCall(
+                                index=0,
+                                id="tc_123",
+                                function=ChoiceDeltaToolCallFunction(name="get_weather", arguments="{}"),
+                                type=None,
+                            )
+                        ]
+                    ),
+                ),
+            ],
+            created=1,
+            model=model,
+            object="chat.completion.chunk",
+        )
+
+    mock_inference_api.openai_chat_completion.return_value = fake_stream()
+
+    # Execute
+    result = await openai_responses_impl.create_openai_response(
+        input=input_text,
+        model=model,
+        stream=True,
+        temperature=0.1,
+        tools=[
+            OpenAIResponseInputToolFunction(
+                name="get_weather",
+                description="Get current temperature for a given location.",
+                parameters={
+                    "location": "string",
+                },
+            )
+        ],
+    )
+
+    # Verify
+    first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
+    assert first_call.kwargs["messages"][0].content == input_text
+    assert first_call.kwargs["tools"] is not None
+    assert first_call.kwargs["temperature"] == 0.1
+
+    # Check that we got the content from our mocked tool execution result
+    chunks = [chunk async for chunk in result]
+    assert len(chunks) > 0
+    assert chunks[0].response.output[0].type == "function_call"
+    assert chunks[0].response.output[0].name == "get_weather"
+
+
 @pytest.mark.asyncio
 async def test_create_openai_response_with_multiple_messages(openai_responses_impl, mock_inference_api):
     """Test creating an OpenAI response with multiple messages."""