fix!: Enhance response API support to not fail with tool calling (#3385)

# What does this PR do? Introduces two main fixes to enhance the stability of Responses API when dealing with tool calling responses and structured outputs. ### Changes Made 1. It added OpenAIResponseOutputMessageMCPCall and ListTools to OpenAIResponseInput but https://github.com/llamastack/llama-stack/pull/3810 got merge that did the same in a different way. Still this PR does it in a way that keep the sync between OpenAIResponsesOutput and the allowed objects in OpenAIResponseInput. 2. Add protection in case self.ctx.response_format does not have type attribute BREAKING CHANGE: OpenAIResponseInput now uses OpenAIResponseOutput union type. This is semantically equivalent - all previously accepted types are still supported via the OpenAIResponseOutput union. This improves type consistency and maintainability.
2025-12-03 09:53:45 +00:00 · 2025-10-27 17:33:02 +01:00 · 2025-10-27 17:33:02 +01:00 · 63422e5b36
commit 63422e5b36
parent f18b5eb537
10 changed files with 84 additions and 79 deletions
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@ -6735,14 +6735,9 @@ components:
        Error details for failed OpenAI response requests.
    OpenAIResponseInput:
      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        - $ref: '#/components/schemas/OpenAIResponseOutput'
        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
        - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
        - $ref: '#/components/schemas/OpenAIResponseMessage'
    OpenAIResponseInputToolFileSearch:
      type: object
--- a/docs/static/deprecated-llama-stack-spec.html
+++ b/docs/static/deprecated-llama-stack-spec.html
@ -8526,29 +8526,14 @@
            "OpenAIResponseInput": {
                "oneOf": [
                    {
-                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
+                        "$ref": "#/components/schemas/OpenAIResponseOutput"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseMessage"
                    }
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@ -6369,14 +6369,9 @@ components:
        Error details for failed OpenAI response requests.
    OpenAIResponseInput:
      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        - $ref: '#/components/schemas/OpenAIResponseOutput'
        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
        - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
        - $ref: '#/components/schemas/OpenAIResponseMessage'
    "OpenAIResponseInputFunctionToolCallOutput":
      type: object
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@ -7305,29 +7305,14 @@
            "OpenAIResponseInput": {
                "oneOf": [
                    {
-                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
+                        "$ref": "#/components/schemas/OpenAIResponseOutput"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseMessage"
                    }
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -5522,14 +5522,9 @@ components:
        Error details for failed OpenAI response requests.
    OpenAIResponseInput:
      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        - $ref: '#/components/schemas/OpenAIResponseOutput'
        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
        - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
        - $ref: '#/components/schemas/OpenAIResponseMessage'
    OpenAIResponseInputToolFileSearch:
      type: object
--- a/docs/static/stainless-llama-stack-spec.html
+++ b/docs/static/stainless-llama-stack-spec.html
@ -8977,29 +8977,14 @@
            "OpenAIResponseInput": {
                "oneOf": [
                    {
-                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
+                        "$ref": "#/components/schemas/OpenAIResponseOutput"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseMessage"
                    }
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@ -6735,14 +6735,9 @@ components:
        Error details for failed OpenAI response requests.
    OpenAIResponseInput:
      oneOf:
-        - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        - $ref: '#/components/schemas/OpenAIResponseOutput'
        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
        - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
        - $ref: '#/components/schemas/OpenAIResponseMessage'
    OpenAIResponseInputToolFileSearch:
      type: object
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@ -1254,14 +1254,9 @@ class OpenAIResponseInputFunctionToolCallOutput(BaseModel):
 OpenAIResponseInput = Annotated[
    # Responses API allows output messages to be passed in as input
-    OpenAIResponseOutputMessageWebSearchToolCall
+    OpenAIResponseOutput
    | OpenAIResponseOutputMessageFileSearchToolCall
    | OpenAIResponseOutputMessageFunctionToolCall
    | OpenAIResponseInputFunctionToolCallOutput
    | OpenAIResponseMCPApprovalRequest
    | OpenAIResponseMCPApprovalResponse
    | OpenAIResponseOutputMessageMCPCall
    | OpenAIResponseOutputMessageMCPListTools
    | OpenAIResponseMessage,
    Field(union_mode="left_to_right"),
 ]
--- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@ -217,7 +217,9 @@ class StreamingResponseOrchestrator:
            while True:
                # Text is the default response format for chat completion so don't need to pass it
                # (some providers don't support non-empty response_format when tools are present)
-                response_format = None if self.ctx.response_format.type == "text" else self.ctx.response_format
+                response_format = (
                    None if getattr(self.ctx.response_format, "type", None) == "text" else self.ctx.response_format
                )
                logger.debug(f"calling openai_chat_completion with tools: {self.ctx.chat_tools}")
                params = OpenAIChatCompletionRequestWithExtraBody(
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@ -24,6 +24,7 @@ from llama_stack.apis.agents.openai_responses import (
    OpenAIResponseInputToolWebSearch,
    OpenAIResponseMessage,
    OpenAIResponseOutputMessageContentOutputText,
    OpenAIResponseOutputMessageFunctionToolCall,
    OpenAIResponseOutputMessageMCPCall,
    OpenAIResponseOutputMessageWebSearchToolCall,
    OpenAIResponseText,
@ -1169,3 +1170,75 @@ async def test_create_openai_response_with_invalid_text_format(openai_responses_
            model=model,
            text=OpenAIResponseText(format={"type": "invalid"}),
        )
 async def test_create_openai_response_with_output_types_as_input(
    openai_responses_impl, mock_inference_api, mock_responses_store
 ):
    """Test that response outputs can be used as inputs in multi-turn conversations.
    Before adding OpenAIResponseOutput types to OpenAIResponseInput,
    creating a _OpenAIResponseObjectWithInputAndMessages with some output types
    in the input field would fail with a Pydantic ValidationError.
    This test simulates storing a response where the input contains output message
    types (MCP calls, function calls), which happens in multi-turn conversations.
    """
    model = "meta-llama/Llama-3.1-8B-Instruct"
    # Mock the inference response
    mock_inference_api.openai_chat_completion.return_value = fake_stream()
    # Create a response with store=True to trigger the storage path
    result = await openai_responses_impl.create_openai_response(
        input="What's the weather?",
        model=model,
        stream=True,
        temperature=0.1,
        store=True,
    )
    # Consume the stream
    _ = [chunk async for chunk in result]
    # Verify store was called
    assert mock_responses_store.store_response_object.called
    # Get the stored data
    store_call_args = mock_responses_store.store_response_object.call_args
    stored_response = store_call_args.kwargs["response_object"]
    # Now simulate a multi-turn conversation where outputs become inputs
    input_with_output_types = [
        OpenAIResponseMessage(role="user", content="What's the weather?", name=None),
        # These output types need to be valid OpenAIResponseInput
        OpenAIResponseOutputMessageFunctionToolCall(
            call_id="call_123",
            name="get_weather",
            arguments='{"city": "Tokyo"}',
            type="function_call",
        ),
        OpenAIResponseOutputMessageMCPCall(
            id="mcp_456",
            type="mcp_call",
            server_label="weather_server",
            name="get_temperature",
            arguments='{"location": "Tokyo"}',
            output="25°C",
        ),
    ]
    # This simulates storing a response in a multi-turn conversation
    # where previous outputs are included in the input.
    stored_with_outputs = _OpenAIResponseObjectWithInputAndMessages(
        id=stored_response.id,
        created_at=stored_response.created_at,
        model=stored_response.model,
        status=stored_response.status,
        output=stored_response.output,
        input=input_with_output_types,  # This will trigger Pydantic validation
        messages=None,
    )
    assert stored_with_outputs.input == input_with_output_types
    assert len(stored_with_outputs.input) == 3