fix: annotations list and web_search_preview in Responses (#2520)

# What does this PR do? These are a couple of fixes to get an example LangChain app working with our OpenAI Responses API implementation. The Responses API spec requires an annotations array in `output[*].content[*].annotations` and we were not providing one. So, this adds that as an empty list, even though we don't do anything to populate it yet. This prevents an error from client libraries like Langchain that expect this field to always exist, even if an empty list. The other fix is `web_search_preview` is a valid name for the web search tool in the Responses API, but we only responded to `web_search` or `web_search_preview_2025_03_11`. ## Test Plan The existing Responses unit tests were expanded to test these cases, via: ``` pytest -sv tests/unit/providers/agents/meta_reference/test_openai_responses.py ``` The existing test_openai_responses.py integration tests still pass with this change, tested as below with Fireworks: ``` uv run llama stack run llama_stack/templates/starter/run.yaml LLAMA_STACK_CONFIG=http://localhost:8321 \ uv run pytest -sv tests/integration/agents/test_openai_responses.py \ --text-model accounts/fireworks/models/llama4-scout-instruct-basic ``` Lastly, this example LangChain app now works with Llama stack (tested with Ollama in the starter template in this case). This LangChain code is using the example snippets for using Responses API at https://python.langchain.com/docs/integrations/chat/openai/#responses-api ```python from langchain_openai import ChatOpenAI llm = ChatOpenAI( base_url="http://localhost:8321/v1/openai/v1", api_key="fake", model="ollama/meta-llama/Llama-3.2-3B-Instruct", ) tool = {"type": "web_search_preview"} llm_with_tools = llm.bind_tools([tool]) response = llm_with_tools.invoke("What was a positive news story from today?") print(response.content) ``` Signed-off-by: Ben Browning <bbrownin@redhat.com>
2025-12-03 09:53:45 +00:00 · 2025-06-25 22:29:33 -04:00 · 2025-06-25 22:29:33 -04:00 · 2d9fd041eb
commit 2d9fd041eb
parent 1d3f27fe5b
5 changed files with 355 additions and 36 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -7390,6 +7390,147 @@
                ],
                "title": "AgentTurnResponseTurnStartPayload"
            },
            "OpenAIResponseAnnotationCitation": {
                "type": "object",
                "properties": {
                    "type": {
                        "type": "string",
                        "const": "url_citation",
                        "default": "url_citation"
                    },
                    "end_index": {
                        "type": "integer"
                    },
                    "start_index": {
                        "type": "integer"
                    },
                    "title": {
                        "type": "string"
                    },
                    "url": {
                        "type": "string"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type",
                    "end_index",
                    "start_index",
                    "title",
                    "url"
                ],
                "title": "OpenAIResponseAnnotationCitation"
            },
            "OpenAIResponseAnnotationContainerFileCitation": {
                "type": "object",
                "properties": {
                    "type": {
                        "type": "string",
                        "const": "container_file_citation",
                        "default": "container_file_citation"
                    },
                    "container_id": {
                        "type": "string"
                    },
                    "end_index": {
                        "type": "integer"
                    },
                    "file_id": {
                        "type": "string"
                    },
                    "filename": {
                        "type": "string"
                    },
                    "start_index": {
                        "type": "integer"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type",
                    "container_id",
                    "end_index",
                    "file_id",
                    "filename",
                    "start_index"
                ],
                "title": "OpenAIResponseAnnotationContainerFileCitation"
            },
            "OpenAIResponseAnnotationFileCitation": {
                "type": "object",
                "properties": {
                    "type": {
                        "type": "string",
                        "const": "file_citation",
                        "default": "file_citation"
                    },
                    "file_id": {
                        "type": "string"
                    },
                    "filename": {
                        "type": "string"
                    },
                    "index": {
                        "type": "integer"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type",
                    "file_id",
                    "filename",
                    "index"
                ],
                "title": "OpenAIResponseAnnotationFileCitation"
            },
            "OpenAIResponseAnnotationFilePath": {
                "type": "object",
                "properties": {
                    "type": {
                        "type": "string",
                        "const": "file_path",
                        "default": "file_path"
                    },
                    "file_id": {
                        "type": "string"
                    },
                    "index": {
                        "type": "integer"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type",
                    "file_id",
                    "index"
                ],
                "title": "OpenAIResponseAnnotationFilePath"
            },
            "OpenAIResponseAnnotations": {
                "oneOf": [
                    {
                        "$ref": "#/components/schemas/OpenAIResponseAnnotationFileCitation"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseAnnotationCitation"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseAnnotationFilePath"
                    }
                ],
                "discriminator": {
                    "propertyName": "type",
                    "mapping": {
                        "file_citation": "#/components/schemas/OpenAIResponseAnnotationFileCitation",
                        "url_citation": "#/components/schemas/OpenAIResponseAnnotationCitation",
                        "container_file_citation": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation",
                        "file_path": "#/components/schemas/OpenAIResponseAnnotationFilePath"
                    }
                }
            },
            "OpenAIResponseInput": {
                "oneOf": [
                    {
@ -7764,6 +7905,10 @@
                                "type": "string",
                                "const": "web_search"
                            },
                            {
                                "type": "string",
                                "const": "web_search_preview"
                            },
                            {
                                "type": "string",
                                "const": "web_search_preview_2025_03_11"
@ -7855,12 +8000,19 @@
                        "type": "string",
                        "const": "output_text",
                        "default": "output_text"
                    },
                    "annotations": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/OpenAIResponseAnnotations"
                        }
                    }
                },
                "additionalProperties": false,
                "required": [
                    "text",
-                    "type"
+                    "type",
                    "annotations"
                ],
                "title": "OpenAIResponseOutputMessageContentOutputText"
            },
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -5263,6 +5263,106 @@ components:
        - event_type
        - turn_id
      title: AgentTurnResponseTurnStartPayload
    OpenAIResponseAnnotationCitation:
      type: object
      properties:
        type:
          type: string
          const: url_citation
          default: url_citation
        end_index:
          type: integer
        start_index:
          type: integer
        title:
          type: string
        url:
          type: string
      additionalProperties: false
      required:
        - type
        - end_index
        - start_index
        - title
        - url
      title: OpenAIResponseAnnotationCitation
    "OpenAIResponseAnnotationContainerFileCitation":
      type: object
      properties:
        type:
          type: string
          const: container_file_citation
          default: container_file_citation
        container_id:
          type: string
        end_index:
          type: integer
        file_id:
          type: string
        filename:
          type: string
        start_index:
          type: integer
      additionalProperties: false
      required:
        - type
        - container_id
        - end_index
        - file_id
        - filename
        - start_index
      title: >-
        OpenAIResponseAnnotationContainerFileCitation
    OpenAIResponseAnnotationFileCitation:
      type: object
      properties:
        type:
          type: string
          const: file_citation
          default: file_citation
        file_id:
          type: string
        filename:
          type: string
        index:
          type: integer
      additionalProperties: false
      required:
        - type
        - file_id
        - filename
        - index
      title: OpenAIResponseAnnotationFileCitation
    OpenAIResponseAnnotationFilePath:
      type: object
      properties:
        type:
          type: string
          const: file_path
          default: file_path
        file_id:
          type: string
        index:
          type: integer
      additionalProperties: false
      required:
        - type
        - file_id
        - index
      title: OpenAIResponseAnnotationFilePath
    OpenAIResponseAnnotations:
      oneOf:
        - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
        - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
        - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
        - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
      discriminator:
        propertyName: type
        mapping:
          file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
          url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
          container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
          file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
    OpenAIResponseInput:
      oneOf:
        - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
@ -5488,6 +5588,8 @@ components:
          oneOf:
            - type: string
              const: web_search
            - type: string
              const: web_search_preview
            - type: string
              const: web_search_preview_2025_03_11
          default: web_search
@ -5547,10 +5649,15 @@ components:
          type: string
          const: output_text
          default: output_text
        annotations:
          type: array
          items:
            $ref: '#/components/schemas/OpenAIResponseAnnotations'
      additionalProperties: false
      required:
        - text
        - type
        - annotations
      title: >-
        OpenAIResponseOutputMessageContentOutputText
    "OpenAIResponseOutputMessageFileSearchToolCall":
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@ -44,10 +44,55 @@ OpenAIResponseInputMessageContent = Annotated[
 register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
@json_schema_type
 class OpenAIResponseAnnotationFileCitation(BaseModel):
    type: Literal["file_citation"] = "file_citation"
    file_id: str
    filename: str
    index: int
@json_schema_type
 class OpenAIResponseAnnotationCitation(BaseModel):
    type: Literal["url_citation"] = "url_citation"
    end_index: int
    start_index: int
    title: str
    url: str
@json_schema_type
 class OpenAIResponseAnnotationContainerFileCitation(BaseModel):
    type: Literal["container_file_citation"] = "container_file_citation"
    container_id: str
    end_index: int
    file_id: str
    filename: str
    start_index: int
@json_schema_type
 class OpenAIResponseAnnotationFilePath(BaseModel):
    type: Literal["file_path"] = "file_path"
    file_id: str
    index: int
 OpenAIResponseAnnotations = Annotated[
    OpenAIResponseAnnotationFileCitation
    | OpenAIResponseAnnotationCitation
    | OpenAIResponseAnnotationContainerFileCitation
    | OpenAIResponseAnnotationFilePath,
    Field(discriminator="type"),
 ]
 register_schema(OpenAIResponseAnnotations, name="OpenAIResponseAnnotations")
@json_schema_type
 class OpenAIResponseOutputMessageContentOutputText(BaseModel):
    text: str
    type: Literal["output_text"] = "output_text"
    annotations: list[OpenAIResponseAnnotations] = Field(default_factory=list)
 OpenAIResponseOutputMessageContent = Annotated[
@ -384,9 +429,16 @@ OpenAIResponseInput = Annotated[
 register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
 # Must match type Literals of OpenAIResponseInputToolWebSearch below
 WebSearchToolTypes = ["web_search", "web_search_preview", "web_search_preview_2025_03_11"]
@json_schema_type
 class OpenAIResponseInputToolWebSearch(BaseModel):
-    type: Literal["web_search"] | Literal["web_search_preview_2025_03_11"] = "web_search"
+    # Must match values of WebSearchToolTypes above
    type: Literal["web_search"] | Literal["web_search_preview"] | Literal["web_search_preview_2025_03_11"] = (
        "web_search"
    )
    # TODO: actually use search_context_size somewhere...
    search_context_size: str | None = Field(default="medium", pattern="^low|medium|high$")
    # TODO: add user_location
--- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
@ -42,6 +42,7 @@ from llama_stack.apis.agents.openai_responses import (
    OpenAIResponseOutputMessageWebSearchToolCall,
    OpenAIResponseText,
    OpenAIResponseTextFormat,
    WebSearchToolTypes,
 )
 from llama_stack.apis.common.content_types import TextContentItem
 from llama_stack.apis.inference.inference import (
@ -609,7 +610,7 @@ class OpenAIResponsesImpl:
            # TODO: Handle other tool types
            if input_tool.type == "function":
                chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump()))
-            elif input_tool.type == "web_search":
+            elif input_tool.type in WebSearchToolTypes:
                tool_name = "web_search"
                tool = await self.tool_groups_api.get_tool(tool_name)
                if not tool:
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@ -27,6 +27,7 @@ from llama_stack.apis.agents.openai_responses import (
    OpenAIResponseOutputMessageWebSearchToolCall,
    OpenAIResponseText,
    OpenAIResponseTextFormat,
    WebSearchToolTypes,
 )
 from llama_stack.apis.inference.inference import (
    OpenAIAssistantMessageParam,
@ -161,11 +162,6 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon
    input_text = "What is the capital of Ireland?"
    model = "meta-llama/Llama-3.1-8B-Instruct"
    mock_inference_api.openai_chat_completion.side_effect = [
        fake_stream("tool_call_completion.yaml"),
        fake_stream(),
    ]
    openai_responses_impl.tool_groups_api.get_tool.return_value = Tool(
        identifier="web_search",
        provider_id="client",
@ -182,39 +178,50 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon
    )
    # Execute
-    result = await openai_responses_impl.create_openai_response(
+    for tool_name in WebSearchToolTypes:
-        input=input_text,
+        # Reset mock states as we loop through each tool type
-        model=model,
+        mock_inference_api.openai_chat_completion.side_effect = [
-        temperature=0.1,
+            fake_stream("tool_call_completion.yaml"),
-        tools=[
+            fake_stream(),
-            OpenAIResponseInputToolWebSearch(
+        ]
-                name="web_search",
+        openai_responses_impl.tool_groups_api.get_tool.reset_mock()
-            )
+        openai_responses_impl.tool_runtime_api.invoke_tool.reset_mock()
-        ],
+        openai_responses_impl.responses_store.store_response_object.reset_mock()
    )
-    # Verify
+        result = await openai_responses_impl.create_openai_response(
-    first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
+            input=input_text,
-    assert first_call.kwargs["messages"][0].content == "What is the capital of Ireland?"
+            model=model,
-    assert first_call.kwargs["tools"] is not None
+            temperature=0.1,
-    assert first_call.kwargs["temperature"] == 0.1
+            tools=[
                OpenAIResponseInputToolWebSearch(
                    name=tool_name,
                )
            ],
        )
-    second_call = mock_inference_api.openai_chat_completion.call_args_list[1]
+        # Verify
-    assert second_call.kwargs["messages"][-1].content == "Dublin"
+        first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
-    assert second_call.kwargs["temperature"] == 0.1
+        assert first_call.kwargs["messages"][0].content == "What is the capital of Ireland?"
        assert first_call.kwargs["tools"] is not None
        assert first_call.kwargs["temperature"] == 0.1
-    openai_responses_impl.tool_groups_api.get_tool.assert_called_once_with("web_search")
+        second_call = mock_inference_api.openai_chat_completion.call_args_list[1]
-    openai_responses_impl.tool_runtime_api.invoke_tool.assert_called_once_with(
+        assert second_call.kwargs["messages"][-1].content == "Dublin"
-        tool_name="web_search",
+        assert second_call.kwargs["temperature"] == 0.1
        kwargs={"query": "What is the capital of Ireland?"},
    )
-    openai_responses_impl.responses_store.store_response_object.assert_called_once()
+        openai_responses_impl.tool_groups_api.get_tool.assert_called_once_with("web_search")
        openai_responses_impl.tool_runtime_api.invoke_tool.assert_called_once_with(
            tool_name="web_search",
            kwargs={"query": "What is the capital of Ireland?"},
        )
-    # Check that we got the content from our mocked tool execution result
+        openai_responses_impl.responses_store.store_response_object.assert_called_once()
-    assert len(result.output) >= 1
+
-    assert isinstance(result.output[1], OpenAIResponseMessage)
+        # Check that we got the content from our mocked tool execution result
-    assert result.output[1].content[0].text == "Dublin"
+        assert len(result.output) >= 1
        assert isinstance(result.output[1], OpenAIResponseMessage)
        assert result.output[1].content[0].text == "Dublin"
        assert result.output[1].content[0].annotations == []
@pytest.mark.asyncio