From 2d9fd041eb7c5d8d163d1f97e6e9942b6a366af4 Mon Sep 17 00:00:00 2001
From: Ben Browning <bbrownin@redhat.com>
Date: Wed, 25 Jun 2025 22:29:33 -0400
Subject: [PATCH] fix: annotations list and web_search_preview in Responses
 (#2520)

# What does this PR do?


These are a couple of fixes to get an example LangChain app working with
our OpenAI Responses API implementation.

The Responses API spec requires an annotations array in
`output[*].content[*].annotations` and we were not providing one. So,
this adds that as an empty list, even though we don't do anything to
populate it yet. This prevents an error from client libraries like
Langchain that expect this field to always exist, even if an empty list.

The other fix is `web_search_preview` is a valid name for the web search
tool in the Responses API, but we only responded to `web_search` or
`web_search_preview_2025_03_11`.


## Test Plan


The existing Responses unit tests were expanded to test these cases,
via:

```
pytest -sv tests/unit/providers/agents/meta_reference/test_openai_responses.py
```

The existing test_openai_responses.py integration tests still pass with
this change, tested as below with Fireworks:

```
uv run llama stack run llama_stack/templates/starter/run.yaml

LLAMA_STACK_CONFIG=http://localhost:8321 \
uv run pytest -sv tests/integration/agents/test_openai_responses.py \
  --text-model accounts/fireworks/models/llama4-scout-instruct-basic
```

Lastly, this example LangChain app now works with Llama stack (tested
with Ollama in the starter template in this case). This LangChain code
is using the example snippets for using Responses API at
https://python.langchain.com/docs/integrations/chat/openai/#responses-api

```python
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(
    base_url="http://localhost:8321/v1/openai/v1",
    api_key="fake",
    model="ollama/meta-llama/Llama-3.2-3B-Instruct",
)

tool = {"type": "web_search_preview"}
llm_with_tools = llm.bind_tools([tool])

response = llm_with_tools.invoke("What was a positive news story from today?")

print(response.content)
```

Signed-off-by: Ben Browning <bbrownin@redhat.com>
---
 docs/_static/llama-stack-spec.html            | 154 +++++++++++++++++-
 docs/_static/llama-stack-spec.yaml            | 107 ++++++++++++
 llama_stack/apis/agents/openai_responses.py   |  54 +++++-
 .../agents/meta_reference/openai_responses.py |   3 +-
 .../meta_reference/test_openai_responses.py   |  73 +++++----
 5 files changed, 355 insertions(+), 36 deletions(-)

diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 801e8dc33..f9e4bb38e 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -7390,6 +7390,147 @@
                 ],
                 "title": "AgentTurnResponseTurnStartPayload"
             },
+            "OpenAIResponseAnnotationCitation": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "url_citation",
+                        "default": "url_citation"
+                    },
+                    "end_index": {
+                        "type": "integer"
+                    },
+                    "start_index": {
+                        "type": "integer"
+                    },
+                    "title": {
+                        "type": "string"
+                    },
+                    "url": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "end_index",
+                    "start_index",
+                    "title",
+                    "url"
+                ],
+                "title": "OpenAIResponseAnnotationCitation"
+            },
+            "OpenAIResponseAnnotationContainerFileCitation": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "container_file_citation",
+                        "default": "container_file_citation"
+                    },
+                    "container_id": {
+                        "type": "string"
+                    },
+                    "end_index": {
+                        "type": "integer"
+                    },
+                    "file_id": {
+                        "type": "string"
+                    },
+                    "filename": {
+                        "type": "string"
+                    },
+                    "start_index": {
+                        "type": "integer"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "container_id",
+                    "end_index",
+                    "file_id",
+                    "filename",
+                    "start_index"
+                ],
+                "title": "OpenAIResponseAnnotationContainerFileCitation"
+            },
+            "OpenAIResponseAnnotationFileCitation": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "file_citation",
+                        "default": "file_citation"
+                    },
+                    "file_id": {
+                        "type": "string"
+                    },
+                    "filename": {
+                        "type": "string"
+                    },
+                    "index": {
+                        "type": "integer"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "file_id",
+                    "filename",
+                    "index"
+                ],
+                "title": "OpenAIResponseAnnotationFileCitation"
+            },
+            "OpenAIResponseAnnotationFilePath": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "file_path",
+                        "default": "file_path"
+                    },
+                    "file_id": {
+                        "type": "string"
+                    },
+                    "index": {
+                        "type": "integer"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "file_id",
+                    "index"
+                ],
+                "title": "OpenAIResponseAnnotationFilePath"
+            },
+            "OpenAIResponseAnnotations": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseAnnotationFileCitation"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseAnnotationCitation"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseAnnotationFilePath"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "file_citation": "#/components/schemas/OpenAIResponseAnnotationFileCitation",
+                        "url_citation": "#/components/schemas/OpenAIResponseAnnotationCitation",
+                        "container_file_citation": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation",
+                        "file_path": "#/components/schemas/OpenAIResponseAnnotationFilePath"
+                    }
+                }
+            },
             "OpenAIResponseInput": {
                 "oneOf": [
                     {
@@ -7764,6 +7905,10 @@
                                 "type": "string",
                                 "const": "web_search"
                             },
+                            {
+                                "type": "string",
+                                "const": "web_search_preview"
+                            },
                             {
                                 "type": "string",
                                 "const": "web_search_preview_2025_03_11"
@@ -7855,12 +8000,19 @@
                         "type": "string",
                         "const": "output_text",
                         "default": "output_text"
+                    },
+                    "annotations": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIResponseAnnotations"
+                        }
                     }
                 },
                 "additionalProperties": false,
                 "required": [
                     "text",
-                    "type"
+                    "type",
+                    "annotations"
                 ],
                 "title": "OpenAIResponseOutputMessageContentOutputText"
             },
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index b736cd904..9175c97fc 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -5263,6 +5263,106 @@ components:
         - event_type
         - turn_id
       title: AgentTurnResponseTurnStartPayload
+    OpenAIResponseAnnotationCitation:
+      type: object
+      properties:
+        type:
+          type: string
+          const: url_citation
+          default: url_citation
+        end_index:
+          type: integer
+        start_index:
+          type: integer
+        title:
+          type: string
+        url:
+          type: string
+      additionalProperties: false
+      required:
+        - type
+        - end_index
+        - start_index
+        - title
+        - url
+      title: OpenAIResponseAnnotationCitation
+    "OpenAIResponseAnnotationContainerFileCitation":
+      type: object
+      properties:
+        type:
+          type: string
+          const: container_file_citation
+          default: container_file_citation
+        container_id:
+          type: string
+        end_index:
+          type: integer
+        file_id:
+          type: string
+        filename:
+          type: string
+        start_index:
+          type: integer
+      additionalProperties: false
+      required:
+        - type
+        - container_id
+        - end_index
+        - file_id
+        - filename
+        - start_index
+      title: >-
+        OpenAIResponseAnnotationContainerFileCitation
+    OpenAIResponseAnnotationFileCitation:
+      type: object
+      properties:
+        type:
+          type: string
+          const: file_citation
+          default: file_citation
+        file_id:
+          type: string
+        filename:
+          type: string
+        index:
+          type: integer
+      additionalProperties: false
+      required:
+        - type
+        - file_id
+        - filename
+        - index
+      title: OpenAIResponseAnnotationFileCitation
+    OpenAIResponseAnnotationFilePath:
+      type: object
+      properties:
+        type:
+          type: string
+          const: file_path
+          default: file_path
+        file_id:
+          type: string
+        index:
+          type: integer
+      additionalProperties: false
+      required:
+        - type
+        - file_id
+        - index
+      title: OpenAIResponseAnnotationFilePath
+    OpenAIResponseAnnotations:
+      oneOf:
+        - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+        - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
+        - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+        - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+      discriminator:
+        propertyName: type
+        mapping:
+          file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+          url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
+          container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+          file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
     OpenAIResponseInput:
       oneOf:
         - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
@@ -5488,6 +5588,8 @@ components:
           oneOf:
             - type: string
               const: web_search
+            - type: string
+              const: web_search_preview
             - type: string
               const: web_search_preview_2025_03_11
           default: web_search
@@ -5547,10 +5649,15 @@ components:
           type: string
           const: output_text
           default: output_text
+        annotations:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIResponseAnnotations'
       additionalProperties: false
       required:
         - text
         - type
+        - annotations
       title: >-
         OpenAIResponseOutputMessageContentOutputText
     "OpenAIResponseOutputMessageFileSearchToolCall":
diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py
index addb72f14..27b85e2d6 100644
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@@ -44,10 +44,55 @@ OpenAIResponseInputMessageContent = Annotated[
 register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
 
 
+@json_schema_type
+class OpenAIResponseAnnotationFileCitation(BaseModel):
+    type: Literal["file_citation"] = "file_citation"
+    file_id: str
+    filename: str
+    index: int
+
+
+@json_schema_type
+class OpenAIResponseAnnotationCitation(BaseModel):
+    type: Literal["url_citation"] = "url_citation"
+    end_index: int
+    start_index: int
+    title: str
+    url: str
+
+
+@json_schema_type
+class OpenAIResponseAnnotationContainerFileCitation(BaseModel):
+    type: Literal["container_file_citation"] = "container_file_citation"
+    container_id: str
+    end_index: int
+    file_id: str
+    filename: str
+    start_index: int
+
+
+@json_schema_type
+class OpenAIResponseAnnotationFilePath(BaseModel):
+    type: Literal["file_path"] = "file_path"
+    file_id: str
+    index: int
+
+
+OpenAIResponseAnnotations = Annotated[
+    OpenAIResponseAnnotationFileCitation
+    | OpenAIResponseAnnotationCitation
+    | OpenAIResponseAnnotationContainerFileCitation
+    | OpenAIResponseAnnotationFilePath,
+    Field(discriminator="type"),
+]
+register_schema(OpenAIResponseAnnotations, name="OpenAIResponseAnnotations")
+
+
 @json_schema_type
 class OpenAIResponseOutputMessageContentOutputText(BaseModel):
     text: str
     type: Literal["output_text"] = "output_text"
+    annotations: list[OpenAIResponseAnnotations] = Field(default_factory=list)
 
 
 OpenAIResponseOutputMessageContent = Annotated[
@@ -384,9 +429,16 @@ OpenAIResponseInput = Annotated[
 register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
 
 
+# Must match type Literals of OpenAIResponseInputToolWebSearch below
+WebSearchToolTypes = ["web_search", "web_search_preview", "web_search_preview_2025_03_11"]
+
+
 @json_schema_type
 class OpenAIResponseInputToolWebSearch(BaseModel):
-    type: Literal["web_search"] | Literal["web_search_preview_2025_03_11"] = "web_search"
+    # Must match values of WebSearchToolTypes above
+    type: Literal["web_search"] | Literal["web_search_preview"] | Literal["web_search_preview_2025_03_11"] = (
+        "web_search"
+    )
     # TODO: actually use search_context_size somewhere...
     search_context_size: str | None = Field(default="medium", pattern="^low|medium|high$")
     # TODO: add user_location
diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
index 4465a32fe..cf3293ed0 100644
--- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
@@ -42,6 +42,7 @@ from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseOutputMessageWebSearchToolCall,
     OpenAIResponseText,
     OpenAIResponseTextFormat,
+    WebSearchToolTypes,
 )
 from llama_stack.apis.common.content_types import TextContentItem
 from llama_stack.apis.inference.inference import (
@@ -609,7 +610,7 @@ class OpenAIResponsesImpl:
             # TODO: Handle other tool types
             if input_tool.type == "function":
                 chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump()))
-            elif input_tool.type == "web_search":
+            elif input_tool.type in WebSearchToolTypes:
                 tool_name = "web_search"
                 tool = await self.tool_groups_api.get_tool(tool_name)
                 if not tool:
diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
index a3d798083..7772dd2cc 100644
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@@ -27,6 +27,7 @@ from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseOutputMessageWebSearchToolCall,
     OpenAIResponseText,
     OpenAIResponseTextFormat,
+    WebSearchToolTypes,
 )
 from llama_stack.apis.inference.inference import (
     OpenAIAssistantMessageParam,
@@ -161,11 +162,6 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon
     input_text = "What is the capital of Ireland?"
     model = "meta-llama/Llama-3.1-8B-Instruct"
 
-    mock_inference_api.openai_chat_completion.side_effect = [
-        fake_stream("tool_call_completion.yaml"),
-        fake_stream(),
-    ]
-
     openai_responses_impl.tool_groups_api.get_tool.return_value = Tool(
         identifier="web_search",
         provider_id="client",
@@ -182,39 +178,50 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon
     )
 
     # Execute
-    result = await openai_responses_impl.create_openai_response(
-        input=input_text,
-        model=model,
-        temperature=0.1,
-        tools=[
-            OpenAIResponseInputToolWebSearch(
-                name="web_search",
-            )
-        ],
-    )
+    for tool_name in WebSearchToolTypes:
+        # Reset mock states as we loop through each tool type
+        mock_inference_api.openai_chat_completion.side_effect = [
+            fake_stream("tool_call_completion.yaml"),
+            fake_stream(),
+        ]
+        openai_responses_impl.tool_groups_api.get_tool.reset_mock()
+        openai_responses_impl.tool_runtime_api.invoke_tool.reset_mock()
+        openai_responses_impl.responses_store.store_response_object.reset_mock()
 
-    # Verify
-    first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
-    assert first_call.kwargs["messages"][0].content == "What is the capital of Ireland?"
-    assert first_call.kwargs["tools"] is not None
-    assert first_call.kwargs["temperature"] == 0.1
+        result = await openai_responses_impl.create_openai_response(
+            input=input_text,
+            model=model,
+            temperature=0.1,
+            tools=[
+                OpenAIResponseInputToolWebSearch(
+                    name=tool_name,
+                )
+            ],
+        )
 
-    second_call = mock_inference_api.openai_chat_completion.call_args_list[1]
-    assert second_call.kwargs["messages"][-1].content == "Dublin"
-    assert second_call.kwargs["temperature"] == 0.1
+        # Verify
+        first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
+        assert first_call.kwargs["messages"][0].content == "What is the capital of Ireland?"
+        assert first_call.kwargs["tools"] is not None
+        assert first_call.kwargs["temperature"] == 0.1
 
-    openai_responses_impl.tool_groups_api.get_tool.assert_called_once_with("web_search")
-    openai_responses_impl.tool_runtime_api.invoke_tool.assert_called_once_with(
-        tool_name="web_search",
-        kwargs={"query": "What is the capital of Ireland?"},
-    )
+        second_call = mock_inference_api.openai_chat_completion.call_args_list[1]
+        assert second_call.kwargs["messages"][-1].content == "Dublin"
+        assert second_call.kwargs["temperature"] == 0.1
 
-    openai_responses_impl.responses_store.store_response_object.assert_called_once()
+        openai_responses_impl.tool_groups_api.get_tool.assert_called_once_with("web_search")
+        openai_responses_impl.tool_runtime_api.invoke_tool.assert_called_once_with(
+            tool_name="web_search",
+            kwargs={"query": "What is the capital of Ireland?"},
+        )
 
-    # Check that we got the content from our mocked tool execution result
-    assert len(result.output) >= 1
-    assert isinstance(result.output[1], OpenAIResponseMessage)
-    assert result.output[1].content[0].text == "Dublin"
+        openai_responses_impl.responses_store.store_response_object.assert_called_once()
+
+        # Check that we got the content from our mocked tool execution result
+        assert len(result.output) >= 1
+        assert isinstance(result.output[1], OpenAIResponseMessage)
+        assert result.output[1].content[0].text == "Dublin"
+        assert result.output[1].content[0].annotations == []
 
 
 @pytest.mark.asyncio