fix: annotations list and web_search_preview in Responses (#2520)

# What does this PR do?


These are a couple of fixes to get an example LangChain app working with
our OpenAI Responses API implementation.

The Responses API spec requires an annotations array in
`output[*].content[*].annotations` and we were not providing one. So,
this adds that as an empty list, even though we don't do anything to
populate it yet. This prevents an error from client libraries like
Langchain that expect this field to always exist, even if an empty list.

The other fix is `web_search_preview` is a valid name for the web search
tool in the Responses API, but we only responded to `web_search` or
`web_search_preview_2025_03_11`.


## Test Plan


The existing Responses unit tests were expanded to test these cases,
via:

```
pytest -sv tests/unit/providers/agents/meta_reference/test_openai_responses.py
```

The existing test_openai_responses.py integration tests still pass with
this change, tested as below with Fireworks:

```
uv run llama stack run llama_stack/templates/starter/run.yaml

LLAMA_STACK_CONFIG=http://localhost:8321 \
uv run pytest -sv tests/integration/agents/test_openai_responses.py \
  --text-model accounts/fireworks/models/llama4-scout-instruct-basic
```

Lastly, this example LangChain app now works with Llama stack (tested
with Ollama in the starter template in this case). This LangChain code
is using the example snippets for using Responses API at
https://python.langchain.com/docs/integrations/chat/openai/#responses-api

```python
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(
    base_url="http://localhost:8321/v1/openai/v1",
    api_key="fake",
    model="ollama/meta-llama/Llama-3.2-3B-Instruct",
)

tool = {"type": "web_search_preview"}
llm_with_tools = llm.bind_tools([tool])

response = llm_with_tools.invoke("What was a positive news story from today?")

print(response.content)
```

Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
Ben Browning 2025-06-25 22:29:33 -04:00 committed by GitHub
parent 1d3f27fe5b
commit 2d9fd041eb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 355 additions and 36 deletions

View file

@ -7390,6 +7390,147 @@
], ],
"title": "AgentTurnResponseTurnStartPayload" "title": "AgentTurnResponseTurnStartPayload"
}, },
"OpenAIResponseAnnotationCitation": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "url_citation",
"default": "url_citation"
},
"end_index": {
"type": "integer"
},
"start_index": {
"type": "integer"
},
"title": {
"type": "string"
},
"url": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"type",
"end_index",
"start_index",
"title",
"url"
],
"title": "OpenAIResponseAnnotationCitation"
},
"OpenAIResponseAnnotationContainerFileCitation": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "container_file_citation",
"default": "container_file_citation"
},
"container_id": {
"type": "string"
},
"end_index": {
"type": "integer"
},
"file_id": {
"type": "string"
},
"filename": {
"type": "string"
},
"start_index": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"type",
"container_id",
"end_index",
"file_id",
"filename",
"start_index"
],
"title": "OpenAIResponseAnnotationContainerFileCitation"
},
"OpenAIResponseAnnotationFileCitation": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "file_citation",
"default": "file_citation"
},
"file_id": {
"type": "string"
},
"filename": {
"type": "string"
},
"index": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"type",
"file_id",
"filename",
"index"
],
"title": "OpenAIResponseAnnotationFileCitation"
},
"OpenAIResponseAnnotationFilePath": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "file_path",
"default": "file_path"
},
"file_id": {
"type": "string"
},
"index": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"type",
"file_id",
"index"
],
"title": "OpenAIResponseAnnotationFilePath"
},
"OpenAIResponseAnnotations": {
"oneOf": [
{
"$ref": "#/components/schemas/OpenAIResponseAnnotationFileCitation"
},
{
"$ref": "#/components/schemas/OpenAIResponseAnnotationCitation"
},
{
"$ref": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation"
},
{
"$ref": "#/components/schemas/OpenAIResponseAnnotationFilePath"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"file_citation": "#/components/schemas/OpenAIResponseAnnotationFileCitation",
"url_citation": "#/components/schemas/OpenAIResponseAnnotationCitation",
"container_file_citation": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation",
"file_path": "#/components/schemas/OpenAIResponseAnnotationFilePath"
}
}
},
"OpenAIResponseInput": { "OpenAIResponseInput": {
"oneOf": [ "oneOf": [
{ {
@ -7764,6 +7905,10 @@
"type": "string", "type": "string",
"const": "web_search" "const": "web_search"
}, },
{
"type": "string",
"const": "web_search_preview"
},
{ {
"type": "string", "type": "string",
"const": "web_search_preview_2025_03_11" "const": "web_search_preview_2025_03_11"
@ -7855,12 +8000,19 @@
"type": "string", "type": "string",
"const": "output_text", "const": "output_text",
"default": "output_text" "default": "output_text"
},
"annotations": {
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponseAnnotations"
}
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"text", "text",
"type" "type",
"annotations"
], ],
"title": "OpenAIResponseOutputMessageContentOutputText" "title": "OpenAIResponseOutputMessageContentOutputText"
}, },

View file

@ -5263,6 +5263,106 @@ components:
- event_type - event_type
- turn_id - turn_id
title: AgentTurnResponseTurnStartPayload title: AgentTurnResponseTurnStartPayload
OpenAIResponseAnnotationCitation:
type: object
properties:
type:
type: string
const: url_citation
default: url_citation
end_index:
type: integer
start_index:
type: integer
title:
type: string
url:
type: string
additionalProperties: false
required:
- type
- end_index
- start_index
- title
- url
title: OpenAIResponseAnnotationCitation
"OpenAIResponseAnnotationContainerFileCitation":
type: object
properties:
type:
type: string
const: container_file_citation
default: container_file_citation
container_id:
type: string
end_index:
type: integer
file_id:
type: string
filename:
type: string
start_index:
type: integer
additionalProperties: false
required:
- type
- container_id
- end_index
- file_id
- filename
- start_index
title: >-
OpenAIResponseAnnotationContainerFileCitation
OpenAIResponseAnnotationFileCitation:
type: object
properties:
type:
type: string
const: file_citation
default: file_citation
file_id:
type: string
filename:
type: string
index:
type: integer
additionalProperties: false
required:
- type
- file_id
- filename
- index
title: OpenAIResponseAnnotationFileCitation
OpenAIResponseAnnotationFilePath:
type: object
properties:
type:
type: string
const: file_path
default: file_path
file_id:
type: string
index:
type: integer
additionalProperties: false
required:
- type
- file_id
- index
title: OpenAIResponseAnnotationFilePath
OpenAIResponseAnnotations:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
- $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
- $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
- $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
discriminator:
propertyName: type
mapping:
file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
OpenAIResponseInput: OpenAIResponseInput:
oneOf: oneOf:
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
@ -5488,6 +5588,8 @@ components:
oneOf: oneOf:
- type: string - type: string
const: web_search const: web_search
- type: string
const: web_search_preview
- type: string - type: string
const: web_search_preview_2025_03_11 const: web_search_preview_2025_03_11
default: web_search default: web_search
@ -5547,10 +5649,15 @@ components:
type: string type: string
const: output_text const: output_text
default: output_text default: output_text
annotations:
type: array
items:
$ref: '#/components/schemas/OpenAIResponseAnnotations'
additionalProperties: false additionalProperties: false
required: required:
- text - text
- type - type
- annotations
title: >- title: >-
OpenAIResponseOutputMessageContentOutputText OpenAIResponseOutputMessageContentOutputText
"OpenAIResponseOutputMessageFileSearchToolCall": "OpenAIResponseOutputMessageFileSearchToolCall":

View file

@ -44,10 +44,55 @@ OpenAIResponseInputMessageContent = Annotated[
register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent") register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
@json_schema_type
class OpenAIResponseAnnotationFileCitation(BaseModel):
type: Literal["file_citation"] = "file_citation"
file_id: str
filename: str
index: int
@json_schema_type
class OpenAIResponseAnnotationCitation(BaseModel):
type: Literal["url_citation"] = "url_citation"
end_index: int
start_index: int
title: str
url: str
@json_schema_type
class OpenAIResponseAnnotationContainerFileCitation(BaseModel):
type: Literal["container_file_citation"] = "container_file_citation"
container_id: str
end_index: int
file_id: str
filename: str
start_index: int
@json_schema_type
class OpenAIResponseAnnotationFilePath(BaseModel):
type: Literal["file_path"] = "file_path"
file_id: str
index: int
OpenAIResponseAnnotations = Annotated[
OpenAIResponseAnnotationFileCitation
| OpenAIResponseAnnotationCitation
| OpenAIResponseAnnotationContainerFileCitation
| OpenAIResponseAnnotationFilePath,
Field(discriminator="type"),
]
register_schema(OpenAIResponseAnnotations, name="OpenAIResponseAnnotations")
@json_schema_type @json_schema_type
class OpenAIResponseOutputMessageContentOutputText(BaseModel): class OpenAIResponseOutputMessageContentOutputText(BaseModel):
text: str text: str
type: Literal["output_text"] = "output_text" type: Literal["output_text"] = "output_text"
annotations: list[OpenAIResponseAnnotations] = Field(default_factory=list)
OpenAIResponseOutputMessageContent = Annotated[ OpenAIResponseOutputMessageContent = Annotated[
@ -384,9 +429,16 @@ OpenAIResponseInput = Annotated[
register_schema(OpenAIResponseInput, name="OpenAIResponseInput") register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
# Must match type Literals of OpenAIResponseInputToolWebSearch below
WebSearchToolTypes = ["web_search", "web_search_preview", "web_search_preview_2025_03_11"]
@json_schema_type @json_schema_type
class OpenAIResponseInputToolWebSearch(BaseModel): class OpenAIResponseInputToolWebSearch(BaseModel):
type: Literal["web_search"] | Literal["web_search_preview_2025_03_11"] = "web_search" # Must match values of WebSearchToolTypes above
type: Literal["web_search"] | Literal["web_search_preview"] | Literal["web_search_preview_2025_03_11"] = (
"web_search"
)
# TODO: actually use search_context_size somewhere... # TODO: actually use search_context_size somewhere...
search_context_size: str | None = Field(default="medium", pattern="^low|medium|high$") search_context_size: str | None = Field(default="medium", pattern="^low|medium|high$")
# TODO: add user_location # TODO: add user_location

View file

@ -42,6 +42,7 @@ from llama_stack.apis.agents.openai_responses import (
OpenAIResponseOutputMessageWebSearchToolCall, OpenAIResponseOutputMessageWebSearchToolCall,
OpenAIResponseText, OpenAIResponseText,
OpenAIResponseTextFormat, OpenAIResponseTextFormat,
WebSearchToolTypes,
) )
from llama_stack.apis.common.content_types import TextContentItem from llama_stack.apis.common.content_types import TextContentItem
from llama_stack.apis.inference.inference import ( from llama_stack.apis.inference.inference import (
@ -609,7 +610,7 @@ class OpenAIResponsesImpl:
# TODO: Handle other tool types # TODO: Handle other tool types
if input_tool.type == "function": if input_tool.type == "function":
chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump())) chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump()))
elif input_tool.type == "web_search": elif input_tool.type in WebSearchToolTypes:
tool_name = "web_search" tool_name = "web_search"
tool = await self.tool_groups_api.get_tool(tool_name) tool = await self.tool_groups_api.get_tool(tool_name)
if not tool: if not tool:

View file

@ -27,6 +27,7 @@ from llama_stack.apis.agents.openai_responses import (
OpenAIResponseOutputMessageWebSearchToolCall, OpenAIResponseOutputMessageWebSearchToolCall,
OpenAIResponseText, OpenAIResponseText,
OpenAIResponseTextFormat, OpenAIResponseTextFormat,
WebSearchToolTypes,
) )
from llama_stack.apis.inference.inference import ( from llama_stack.apis.inference.inference import (
OpenAIAssistantMessageParam, OpenAIAssistantMessageParam,
@ -161,11 +162,6 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon
input_text = "What is the capital of Ireland?" input_text = "What is the capital of Ireland?"
model = "meta-llama/Llama-3.1-8B-Instruct" model = "meta-llama/Llama-3.1-8B-Instruct"
mock_inference_api.openai_chat_completion.side_effect = [
fake_stream("tool_call_completion.yaml"),
fake_stream(),
]
openai_responses_impl.tool_groups_api.get_tool.return_value = Tool( openai_responses_impl.tool_groups_api.get_tool.return_value = Tool(
identifier="web_search", identifier="web_search",
provider_id="client", provider_id="client",
@ -182,39 +178,50 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon
) )
# Execute # Execute
result = await openai_responses_impl.create_openai_response( for tool_name in WebSearchToolTypes:
input=input_text, # Reset mock states as we loop through each tool type
model=model, mock_inference_api.openai_chat_completion.side_effect = [
temperature=0.1, fake_stream("tool_call_completion.yaml"),
tools=[ fake_stream(),
OpenAIResponseInputToolWebSearch( ]
name="web_search", openai_responses_impl.tool_groups_api.get_tool.reset_mock()
) openai_responses_impl.tool_runtime_api.invoke_tool.reset_mock()
], openai_responses_impl.responses_store.store_response_object.reset_mock()
)
# Verify result = await openai_responses_impl.create_openai_response(
first_call = mock_inference_api.openai_chat_completion.call_args_list[0] input=input_text,
assert first_call.kwargs["messages"][0].content == "What is the capital of Ireland?" model=model,
assert first_call.kwargs["tools"] is not None temperature=0.1,
assert first_call.kwargs["temperature"] == 0.1 tools=[
OpenAIResponseInputToolWebSearch(
name=tool_name,
)
],
)
second_call = mock_inference_api.openai_chat_completion.call_args_list[1] # Verify
assert second_call.kwargs["messages"][-1].content == "Dublin" first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
assert second_call.kwargs["temperature"] == 0.1 assert first_call.kwargs["messages"][0].content == "What is the capital of Ireland?"
assert first_call.kwargs["tools"] is not None
assert first_call.kwargs["temperature"] == 0.1
openai_responses_impl.tool_groups_api.get_tool.assert_called_once_with("web_search") second_call = mock_inference_api.openai_chat_completion.call_args_list[1]
openai_responses_impl.tool_runtime_api.invoke_tool.assert_called_once_with( assert second_call.kwargs["messages"][-1].content == "Dublin"
tool_name="web_search", assert second_call.kwargs["temperature"] == 0.1
kwargs={"query": "What is the capital of Ireland?"},
)
openai_responses_impl.responses_store.store_response_object.assert_called_once() openai_responses_impl.tool_groups_api.get_tool.assert_called_once_with("web_search")
openai_responses_impl.tool_runtime_api.invoke_tool.assert_called_once_with(
tool_name="web_search",
kwargs={"query": "What is the capital of Ireland?"},
)
# Check that we got the content from our mocked tool execution result openai_responses_impl.responses_store.store_response_object.assert_called_once()
assert len(result.output) >= 1
assert isinstance(result.output[1], OpenAIResponseMessage) # Check that we got the content from our mocked tool execution result
assert result.output[1].content[0].text == "Dublin" assert len(result.output) >= 1
assert isinstance(result.output[1], OpenAIResponseMessage)
assert result.output[1].content[0].text == "Dublin"
assert result.output[1].content[0].annotations == []
@pytest.mark.asyncio @pytest.mark.asyncio