diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 801e8dc33..f9e4bb38e 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -7390,6 +7390,147 @@
],
"title": "AgentTurnResponseTurnStartPayload"
},
+ "OpenAIResponseAnnotationCitation": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "url_citation",
+ "default": "url_citation"
+ },
+ "end_index": {
+ "type": "integer"
+ },
+ "start_index": {
+ "type": "integer"
+ },
+ "title": {
+ "type": "string"
+ },
+ "url": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "end_index",
+ "start_index",
+ "title",
+ "url"
+ ],
+ "title": "OpenAIResponseAnnotationCitation"
+ },
+ "OpenAIResponseAnnotationContainerFileCitation": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "container_file_citation",
+ "default": "container_file_citation"
+ },
+ "container_id": {
+ "type": "string"
+ },
+ "end_index": {
+ "type": "integer"
+ },
+ "file_id": {
+ "type": "string"
+ },
+ "filename": {
+ "type": "string"
+ },
+ "start_index": {
+ "type": "integer"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "container_id",
+ "end_index",
+ "file_id",
+ "filename",
+ "start_index"
+ ],
+ "title": "OpenAIResponseAnnotationContainerFileCitation"
+ },
+ "OpenAIResponseAnnotationFileCitation": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "file_citation",
+ "default": "file_citation"
+ },
+ "file_id": {
+ "type": "string"
+ },
+ "filename": {
+ "type": "string"
+ },
+ "index": {
+ "type": "integer"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "file_id",
+ "filename",
+ "index"
+ ],
+ "title": "OpenAIResponseAnnotationFileCitation"
+ },
+ "OpenAIResponseAnnotationFilePath": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "file_path",
+ "default": "file_path"
+ },
+ "file_id": {
+ "type": "string"
+ },
+ "index": {
+ "type": "integer"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "file_id",
+ "index"
+ ],
+ "title": "OpenAIResponseAnnotationFilePath"
+ },
+ "OpenAIResponseAnnotations": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/OpenAIResponseAnnotationFileCitation"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseAnnotationCitation"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseAnnotationFilePath"
+ }
+ ],
+ "discriminator": {
+ "propertyName": "type",
+ "mapping": {
+ "file_citation": "#/components/schemas/OpenAIResponseAnnotationFileCitation",
+ "url_citation": "#/components/schemas/OpenAIResponseAnnotationCitation",
+ "container_file_citation": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation",
+ "file_path": "#/components/schemas/OpenAIResponseAnnotationFilePath"
+ }
+ }
+ },
"OpenAIResponseInput": {
"oneOf": [
{
@@ -7764,6 +7905,10 @@
"type": "string",
"const": "web_search"
},
+ {
+ "type": "string",
+ "const": "web_search_preview"
+ },
{
"type": "string",
"const": "web_search_preview_2025_03_11"
@@ -7855,12 +8000,19 @@
"type": "string",
"const": "output_text",
"default": "output_text"
+ },
+ "annotations": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/OpenAIResponseAnnotations"
+ }
}
},
"additionalProperties": false,
"required": [
"text",
- "type"
+ "type",
+ "annotations"
],
"title": "OpenAIResponseOutputMessageContentOutputText"
},
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index b736cd904..9175c97fc 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -5263,6 +5263,106 @@ components:
- event_type
- turn_id
title: AgentTurnResponseTurnStartPayload
+ OpenAIResponseAnnotationCitation:
+ type: object
+ properties:
+ type:
+ type: string
+ const: url_citation
+ default: url_citation
+ end_index:
+ type: integer
+ start_index:
+ type: integer
+ title:
+ type: string
+ url:
+ type: string
+ additionalProperties: false
+ required:
+ - type
+ - end_index
+ - start_index
+ - title
+ - url
+ title: OpenAIResponseAnnotationCitation
+ "OpenAIResponseAnnotationContainerFileCitation":
+ type: object
+ properties:
+ type:
+ type: string
+ const: container_file_citation
+ default: container_file_citation
+ container_id:
+ type: string
+ end_index:
+ type: integer
+ file_id:
+ type: string
+ filename:
+ type: string
+ start_index:
+ type: integer
+ additionalProperties: false
+ required:
+ - type
+ - container_id
+ - end_index
+ - file_id
+ - filename
+ - start_index
+ title: >-
+ OpenAIResponseAnnotationContainerFileCitation
+ OpenAIResponseAnnotationFileCitation:
+ type: object
+ properties:
+ type:
+ type: string
+ const: file_citation
+ default: file_citation
+ file_id:
+ type: string
+ filename:
+ type: string
+ index:
+ type: integer
+ additionalProperties: false
+ required:
+ - type
+ - file_id
+ - filename
+ - index
+ title: OpenAIResponseAnnotationFileCitation
+ OpenAIResponseAnnotationFilePath:
+ type: object
+ properties:
+ type:
+ type: string
+ const: file_path
+ default: file_path
+ file_id:
+ type: string
+ index:
+ type: integer
+ additionalProperties: false
+ required:
+ - type
+ - file_id
+ - index
+ title: OpenAIResponseAnnotationFilePath
+ OpenAIResponseAnnotations:
+ oneOf:
+ - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+ - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
+ - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+ - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+ discriminator:
+ propertyName: type
+ mapping:
+ file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+ url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
+ container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+ file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
OpenAIResponseInput:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
@@ -5488,6 +5588,8 @@ components:
oneOf:
- type: string
const: web_search
+ - type: string
+ const: web_search_preview
- type: string
const: web_search_preview_2025_03_11
default: web_search
@@ -5547,10 +5649,15 @@ components:
type: string
const: output_text
default: output_text
+ annotations:
+ type: array
+ items:
+ $ref: '#/components/schemas/OpenAIResponseAnnotations'
additionalProperties: false
required:
- text
- type
+ - annotations
title: >-
OpenAIResponseOutputMessageContentOutputText
"OpenAIResponseOutputMessageFileSearchToolCall":
diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py
index addb72f14..27b85e2d6 100644
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@@ -44,10 +44,55 @@ OpenAIResponseInputMessageContent = Annotated[
register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
+@json_schema_type
+class OpenAIResponseAnnotationFileCitation(BaseModel):
+ type: Literal["file_citation"] = "file_citation"
+ file_id: str
+ filename: str
+ index: int
+
+
+@json_schema_type
+class OpenAIResponseAnnotationCitation(BaseModel):
+ type: Literal["url_citation"] = "url_citation"
+ end_index: int
+ start_index: int
+ title: str
+ url: str
+
+
+@json_schema_type
+class OpenAIResponseAnnotationContainerFileCitation(BaseModel):
+ type: Literal["container_file_citation"] = "container_file_citation"
+ container_id: str
+ end_index: int
+ file_id: str
+ filename: str
+ start_index: int
+
+
+@json_schema_type
+class OpenAIResponseAnnotationFilePath(BaseModel):
+ type: Literal["file_path"] = "file_path"
+ file_id: str
+ index: int
+
+
+OpenAIResponseAnnotations = Annotated[
+ OpenAIResponseAnnotationFileCitation
+ | OpenAIResponseAnnotationCitation
+ | OpenAIResponseAnnotationContainerFileCitation
+ | OpenAIResponseAnnotationFilePath,
+ Field(discriminator="type"),
+]
+register_schema(OpenAIResponseAnnotations, name="OpenAIResponseAnnotations")
+
+
@json_schema_type
class OpenAIResponseOutputMessageContentOutputText(BaseModel):
text: str
type: Literal["output_text"] = "output_text"
+ annotations: list[OpenAIResponseAnnotations] = Field(default_factory=list)
OpenAIResponseOutputMessageContent = Annotated[
@@ -384,9 +429,16 @@ OpenAIResponseInput = Annotated[
register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
+# Must match type Literals of OpenAIResponseInputToolWebSearch below
+WebSearchToolTypes = ["web_search", "web_search_preview", "web_search_preview_2025_03_11"]
+
+
@json_schema_type
class OpenAIResponseInputToolWebSearch(BaseModel):
- type: Literal["web_search"] | Literal["web_search_preview_2025_03_11"] = "web_search"
+ # Must match values of WebSearchToolTypes above
+ type: Literal["web_search"] | Literal["web_search_preview"] | Literal["web_search_preview_2025_03_11"] = (
+ "web_search"
+ )
# TODO: actually use search_context_size somewhere...
search_context_size: str | None = Field(default="medium", pattern="^low|medium|high$")
# TODO: add user_location
diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
index 4465a32fe..cf3293ed0 100644
--- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
@@ -42,6 +42,7 @@ from llama_stack.apis.agents.openai_responses import (
OpenAIResponseOutputMessageWebSearchToolCall,
OpenAIResponseText,
OpenAIResponseTextFormat,
+ WebSearchToolTypes,
)
from llama_stack.apis.common.content_types import TextContentItem
from llama_stack.apis.inference.inference import (
@@ -609,7 +610,7 @@ class OpenAIResponsesImpl:
# TODO: Handle other tool types
if input_tool.type == "function":
chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump()))
- elif input_tool.type == "web_search":
+ elif input_tool.type in WebSearchToolTypes:
tool_name = "web_search"
tool = await self.tool_groups_api.get_tool(tool_name)
if not tool:
diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
index a3d798083..7772dd2cc 100644
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@@ -27,6 +27,7 @@ from llama_stack.apis.agents.openai_responses import (
OpenAIResponseOutputMessageWebSearchToolCall,
OpenAIResponseText,
OpenAIResponseTextFormat,
+ WebSearchToolTypes,
)
from llama_stack.apis.inference.inference import (
OpenAIAssistantMessageParam,
@@ -161,11 +162,6 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon
input_text = "What is the capital of Ireland?"
model = "meta-llama/Llama-3.1-8B-Instruct"
- mock_inference_api.openai_chat_completion.side_effect = [
- fake_stream("tool_call_completion.yaml"),
- fake_stream(),
- ]
-
openai_responses_impl.tool_groups_api.get_tool.return_value = Tool(
identifier="web_search",
provider_id="client",
@@ -182,39 +178,50 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon
)
# Execute
- result = await openai_responses_impl.create_openai_response(
- input=input_text,
- model=model,
- temperature=0.1,
- tools=[
- OpenAIResponseInputToolWebSearch(
- name="web_search",
- )
- ],
- )
+ for tool_name in WebSearchToolTypes:
+ # Reset mock states as we loop through each tool type
+ mock_inference_api.openai_chat_completion.side_effect = [
+ fake_stream("tool_call_completion.yaml"),
+ fake_stream(),
+ ]
+ openai_responses_impl.tool_groups_api.get_tool.reset_mock()
+ openai_responses_impl.tool_runtime_api.invoke_tool.reset_mock()
+ openai_responses_impl.responses_store.store_response_object.reset_mock()
- # Verify
- first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
- assert first_call.kwargs["messages"][0].content == "What is the capital of Ireland?"
- assert first_call.kwargs["tools"] is not None
- assert first_call.kwargs["temperature"] == 0.1
+ result = await openai_responses_impl.create_openai_response(
+ input=input_text,
+ model=model,
+ temperature=0.1,
+ tools=[
+ OpenAIResponseInputToolWebSearch(
+ name=tool_name,
+ )
+ ],
+ )
- second_call = mock_inference_api.openai_chat_completion.call_args_list[1]
- assert second_call.kwargs["messages"][-1].content == "Dublin"
- assert second_call.kwargs["temperature"] == 0.1
+ # Verify
+ first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
+ assert first_call.kwargs["messages"][0].content == "What is the capital of Ireland?"
+ assert first_call.kwargs["tools"] is not None
+ assert first_call.kwargs["temperature"] == 0.1
- openai_responses_impl.tool_groups_api.get_tool.assert_called_once_with("web_search")
- openai_responses_impl.tool_runtime_api.invoke_tool.assert_called_once_with(
- tool_name="web_search",
- kwargs={"query": "What is the capital of Ireland?"},
- )
+ second_call = mock_inference_api.openai_chat_completion.call_args_list[1]
+ assert second_call.kwargs["messages"][-1].content == "Dublin"
+ assert second_call.kwargs["temperature"] == 0.1
- openai_responses_impl.responses_store.store_response_object.assert_called_once()
+ openai_responses_impl.tool_groups_api.get_tool.assert_called_once_with("web_search")
+ openai_responses_impl.tool_runtime_api.invoke_tool.assert_called_once_with(
+ tool_name="web_search",
+ kwargs={"query": "What is the capital of Ireland?"},
+ )
- # Check that we got the content from our mocked tool execution result
- assert len(result.output) >= 1
- assert isinstance(result.output[1], OpenAIResponseMessage)
- assert result.output[1].content[0].text == "Dublin"
+ openai_responses_impl.responses_store.store_response_object.assert_called_once()
+
+ # Check that we got the content from our mocked tool execution result
+ assert len(result.output) >= 1
+ assert isinstance(result.output[1], OpenAIResponseMessage)
+ assert result.output[1].content[0].text == "Dublin"
+ assert result.output[1].content[0].annotations == []
@pytest.mark.asyncio