diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 801e8dc33..f9e4bb38e 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -7390,6 +7390,147 @@ ], "title": "AgentTurnResponseTurnStartPayload" }, + "OpenAIResponseAnnotationCitation": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "url_citation", + "default": "url_citation" + }, + "end_index": { + "type": "integer" + }, + "start_index": { + "type": "integer" + }, + "title": { + "type": "string" + }, + "url": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "type", + "end_index", + "start_index", + "title", + "url" + ], + "title": "OpenAIResponseAnnotationCitation" + }, + "OpenAIResponseAnnotationContainerFileCitation": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "container_file_citation", + "default": "container_file_citation" + }, + "container_id": { + "type": "string" + }, + "end_index": { + "type": "integer" + }, + "file_id": { + "type": "string" + }, + "filename": { + "type": "string" + }, + "start_index": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "type", + "container_id", + "end_index", + "file_id", + "filename", + "start_index" + ], + "title": "OpenAIResponseAnnotationContainerFileCitation" + }, + "OpenAIResponseAnnotationFileCitation": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "file_citation", + "default": "file_citation" + }, + "file_id": { + "type": "string" + }, + "filename": { + "type": "string" + }, + "index": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "type", + "file_id", + "filename", + "index" + ], + "title": "OpenAIResponseAnnotationFileCitation" + }, + "OpenAIResponseAnnotationFilePath": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "file_path", + "default": "file_path" + }, + "file_id": { + "type": "string" + }, + "index": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "type", + "file_id", + "index" + ], + "title": "OpenAIResponseAnnotationFilePath" + }, + "OpenAIResponseAnnotations": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationFileCitation" + }, + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationCitation" + }, + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation" + }, + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationFilePath" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "file_citation": "#/components/schemas/OpenAIResponseAnnotationFileCitation", + "url_citation": "#/components/schemas/OpenAIResponseAnnotationCitation", + "container_file_citation": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation", + "file_path": "#/components/schemas/OpenAIResponseAnnotationFilePath" + } + } + }, "OpenAIResponseInput": { "oneOf": [ { @@ -7764,6 +7905,10 @@ "type": "string", "const": "web_search" }, + { + "type": "string", + "const": "web_search_preview" + }, { "type": "string", "const": "web_search_preview_2025_03_11" @@ -7855,12 +8000,19 @@ "type": "string", "const": "output_text", "default": "output_text" + }, + "annotations": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseAnnotations" + } } }, "additionalProperties": false, "required": [ "text", - "type" + "type", + "annotations" ], "title": "OpenAIResponseOutputMessageContentOutputText" }, diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index b736cd904..9175c97fc 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -5263,6 +5263,106 @@ components: - event_type - turn_id title: AgentTurnResponseTurnStartPayload + OpenAIResponseAnnotationCitation: + type: object + properties: + type: + type: string + const: url_citation + default: url_citation + end_index: + type: integer + start_index: + type: integer + title: + type: string + url: + type: string + additionalProperties: false + required: + - type + - end_index + - start_index + - title + - url + title: OpenAIResponseAnnotationCitation + "OpenAIResponseAnnotationContainerFileCitation": + type: object + properties: + type: + type: string + const: container_file_citation + default: container_file_citation + container_id: + type: string + end_index: + type: integer + file_id: + type: string + filename: + type: string + start_index: + type: integer + additionalProperties: false + required: + - type + - container_id + - end_index + - file_id + - filename + - start_index + title: >- + OpenAIResponseAnnotationContainerFileCitation + OpenAIResponseAnnotationFileCitation: + type: object + properties: + type: + type: string + const: file_citation + default: file_citation + file_id: + type: string + filename: + type: string + index: + type: integer + additionalProperties: false + required: + - type + - file_id + - filename + - index + title: OpenAIResponseAnnotationFileCitation + OpenAIResponseAnnotationFilePath: + type: object + properties: + type: + type: string + const: file_path + default: file_path + file_id: + type: string + index: + type: integer + additionalProperties: false + required: + - type + - file_id + - index + title: OpenAIResponseAnnotationFilePath + OpenAIResponseAnnotations: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + discriminator: + propertyName: type + mapping: + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' OpenAIResponseInput: oneOf: - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' @@ -5488,6 +5588,8 @@ components: oneOf: - type: string const: web_search + - type: string + const: web_search_preview - type: string const: web_search_preview_2025_03_11 default: web_search @@ -5547,10 +5649,15 @@ components: type: string const: output_text default: output_text + annotations: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseAnnotations' additionalProperties: false required: - text - type + - annotations title: >- OpenAIResponseOutputMessageContentOutputText "OpenAIResponseOutputMessageFileSearchToolCall": diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py index addb72f14..27b85e2d6 100644 --- a/llama_stack/apis/agents/openai_responses.py +++ b/llama_stack/apis/agents/openai_responses.py @@ -44,10 +44,55 @@ OpenAIResponseInputMessageContent = Annotated[ register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent") +@json_schema_type +class OpenAIResponseAnnotationFileCitation(BaseModel): + type: Literal["file_citation"] = "file_citation" + file_id: str + filename: str + index: int + + +@json_schema_type +class OpenAIResponseAnnotationCitation(BaseModel): + type: Literal["url_citation"] = "url_citation" + end_index: int + start_index: int + title: str + url: str + + +@json_schema_type +class OpenAIResponseAnnotationContainerFileCitation(BaseModel): + type: Literal["container_file_citation"] = "container_file_citation" + container_id: str + end_index: int + file_id: str + filename: str + start_index: int + + +@json_schema_type +class OpenAIResponseAnnotationFilePath(BaseModel): + type: Literal["file_path"] = "file_path" + file_id: str + index: int + + +OpenAIResponseAnnotations = Annotated[ + OpenAIResponseAnnotationFileCitation + | OpenAIResponseAnnotationCitation + | OpenAIResponseAnnotationContainerFileCitation + | OpenAIResponseAnnotationFilePath, + Field(discriminator="type"), +] +register_schema(OpenAIResponseAnnotations, name="OpenAIResponseAnnotations") + + @json_schema_type class OpenAIResponseOutputMessageContentOutputText(BaseModel): text: str type: Literal["output_text"] = "output_text" + annotations: list[OpenAIResponseAnnotations] = Field(default_factory=list) OpenAIResponseOutputMessageContent = Annotated[ @@ -384,9 +429,16 @@ OpenAIResponseInput = Annotated[ register_schema(OpenAIResponseInput, name="OpenAIResponseInput") +# Must match type Literals of OpenAIResponseInputToolWebSearch below +WebSearchToolTypes = ["web_search", "web_search_preview", "web_search_preview_2025_03_11"] + + @json_schema_type class OpenAIResponseInputToolWebSearch(BaseModel): - type: Literal["web_search"] | Literal["web_search_preview_2025_03_11"] = "web_search" + # Must match values of WebSearchToolTypes above + type: Literal["web_search"] | Literal["web_search_preview"] | Literal["web_search_preview_2025_03_11"] = ( + "web_search" + ) # TODO: actually use search_context_size somewhere... search_context_size: str | None = Field(default="medium", pattern="^low|medium|high$") # TODO: add user_location diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py index 4465a32fe..cf3293ed0 100644 --- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py +++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py @@ -42,6 +42,7 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseOutputMessageWebSearchToolCall, OpenAIResponseText, OpenAIResponseTextFormat, + WebSearchToolTypes, ) from llama_stack.apis.common.content_types import TextContentItem from llama_stack.apis.inference.inference import ( @@ -609,7 +610,7 @@ class OpenAIResponsesImpl: # TODO: Handle other tool types if input_tool.type == "function": chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump())) - elif input_tool.type == "web_search": + elif input_tool.type in WebSearchToolTypes: tool_name = "web_search" tool = await self.tool_groups_api.get_tool(tool_name) if not tool: diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index a3d798083..7772dd2cc 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -27,6 +27,7 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseOutputMessageWebSearchToolCall, OpenAIResponseText, OpenAIResponseTextFormat, + WebSearchToolTypes, ) from llama_stack.apis.inference.inference import ( OpenAIAssistantMessageParam, @@ -161,11 +162,6 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon input_text = "What is the capital of Ireland?" model = "meta-llama/Llama-3.1-8B-Instruct" - mock_inference_api.openai_chat_completion.side_effect = [ - fake_stream("tool_call_completion.yaml"), - fake_stream(), - ] - openai_responses_impl.tool_groups_api.get_tool.return_value = Tool( identifier="web_search", provider_id="client", @@ -182,39 +178,50 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon ) # Execute - result = await openai_responses_impl.create_openai_response( - input=input_text, - model=model, - temperature=0.1, - tools=[ - OpenAIResponseInputToolWebSearch( - name="web_search", - ) - ], - ) + for tool_name in WebSearchToolTypes: + # Reset mock states as we loop through each tool type + mock_inference_api.openai_chat_completion.side_effect = [ + fake_stream("tool_call_completion.yaml"), + fake_stream(), + ] + openai_responses_impl.tool_groups_api.get_tool.reset_mock() + openai_responses_impl.tool_runtime_api.invoke_tool.reset_mock() + openai_responses_impl.responses_store.store_response_object.reset_mock() - # Verify - first_call = mock_inference_api.openai_chat_completion.call_args_list[0] - assert first_call.kwargs["messages"][0].content == "What is the capital of Ireland?" - assert first_call.kwargs["tools"] is not None - assert first_call.kwargs["temperature"] == 0.1 + result = await openai_responses_impl.create_openai_response( + input=input_text, + model=model, + temperature=0.1, + tools=[ + OpenAIResponseInputToolWebSearch( + name=tool_name, + ) + ], + ) - second_call = mock_inference_api.openai_chat_completion.call_args_list[1] - assert second_call.kwargs["messages"][-1].content == "Dublin" - assert second_call.kwargs["temperature"] == 0.1 + # Verify + first_call = mock_inference_api.openai_chat_completion.call_args_list[0] + assert first_call.kwargs["messages"][0].content == "What is the capital of Ireland?" + assert first_call.kwargs["tools"] is not None + assert first_call.kwargs["temperature"] == 0.1 - openai_responses_impl.tool_groups_api.get_tool.assert_called_once_with("web_search") - openai_responses_impl.tool_runtime_api.invoke_tool.assert_called_once_with( - tool_name="web_search", - kwargs={"query": "What is the capital of Ireland?"}, - ) + second_call = mock_inference_api.openai_chat_completion.call_args_list[1] + assert second_call.kwargs["messages"][-1].content == "Dublin" + assert second_call.kwargs["temperature"] == 0.1 - openai_responses_impl.responses_store.store_response_object.assert_called_once() + openai_responses_impl.tool_groups_api.get_tool.assert_called_once_with("web_search") + openai_responses_impl.tool_runtime_api.invoke_tool.assert_called_once_with( + tool_name="web_search", + kwargs={"query": "What is the capital of Ireland?"}, + ) - # Check that we got the content from our mocked tool execution result - assert len(result.output) >= 1 - assert isinstance(result.output[1], OpenAIResponseMessage) - assert result.output[1].content[0].text == "Dublin" + openai_responses_impl.responses_store.store_response_object.assert_called_once() + + # Check that we got the content from our mocked tool execution result + assert len(result.output) >= 1 + assert isinstance(result.output[1], OpenAIResponseMessage) + assert result.output[1].content[0].text == "Dublin" + assert result.output[1].content[0].annotations == [] @pytest.mark.asyncio