Restore responses unit tests

2025-12-03 01:48:05 +00:00 · 2025-11-13 14:22:24 -05:00 · 2025-11-13 14:22:24 -05:00 · f97b66f3ec
commit f97b66f3ec
parent fcf649b97a
8 changed files with 2244 additions and 0 deletions
--- a/tests/unit/providers/agents/meta_reference/fixtures/init.py
+++ b/tests/unit/providers/agents/meta_reference/fixtures/init.py
@ -0,0 +1,23 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import os
+
+import yaml
+
+from llama_stack.apis.inference import (
+    OpenAIChatCompletion,
+)
+
+FIXTURES_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+def load_chat_completion_fixture(filename: str) -> OpenAIChatCompletion:
+    fixture_path = os.path.join(FIXTURES_DIR, filename)
+
+    with open(fixture_path) as f:
+        data = yaml.safe_load(f)
+    return OpenAIChatCompletion(**data)
--- a/tests/unit/providers/agents/meta_reference/fixtures/simple_chat_completion.yaml
+++ b/tests/unit/providers/agents/meta_reference/fixtures/simple_chat_completion.yaml
@ -0,0 +1,9 @@
+id: chat-completion-123
+choices:
+  - message:
+      content: "Dublin"
+      role: assistant
+    finish_reason: stop
+    index: 0
+created: 1234567890
+model: meta-llama/Llama-3.1-8B-Instruct
--- a/tests/unit/providers/agents/meta_reference/fixtures/tool_call_completion.yaml
+++ b/tests/unit/providers/agents/meta_reference/fixtures/tool_call_completion.yaml
@ -0,0 +1,14 @@
+id: chat-completion-123
+choices:
+  - message:
+      tool_calls:
+        - id: tool_call_123
+          type: function
+          function:
+            name: web_search
+            arguments: '{"query":"What is the capital of Ireland?"}'
+      role: assistant
+    finish_reason: stop
+    index: 0
+created: 1234567890
+model: meta-llama/Llama-3.1-8B-Instruct
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses_conversations.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses_conversations.py
@ -0,0 +1,249 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+import pytest
+
+from llama_stack.apis.agents.openai_responses import (
+    OpenAIResponseMessage,
+    OpenAIResponseObject,
+    OpenAIResponseObjectStreamResponseCompleted,
+    OpenAIResponseObjectStreamResponseOutputItemDone,
+    OpenAIResponseOutputMessageContentOutputText,
+)
+from llama_stack.apis.common.errors import (
+    ConversationNotFoundError,
+    InvalidConversationIdError,
+)
+from llama_stack.apis.conversations.conversations import (
+    ConversationItemList,
+)
+
+# Import existing fixtures from the main responses test file
+pytest_plugins = ["tests.unit.providers.agents.meta_reference.test_openai_responses"]
+
+from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import (
+    OpenAIResponsesImpl,
+)
+
+
+@pytest.fixture
+def responses_impl_with_conversations(
+    mock_inference_api,
+    mock_tool_groups_api,
+    mock_tool_runtime_api,
+    mock_responses_store,
+    mock_vector_io_api,
+    mock_conversations_api,
+    mock_safety_api,
+):
+    """Create OpenAIResponsesImpl instance with conversations API."""
+    return OpenAIResponsesImpl(
+        inference_api=mock_inference_api,
+        tool_groups_api=mock_tool_groups_api,
+        tool_runtime_api=mock_tool_runtime_api,
+        responses_store=mock_responses_store,
+        vector_io_api=mock_vector_io_api,
+        conversations_api=mock_conversations_api,
+        safety_api=mock_safety_api,
+    )
+
+
+class TestConversationValidation:
+    """Test conversation ID validation logic."""
+
+    async def test_nonexistent_conversation_raises_error(
+        self, responses_impl_with_conversations, mock_conversations_api
+    ):
+        """Test that ConversationNotFoundError is raised for non-existent conversation."""
+        conv_id = "conv_nonexistent"
+
+        # Mock conversation not found
+        mock_conversations_api.list_items.side_effect = ConversationNotFoundError("conv_nonexistent")
+
+        with pytest.raises(ConversationNotFoundError):
+            await responses_impl_with_conversations.create_openai_response(
+                input="Hello", model="test-model", conversation=conv_id, stream=False
+            )
+
+
+class TestMessageSyncing:
+    """Test message syncing to conversations."""
+
+    async def test_sync_response_to_conversation_simple(
+        self, responses_impl_with_conversations, mock_conversations_api
+    ):
+        """Test syncing simple response to conversation."""
+        conv_id = "conv_test123"
+        input_text = "What are the 5 Ds of dodgeball?"
+
+        # Output items (what the model generated)
+        output_items = [
+            OpenAIResponseMessage(
+                id="msg_response",
+                content=[
+                    OpenAIResponseOutputMessageContentOutputText(
+                        text="The 5 Ds are: Dodge, Duck, Dip, Dive, and Dodge.", type="output_text", annotations=[]
+                    )
+                ],
+                role="assistant",
+                status="completed",
+                type="message",
+            )
+        ]
+
+        await responses_impl_with_conversations._sync_response_to_conversation(conv_id, input_text, output_items)
+
+        # should call add_items with user input and assistant response
+        mock_conversations_api.add_items.assert_called_once()
+        call_args = mock_conversations_api.add_items.call_args
+
+        assert call_args[0][0] == conv_id  # conversation_id
+        items = call_args[0][1]  # conversation_items
+
+        assert len(items) == 2
+        # User message
+        assert items[0].type == "message"
+        assert items[0].role == "user"
+        assert items[0].content[0].type == "input_text"
+        assert items[0].content[0].text == input_text
+
+        # Assistant message
+        assert items[1].type == "message"
+        assert items[1].role == "assistant"
+
+    async def test_sync_response_to_conversation_api_error(
+        self, responses_impl_with_conversations, mock_conversations_api
+    ):
+        mock_conversations_api.add_items.side_effect = Exception("API Error")
+        output_items = []
+
+        # matching the behavior of OpenAI here
+        with pytest.raises(Exception, match="API Error"):
+            await responses_impl_with_conversations._sync_response_to_conversation(
+                "conv_test123", "Hello", output_items
+            )
+
+    async def test_sync_with_list_input(self, responses_impl_with_conversations, mock_conversations_api):
+        """Test syncing with list of input messages."""
+        conv_id = "conv_test123"
+        input_messages = [
+            OpenAIResponseMessage(role="user", content=[{"type": "input_text", "text": "First message"}]),
+        ]
+        output_items = [
+            OpenAIResponseMessage(
+                id="msg_response",
+                content=[OpenAIResponseOutputMessageContentOutputText(text="Response", type="output_text")],
+                role="assistant",
+                status="completed",
+                type="message",
+            )
+        ]
+
+        await responses_impl_with_conversations._sync_response_to_conversation(conv_id, input_messages, output_items)
+
+        mock_conversations_api.add_items.assert_called_once()
+        call_args = mock_conversations_api.add_items.call_args
+
+        items = call_args[0][1]
+        # Should have input message + output message
+        assert len(items) == 2
+
+
+class TestIntegrationWorkflow:
+    """Integration tests for the full conversation workflow."""
+
+    async def test_create_response_with_valid_conversation(
+        self, responses_impl_with_conversations, mock_conversations_api
+    ):
+        """Test creating a response with a valid conversation parameter."""
+        mock_conversations_api.list_items.return_value = ConversationItemList(
+            data=[], first_id=None, has_more=False, last_id=None, object="list"
+        )
+
+        async def mock_streaming_response(*args, **kwargs):
+            message_item = OpenAIResponseMessage(
+                id="msg_response",
+                content=[
+                    OpenAIResponseOutputMessageContentOutputText(
+                        text="Test response", type="output_text", annotations=[]
+                    )
+                ],
+                role="assistant",
+                status="completed",
+                type="message",
+            )
+
+            # Emit output_item.done event first (needed for conversation sync)
+            yield OpenAIResponseObjectStreamResponseOutputItemDone(
+                response_id="resp_test123",
+                item=message_item,
+                output_index=0,
+                sequence_number=1,
+                type="response.output_item.done",
+            )
+
+            # Then emit response.completed
+            mock_response = OpenAIResponseObject(
+                id="resp_test123",
+                created_at=1234567890,
+                model="test-model",
+                object="response",
+                output=[message_item],
+                status="completed",
+            )
+
+            yield OpenAIResponseObjectStreamResponseCompleted(response=mock_response, type="response.completed")
+
+        responses_impl_with_conversations._create_streaming_response = mock_streaming_response
+
+        input_text = "Hello, how are you?"
+        conversation_id = "conv_test123"
+
+        response = await responses_impl_with_conversations.create_openai_response(
+            input=input_text, model="test-model", conversation=conversation_id, stream=False
+        )
+
+        assert response is not None
+        assert response.id == "resp_test123"
+
+        # Note: conversation sync happens inside _create_streaming_response,
+        # which we're mocking here, so we can't test it in this unit test.
+        # The sync logic is tested separately in TestMessageSyncing.
+
+    async def test_create_response_with_invalid_conversation_id(self, responses_impl_with_conversations):
+        """Test creating a response with an invalid conversation ID."""
+        with pytest.raises(InvalidConversationIdError) as exc_info:
+            await responses_impl_with_conversations.create_openai_response(
+                input="Hello", model="test-model", conversation="invalid_id", stream=False
+            )
+
+        assert "Expected an ID that begins with 'conv_'" in str(exc_info.value)
+
+    async def test_create_response_with_nonexistent_conversation(
+        self, responses_impl_with_conversations, mock_conversations_api
+    ):
+        """Test creating a response with a non-existent conversation."""
+        mock_conversations_api.list_items.side_effect = ConversationNotFoundError("conv_nonexistent")
+
+        with pytest.raises(ConversationNotFoundError) as exc_info:
+            await responses_impl_with_conversations.create_openai_response(
+                input="Hello", model="test-model", conversation="conv_nonexistent", stream=False
+            )
+
+        assert "not found" in str(exc_info.value)
+
+    async def test_conversation_and_previous_response_id(
+        self, responses_impl_with_conversations, mock_conversations_api, mock_responses_store
+    ):
+        with pytest.raises(ValueError) as exc_info:
+            await responses_impl_with_conversations.create_openai_response(
+                input="test", model="test", conversation="conv_123", previous_response_id="resp_123"
+            )
+
+        assert "Mutually exclusive parameters" in str(exc_info.value)
+        assert "previous_response_id" in str(exc_info.value)
+        assert "conversation" in str(exc_info.value)
--- a/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py
+++ b/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py
@ -0,0 +1,367 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+import pytest
+
+from llama_stack.apis.agents.openai_responses import (
+    OpenAIResponseAnnotationFileCitation,
+    OpenAIResponseInputFunctionToolCallOutput,
+    OpenAIResponseInputMessageContentImage,
+    OpenAIResponseInputMessageContentText,
+    OpenAIResponseInputToolFunction,
+    OpenAIResponseInputToolWebSearch,
+    OpenAIResponseMessage,
+    OpenAIResponseOutputMessageContentOutputText,
+    OpenAIResponseOutputMessageFunctionToolCall,
+    OpenAIResponseText,
+    OpenAIResponseTextFormat,
+)
+from llama_stack.apis.inference import (
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartTextParam,
+    OpenAIChatCompletionToolCall,
+    OpenAIChatCompletionToolCallFunction,
+    OpenAIChoice,
+    OpenAIDeveloperMessageParam,
+    OpenAIResponseFormatJSONObject,
+    OpenAIResponseFormatJSONSchema,
+    OpenAIResponseFormatText,
+    OpenAISystemMessageParam,
+    OpenAIToolMessageParam,
+    OpenAIUserMessageParam,
+)
+from llama_stack.providers.inline.agents.meta_reference.responses.utils import (
+    _extract_citations_from_text,
+    convert_chat_choice_to_response_message,
+    convert_response_content_to_chat_content,
+    convert_response_input_to_chat_messages,
+    convert_response_text_to_chat_response_format,
+    get_message_type_by_role,
+    is_function_tool_call,
+)
+
+
+class TestConvertChatChoiceToResponseMessage:
+    async def test_convert_string_content(self):
+        choice = OpenAIChoice(
+            message=OpenAIAssistantMessageParam(content="Test message"),
+            finish_reason="stop",
+            index=0,
+        )
+
+        result = await convert_chat_choice_to_response_message(choice)
+
+        assert result.role == "assistant"
+        assert result.status == "completed"
+        assert len(result.content) == 1
+        assert isinstance(result.content[0], OpenAIResponseOutputMessageContentOutputText)
+        assert result.content[0].text == "Test message"
+
+    async def test_convert_text_param_content(self):
+        choice = OpenAIChoice(
+            message=OpenAIAssistantMessageParam(
+                content=[OpenAIChatCompletionContentPartTextParam(text="Test text param")]
+            ),
+            finish_reason="stop",
+            index=0,
+        )
+
+        with pytest.raises(ValueError) as exc_info:
+            await convert_chat_choice_to_response_message(choice)
+
+        assert "does not yet support output content type" in str(exc_info.value)
+
+
+class TestConvertResponseContentToChatContent:
+    async def test_convert_string_content(self):
+        result = await convert_response_content_to_chat_content("Simple string")
+        assert result == "Simple string"
+
+    async def test_convert_text_content_parts(self):
+        content = [
+            OpenAIResponseInputMessageContentText(text="First part"),
+            OpenAIResponseOutputMessageContentOutputText(text="Second part"),
+        ]
+
+        result = await convert_response_content_to_chat_content(content)
+
+        assert len(result) == 2
+        assert isinstance(result[0], OpenAIChatCompletionContentPartTextParam)
+        assert result[0].text == "First part"
+        assert isinstance(result[1], OpenAIChatCompletionContentPartTextParam)
+        assert result[1].text == "Second part"
+
+    async def test_convert_image_content(self):
+        content = [OpenAIResponseInputMessageContentImage(image_url="https://example.com/image.jpg", detail="high")]
+
+        result = await convert_response_content_to_chat_content(content)
+
+        assert len(result) == 1
+        assert isinstance(result[0], OpenAIChatCompletionContentPartImageParam)
+        assert result[0].image_url.url == "https://example.com/image.jpg"
+        assert result[0].image_url.detail == "high"
+
+
+class TestConvertResponseInputToChatMessages:
+    async def test_convert_string_input(self):
+        result = await convert_response_input_to_chat_messages("User message")
+
+        assert len(result) == 1
+        assert isinstance(result[0], OpenAIUserMessageParam)
+        assert result[0].content == "User message"
+
+    async def test_convert_function_tool_call_output(self):
+        input_items = [
+            OpenAIResponseOutputMessageFunctionToolCall(
+                call_id="call_123",
+                name="test_function",
+                arguments='{"param": "value"}',
+            ),
+            OpenAIResponseInputFunctionToolCallOutput(
+                output="Tool output",
+                call_id="call_123",
+            ),
+        ]
+
+        result = await convert_response_input_to_chat_messages(input_items)
+
+        assert len(result) == 2
+        assert isinstance(result[0], OpenAIAssistantMessageParam)
+        assert result[0].tool_calls[0].id == "call_123"
+        assert result[0].tool_calls[0].function.name == "test_function"
+        assert result[0].tool_calls[0].function.arguments == '{"param": "value"}'
+        assert isinstance(result[1], OpenAIToolMessageParam)
+        assert result[1].content == "Tool output"
+        assert result[1].tool_call_id == "call_123"
+
+    async def test_convert_function_tool_call(self):
+        input_items = [
+            OpenAIResponseOutputMessageFunctionToolCall(
+                call_id="call_456",
+                name="test_function",
+                arguments='{"param": "value"}',
+            )
+        ]
+
+        result = await convert_response_input_to_chat_messages(input_items)
+
+        assert len(result) == 1
+        assert isinstance(result[0], OpenAIAssistantMessageParam)
+        assert len(result[0].tool_calls) == 1
+        assert result[0].tool_calls[0].id == "call_456"
+        assert result[0].tool_calls[0].function.name == "test_function"
+        assert result[0].tool_calls[0].function.arguments == '{"param": "value"}'
+
+    async def test_convert_function_call_ordering(self):
+        input_items = [
+            OpenAIResponseOutputMessageFunctionToolCall(
+                call_id="call_123",
+                name="test_function_a",
+                arguments='{"param": "value"}',
+            ),
+            OpenAIResponseOutputMessageFunctionToolCall(
+                call_id="call_456",
+                name="test_function_b",
+                arguments='{"param": "value"}',
+            ),
+            OpenAIResponseInputFunctionToolCallOutput(
+                output="AAA",
+                call_id="call_123",
+            ),
+            OpenAIResponseInputFunctionToolCallOutput(
+                output="BBB",
+                call_id="call_456",
+            ),
+        ]
+
+        result = await convert_response_input_to_chat_messages(input_items)
+        assert len(result) == 4
+        assert isinstance(result[0], OpenAIAssistantMessageParam)
+        assert len(result[0].tool_calls) == 1
+        assert result[0].tool_calls[0].id == "call_123"
+        assert result[0].tool_calls[0].function.name == "test_function_a"
+        assert result[0].tool_calls[0].function.arguments == '{"param": "value"}'
+        assert isinstance(result[1], OpenAIToolMessageParam)
+        assert result[1].content == "AAA"
+        assert result[1].tool_call_id == "call_123"
+        assert isinstance(result[2], OpenAIAssistantMessageParam)
+        assert len(result[2].tool_calls) == 1
+        assert result[2].tool_calls[0].id == "call_456"
+        assert result[2].tool_calls[0].function.name == "test_function_b"
+        assert result[2].tool_calls[0].function.arguments == '{"param": "value"}'
+        assert isinstance(result[3], OpenAIToolMessageParam)
+        assert result[3].content == "BBB"
+        assert result[3].tool_call_id == "call_456"
+
+    async def test_convert_response_message(self):
+        input_items = [
+            OpenAIResponseMessage(
+                role="user",
+                content=[OpenAIResponseInputMessageContentText(text="User text")],
+            )
+        ]
+
+        result = await convert_response_input_to_chat_messages(input_items)
+
+        assert len(result) == 1
+        assert isinstance(result[0], OpenAIUserMessageParam)
+        # Content should be converted to chat content format
+        assert len(result[0].content) == 1
+        assert result[0].content[0].text == "User text"
+
+
+class TestConvertResponseTextToChatResponseFormat:
+    async def test_convert_text_format(self):
+        text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text"))
+        result = await convert_response_text_to_chat_response_format(text)
+
+        assert isinstance(result, OpenAIResponseFormatText)
+        assert result.type == "text"
+
+    async def test_convert_json_object_format(self):
+        text = OpenAIResponseText(format={"type": "json_object"})
+        result = await convert_response_text_to_chat_response_format(text)
+
+        assert isinstance(result, OpenAIResponseFormatJSONObject)
+
+    async def test_convert_json_schema_format(self):
+        schema_def = {"type": "object", "properties": {"test": {"type": "string"}}}
+        text = OpenAIResponseText(
+            format={
+                "type": "json_schema",
+                "name": "test_schema",
+                "schema": schema_def,
+            }
+        )
+        result = await convert_response_text_to_chat_response_format(text)
+
+        assert isinstance(result, OpenAIResponseFormatJSONSchema)
+        assert result.json_schema["name"] == "test_schema"
+        assert result.json_schema["schema"] == schema_def
+
+    async def test_default_text_format(self):
+        text = OpenAIResponseText()
+        result = await convert_response_text_to_chat_response_format(text)
+
+        assert isinstance(result, OpenAIResponseFormatText)
+        assert result.type == "text"
+
+
+class TestGetMessageTypeByRole:
+    async def test_user_role(self):
+        result = await get_message_type_by_role("user")
+        assert result == OpenAIUserMessageParam
+
+    async def test_system_role(self):
+        result = await get_message_type_by_role("system")
+        assert result == OpenAISystemMessageParam
+
+    async def test_assistant_role(self):
+        result = await get_message_type_by_role("assistant")
+        assert result == OpenAIAssistantMessageParam
+
+    async def test_developer_role(self):
+        result = await get_message_type_by_role("developer")
+        assert result == OpenAIDeveloperMessageParam
+
+    async def test_unknown_role(self):
+        result = await get_message_type_by_role("unknown")
+        assert result is None
+
+
+class TestIsFunctionToolCall:
+    def test_is_function_tool_call_true(self):
+        tool_call = OpenAIChatCompletionToolCall(
+            index=0,
+            id="call_123",
+            function=OpenAIChatCompletionToolCallFunction(
+                name="test_function",
+                arguments="{}",
+            ),
+        )
+        tools = [
+            OpenAIResponseInputToolFunction(
+                type="function", name="test_function", parameters={"type": "object", "properties": {}}
+            ),
+            OpenAIResponseInputToolWebSearch(type="web_search"),
+        ]
+
+        result = is_function_tool_call(tool_call, tools)
+        assert result is True
+
+    def test_is_function_tool_call_false_different_name(self):
+        tool_call = OpenAIChatCompletionToolCall(
+            index=0,
+            id="call_123",
+            function=OpenAIChatCompletionToolCallFunction(
+                name="other_function",
+                arguments="{}",
+            ),
+        )
+        tools = [
+            OpenAIResponseInputToolFunction(
+                type="function", name="test_function", parameters={"type": "object", "properties": {}}
+            ),
+        ]
+
+        result = is_function_tool_call(tool_call, tools)
+        assert result is False
+
+    def test_is_function_tool_call_false_no_function(self):
+        tool_call = OpenAIChatCompletionToolCall(
+            index=0,
+            id="call_123",
+            function=None,
+        )
+        tools = [
+            OpenAIResponseInputToolFunction(
+                type="function", name="test_function", parameters={"type": "object", "properties": {}}
+            ),
+        ]
+
+        result = is_function_tool_call(tool_call, tools)
+        assert result is False
+
+    def test_is_function_tool_call_false_wrong_type(self):
+        tool_call = OpenAIChatCompletionToolCall(
+            index=0,
+            id="call_123",
+            function=OpenAIChatCompletionToolCallFunction(
+                name="web_search",
+                arguments="{}",
+            ),
+        )
+        tools = [
+            OpenAIResponseInputToolWebSearch(type="web_search"),
+        ]
+
+        result = is_function_tool_call(tool_call, tools)
+        assert result is False
+
+
+class TestExtractCitationsFromText:
+    def test_extract_citations_and_annotations(self):
+        text = "Start [not-a-file]. New source <|file-abc123|>. "
+        text += "Other source <|file-def456|>? Repeat source <|file-abc123|>! No citation."
+        file_mapping = {"file-abc123": "doc1.pdf", "file-def456": "doc2.txt"}
+
+        annotations, cleaned_text = _extract_citations_from_text(text, file_mapping)
+
+        expected_annotations = [
+            OpenAIResponseAnnotationFileCitation(file_id="file-abc123", filename="doc1.pdf", index=30),
+            OpenAIResponseAnnotationFileCitation(file_id="file-def456", filename="doc2.txt", index=44),
+            OpenAIResponseAnnotationFileCitation(file_id="file-abc123", filename="doc1.pdf", index=59),
+        ]
+        expected_clean_text = "Start [not-a-file]. New source. Other source? Repeat source! No citation."
+
+        assert cleaned_text == expected_clean_text
+        assert annotations == expected_annotations
+        # OpenAI cites at the end of the sentence
+        assert cleaned_text[expected_annotations[0].index] == "."
+        assert cleaned_text[expected_annotations[1].index] == "?"
+        assert cleaned_text[expected_annotations[2].index] == "!"
--- a/tests/unit/providers/agents/meta_reference/test_response_tool_context.py
+++ b/tests/unit/providers/agents/meta_reference/test_response_tool_context.py
@ -0,0 +1,183 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+from llama_stack.apis.agents.openai_responses import (
+    MCPListToolsTool,
+    OpenAIResponseInputToolFileSearch,
+    OpenAIResponseInputToolFunction,
+    OpenAIResponseInputToolMCP,
+    OpenAIResponseInputToolWebSearch,
+    OpenAIResponseObject,
+    OpenAIResponseOutputMessageMCPListTools,
+    OpenAIResponseToolMCP,
+)
+from llama_stack.providers.inline.agents.meta_reference.responses.types import ToolContext
+
+
+class TestToolContext:
+    def test_no_tools(self):
+        tools = []
+        context = ToolContext(tools)
+        previous_response = OpenAIResponseObject(created_at=1234, id="test", model="mymodel", output=[], status="")
+        context.recover_tools_from_previous_response(previous_response)
+
+        assert len(context.tools_to_process) == 0
+        assert len(context.previous_tools) == 0
+        assert len(context.previous_tool_listings) == 0
+
+    def test_no_previous_tools(self):
+        tools = [
+            OpenAIResponseInputToolFileSearch(vector_store_ids=["fake"]),
+            OpenAIResponseInputToolMCP(server_label="label", server_url="url"),
+        ]
+        context = ToolContext(tools)
+        previous_response = OpenAIResponseObject(created_at=1234, id="test", model="mymodel", output=[], status="")
+        context.recover_tools_from_previous_response(previous_response)
+
+        assert len(context.tools_to_process) == 2
+        assert len(context.previous_tools) == 0
+        assert len(context.previous_tool_listings) == 0
+
+    def test_reusable_server(self):
+        tools = [
+            OpenAIResponseInputToolFileSearch(vector_store_ids=["fake"]),
+            OpenAIResponseInputToolMCP(server_label="alabel", server_url="aurl"),
+        ]
+        context = ToolContext(tools)
+        output = [
+            OpenAIResponseOutputMessageMCPListTools(
+                id="test", server_label="alabel", tools=[MCPListToolsTool(name="test_tool", input_schema={})]
+            )
+        ]
+        previous_response = OpenAIResponseObject(created_at=1234, id="test", model="fake", output=output, status="")
+        previous_response.tools = [
+            OpenAIResponseInputToolFileSearch(vector_store_ids=["fake"]),
+            OpenAIResponseToolMCP(server_label="alabel"),
+        ]
+        context.recover_tools_from_previous_response(previous_response)
+
+        assert len(context.tools_to_process) == 1
+        assert context.tools_to_process[0].type == "file_search"
+        assert len(context.previous_tools) == 1
+        assert context.previous_tools["test_tool"].server_label == "alabel"
+        assert context.previous_tools["test_tool"].server_url == "aurl"
+        assert len(context.previous_tool_listings) == 1
+        assert len(context.previous_tool_listings[0].tools) == 1
+        assert context.previous_tool_listings[0].server_label == "alabel"
+
+    def test_multiple_reusable_servers(self):
+        tools = [
+            OpenAIResponseInputToolFunction(name="fake", parameters=None),
+            OpenAIResponseInputToolMCP(server_label="anotherlabel", server_url="anotherurl"),
+            OpenAIResponseInputToolWebSearch(),
+            OpenAIResponseInputToolMCP(server_label="alabel", server_url="aurl"),
+        ]
+        context = ToolContext(tools)
+        output = [
+            OpenAIResponseOutputMessageMCPListTools(
+                id="test1", server_label="alabel", tools=[MCPListToolsTool(name="test_tool", input_schema={})]
+            ),
+            OpenAIResponseOutputMessageMCPListTools(
+                id="test2",
+                server_label="anotherlabel",
+                tools=[MCPListToolsTool(name="some_other_tool", input_schema={})],
+            ),
+        ]
+        previous_response = OpenAIResponseObject(created_at=1234, id="test", model="fake", output=output, status="")
+        previous_response.tools = [
+            OpenAIResponseInputToolFunction(name="fake", parameters=None),
+            OpenAIResponseToolMCP(server_label="anotherlabel", server_url="anotherurl"),
+            OpenAIResponseInputToolWebSearch(type="web_search"),
+            OpenAIResponseToolMCP(server_label="alabel", server_url="aurl"),
+        ]
+        context.recover_tools_from_previous_response(previous_response)
+
+        assert len(context.tools_to_process) == 2
+        assert context.tools_to_process[0].type == "function"
+        assert context.tools_to_process[1].type == "web_search"
+        assert len(context.previous_tools) == 2
+        assert context.previous_tools["test_tool"].server_label == "alabel"
+        assert context.previous_tools["test_tool"].server_url == "aurl"
+        assert context.previous_tools["some_other_tool"].server_label == "anotherlabel"
+        assert context.previous_tools["some_other_tool"].server_url == "anotherurl"
+        assert len(context.previous_tool_listings) == 2
+        assert len(context.previous_tool_listings[0].tools) == 1
+        assert context.previous_tool_listings[0].server_label == "alabel"
+        assert len(context.previous_tool_listings[1].tools) == 1
+        assert context.previous_tool_listings[1].server_label == "anotherlabel"
+
+    def test_multiple_servers_only_one_reusable(self):
+        tools = [
+            OpenAIResponseInputToolFunction(name="fake", parameters=None),
+            OpenAIResponseInputToolMCP(server_label="anotherlabel", server_url="anotherurl"),
+            OpenAIResponseInputToolWebSearch(type="web_search"),
+            OpenAIResponseInputToolMCP(server_label="alabel", server_url="aurl"),
+        ]
+        context = ToolContext(tools)
+        output = [
+            OpenAIResponseOutputMessageMCPListTools(
+                id="test2",
+                server_label="anotherlabel",
+                tools=[MCPListToolsTool(name="some_other_tool", input_schema={})],
+            )
+        ]
+        previous_response = OpenAIResponseObject(created_at=1234, id="test", model="fake", output=output, status="")
+        previous_response.tools = [
+            OpenAIResponseInputToolFunction(name="fake", parameters=None),
+            OpenAIResponseToolMCP(server_label="anotherlabel", server_url="anotherurl"),
+            OpenAIResponseInputToolWebSearch(type="web_search"),
+        ]
+        context.recover_tools_from_previous_response(previous_response)
+
+        assert len(context.tools_to_process) == 3
+        assert context.tools_to_process[0].type == "function"
+        assert context.tools_to_process[1].type == "web_search"
+        assert context.tools_to_process[2].type == "mcp"
+        assert len(context.previous_tools) == 1
+        assert context.previous_tools["some_other_tool"].server_label == "anotherlabel"
+        assert context.previous_tools["some_other_tool"].server_url == "anotherurl"
+        assert len(context.previous_tool_listings) == 1
+        assert len(context.previous_tool_listings[0].tools) == 1
+        assert context.previous_tool_listings[0].server_label == "anotherlabel"
+
+    def test_mismatched_allowed_tools(self):
+        tools = [
+            OpenAIResponseInputToolFunction(name="fake", parameters=None),
+            OpenAIResponseInputToolMCP(server_label="anotherlabel", server_url="anotherurl"),
+            OpenAIResponseInputToolWebSearch(type="web_search"),
+            OpenAIResponseInputToolMCP(server_label="alabel", server_url="aurl", allowed_tools=["test_tool_2"]),
+        ]
+        context = ToolContext(tools)
+        output = [
+            OpenAIResponseOutputMessageMCPListTools(
+                id="test1", server_label="alabel", tools=[MCPListToolsTool(name="test_tool_1", input_schema={})]
+            ),
+            OpenAIResponseOutputMessageMCPListTools(
+                id="test2",
+                server_label="anotherlabel",
+                tools=[MCPListToolsTool(name="some_other_tool", input_schema={})],
+            ),
+        ]
+        previous_response = OpenAIResponseObject(created_at=1234, id="test", model="fake", output=output, status="")
+        previous_response.tools = [
+            OpenAIResponseInputToolFunction(name="fake", parameters=None),
+            OpenAIResponseToolMCP(server_label="anotherlabel", server_url="anotherurl"),
+            OpenAIResponseInputToolWebSearch(type="web_search"),
+            OpenAIResponseToolMCP(server_label="alabel", server_url="aurl"),
+        ]
+        context.recover_tools_from_previous_response(previous_response)
+
+        assert len(context.tools_to_process) == 3
+        assert context.tools_to_process[0].type == "function"
+        assert context.tools_to_process[1].type == "web_search"
+        assert context.tools_to_process[2].type == "mcp"
+        assert len(context.previous_tools) == 1
+        assert context.previous_tools["some_other_tool"].server_label == "anotherlabel"
+        assert context.previous_tools["some_other_tool"].server_url == "anotherurl"
+        assert len(context.previous_tool_listings) == 1
+        assert len(context.previous_tool_listings[0].tools) == 1
+        assert context.previous_tool_listings[0].server_label == "anotherlabel"
--- a/tests/unit/providers/agents/meta_reference/test_responses_safety_utils.py
+++ b/tests/unit/providers/agents/meta_reference/test_responses_safety_utils.py
@ -0,0 +1,155 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from unittest.mock import AsyncMock
+
+import pytest
+
+from llama_stack.apis.agents.agents import ResponseGuardrailSpec
+from llama_stack.apis.safety import ModerationObject, ModerationObjectResults
+from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import (
+    OpenAIResponsesImpl,
+)
+from llama_stack.providers.inline.agents.meta_reference.responses.utils import (
+    extract_guardrail_ids,
+    run_guardrails,
+)
+
+
+@pytest.fixture
+def mock_apis():
+    """Create mock APIs for testing."""
+    return {
+        "inference_api": AsyncMock(),
+        "tool_groups_api": AsyncMock(),
+        "tool_runtime_api": AsyncMock(),
+        "responses_store": AsyncMock(),
+        "vector_io_api": AsyncMock(),
+        "conversations_api": AsyncMock(),
+        "safety_api": AsyncMock(),
+    }
+
+
+@pytest.fixture
+def responses_impl(mock_apis):
+    """Create OpenAIResponsesImpl instance with mocked dependencies."""
+    return OpenAIResponsesImpl(**mock_apis)
+
+
+def test_extract_guardrail_ids_from_strings(responses_impl):
+    """Test extraction from simple string guardrail IDs."""
+    guardrails = ["llama-guard", "content-filter", "nsfw-detector"]
+    result = extract_guardrail_ids(guardrails)
+    assert result == ["llama-guard", "content-filter", "nsfw-detector"]
+
+
+def test_extract_guardrail_ids_from_objects(responses_impl):
+    """Test extraction from ResponseGuardrailSpec objects."""
+    guardrails = [
+        ResponseGuardrailSpec(type="llama-guard"),
+        ResponseGuardrailSpec(type="content-filter"),
+    ]
+    result = extract_guardrail_ids(guardrails)
+    assert result == ["llama-guard", "content-filter"]
+
+
+def test_extract_guardrail_ids_mixed_formats(responses_impl):
+    """Test extraction from mixed string and object formats."""
+    guardrails = [
+        "llama-guard",
+        ResponseGuardrailSpec(type="content-filter"),
+        "nsfw-detector",
+    ]
+    result = extract_guardrail_ids(guardrails)
+    assert result == ["llama-guard", "content-filter", "nsfw-detector"]
+
+
+def test_extract_guardrail_ids_none_input(responses_impl):
+    """Test extraction with None input."""
+    result = extract_guardrail_ids(None)
+    assert result == []
+
+
+def test_extract_guardrail_ids_empty_list(responses_impl):
+    """Test extraction with empty list."""
+    result = extract_guardrail_ids([])
+    assert result == []
+
+
+def test_extract_guardrail_ids_unknown_format(responses_impl):
+    """Test extraction with unknown guardrail format raises ValueError."""
+    # Create an object that's neither string nor ResponseGuardrailSpec
+    unknown_object = {"invalid": "format"}  # Plain dict, not ResponseGuardrailSpec
+    guardrails = ["valid-guardrail", unknown_object, "another-guardrail"]
+    with pytest.raises(ValueError, match="Unknown guardrail format.*expected str or ResponseGuardrailSpec"):
+        extract_guardrail_ids(guardrails)
+
+
+@pytest.fixture
+def mock_safety_api():
+    """Create mock safety API for guardrails testing."""
+    safety_api = AsyncMock()
+    # Mock the routing table and shields list for guardrails lookup
+    safety_api.routing_table = AsyncMock()
+    shield = AsyncMock()
+    shield.identifier = "llama-guard"
+    shield.provider_resource_id = "llama-guard-model"
+    safety_api.routing_table.list_shields.return_value = AsyncMock(data=[shield])
+    return safety_api
+
+
+async def test_run_guardrails_no_violation(mock_safety_api):
+    """Test guardrails validation with no violations."""
+    text = "Hello world"
+    guardrail_ids = ["llama-guard"]
+
+    # Mock moderation to return non-flagged content
+    unflagged_result = ModerationObjectResults(flagged=False, categories={"violence": False})
+    mock_moderation_object = ModerationObject(id="test-mod-id", model="llama-guard-model", results=[unflagged_result])
+    mock_safety_api.run_moderation.return_value = mock_moderation_object
+
+    result = await run_guardrails(mock_safety_api, text, guardrail_ids)
+
+    assert result is None
+    # Verify run_moderation was called with the correct model
+    mock_safety_api.run_moderation.assert_called_once()
+    call_args = mock_safety_api.run_moderation.call_args
+    assert call_args[1]["model"] == "llama-guard-model"
+
+
+async def test_run_guardrails_with_violation(mock_safety_api):
+    """Test guardrails validation with safety violation."""
+    text = "Harmful content"
+    guardrail_ids = ["llama-guard"]
+
+    # Mock moderation to return flagged content
+    flagged_result = ModerationObjectResults(
+        flagged=True,
+        categories={"violence": True},
+        user_message="Content flagged by moderation",
+        metadata={"violation_type": ["S1"]},
+    )
+    mock_moderation_object = ModerationObject(id="test-mod-id", model="llama-guard-model", results=[flagged_result])
+    mock_safety_api.run_moderation.return_value = mock_moderation_object
+
+    result = await run_guardrails(mock_safety_api, text, guardrail_ids)
+
+    assert result == "Content flagged by moderation (flagged for: violence) (violation type: S1)"
+
+
+async def test_run_guardrails_empty_inputs(mock_safety_api):
+    """Test guardrails validation with empty inputs."""
+    # Test empty guardrail_ids
+    result = await run_guardrails(mock_safety_api, "test", [])
+    assert result is None
+
+    # Test empty text
+    result = await run_guardrails(mock_safety_api, "", ["llama-guard"])
+    assert result is None
+
+    # Test both empty
+    result = await run_guardrails(mock_safety_api, "", [])
+    assert result is None