From 89c49eb003cfc7b70babf85bab81bc3ebc81b63b Mon Sep 17 00:00:00 2001 From: Ondrej Metelka Date: Mon, 21 Jul 2025 15:43:32 +0200 Subject: [PATCH] feat: Allow application/yaml as mime_type (#2575) # What does this PR do? Allow application/yaml as mime_type for documents. ## Test Plan Added unit tests. --- .../agents/meta_reference/agent_instance.py | 11 +- .../agent/test_get_raw_document_text.py | 187 ++++++++++++++++++ 2 files changed, 197 insertions(+), 1 deletion(-) create mode 100644 tests/unit/providers/agent/test_get_raw_document_text.py diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py index 4d2b9f8bf..3c34c71fb 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py +++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py @@ -10,6 +10,7 @@ import re import secrets import string import uuid +import warnings from collections.abc import AsyncGenerator from datetime import UTC, datetime @@ -911,8 +912,16 @@ async def load_data_from_url(url: str) -> str: async def get_raw_document_text(document: Document) -> str: - if not document.mime_type.startswith("text/"): + # Handle deprecated text/yaml mime type with warning + if document.mime_type == "text/yaml": + warnings.warn( + "The 'text/yaml' MIME type is deprecated. Please use 'application/yaml' instead.", + DeprecationWarning, + stacklevel=2, + ) + elif not (document.mime_type.startswith("text/") or document.mime_type == "application/yaml"): raise ValueError(f"Unexpected document mime type: {document.mime_type}") + if isinstance(document.content, URL): return await load_data_from_url(document.content.uri) elif isinstance(document.content, str): diff --git a/tests/unit/providers/agent/test_get_raw_document_text.py b/tests/unit/providers/agent/test_get_raw_document_text.py new file mode 100644 index 000000000..ddc886293 --- /dev/null +++ b/tests/unit/providers/agent/test_get_raw_document_text.py @@ -0,0 +1,187 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import warnings +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from llama_stack.apis.agents import Document +from llama_stack.apis.common.content_types import URL, TextContentItem +from llama_stack.providers.inline.agents.meta_reference.agent_instance import get_raw_document_text + + +@pytest.mark.asyncio +async def test_get_raw_document_text_supports_text_mime_types(): + """Test that the function accepts text/* mime types.""" + document = Document(content="Sample text content", mime_type="text/plain") + + result = await get_raw_document_text(document) + assert result == "Sample text content" + + +@pytest.mark.asyncio +async def test_get_raw_document_text_supports_yaml_mime_type(): + """Test that the function accepts application/yaml mime type.""" + yaml_content = """ + name: test + version: 1.0 + items: + - item1 + - item2 + """ + + document = Document(content=yaml_content, mime_type="application/yaml") + + result = await get_raw_document_text(document) + assert result == yaml_content + + +@pytest.mark.asyncio +async def test_get_raw_document_text_supports_deprecated_text_yaml_with_warning(): + """Test that the function accepts text/yaml but emits a deprecation warning.""" + yaml_content = """ + name: test + version: 1.0 + items: + - item1 + - item2 + """ + + document = Document(content=yaml_content, mime_type="text/yaml") + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + result = await get_raw_document_text(document) + + # Check that result is correct + assert result == yaml_content + + # Check that exactly one warning was issued + assert len(w) == 1 + assert issubclass(w[0].category, DeprecationWarning) + assert "text/yaml" in str(w[0].message) + assert "application/yaml" in str(w[0].message) + assert "deprecated" in str(w[0].message).lower() + + +@pytest.mark.asyncio +async def test_get_raw_document_text_deprecated_text_yaml_with_url(): + """Test that text/yaml works with URL content and emits warning.""" + yaml_content = "name: test\nversion: 1.0" + + with patch("llama_stack.providers.inline.agents.meta_reference.agent_instance.load_data_from_url") as mock_load: + mock_load.return_value = yaml_content + + document = Document(content=URL(uri="https://example.com/config.yaml"), mime_type="text/yaml") + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + result = await get_raw_document_text(document) + + # Check that result is correct + assert result == yaml_content + mock_load.assert_called_once_with("https://example.com/config.yaml") + + # Check that deprecation warning was issued + assert len(w) == 1 + assert issubclass(w[0].category, DeprecationWarning) + assert "text/yaml" in str(w[0].message) + + +@pytest.mark.asyncio +async def test_get_raw_document_text_deprecated_text_yaml_with_text_content_item(): + """Test that text/yaml works with TextContentItem and emits warning.""" + yaml_content = "key: value\nlist:\n - item1\n - item2" + + document = Document(content=TextContentItem(text=yaml_content), mime_type="text/yaml") + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + result = await get_raw_document_text(document) + + # Check that result is correct + assert result == yaml_content + + # Check that deprecation warning was issued + assert len(w) == 1 + assert issubclass(w[0].category, DeprecationWarning) + assert "text/yaml" in str(w[0].message) + + +@pytest.mark.asyncio +async def test_get_raw_document_text_rejects_unsupported_mime_types(): + """Test that the function rejects unsupported mime types.""" + document = Document( + content="Some content", + mime_type="application/json", # Not supported + ) + + with pytest.raises(ValueError, match="Unexpected document mime type: application/json"): + await get_raw_document_text(document) + + +@pytest.mark.asyncio +async def test_get_raw_document_text_with_url_content(): + """Test that the function handles URL content correctly.""" + mock_response = AsyncMock() + mock_response.text = "Content from URL" + + with patch("llama_stack.providers.inline.agents.meta_reference.agent_instance.load_data_from_url") as mock_load: + mock_load.return_value = "Content from URL" + + document = Document(content=URL(uri="https://example.com/test.txt"), mime_type="text/plain") + + result = await get_raw_document_text(document) + assert result == "Content from URL" + mock_load.assert_called_once_with("https://example.com/test.txt") + + +@pytest.mark.asyncio +async def test_get_raw_document_text_with_yaml_url(): + """Test that the function handles YAML URLs correctly.""" + yaml_content = "name: test\nversion: 1.0" + + with patch("llama_stack.providers.inline.agents.meta_reference.agent_instance.load_data_from_url") as mock_load: + mock_load.return_value = yaml_content + + document = Document(content=URL(uri="https://example.com/config.yaml"), mime_type="application/yaml") + + result = await get_raw_document_text(document) + assert result == yaml_content + mock_load.assert_called_once_with("https://example.com/config.yaml") + + +@pytest.mark.asyncio +async def test_get_raw_document_text_with_text_content_item(): + """Test that the function handles TextContentItem correctly.""" + document = Document(content=TextContentItem(text="Text content item"), mime_type="text/plain") + + result = await get_raw_document_text(document) + assert result == "Text content item" + + +@pytest.mark.asyncio +async def test_get_raw_document_text_with_yaml_text_content_item(): + """Test that the function handles YAML TextContentItem correctly.""" + yaml_content = "key: value\nlist:\n - item1\n - item2" + + document = Document(content=TextContentItem(text=yaml_content), mime_type="application/yaml") + + result = await get_raw_document_text(document) + assert result == yaml_content + + +@pytest.mark.asyncio +async def test_get_raw_document_text_rejects_unexpected_content_type(): + """Test that the function rejects unexpected document content types.""" + # Create a mock document that bypasses Pydantic validation + mock_document = MagicMock(spec=Document) + mock_document.mime_type = "text/plain" + mock_document.content = 123 # Unexpected content type (not str, URL, or TextContentItem) + + with pytest.raises(ValueError, match="Unexpected document content type: "): + await get_raw_document_text(mock_document)