mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-22 20:43:59 +00:00
feat: Allow application/yaml as mime_type (#2575)
# What does this PR do? Allow application/yaml as mime_type for documents. ## Test Plan Added unit tests.
This commit is contained in:
parent
b2c7543af7
commit
89c49eb003
2 changed files with 197 additions and 1 deletions
|
@ -10,6 +10,7 @@ import re
|
||||||
import secrets
|
import secrets
|
||||||
import string
|
import string
|
||||||
import uuid
|
import uuid
|
||||||
|
import warnings
|
||||||
from collections.abc import AsyncGenerator
|
from collections.abc import AsyncGenerator
|
||||||
from datetime import UTC, datetime
|
from datetime import UTC, datetime
|
||||||
|
|
||||||
|
@ -911,8 +912,16 @@ async def load_data_from_url(url: str) -> str:
|
||||||
|
|
||||||
|
|
||||||
async def get_raw_document_text(document: Document) -> str:
|
async def get_raw_document_text(document: Document) -> str:
|
||||||
if not document.mime_type.startswith("text/"):
|
# Handle deprecated text/yaml mime type with warning
|
||||||
|
if document.mime_type == "text/yaml":
|
||||||
|
warnings.warn(
|
||||||
|
"The 'text/yaml' MIME type is deprecated. Please use 'application/yaml' instead.",
|
||||||
|
DeprecationWarning,
|
||||||
|
stacklevel=2,
|
||||||
|
)
|
||||||
|
elif not (document.mime_type.startswith("text/") or document.mime_type == "application/yaml"):
|
||||||
raise ValueError(f"Unexpected document mime type: {document.mime_type}")
|
raise ValueError(f"Unexpected document mime type: {document.mime_type}")
|
||||||
|
|
||||||
if isinstance(document.content, URL):
|
if isinstance(document.content, URL):
|
||||||
return await load_data_from_url(document.content.uri)
|
return await load_data_from_url(document.content.uri)
|
||||||
elif isinstance(document.content, str):
|
elif isinstance(document.content, str):
|
||||||
|
|
187
tests/unit/providers/agent/test_get_raw_document_text.py
Normal file
187
tests/unit/providers/agent/test_get_raw_document_text.py
Normal file
|
@ -0,0 +1,187 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import warnings
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from llama_stack.apis.agents import Document
|
||||||
|
from llama_stack.apis.common.content_types import URL, TextContentItem
|
||||||
|
from llama_stack.providers.inline.agents.meta_reference.agent_instance import get_raw_document_text
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_raw_document_text_supports_text_mime_types():
|
||||||
|
"""Test that the function accepts text/* mime types."""
|
||||||
|
document = Document(content="Sample text content", mime_type="text/plain")
|
||||||
|
|
||||||
|
result = await get_raw_document_text(document)
|
||||||
|
assert result == "Sample text content"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_raw_document_text_supports_yaml_mime_type():
|
||||||
|
"""Test that the function accepts application/yaml mime type."""
|
||||||
|
yaml_content = """
|
||||||
|
name: test
|
||||||
|
version: 1.0
|
||||||
|
items:
|
||||||
|
- item1
|
||||||
|
- item2
|
||||||
|
"""
|
||||||
|
|
||||||
|
document = Document(content=yaml_content, mime_type="application/yaml")
|
||||||
|
|
||||||
|
result = await get_raw_document_text(document)
|
||||||
|
assert result == yaml_content
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_raw_document_text_supports_deprecated_text_yaml_with_warning():
|
||||||
|
"""Test that the function accepts text/yaml but emits a deprecation warning."""
|
||||||
|
yaml_content = """
|
||||||
|
name: test
|
||||||
|
version: 1.0
|
||||||
|
items:
|
||||||
|
- item1
|
||||||
|
- item2
|
||||||
|
"""
|
||||||
|
|
||||||
|
document = Document(content=yaml_content, mime_type="text/yaml")
|
||||||
|
|
||||||
|
with warnings.catch_warnings(record=True) as w:
|
||||||
|
warnings.simplefilter("always")
|
||||||
|
result = await get_raw_document_text(document)
|
||||||
|
|
||||||
|
# Check that result is correct
|
||||||
|
assert result == yaml_content
|
||||||
|
|
||||||
|
# Check that exactly one warning was issued
|
||||||
|
assert len(w) == 1
|
||||||
|
assert issubclass(w[0].category, DeprecationWarning)
|
||||||
|
assert "text/yaml" in str(w[0].message)
|
||||||
|
assert "application/yaml" in str(w[0].message)
|
||||||
|
assert "deprecated" in str(w[0].message).lower()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_raw_document_text_deprecated_text_yaml_with_url():
|
||||||
|
"""Test that text/yaml works with URL content and emits warning."""
|
||||||
|
yaml_content = "name: test\nversion: 1.0"
|
||||||
|
|
||||||
|
with patch("llama_stack.providers.inline.agents.meta_reference.agent_instance.load_data_from_url") as mock_load:
|
||||||
|
mock_load.return_value = yaml_content
|
||||||
|
|
||||||
|
document = Document(content=URL(uri="https://example.com/config.yaml"), mime_type="text/yaml")
|
||||||
|
|
||||||
|
with warnings.catch_warnings(record=True) as w:
|
||||||
|
warnings.simplefilter("always")
|
||||||
|
result = await get_raw_document_text(document)
|
||||||
|
|
||||||
|
# Check that result is correct
|
||||||
|
assert result == yaml_content
|
||||||
|
mock_load.assert_called_once_with("https://example.com/config.yaml")
|
||||||
|
|
||||||
|
# Check that deprecation warning was issued
|
||||||
|
assert len(w) == 1
|
||||||
|
assert issubclass(w[0].category, DeprecationWarning)
|
||||||
|
assert "text/yaml" in str(w[0].message)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_raw_document_text_deprecated_text_yaml_with_text_content_item():
|
||||||
|
"""Test that text/yaml works with TextContentItem and emits warning."""
|
||||||
|
yaml_content = "key: value\nlist:\n - item1\n - item2"
|
||||||
|
|
||||||
|
document = Document(content=TextContentItem(text=yaml_content), mime_type="text/yaml")
|
||||||
|
|
||||||
|
with warnings.catch_warnings(record=True) as w:
|
||||||
|
warnings.simplefilter("always")
|
||||||
|
result = await get_raw_document_text(document)
|
||||||
|
|
||||||
|
# Check that result is correct
|
||||||
|
assert result == yaml_content
|
||||||
|
|
||||||
|
# Check that deprecation warning was issued
|
||||||
|
assert len(w) == 1
|
||||||
|
assert issubclass(w[0].category, DeprecationWarning)
|
||||||
|
assert "text/yaml" in str(w[0].message)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_raw_document_text_rejects_unsupported_mime_types():
|
||||||
|
"""Test that the function rejects unsupported mime types."""
|
||||||
|
document = Document(
|
||||||
|
content="Some content",
|
||||||
|
mime_type="application/json", # Not supported
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match="Unexpected document mime type: application/json"):
|
||||||
|
await get_raw_document_text(document)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_raw_document_text_with_url_content():
|
||||||
|
"""Test that the function handles URL content correctly."""
|
||||||
|
mock_response = AsyncMock()
|
||||||
|
mock_response.text = "Content from URL"
|
||||||
|
|
||||||
|
with patch("llama_stack.providers.inline.agents.meta_reference.agent_instance.load_data_from_url") as mock_load:
|
||||||
|
mock_load.return_value = "Content from URL"
|
||||||
|
|
||||||
|
document = Document(content=URL(uri="https://example.com/test.txt"), mime_type="text/plain")
|
||||||
|
|
||||||
|
result = await get_raw_document_text(document)
|
||||||
|
assert result == "Content from URL"
|
||||||
|
mock_load.assert_called_once_with("https://example.com/test.txt")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_raw_document_text_with_yaml_url():
|
||||||
|
"""Test that the function handles YAML URLs correctly."""
|
||||||
|
yaml_content = "name: test\nversion: 1.0"
|
||||||
|
|
||||||
|
with patch("llama_stack.providers.inline.agents.meta_reference.agent_instance.load_data_from_url") as mock_load:
|
||||||
|
mock_load.return_value = yaml_content
|
||||||
|
|
||||||
|
document = Document(content=URL(uri="https://example.com/config.yaml"), mime_type="application/yaml")
|
||||||
|
|
||||||
|
result = await get_raw_document_text(document)
|
||||||
|
assert result == yaml_content
|
||||||
|
mock_load.assert_called_once_with("https://example.com/config.yaml")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_raw_document_text_with_text_content_item():
|
||||||
|
"""Test that the function handles TextContentItem correctly."""
|
||||||
|
document = Document(content=TextContentItem(text="Text content item"), mime_type="text/plain")
|
||||||
|
|
||||||
|
result = await get_raw_document_text(document)
|
||||||
|
assert result == "Text content item"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_raw_document_text_with_yaml_text_content_item():
|
||||||
|
"""Test that the function handles YAML TextContentItem correctly."""
|
||||||
|
yaml_content = "key: value\nlist:\n - item1\n - item2"
|
||||||
|
|
||||||
|
document = Document(content=TextContentItem(text=yaml_content), mime_type="application/yaml")
|
||||||
|
|
||||||
|
result = await get_raw_document_text(document)
|
||||||
|
assert result == yaml_content
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_raw_document_text_rejects_unexpected_content_type():
|
||||||
|
"""Test that the function rejects unexpected document content types."""
|
||||||
|
# Create a mock document that bypasses Pydantic validation
|
||||||
|
mock_document = MagicMock(spec=Document)
|
||||||
|
mock_document.mime_type = "text/plain"
|
||||||
|
mock_document.content = 123 # Unexpected content type (not str, URL, or TextContentItem)
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match="Unexpected document content type: <class 'int'>"):
|
||||||
|
await get_raw_document_text(mock_document)
|
Loading…
Add table
Add a link
Reference in a new issue