From 56269245c2877f8c1b5c66c16c89c48316a59e73 Mon Sep 17 00:00:00 2001 From: Christian Zaccaria <73656840+ChristianZaccaria@users.noreply.github.com> Date: Mon, 21 Jul 2025 10:40:00 +0100 Subject: [PATCH 1/4] fix: Add permissions for pull request creation in coverage-badge workflow (#2832) # What does this PR do? The workflow that automatically creates a PR to update the Coverage Badge fails as the `GITHUB_TOKEN` doesn't have write permissions. As opposed to providing write permissions to the token, we can provide the permissions for just this workflow with this PR. --- .github/workflows/coverage-badge.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/coverage-badge.yml b/.github/workflows/coverage-badge.yml index 6b2f133dd..54bde1749 100644 --- a/.github/workflows/coverage-badge.yml +++ b/.github/workflows/coverage-badge.yml @@ -15,6 +15,9 @@ on: jobs: unit-tests: + permissions: + contents: write # for peter-evans/create-pull-request to create branch + pull-requests: write # for peter-evans/create-pull-request to create a PR runs-on: ubuntu-latest steps: - name: Checkout repository From ecd28f0085413d83d771738ccd197eb2fa3c6a6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Mon, 21 Jul 2025 11:47:17 +0200 Subject: [PATCH 2/4] chore: add contribution guideline around PRs (#2811) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit More contributing guidelines. Signed-off-by: Sébastien Han --- CONTRIBUTING.md | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 75b29213c..8d866328b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -10,8 +10,13 @@ If in doubt, please open a [discussion](https://github.com/meta-llama/llama-stac **I'd like to contribute!** -All issues are actionable (please report if they are not.) Pick one and start working on it. Thank you. -If you need help or guidance, comment on the issue. Issues that are extra friendly to new contributors are tagged with "contributor friendly". +If you are new to the project, start by looking at the issues tagged with "good first issue". If you're interested +leave a comment on the issue and a triager will assign it to you. + +Please avoid picking up too many issues at once. This helps you stay focused and ensures that others in the community also have opportunities to contribute. +- Try to work on only 1–2 issues at a time, especially if you’re still getting familiar with the codebase. +- Before taking an issue, check if it’s already assigned or being actively discussed. +- If you’re blocked or can’t continue with an issue, feel free to unassign yourself or leave a comment so others can step in. **I have a bug!** @@ -41,6 +46,15 @@ If you need help or guidance, comment on the issue. Issues that are extra friend 4. Make sure your code lints using `pre-commit`. 5. If you haven't already, complete the Contributor License Agreement ("CLA"). 6. Ensure your pull request follows the [conventional commits format](https://www.conventionalcommits.org/en/v1.0.0/). +7. Ensure your pull request follows the [coding style](#coding-style). + + +Please keep pull requests (PRs) small and focused. If you have a large set of changes, consider splitting them into logically grouped, smaller PRs to facilitate review and testing. + +> [!TIP] +> As a general guideline: +> - Experienced contributors should try to keep no more than 5 open PRs at a time. +> - New contributors are encouraged to have only one open PR at a time until they’re familiar with the codebase and process. ## Contributor License Agreement ("CLA") In order to accept your pull request, we need you to submit a CLA. You only need @@ -140,7 +154,9 @@ uv sync * Don't use unicode characters in the codebase. ASCII-only is preferred for compatibility or readability reasons. * Providers configuration class should be Pydantic Field class. It should have a `description` field - that describes the configuration. These descriptions will be used to generate the provider documentation. + that describes the configuration. These descriptions will be used to generate the provider + documentation. +* When possible, use keyword arguments only when calling functions. ## Common Tasks From b2c7543af72d80103fac4e1e8ff41302e59bfe57 Mon Sep 17 00:00:00 2001 From: Mustafa Elbehery Date: Mon, 21 Jul 2025 14:03:40 +0200 Subject: [PATCH 3/4] fix(vectordb): VectorDBInput has no provider_id (#2830) # What does this PR do? This PR add `provider_id` field to `VectorDBInput` class. fixes https://github.com/meta-llama/llama-stack/issues/2819 Signed-off-by: Mustafa Elbehery --- llama_stack/apis/vector_dbs/vector_dbs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/llama_stack/apis/vector_dbs/vector_dbs.py b/llama_stack/apis/vector_dbs/vector_dbs.py index 0d160737a..325e21bab 100644 --- a/llama_stack/apis/vector_dbs/vector_dbs.py +++ b/llama_stack/apis/vector_dbs/vector_dbs.py @@ -34,6 +34,7 @@ class VectorDBInput(BaseModel): vector_db_id: str embedding_model: str embedding_dimension: int + provider_id: str | None = None provider_vector_db_id: str | None = None From 89c49eb003cfc7b70babf85bab81bc3ebc81b63b Mon Sep 17 00:00:00 2001 From: Ondrej Metelka Date: Mon, 21 Jul 2025 15:43:32 +0200 Subject: [PATCH 4/4] feat: Allow application/yaml as mime_type (#2575) # What does this PR do? Allow application/yaml as mime_type for documents. ## Test Plan Added unit tests. --- .../agents/meta_reference/agent_instance.py | 11 +- .../agent/test_get_raw_document_text.py | 187 ++++++++++++++++++ 2 files changed, 197 insertions(+), 1 deletion(-) create mode 100644 tests/unit/providers/agent/test_get_raw_document_text.py diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py index 4d2b9f8bf..3c34c71fb 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py +++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py @@ -10,6 +10,7 @@ import re import secrets import string import uuid +import warnings from collections.abc import AsyncGenerator from datetime import UTC, datetime @@ -911,8 +912,16 @@ async def load_data_from_url(url: str) -> str: async def get_raw_document_text(document: Document) -> str: - if not document.mime_type.startswith("text/"): + # Handle deprecated text/yaml mime type with warning + if document.mime_type == "text/yaml": + warnings.warn( + "The 'text/yaml' MIME type is deprecated. Please use 'application/yaml' instead.", + DeprecationWarning, + stacklevel=2, + ) + elif not (document.mime_type.startswith("text/") or document.mime_type == "application/yaml"): raise ValueError(f"Unexpected document mime type: {document.mime_type}") + if isinstance(document.content, URL): return await load_data_from_url(document.content.uri) elif isinstance(document.content, str): diff --git a/tests/unit/providers/agent/test_get_raw_document_text.py b/tests/unit/providers/agent/test_get_raw_document_text.py new file mode 100644 index 000000000..ddc886293 --- /dev/null +++ b/tests/unit/providers/agent/test_get_raw_document_text.py @@ -0,0 +1,187 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import warnings +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from llama_stack.apis.agents import Document +from llama_stack.apis.common.content_types import URL, TextContentItem +from llama_stack.providers.inline.agents.meta_reference.agent_instance import get_raw_document_text + + +@pytest.mark.asyncio +async def test_get_raw_document_text_supports_text_mime_types(): + """Test that the function accepts text/* mime types.""" + document = Document(content="Sample text content", mime_type="text/plain") + + result = await get_raw_document_text(document) + assert result == "Sample text content" + + +@pytest.mark.asyncio +async def test_get_raw_document_text_supports_yaml_mime_type(): + """Test that the function accepts application/yaml mime type.""" + yaml_content = """ + name: test + version: 1.0 + items: + - item1 + - item2 + """ + + document = Document(content=yaml_content, mime_type="application/yaml") + + result = await get_raw_document_text(document) + assert result == yaml_content + + +@pytest.mark.asyncio +async def test_get_raw_document_text_supports_deprecated_text_yaml_with_warning(): + """Test that the function accepts text/yaml but emits a deprecation warning.""" + yaml_content = """ + name: test + version: 1.0 + items: + - item1 + - item2 + """ + + document = Document(content=yaml_content, mime_type="text/yaml") + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + result = await get_raw_document_text(document) + + # Check that result is correct + assert result == yaml_content + + # Check that exactly one warning was issued + assert len(w) == 1 + assert issubclass(w[0].category, DeprecationWarning) + assert "text/yaml" in str(w[0].message) + assert "application/yaml" in str(w[0].message) + assert "deprecated" in str(w[0].message).lower() + + +@pytest.mark.asyncio +async def test_get_raw_document_text_deprecated_text_yaml_with_url(): + """Test that text/yaml works with URL content and emits warning.""" + yaml_content = "name: test\nversion: 1.0" + + with patch("llama_stack.providers.inline.agents.meta_reference.agent_instance.load_data_from_url") as mock_load: + mock_load.return_value = yaml_content + + document = Document(content=URL(uri="https://example.com/config.yaml"), mime_type="text/yaml") + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + result = await get_raw_document_text(document) + + # Check that result is correct + assert result == yaml_content + mock_load.assert_called_once_with("https://example.com/config.yaml") + + # Check that deprecation warning was issued + assert len(w) == 1 + assert issubclass(w[0].category, DeprecationWarning) + assert "text/yaml" in str(w[0].message) + + +@pytest.mark.asyncio +async def test_get_raw_document_text_deprecated_text_yaml_with_text_content_item(): + """Test that text/yaml works with TextContentItem and emits warning.""" + yaml_content = "key: value\nlist:\n - item1\n - item2" + + document = Document(content=TextContentItem(text=yaml_content), mime_type="text/yaml") + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + result = await get_raw_document_text(document) + + # Check that result is correct + assert result == yaml_content + + # Check that deprecation warning was issued + assert len(w) == 1 + assert issubclass(w[0].category, DeprecationWarning) + assert "text/yaml" in str(w[0].message) + + +@pytest.mark.asyncio +async def test_get_raw_document_text_rejects_unsupported_mime_types(): + """Test that the function rejects unsupported mime types.""" + document = Document( + content="Some content", + mime_type="application/json", # Not supported + ) + + with pytest.raises(ValueError, match="Unexpected document mime type: application/json"): + await get_raw_document_text(document) + + +@pytest.mark.asyncio +async def test_get_raw_document_text_with_url_content(): + """Test that the function handles URL content correctly.""" + mock_response = AsyncMock() + mock_response.text = "Content from URL" + + with patch("llama_stack.providers.inline.agents.meta_reference.agent_instance.load_data_from_url") as mock_load: + mock_load.return_value = "Content from URL" + + document = Document(content=URL(uri="https://example.com/test.txt"), mime_type="text/plain") + + result = await get_raw_document_text(document) + assert result == "Content from URL" + mock_load.assert_called_once_with("https://example.com/test.txt") + + +@pytest.mark.asyncio +async def test_get_raw_document_text_with_yaml_url(): + """Test that the function handles YAML URLs correctly.""" + yaml_content = "name: test\nversion: 1.0" + + with patch("llama_stack.providers.inline.agents.meta_reference.agent_instance.load_data_from_url") as mock_load: + mock_load.return_value = yaml_content + + document = Document(content=URL(uri="https://example.com/config.yaml"), mime_type="application/yaml") + + result = await get_raw_document_text(document) + assert result == yaml_content + mock_load.assert_called_once_with("https://example.com/config.yaml") + + +@pytest.mark.asyncio +async def test_get_raw_document_text_with_text_content_item(): + """Test that the function handles TextContentItem correctly.""" + document = Document(content=TextContentItem(text="Text content item"), mime_type="text/plain") + + result = await get_raw_document_text(document) + assert result == "Text content item" + + +@pytest.mark.asyncio +async def test_get_raw_document_text_with_yaml_text_content_item(): + """Test that the function handles YAML TextContentItem correctly.""" + yaml_content = "key: value\nlist:\n - item1\n - item2" + + document = Document(content=TextContentItem(text=yaml_content), mime_type="application/yaml") + + result = await get_raw_document_text(document) + assert result == yaml_content + + +@pytest.mark.asyncio +async def test_get_raw_document_text_rejects_unexpected_content_type(): + """Test that the function rejects unexpected document content types.""" + # Create a mock document that bypasses Pydantic validation + mock_document = MagicMock(spec=Document) + mock_document.mime_type = "text/plain" + mock_document.content = 123 # Unexpected content type (not str, URL, or TextContentItem) + + with pytest.raises(ValueError, match="Unexpected document content type: "): + await get_raw_document_text(mock_document)