Merge branch 'main' into feature/dpo-training

2025-12-25 16:38:03 +00:00 · 2025-07-29 14:57:22 -04:00 · 2025-07-29 14:57:22 -04:00 · b68b818539
commit b68b818539
parent 518bf2fc34 fee365b71e
265 changed files with 10254 additions and 7796 deletions
--- a/tests/integration/files/test_files.py
+++ b/tests/integration/files/test_files.py
@ -5,17 +5,20 @@
 # the root directory of this source tree.

 from io import BytesIO
+from unittest.mock import patch

 import pytest
+from openai import OpenAI

+from llama_stack.distribution.datatypes import User
 from llama_stack.distribution.library_client import LlamaStackAsLibraryClient


-def test_openai_client_basic_operations(openai_client, client_with_models):
+def test_openai_client_basic_operations(compat_client, client_with_models):
    """Test basic file operations through OpenAI client."""
-    if isinstance(client_with_models, LlamaStackAsLibraryClient):
-        pytest.skip("OpenAI files are not supported when testing with library client yet.")
-    client = openai_client
+    if isinstance(client_with_models, LlamaStackAsLibraryClient) and isinstance(compat_client, OpenAI):
+        pytest.skip("OpenAI files are not supported when testing with LlamaStackAsLibraryClient")
+    client = compat_client

    test_content = b"files test content"

@ -41,7 +44,12 @@ def test_openai_client_basic_operations(openai_client, client_with_models):
        # Retrieve file content - OpenAI client returns httpx Response object
        content_response = client.files.content(uploaded_file.id)
        # The response is an httpx Response object with .content attribute containing bytes
-        content = content_response.content
+        if isinstance(content_response, str):
+            # Llama Stack Client returns a str
+            # TODO: fix Llama Stack Client
+            content = bytes(content_response, "utf-8")
+        else:
+            content = content_response.content
        assert content == test_content

        # Delete file
@ -55,3 +63,218 @@ def test_openai_client_basic_operations(openai_client, client_with_models):
        except Exception:
            pass
        raise e
+
+
+@patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user")
+def test_files_authentication_isolation(mock_get_authenticated_user, compat_client, client_with_models):
+    """Test that users can only access their own files."""
+    if isinstance(client_with_models, LlamaStackAsLibraryClient) and isinstance(compat_client, OpenAI):
+        pytest.skip("OpenAI files are not supported when testing with LlamaStackAsLibraryClient")
+    if not isinstance(client_with_models, LlamaStackAsLibraryClient):
+        pytest.skip("Authentication tests require LlamaStackAsLibraryClient (library mode)")
+
+    client = compat_client
+
+    # Create two test users
+    user1 = User("user1", {"roles": ["user"], "teams": ["team-a"]})
+    user2 = User("user2", {"roles": ["user"], "teams": ["team-b"]})
+
+    # User 1 uploads a file
+    mock_get_authenticated_user.return_value = user1
+    test_content_1 = b"User 1's private file content"
+
+    with BytesIO(test_content_1) as file_buffer:
+        file_buffer.name = "user1_file.txt"
+        user1_file = client.files.create(file=file_buffer, purpose="assistants")
+
+    # User 2 uploads a file
+    mock_get_authenticated_user.return_value = user2
+    test_content_2 = b"User 2's private file content"
+
+    with BytesIO(test_content_2) as file_buffer:
+        file_buffer.name = "user2_file.txt"
+        user2_file = client.files.create(file=file_buffer, purpose="assistants")
+
+    try:
+        # User 1 can see their own file
+        mock_get_authenticated_user.return_value = user1
+        user1_files = client.files.list()
+        user1_file_ids = [f.id for f in user1_files.data]
+        assert user1_file.id in user1_file_ids
+        assert user2_file.id not in user1_file_ids  # Cannot see user2's file
+
+        # User 2 can see their own file
+        mock_get_authenticated_user.return_value = user2
+        user2_files = client.files.list()
+        user2_file_ids = [f.id for f in user2_files.data]
+        assert user2_file.id in user2_file_ids
+        assert user1_file.id not in user2_file_ids  # Cannot see user1's file
+
+        # User 1 can retrieve their own file
+        mock_get_authenticated_user.return_value = user1
+        retrieved_file = client.files.retrieve(user1_file.id)
+        assert retrieved_file.id == user1_file.id
+
+        # User 1 cannot retrieve user2's file
+        mock_get_authenticated_user.return_value = user1
+        with pytest.raises(ValueError, match="not found"):
+            client.files.retrieve(user2_file.id)
+
+        # User 1 can access their file content
+        mock_get_authenticated_user.return_value = user1
+        content_response = client.files.content(user1_file.id)
+        if isinstance(content_response, str):
+            content = bytes(content_response, "utf-8")
+        else:
+            content = content_response.content
+        assert content == test_content_1
+
+        # User 1 cannot access user2's file content
+        mock_get_authenticated_user.return_value = user1
+        with pytest.raises(ValueError, match="not found"):
+            client.files.content(user2_file.id)
+
+        # User 1 can delete their own file
+        mock_get_authenticated_user.return_value = user1
+        delete_response = client.files.delete(user1_file.id)
+        assert delete_response.deleted is True
+
+        # User 1 cannot delete user2's file
+        mock_get_authenticated_user.return_value = user1
+        with pytest.raises(ValueError, match="not found"):
+            client.files.delete(user2_file.id)
+
+        # User 2 can still access their file after user1's file is deleted
+        mock_get_authenticated_user.return_value = user2
+        retrieved_file = client.files.retrieve(user2_file.id)
+        assert retrieved_file.id == user2_file.id
+
+        # Cleanup user2's file
+        mock_get_authenticated_user.return_value = user2
+        client.files.delete(user2_file.id)
+
+    except Exception as e:
+        # Cleanup in case of failure
+        try:
+            mock_get_authenticated_user.return_value = user1
+            client.files.delete(user1_file.id)
+        except Exception:
+            pass
+        try:
+            mock_get_authenticated_user.return_value = user2
+            client.files.delete(user2_file.id)
+        except Exception:
+            pass
+        raise e
+
+
+@patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user")
+def test_files_authentication_shared_attributes(mock_get_authenticated_user, compat_client, client_with_models):
+    """Test access control with users having identical attributes."""
+    if isinstance(client_with_models, LlamaStackAsLibraryClient) and isinstance(compat_client, OpenAI):
+        pytest.skip("OpenAI files are not supported when testing with LlamaStackAsLibraryClient")
+    if not isinstance(client_with_models, LlamaStackAsLibraryClient):
+        pytest.skip("Authentication tests require LlamaStackAsLibraryClient (library mode)")
+
+    client = compat_client
+
+    # Create users with identical attributes (required for default policy)
+    user_a = User("user-a", {"roles": ["user"], "teams": ["shared-team"]})
+    user_b = User("user-b", {"roles": ["user"], "teams": ["shared-team"]})
+
+    # User A uploads a file
+    mock_get_authenticated_user.return_value = user_a
+    test_content = b"Shared attributes file content"
+
+    with BytesIO(test_content) as file_buffer:
+        file_buffer.name = "shared_attributes_file.txt"
+        shared_file = client.files.create(file=file_buffer, purpose="assistants")
+
+    try:
+        # User B with identical attributes can access the file
+        mock_get_authenticated_user.return_value = user_b
+        files_list = client.files.list()
+        file_ids = [f.id for f in files_list.data]
+
+        # User B should be able to see the file due to identical attributes
+        assert shared_file.id in file_ids
+
+        # User B can retrieve file info
+        retrieved_file = client.files.retrieve(shared_file.id)
+        assert retrieved_file.id == shared_file.id
+
+        # User B can access file content
+        content_response = client.files.content(shared_file.id)
+        if isinstance(content_response, str):
+            content = bytes(content_response, "utf-8")
+        else:
+            content = content_response.content
+        assert content == test_content
+
+        # Cleanup
+        mock_get_authenticated_user.return_value = user_a
+        client.files.delete(shared_file.id)
+
+    except Exception as e:
+        # Cleanup in case of failure
+        try:
+            mock_get_authenticated_user.return_value = user_a
+            client.files.delete(shared_file.id)
+        except Exception:
+            pass
+        try:
+            mock_get_authenticated_user.return_value = user_b
+            client.files.delete(shared_file.id)
+        except Exception:
+            pass
+        raise e
+
+
+@patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user")
+def test_files_authentication_anonymous_access(mock_get_authenticated_user, compat_client, client_with_models):
+    """Test anonymous user behavior when no authentication is present."""
+    if isinstance(client_with_models, LlamaStackAsLibraryClient) and isinstance(compat_client, OpenAI):
+        pytest.skip("OpenAI files are not supported when testing with LlamaStackAsLibraryClient")
+    if not isinstance(client_with_models, LlamaStackAsLibraryClient):
+        pytest.skip("Authentication tests require LlamaStackAsLibraryClient (library mode)")
+
+    client = compat_client
+
+    # Simulate anonymous user (no authentication)
+    mock_get_authenticated_user.return_value = None
+
+    test_content = b"Anonymous file content"
+
+    with BytesIO(test_content) as file_buffer:
+        file_buffer.name = "anonymous_file.txt"
+        anonymous_file = client.files.create(file=file_buffer, purpose="assistants")
+
+    try:
+        # Anonymous user should be able to access their own uploaded file
+        files_list = client.files.list()
+        file_ids = [f.id for f in files_list.data]
+        assert anonymous_file.id in file_ids
+
+        # Can retrieve file info
+        retrieved_file = client.files.retrieve(anonymous_file.id)
+        assert retrieved_file.id == anonymous_file.id
+
+        # Can access file content
+        content_response = client.files.content(anonymous_file.id)
+        if isinstance(content_response, str):
+            content = bytes(content_response, "utf-8")
+        else:
+            content = content_response.content
+        assert content == test_content
+
+        # Can delete the file
+        delete_response = client.files.delete(anonymous_file.id)
+        assert delete_response.deleted is True
+
+    except Exception as e:
+        # Cleanup in case of failure
+        try:
+            client.files.delete(anonymous_file.id)
+        except Exception:
+            pass
+        raise e
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@ -257,6 +257,11 @@ def openai_client(client_with_models):
    return OpenAI(base_url=base_url, api_key="fake")


+@pytest.fixture(params=["openai_client", "llama_stack_client"])
+def compat_client(request):
+    return request.getfixturevalue(request.param)
+
+
@pytest.fixture(scope="session", autouse=True)
 def cleanup_server_process(request):
    """Cleanup server process at the end of the test session."""
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@ -5,8 +5,14 @@
 # the root directory of this source tree.


+import base64
+import os
+import tempfile
+
 import pytest
 from openai import OpenAI
+from reportlab.lib.pagesizes import letter
+from reportlab.pdfgen import canvas

 from llama_stack.distribution.library_client import LlamaStackAsLibraryClient

@ -82,6 +88,14 @@ def skip_if_provider_isnt_vllm(client_with_models, model_id):
        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support vllm extra_body parameters.")


+def skip_if_provider_isnt_openai(client_with_models, model_id):
+    provider = provider_from_model(client_with_models, model_id)
+    if provider.provider_type != "remote::openai":
+        pytest.skip(
+            f"Model {model_id} hosted by {provider.provider_type} doesn't support chat completion calls with base64 encoded files."
+        )
+
+
@pytest.fixture
 def openai_client(client_with_models):
    base_url = f"{client_with_models.base_url}/v1/openai/v1"
@ -179,9 +193,7 @@ def test_openai_completion_prompt_logprobs(llama_stack_client, client_with_model
        model=text_model_id,
        prompt=prompt,
        stream=False,
-        extra_body={
-            "prompt_logprobs": prompt_logprobs,
-        },
+        prompt_logprobs=prompt_logprobs,
    )
    assert len(response.choices) > 0
    choice = response.choices[0]
@ -196,9 +208,7 @@ def test_openai_completion_guided_choice(llama_stack_client, client_with_models,
        model=text_model_id,
        prompt=prompt,
        stream=False,
-        extra_body={
-            "guided_choice": ["joy", "sadness"],
-        },
+        guided_choice=["joy", "sadness"],
    )
    assert len(response.choices) > 0
    choice = response.choices[0]
@ -422,3 +432,45 @@ def test_inference_store_tool_calls(compat_client, client_with_models, text_mode
        # failed tool call parses show up as a message with content, so ensure
        # that the retrieve response content matches the original request
        assert retrieved_response.choices[0].message.content == content
+
+
+def test_openai_chat_completion_non_streaming_with_file(openai_client, client_with_models, text_model_id):
+    skip_if_provider_isnt_openai(client_with_models, text_model_id)
+
+    # Generate temporary PDF with "Hello World" text
+    with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_pdf:
+        c = canvas.Canvas(temp_pdf.name, pagesize=letter)
+        c.drawString(100, 750, "Hello World")
+        c.save()
+
+        # Read the PDF and sencode to base64
+        with open(temp_pdf.name, "rb") as pdf_file:
+            pdf_base64 = base64.b64encode(pdf_file.read()).decode("utf-8")
+
+        # Clean up temporary file
+        os.unlink(temp_pdf.name)
+
+    response = openai_client.chat.completions.create(
+        model=text_model_id,
+        messages=[
+            {
+                "role": "user",
+                "content": "Describe what you see in this PDF file.",
+            },
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "file",
+                        "file": {
+                            "filename": "my-temp-hello-world-pdf",
+                            "file_data": f"data:application/pdf;base64,{pdf_base64}",
+                        },
+                    }
+                ],
+            },
+        ],
+        stream=False,
+    )
+    message_content = response.choices[0].message.content.lower().strip()
+    assert "hello world" in message_content
--- a/tests/integration/post_training/test_post_training.py
+++ b/tests/integration/post_training/test_post_training.py
@ -41,9 +41,8 @@ sys.stdout.reconfigure(line_buffering=True)

 # How to run this test:
 #
-# pytest llama_stack/providers/tests/post_training/test_post_training.py
-#   -m "torchtune_post_training_huggingface_datasetio"
-#   -v -s --tb=short --disable-warnings
+# LLAMA_STACK_CONFIG=ci-tests uv run --dev pytest tests/integration/post_training/test_post_training.py
+#


 # SFT test
@ -117,6 +116,7 @@ class TestPostTraining:
                break

            logger.info(f"Current status: {status}")
+            assert status.status in ["scheduled", "in_progress", "completed"]
            if status.status == "completed":
                break

--- a/tests/integration/telemetry/test_telemetry.py
+++ b/tests/integration/telemetry/test_telemetry.py
@ -47,6 +47,9 @@ def setup_telemetry_data(llama_stack_client, text_model_id):
    if len(traces) < 4:
        pytest.fail(f"Failed to create sufficient telemetry data after 30s. Got {len(traces)} traces.")

+    # Wait for 5 seconds to ensure traces has completed logging
+    time.sleep(5)
+
    yield


--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@ -22,16 +22,14 @@ logger = logging.getLogger(__name__)
 def skip_if_provider_doesnt_support_openai_vector_stores(client_with_models):
    vector_io_providers = [p for p in client_with_models.providers.list() if p.api == "vector_io"]
    for p in vector_io_providers:
-        if p.provider_type in ["inline::faiss", "inline::sqlite-vec", "inline::milvus"]:
-            return
-
-    pytest.skip("OpenAI vector stores are not supported by any provider")
-
-
-def skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models):
-    vector_io_providers = [p for p in client_with_models.providers.list() if p.api == "vector_io"]
-    for p in vector_io_providers:
-        if p.provider_type in ["inline::faiss", "inline::sqlite-vec", "inline::milvus", "remote::pgvector"]:
+        if p.provider_type in [
+            "inline::faiss",
+            "inline::sqlite-vec",
+            "inline::milvus",
+            "inline::chromadb",
+            "remote::pgvector",
+            "remote::chromadb",
+        ]:
            return

    pytest.skip("OpenAI vector stores are not supported by any provider")
@ -452,7 +450,6 @@ def test_openai_vector_store_search_with_max_num_results(
 def test_openai_vector_store_attach_file(compat_client_with_empty_stores, client_with_models):
    """Test OpenAI vector store attach file."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)

    if isinstance(compat_client_with_empty_stores, LlamaStackClient):
        pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient")
@ -504,7 +501,6 @@ def test_openai_vector_store_attach_file(compat_client_with_empty_stores, client
 def test_openai_vector_store_attach_files_on_creation(compat_client_with_empty_stores, client_with_models):
    """Test OpenAI vector store attach files on creation."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)

    if isinstance(compat_client_with_empty_stores, LlamaStackClient):
        pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient")
@ -561,7 +557,6 @@ def test_openai_vector_store_attach_files_on_creation(compat_client_with_empty_s
 def test_openai_vector_store_list_files(compat_client_with_empty_stores, client_with_models):
    """Test OpenAI vector store list files."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)

    if isinstance(compat_client_with_empty_stores, LlamaStackClient):
        pytest.skip("Vector Store Files list is not yet supported with LlamaStackClient")
@ -635,7 +630,6 @@ def test_openai_vector_store_list_files_invalid_vector_store(compat_client_with_
 def test_openai_vector_store_retrieve_file_contents(compat_client_with_empty_stores, client_with_models):
    """Test OpenAI vector store retrieve file contents."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)

    if isinstance(compat_client_with_empty_stores, LlamaStackClient):
        pytest.skip("Vector Store Files retrieve contents is not yet supported with LlamaStackClient")
@ -677,7 +671,6 @@ def test_openai_vector_store_retrieve_file_contents(compat_client_with_empty_sto
 def test_openai_vector_store_delete_file(compat_client_with_empty_stores, client_with_models):
    """Test OpenAI vector store delete file."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)

    if isinstance(compat_client_with_empty_stores, LlamaStackClient):
        pytest.skip("Vector Store Files list is not yet supported with LlamaStackClient")
@ -730,12 +723,9 @@ def test_openai_vector_store_delete_file(compat_client_with_empty_stores, client
    assert updated_vector_store.file_counts.in_progress == 0


-# TODO: Remove this xfail once we have a way to remove embeddings from vector store
-@pytest.mark.xfail(reason="Vector Store Files delete doesn't remove embeddings from vector store", strict=True)
 def test_openai_vector_store_delete_file_removes_from_vector_store(compat_client_with_empty_stores, client_with_models):
    """Test OpenAI vector store delete file removes from vector store."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)

    if isinstance(compat_client_with_empty_stores, LlamaStackClient):
        pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient")
@ -777,7 +767,6 @@ def test_openai_vector_store_delete_file_removes_from_vector_store(compat_client
 def test_openai_vector_store_update_file(compat_client_with_empty_stores, client_with_models):
    """Test OpenAI vector store update file."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)

    if isinstance(compat_client_with_empty_stores, LlamaStackClient):
        pytest.skip("Vector Store Files update is not yet supported with LlamaStackClient")
@ -826,7 +815,6 @@ def test_create_vector_store_files_duplicate_vector_store_name(compat_client_wit
    This test confirms that client.vector_stores.create() creates a unique ID
    """
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)

    if isinstance(compat_client_with_empty_stores, LlamaStackClient):
        pytest.skip("Vector Store Files create is not yet supported with LlamaStackClient")