Merge remote-tracking branch 'origin/main' into storage_fix

2025-12-03 18:00:36 +00:00 · 2025-11-12 10:17:56 -08:00 · 2025-11-12 10:17:56 -08:00 · 08024d44f2
commit 08024d44f2
parent 7ce0c5c5dc 356f37b1ba
89 changed files with 4786 additions and 3941 deletions
--- a/tests/integration/agents/test_openai_responses.py
+++ b/tests/integration/agents/test_openai_responses.py
@ -516,3 +516,169 @@ def test_response_with_instructions(openai_client, client_with_models, text_mode

    # Verify instructions from previous response was not carried over to the next response
    assert response_with_instructions2.instructions == instructions2
+
+
+@pytest.mark.skip(reason="Tool calling is not reliable.")
+def test_max_tool_calls_with_function_tools(openai_client, client_with_models, text_model_id):
+    """Test handling of max_tool_calls with function tools in responses."""
+    if isinstance(client_with_models, LlamaStackAsLibraryClient):
+        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
+
+    client = openai_client
+    max_tool_calls = 1
+
+    tools = [
+        {
+            "type": "function",
+            "name": "get_weather",
+            "description": "Get weather information for a specified location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "The city name (e.g., 'New York', 'London')",
+                    },
+                },
+            },
+        },
+        {
+            "type": "function",
+            "name": "get_time",
+            "description": "Get current time for a specified location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "The city name (e.g., 'New York', 'London')",
+                    },
+                },
+            },
+        },
+    ]
+
+    # First create a response that triggers function tools
+    response = client.responses.create(
+        model=text_model_id,
+        input="Can you tell me the weather in Paris and the current time?",
+        tools=tools,
+        stream=False,
+        max_tool_calls=max_tool_calls,
+    )
+
+    # Verify we got two function calls and that the max_tool_calls do not affect function tools
+    assert len(response.output) == 2
+    assert response.output[0].type == "function_call"
+    assert response.output[0].name == "get_weather"
+    assert response.output[0].status == "completed"
+    assert response.output[1].type == "function_call"
+    assert response.output[1].name == "get_time"
+    assert response.output[0].status == "completed"
+
+    # Verify we have a valid max_tool_calls field
+    assert response.max_tool_calls == max_tool_calls
+
+
+def test_max_tool_calls_invalid(openai_client, client_with_models, text_model_id):
+    """Test handling of invalid max_tool_calls in responses."""
+    if isinstance(client_with_models, LlamaStackAsLibraryClient):
+        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
+
+    client = openai_client
+
+    input = "Search for today's top technology news."
+    invalid_max_tool_calls = 0
+    tools = [
+        {"type": "web_search"},
+    ]
+
+    # Create a response with an invalid max_tool_calls value i.e. 0
+    # Handle ValueError from LLS and BadRequestError from OpenAI client
+    with pytest.raises((ValueError, BadRequestError)) as excinfo:
+        client.responses.create(
+            model=text_model_id,
+            input=input,
+            tools=tools,
+            stream=False,
+            max_tool_calls=invalid_max_tool_calls,
+        )
+
+    error_message = str(excinfo.value)
+    assert f"Invalid max_tool_calls={invalid_max_tool_calls}; should be >= 1" in error_message, (
+        f"Expected error message about invalid max_tool_calls, got: {error_message}"
+    )
+
+
+def test_max_tool_calls_with_builtin_tools(openai_client, client_with_models, text_model_id):
+    """Test handling of max_tool_calls with built-in tools in responses."""
+    if isinstance(client_with_models, LlamaStackAsLibraryClient):
+        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
+
+    client = openai_client
+
+    input = "Search for today's top technology and a positive news story. You MUST make exactly two separate web search calls."
+    max_tool_calls = [1, 5]
+    tools = [
+        {"type": "web_search"},
+    ]
+
+    # First create a response that triggers web_search tools without max_tool_calls
+    response = client.responses.create(
+        model=text_model_id,
+        input=input,
+        tools=tools,
+        stream=False,
+    )
+
+    # Verify we got two web search calls followed by a message
+    assert len(response.output) == 3
+    assert response.output[0].type == "web_search_call"
+    assert response.output[0].status == "completed"
+    assert response.output[1].type == "web_search_call"
+    assert response.output[1].status == "completed"
+    assert response.output[2].type == "message"
+    assert response.output[2].status == "completed"
+    assert response.output[2].role == "assistant"
+
+    # Next create a response that triggers web_search tools with max_tool_calls set to 1
+    response_2 = client.responses.create(
+        model=text_model_id,
+        input=input,
+        tools=tools,
+        stream=False,
+        max_tool_calls=max_tool_calls[0],
+    )
+
+    # Verify we got one web search tool call followed by a message
+    assert len(response_2.output) == 2
+    assert response_2.output[0].type == "web_search_call"
+    assert response_2.output[0].status == "completed"
+    assert response_2.output[1].type == "message"
+    assert response_2.output[1].status == "completed"
+    assert response_2.output[1].role == "assistant"
+
+    # Verify we have a valid max_tool_calls field
+    assert response_2.max_tool_calls == max_tool_calls[0]
+
+    # Finally create a response that triggers web_search tools with max_tool_calls set to 5
+    response_3 = client.responses.create(
+        model=text_model_id,
+        input=input,
+        tools=tools,
+        stream=False,
+        max_tool_calls=max_tool_calls[1],
+    )
+
+    # Verify we got two web search calls followed by a message
+    assert len(response_3.output) == 3
+    assert response_3.output[0].type == "web_search_call"
+    assert response_3.output[0].status == "completed"
+    assert response_3.output[1].type == "web_search_call"
+    assert response_3.output[1].status == "completed"
+    assert response_3.output[2].type == "message"
+    assert response_3.output[2].status == "completed"
+    assert response_3.output[2].role == "assistant"
+
+    # Verify we have a valid max_tool_calls field
+    assert response_3.max_tool_calls == max_tool_calls[1]
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@ -334,7 +334,13 @@ def require_server(llama_stack_client):
@pytest.fixture(scope="session")
 def openai_client(llama_stack_client, require_server):
    base_url = f"{llama_stack_client.base_url}/v1"
-    return OpenAI(base_url=base_url, api_key="fake")
+    client = OpenAI(base_url=base_url, api_key="fake", max_retries=0, timeout=30.0)
+    yield client
+    # Cleanup: close HTTP connections
+    try:
+        client.close()
+    except Exception:
+        pass


@pytest.fixture(params=["openai_client", "client_with_models"])
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@ -54,6 +54,7 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
        # {"error":{"message":"Unknown request URL: GET /openai/v1/completions. Please check the URL for typos,
        # or see the docs at https://console.groq.com/docs/","type":"invalid_request_error","code":"unknown_url"}}
        "remote::groq",
+        "remote::oci",
        "remote::gemini",  # https://generativelanguage.googleapis.com/v1beta/openai/completions -> 404
        "remote::anthropic",  # at least claude-3-{5,7}-{haiku,sonnet}-* / claude-{sonnet,opus}-4-* are not supported
        "remote::azure",  # {'error': {'code': 'OperationNotSupported', 'message': 'The completion operation
--- a/tests/integration/inference/test_openai_embeddings.py
+++ b/tests/integration/inference/test_openai_embeddings.py
@ -138,6 +138,7 @@ def skip_if_model_doesnt_support_openai_embeddings(client, model_id):
        "remote::runpod",
        "remote::sambanova",
        "remote::tgi",
+        "remote::oci",
    ):
        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI embeddings.")

--- a/tests/integration/recordings/README.md
+++ b/tests/integration/recordings/README.md
@ -2,6 +2,10 @@

 This directory contains recorded inference API responses used for deterministic testing without requiring live API access.

+For more information, see the
+[docs](https://llamastack.github.io/docs/contributing/testing/record-replay).
+This README provides more technical information.
+
 ## Structure

 - `responses/` - JSON files containing request/response pairs for inference operations
--- a/tests/integration/responses/fixtures/fixtures.py
+++ b/tests/integration/responses/fixtures/fixtures.py
@ -115,7 +115,15 @@ def openai_client(base_url, api_key, provider):
        client = LlamaStackAsLibraryClient(config, skip_logger_removal=True)
        return client

-    return OpenAI(
+    client = OpenAI(
        base_url=base_url,
        api_key=api_key,
+        max_retries=0,
+        timeout=30.0,
    )
+    yield client
+    # Cleanup: close HTTP connections
+    try:
+        client.close()
+    except Exception:
+        pass
--- a/tests/integration/responses/test_conversation_responses.py
+++ b/tests/integration/responses/test_conversation_responses.py
@ -65,8 +65,14 @@ class TestConversationResponses:
        conversation_items = openai_client.conversations.items.list(conversation.id)
        assert len(conversation_items.data) >= 4  # 2 user + 2 assistant messages

+    @pytest.mark.timeout(60, method="thread")
    def test_conversation_context_loading(self, openai_client, text_model_id):
-        """Test that conversation context is properly loaded for responses."""
+        """Test that conversation context is properly loaded for responses.
+
+        Note: 60s timeout added due to CI-specific deadlock in pytest/OpenAI client/httpx
+        after running 25+ tests. Hangs before first HTTP request is made. Works fine locally.
+        Investigation needed: connection pool exhaustion or event loop state issue.
+        """
        conversation = openai_client.conversations.create(
            items=[
                {"type": "message", "role": "user", "content": "My name is Alice. I like to eat apples."},
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@ -11,6 +11,7 @@ import pytest
 from llama_stack_client import BadRequestError
 from openai import BadRequestError as OpenAIBadRequestError

+from llama_stack.apis.files import ExpiresAfter
 from llama_stack.apis.vector_io import Chunk
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
 from llama_stack.log import get_logger
@ -907,16 +908,16 @@ def test_openai_vector_store_retrieve_file_contents(
    )

    assert file_contents is not None
-    assert len(file_contents.content) == 1
-    content = file_contents.content[0]
+    assert file_contents.object == "vector_store.file_content.page"
+    assert len(file_contents.data) == 1
+    content = file_contents.data[0]

    # llama-stack-client returns a model, openai-python is a badboy and returns a dict
    if not isinstance(content, dict):
        content = content.model_dump()
    assert content["type"] == "text"
    assert content["text"] == test_content.decode("utf-8")
-    assert file_contents.filename == file_name
-    assert file_contents.attributes == attributes
+    assert file_contents.has_more is False


@vector_provider_wrapper
@ -1483,14 +1484,12 @@ def test_openai_vector_store_file_batch_retrieve_contents(
        )

        assert file_contents is not None
-        assert file_contents.filename == file_data[i][0]
-        assert len(file_contents.content) > 0
+        assert file_contents.object == "vector_store.file_content.page"
+        assert len(file_contents.data) > 0

        # Verify the content matches what we uploaded
        content_text = (
-            file_contents.content[0].text
-            if hasattr(file_contents.content[0], "text")
-            else file_contents.content[0]["text"]
+            file_contents.data[0].text if hasattr(file_contents.data[0], "text") else file_contents.data[0]["text"]
        )
        assert file_data[i][1].decode("utf-8") in content_text

@ -1606,3 +1605,97 @@ def test_openai_vector_store_embedding_config_from_metadata(

    assert "metadata_config_store" in store_names
    assert "consistent_config_store" in store_names
+
+
+@vector_provider_wrapper
+def test_openai_vector_store_file_contents_with_extra_query(
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
+):
+    """Test that vector store file contents endpoint supports extra_query parameter."""
+    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
+    compat_client = compat_client_with_empty_stores
+
+    # Create a vector store
+    vector_store = compat_client.vector_stores.create(
+        name="test_extra_query_store",
+        extra_body={
+            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
+        },
+    )
+
+    # Create and attach a file
+    test_content = b"This is test content for extra_query validation."
+    with BytesIO(test_content) as file_buffer:
+        file_buffer.name = "test_extra_query.txt"
+        file = compat_client.files.create(
+            file=file_buffer,
+            purpose="assistants",
+            expires_after=ExpiresAfter(anchor="created_at", seconds=86400),
+        )
+
+    file_attach_response = compat_client.vector_stores.files.create(
+        vector_store_id=vector_store.id,
+        file_id=file.id,
+        extra_body={"embedding_model": embedding_model_id},
+    )
+    assert file_attach_response.status == "completed"
+
+    # Wait for processing
+    time.sleep(2)
+
+    # Test that extra_query parameter is accepted and processed
+    content_with_extra_query = compat_client.vector_stores.files.content(
+        vector_store_id=vector_store.id,
+        file_id=file.id,
+        extra_query={"include_embeddings": True, "include_metadata": True},
+    )
+
+    # Test without extra_query for comparison
+    content_without_extra_query = compat_client.vector_stores.files.content(
+        vector_store_id=vector_store.id,
+        file_id=file.id,
+    )
+
+    # Validate that both calls succeed
+    assert content_with_extra_query is not None
+    assert content_without_extra_query is not None
+    assert len(content_with_extra_query.data) > 0
+    assert len(content_without_extra_query.data) > 0
+
+    # Validate that extra_query parameter is processed correctly
+    # Both should have the embedding/metadata fields available (may be None based on flags)
+    first_chunk_with_flags = content_with_extra_query.data[0]
+    first_chunk_without_flags = content_without_extra_query.data[0]
+
+    # The key validation: extra_query fields are present in the response
+    # Handle both dict and object responses (different clients may return different formats)
+    def has_field(obj, field):
+        if isinstance(obj, dict):
+            return field in obj
+        else:
+            return hasattr(obj, field)
+
+    # Validate that all expected fields are present in both responses
+    expected_fields = ["embedding", "chunk_metadata", "metadata", "text"]
+    for field in expected_fields:
+        assert has_field(first_chunk_with_flags, field), f"Field '{field}' missing from response with extra_query"
+        assert has_field(first_chunk_without_flags, field), f"Field '{field}' missing from response without extra_query"
+
+    # Validate content is the same
+    def get_field(obj, field):
+        if isinstance(obj, dict):
+            return obj[field]
+        else:
+            return getattr(obj, field)
+
+    assert get_field(first_chunk_with_flags, "text") == test_content.decode("utf-8")
+    assert get_field(first_chunk_without_flags, "text") == test_content.decode("utf-8")
+
+    with_flags_embedding = get_field(first_chunk_with_flags, "embedding")
+    without_flags_embedding = get_field(first_chunk_without_flags, "embedding")
+
+    # Validate that embeddings are included when requested and excluded when not requested
+    assert with_flags_embedding is not None, "Embeddings should be included when include_embeddings=True"
+    assert len(with_flags_embedding) > 0, "Embedding should be a non-empty list"
+    assert without_flags_embedding is None, "Embeddings should not be included when include_embeddings=False"