Merge branch 'main' into add-batches

2025-12-21 07:58:40 +00:00 · 2025-08-13 07:33:41 -04:00 · 2025-08-13 07:33:41 -04:00 · 95a3ecdffc
commit 95a3ecdffc
parent 04a73c89ef 6358d0a478
67 changed files with 1158 additions and 424 deletions
--- a/tests/common/mcp.py
+++ b/tests/common/mcp.py
@ -16,13 +16,10 @@ MCP_TOOLGROUP_ID = "mcp::localmcp"

 def default_tools():
    """Default tools for backward compatibility."""
-    from mcp import types
    from mcp.server.fastmcp import Context

-    async def greet_everyone(
-        url: str, ctx: Context
-    ) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
-        return [types.TextContent(type="text", text="Hello, world!")]
+    async def greet_everyone(url: str, ctx: Context) -> str:
+        return "Hello, world!"

    async def get_boiling_point(liquid_name: str, celsius: bool = True) -> int:
        """
@ -45,7 +42,6 @@ def default_tools():

 def dependency_tools():
    """Tools with natural dependencies for multi-turn testing."""
-    from mcp import types
    from mcp.server.fastmcp import Context

    async def get_user_id(username: str, ctx: Context) -> str:
@ -106,7 +102,7 @@ def dependency_tools():
        else:
            access = "no"

-        return [types.TextContent(type="text", text=access)]
+        return access

    async def get_experiment_id(experiment_name: str, ctx: Context) -> str:
        """
@ -245,7 +241,6 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal
    try:
        yield {"server_url": server_url}
    finally:
-        print("Telling SSE server to exit")
        server_instance.should_exit = True
        time.sleep(0.5)

@ -269,4 +264,3 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal

        AppStatus.should_exit = False
        AppStatus.should_exit_event = None
-        print("SSE server exited")
--- a/tests/external/llama-stack-api-weather/pyproject.toml
+++ b/tests/external/llama-stack-api-weather/pyproject.toml
@ -3,7 +3,7 @@ name = "llama-stack-api-weather"
 version = "0.1.0"
 description = "Weather API for Llama Stack"
 readme = "README.md"
-requires-python = ">=3.10"
+requires-python = ">=3.12"
 dependencies = ["llama-stack", "pydantic"]

 [build-system]
--- a/tests/external/llama-stack-provider-kaze/pyproject.toml
+++ b/tests/external/llama-stack-provider-kaze/pyproject.toml
@ -3,7 +3,7 @@ name = "llama-stack-provider-kaze"
 version = "0.1.0"
 description = "Kaze weather provider for Llama Stack"
 readme = "README.md"
-requires-python = ">=3.10"
+requires-python = ">=3.12"
 dependencies = ["llama-stack", "pydantic", "aiohttp"]

 [build-system]
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@ -270,7 +270,7 @@ def openai_client(client_with_models):

@pytest.fixture(params=["openai_client", "client_with_models"])
 def compat_client(request, client_with_models):
-    if isinstance(client_with_models, LlamaStackAsLibraryClient):
+    if request.param == "openai_client" and isinstance(client_with_models, LlamaStackAsLibraryClient):
        # OpenAI client expects a server, so unless we also rewrite OpenAI client's requests
        # to go via the Stack library client (which itself rewrites requests to be served inline),
        # we cannot do this.
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@ -34,6 +34,7 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
        "remote::runpod",
        "remote::sambanova",
        "remote::tgi",
+        "remote::vertexai",
    ):
        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")

--- a/tests/integration/inference/test_text_inference.py
+++ b/tests/integration/inference/test_text_inference.py
@ -29,6 +29,7 @@ def skip_if_model_doesnt_support_completion(client_with_models, model_id):
            "remote::openai",
            "remote::anthropic",
            "remote::gemini",
+            "remote::vertexai",
            "remote::groq",
            "remote::sambanova",
        )
--- a/tests/integration/non_ci/responses/fixtures/test_cases/responses.yaml
+++ b/tests/integration/non_ci/responses/fixtures/test_cases/responses.yaml
@ -137,7 +137,7 @@ test_response_multi_turn_tool_execution:
        server_url: "<FILLED_BY_TEST_RUNNER>"
      output: "yes"
    - case_id: "experiment_results_lookup"
-      input: "I need to get the results for the 'boiling_point' experiment. First, get the experiment ID for 'boiling_point', then use that ID to get the experiment results. Tell me what you found."
+      input: "I need to get the results for the 'boiling_point' experiment. First, get the experiment ID for 'boiling_point', then use that ID to get the experiment results. Tell me the boiling point in Celsius."
      tools:
      - type: mcp
        server_label: "localmcp"
@ -149,7 +149,7 @@ test_response_multi_turn_tool_execution_streaming:
  test_params:
    case:
    - case_id: "user_permissions_workflow"
-      input: "Help me with this security check: First, get the user ID for 'charlie', then get the permissions for that user ID, and finally check if that user can access 'secret_file.txt'. Stream your progress as you work through each step."
+      input: "Help me with this security check: First, get the user ID for 'charlie', then get the permissions for that user ID, and finally check if that user can access 'secret_file.txt'. Stream your progress as you work through each step. Return only one tool call per step. Summarize the final result with a single 'yes' or 'no' response."
      tools:
      - type: mcp
        server_label: "localmcp"
@ -157,7 +157,7 @@ test_response_multi_turn_tool_execution_streaming:
      stream: true
      output: "no"
    - case_id: "experiment_analysis_streaming"
-      input: "I need a complete analysis: First, get the experiment ID for 'chemical_reaction', then get the results for that experiment, and tell me if the yield was above 80%. Please stream your analysis process."
+      input: "I need a complete analysis: First, get the experiment ID for 'chemical_reaction', then get the results for that experiment, and tell me if the yield was above 80%. Return only one tool call per step.  Please stream your analysis process."
      tools:
      - type: mcp
        server_label: "localmcp"
--- a/tests/integration/non_ci/responses/test_responses.py
+++ b/tests/integration/non_ci/responses/test_responses.py
@ -363,6 +363,9 @@ def test_response_non_streaming_file_search_empty_vector_store(request, compat_c
    ids=case_id_generator,
 )
 def test_response_non_streaming_mcp_tool(request, compat_client, text_model_id, case):
+    if not isinstance(compat_client, LlamaStackAsLibraryClient):
+        pytest.skip("in-process MCP server is only supported in library client")
+
    with make_mcp_server() as mcp_server_info:
        tools = case["tools"]
        for tool in tools:
@ -485,8 +488,11 @@ def test_response_non_streaming_multi_turn_image(request, compat_client, text_mo
    responses_test_cases["test_response_multi_turn_tool_execution"]["test_params"]["case"],
    ids=case_id_generator,
 )
-def test_response_non_streaming_multi_turn_tool_execution(request, compat_client, text_model_id, case):
+def test_response_non_streaming_multi_turn_tool_execution(compat_client, text_model_id, case):
    """Test multi-turn tool execution where multiple MCP tool calls are performed in sequence."""
+    if not isinstance(compat_client, LlamaStackAsLibraryClient):
+        pytest.skip("in-process MCP server is only supported in library client")
+
    with make_mcp_server(tools=dependency_tools()) as mcp_server_info:
        tools = case["tools"]
        # Replace the placeholder URL with the actual server URL
@ -541,8 +547,11 @@ def test_response_non_streaming_multi_turn_tool_execution(request, compat_client
    responses_test_cases["test_response_multi_turn_tool_execution_streaming"]["test_params"]["case"],
    ids=case_id_generator,
 )
-async def test_response_streaming_multi_turn_tool_execution(request, compat_client, text_model_id, case):
+def test_response_streaming_multi_turn_tool_execution(compat_client, text_model_id, case):
    """Test streaming multi-turn tool execution where multiple MCP tool calls are performed in sequence."""
+    if not isinstance(compat_client, LlamaStackAsLibraryClient):
+        pytest.skip("in-process MCP server is only supported in library client")
+
    with make_mcp_server(tools=dependency_tools()) as mcp_server_info:
        tools = case["tools"]
        # Replace the placeholder URL with the actual server URL
@ -634,7 +643,7 @@ async def test_response_streaming_multi_turn_tool_execution(request, compat_clie
        },
    ],
 )
-def test_response_text_format(request, compat_client, text_model_id, text_format):
+def test_response_text_format(compat_client, text_model_id, text_format):
    if isinstance(compat_client, LlamaStackAsLibraryClient):
        pytest.skip("Responses API text format is not yet supported in library client.")

@ -653,7 +662,7 @@ def test_response_text_format(request, compat_client, text_model_id, text_format


@pytest.fixture
-def vector_store_with_filtered_files(request, compat_client, text_model_id, tmp_path_factory):
+def vector_store_with_filtered_files(compat_client, text_model_id, tmp_path_factory):
    """Create a vector store with multiple files that have different attributes for filtering tests."""
    if isinstance(compat_client, LlamaStackAsLibraryClient):
        pytest.skip("Responses API file search is not yet supported in library client.")
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@ -9,10 +9,11 @@ import time
 from io import BytesIO

 import pytest
-from llama_stack_client import BadRequestError, LlamaStackClient
+from llama_stack_client import BadRequestError
 from openai import BadRequestError as OpenAIBadRequestError

 from llama_stack.apis.vector_io import Chunk
+from llama_stack.core.library_client import LlamaStackAsLibraryClient

 logger = logging.getLogger(__name__)

@ -475,9 +476,6 @@ def test_openai_vector_store_attach_file(compat_client_with_empty_stores, client
    """Test OpenAI vector store attach file."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)

-    if isinstance(compat_client_with_empty_stores, LlamaStackClient):
-        pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient")
-
    compat_client = compat_client_with_empty_stores

    # Create a vector store
@ -526,9 +524,6 @@ def test_openai_vector_store_attach_files_on_creation(compat_client_with_empty_s
    """Test OpenAI vector store attach files on creation."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)

-    if isinstance(compat_client_with_empty_stores, LlamaStackClient):
-        pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient")
-
    compat_client = compat_client_with_empty_stores

    # Create some files and attach them to the vector store
@ -582,9 +577,6 @@ def test_openai_vector_store_list_files(compat_client_with_empty_stores, client_
    """Test OpenAI vector store list files."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)

-    if isinstance(compat_client_with_empty_stores, LlamaStackClient):
-        pytest.skip("Vector Store Files list is not yet supported with LlamaStackClient")
-
    compat_client = compat_client_with_empty_stores

    # Create a vector store
@ -597,16 +589,20 @@ def test_openai_vector_store_list_files(compat_client_with_empty_stores, client_
            file_buffer.name = f"openai_test_{i}.txt"
            file = compat_client.files.create(file=file_buffer, purpose="assistants")

-        compat_client.vector_stores.files.create(
+        response = compat_client.vector_stores.files.create(
            vector_store_id=vector_store.id,
            file_id=file.id,
        )
+        assert response is not None
+        assert response.status == "completed", (
+            f"Failed to attach file {file.id} to vector store {vector_store.id}: {response=}"
+        )
        file_ids.append(file.id)

    files_list = compat_client.vector_stores.files.list(vector_store_id=vector_store.id)
    assert files_list
    assert files_list.object == "list"
-    assert files_list.data
+    assert files_list.data is not None
    assert not files_list.has_more
    assert len(files_list.data) == 3
    assert set(file_ids) == {file.id for file in files_list.data}
@ -642,12 +638,13 @@ def test_openai_vector_store_list_files_invalid_vector_store(compat_client_with_
    """Test OpenAI vector store list files with invalid vector store ID."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)

-    if isinstance(compat_client_with_empty_stores, LlamaStackClient):
-        pytest.skip("Vector Store Files list is not yet supported with LlamaStackClient")
-
    compat_client = compat_client_with_empty_stores
+    if isinstance(compat_client, LlamaStackAsLibraryClient):
+        errors = ValueError
+    else:
+        errors = (BadRequestError, OpenAIBadRequestError)

-    with pytest.raises((BadRequestError, OpenAIBadRequestError)):
+    with pytest.raises(errors):
        compat_client.vector_stores.files.list(vector_store_id="abc123")


@ -655,9 +652,6 @@ def test_openai_vector_store_retrieve_file_contents(compat_client_with_empty_sto
    """Test OpenAI vector store retrieve file contents."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)

-    if isinstance(compat_client_with_empty_stores, LlamaStackClient):
-        pytest.skip("Vector Store Files retrieve contents is not yet supported with LlamaStackClient")
-
    compat_client = compat_client_with_empty_stores

    # Create a vector store
@ -685,9 +679,15 @@ def test_openai_vector_store_retrieve_file_contents(compat_client_with_empty_sto
        file_id=file.id,
    )

-    assert file_contents
-    assert file_contents.content[0]["type"] == "text"
-    assert file_contents.content[0]["text"] == test_content.decode("utf-8")
+    assert file_contents is not None
+    assert len(file_contents.content) == 1
+    content = file_contents.content[0]
+
+    # llama-stack-client returns a model, openai-python is a badboy and returns a dict
+    if not isinstance(content, dict):
+        content = content.model_dump()
+    assert content["type"] == "text"
+    assert content["text"] == test_content.decode("utf-8")
    assert file_contents.filename == file_name
    assert file_contents.attributes == attributes

@ -696,9 +696,6 @@ def test_openai_vector_store_delete_file(compat_client_with_empty_stores, client
    """Test OpenAI vector store delete file."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)

-    if isinstance(compat_client_with_empty_stores, LlamaStackClient):
-        pytest.skip("Vector Store Files list is not yet supported with LlamaStackClient")
-
    compat_client = compat_client_with_empty_stores

    # Create a vector store
@ -751,9 +748,6 @@ def test_openai_vector_store_delete_file_removes_from_vector_store(compat_client
    """Test OpenAI vector store delete file removes from vector store."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)

-    if isinstance(compat_client_with_empty_stores, LlamaStackClient):
-        pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient")
-
    compat_client = compat_client_with_empty_stores

    # Create a vector store
@ -792,9 +786,6 @@ def test_openai_vector_store_update_file(compat_client_with_empty_stores, client
    """Test OpenAI vector store update file."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)

-    if isinstance(compat_client_with_empty_stores, LlamaStackClient):
-        pytest.skip("Vector Store Files update is not yet supported with LlamaStackClient")
-
    compat_client = compat_client_with_empty_stores

    # Create a vector store
@ -840,9 +831,6 @@ def test_create_vector_store_files_duplicate_vector_store_name(compat_client_wit
    """
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)

-    if isinstance(compat_client_with_empty_stores, LlamaStackClient):
-        pytest.skip("Vector Store Files create is not yet supported with LlamaStackClient")
-
    compat_client = compat_client_with_empty_stores

    # Create a vector store with files