feat(tests): make inference_recorder into api_recorder (include tool_invoke)

2025-12-15 12:22:42 +00:00 · 2025-10-04 11:53:44 -07:00 · 2025-10-04 11:53:44 -07:00 · 9205731cd6
commit 9205731cd6
parent b96640eca3
19 changed files with 849 additions and 666 deletions
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@ -6,6 +6,7 @@
 import inspect
 import itertools
 import os
+import tempfile
 import textwrap
 import time
 from pathlib import Path
@ -14,6 +15,7 @@ import pytest
 from dotenv import load_dotenv

 from llama_stack.log import get_logger
+from llama_stack.testing.api_recorder import patch_httpx_for_test_id

 from .suites import SETUP_DEFINITIONS, SUITE_DEFINITIONS

@ -35,6 +37,10 @@ def pytest_sessionstart(session):
    if "LLAMA_STACK_TEST_INFERENCE_MODE" not in os.environ:
        os.environ["LLAMA_STACK_TEST_INFERENCE_MODE"] = "replay"

+    if "SQLITE_STORE_DIR" not in os.environ:
+        os.environ["SQLITE_STORE_DIR"] = tempfile.mkdtemp()
+
+    # Set test stack config type for api_recorder test isolation
    stack_config = session.config.getoption("--stack-config", default=None)
    if stack_config and stack_config.startswith("server:"):
        os.environ["LLAMA_STACK_TEST_STACK_CONFIG_TYPE"] = "server"
@ -43,8 +49,6 @@ def pytest_sessionstart(session):
        os.environ["LLAMA_STACK_TEST_STACK_CONFIG_TYPE"] = "library_client"
        logger.info(f"Test stack config type: library_client (stack_config={stack_config})")

-    from llama_stack.testing.inference_recorder import patch_httpx_for_test_id
-
    patch_httpx_for_test_id()


@ -55,7 +59,7 @@ def _track_test_context(request):
    This fixture runs for every test and stores the test's nodeid in a contextvar
    that the recording system can access to determine which subdirectory to use.
    """
-    from llama_stack.testing.inference_recorder import _test_context
+    from llama_stack.testing.api_recorder import _test_context

    # Store the test nodeid (e.g., "tests/integration/responses/test_basic.py::test_foo[params]")
    token = _test_context.set(request.node.nodeid)
@ -121,9 +125,13 @@ def pytest_configure(config):
        # Apply defaults if not provided explicitly
        for dest, value in setup_obj.defaults.items():
            current = getattr(config.option, dest, None)
-            if not current:
+            if current is None:
                setattr(config.option, dest, value)

+    # Apply global fallback for embedding_dimension if still not set
+    if getattr(config.option, "embedding_dimension", None) is None:
+        config.option.embedding_dimension = 384
+

 def pytest_addoption(parser):
    parser.addoption(
@ -161,8 +169,8 @@ def pytest_addoption(parser):
    parser.addoption(
        "--embedding-dimension",
        type=int,
-        default=384,
-        help="Output dimensionality of the embedding model to use for testing. Default: 384",
+        default=None,
+        help="Output dimensionality of the embedding model to use for testing. Default: 384 (or setup-specific)",
    )

    parser.addoption(
@ -236,7 +244,9 @@ def pytest_generate_tests(metafunc):
            continue

        params.append(fixture_name)
-        val = metafunc.config.getoption(option)
+        # Use getattr on config.option to see values set by pytest_configure fallbacks
+        dest = option.lstrip("-").replace("-", "_")
+        val = getattr(metafunc.config.option, dest, None)

        values = [v.strip() for v in str(val).split(",")] if val else [None]
        param_values[fixture_name] = values
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@ -183,6 +183,12 @@ def llama_stack_client(request):
    # would be forced to use llama_stack_client, which is not what we want.
    print("\ninstantiating llama_stack_client")
    start_time = time.time()
+
+    # Patch httpx to inject test ID for server-mode test isolation
+    from llama_stack.testing.api_recorder import patch_httpx_for_test_id
+
+    patch_httpx_for_test_id()
+
    client = instantiate_llama_stack_client(request.session)
    print(f"llama_stack_client instantiated in {time.time() - start_time:.3f}s")
    return client
--- a/tests/integration/responses/helpers.py
+++ b/tests/integration/responses/helpers.py
@ -7,7 +7,7 @@
 import time


-def new_vector_store(openai_client, name):
+def new_vector_store(openai_client, name, embedding_model, embedding_dimension):
    """Create a new vector store, cleaning up any existing one with the same name."""
    # Ensure we don't reuse an existing vector store
    vector_stores = openai_client.vector_stores.list()
@ -16,7 +16,21 @@ def new_vector_store(openai_client, name):
            openai_client.vector_stores.delete(vector_store_id=vector_store.id)

    # Create a new vector store
-    vector_store = openai_client.vector_stores.create(name=name)
+    # OpenAI SDK client uses extra_body for non-standard parameters
+    from openai import OpenAI
+
+    if isinstance(openai_client, OpenAI):
+        # OpenAI SDK client - use extra_body
+        vector_store = openai_client.vector_stores.create(
+            name=name,
+            extra_body={"embedding_model": embedding_model, "embedding_dimension": embedding_dimension},
+        )
+    else:
+        # LlamaStack client - direct parameter
+        vector_store = openai_client.vector_stores.create(
+            name=name, embedding_model=embedding_model, embedding_dimension=embedding_dimension
+        )
+
    return vector_store


--- a/tests/integration/responses/test_extra_body_shields.py
+++ b/tests/integration/responses/test_extra_body_shields.py
@ -16,6 +16,7 @@ import pytest
 from llama_stack_client import APIStatusError


+@pytest.mark.xfail(reason="Shields are not yet implemented inside responses")
 def test_shields_via_extra_body(compat_client, text_model_id):
    """Test that shields parameter is received by the server and raises NotImplementedError."""

--- a/tests/integration/responses/test_file_search.py
+++ b/tests/integration/responses/test_file_search.py
@ -47,12 +47,14 @@ def test_response_text_format(compat_client, text_model_id, text_format):


@pytest.fixture
-def vector_store_with_filtered_files(compat_client, text_model_id, tmp_path_factory):
-    """Create a vector store with multiple files that have different attributes for filtering tests."""
+def vector_store_with_filtered_files(compat_client, embedding_model_id, embedding_dimension, tmp_path_factory):
+    # """Create a vector store with multiple files that have different attributes for filtering tests."""
    if isinstance(compat_client, LlamaStackAsLibraryClient):
-        pytest.skip("Responses API file search is not yet supported in library client.")
+        pytest.skip("upload_file() is not yet supported in library client somehow?")

-    vector_store = new_vector_store(compat_client, "test_vector_store_with_filters")
+    vector_store = new_vector_store(
+        compat_client, "test_vector_store_with_filters", embedding_model_id, embedding_dimension
+    )
    tmp_path = tmp_path_factory.mktemp("filter_test_files")

    # Create multiple files with different attributes
--- a/tests/integration/responses/test_tool_responses.py
+++ b/tests/integration/responses/test_tool_responses.py
@ -46,11 +46,13 @@ def test_response_non_streaming_web_search(compat_client, text_model_id, case):


@pytest.mark.parametrize("case", file_search_test_cases)
-def test_response_non_streaming_file_search(compat_client, text_model_id, tmp_path, case):
+def test_response_non_streaming_file_search(
+    compat_client, text_model_id, embedding_model_id, embedding_dimension, tmp_path, case
+):
    if isinstance(compat_client, LlamaStackAsLibraryClient):
        pytest.skip("Responses API file search is not yet supported in library client.")

-    vector_store = new_vector_store(compat_client, "test_vector_store")
+    vector_store = new_vector_store(compat_client, "test_vector_store", embedding_model_id, embedding_dimension)

    if case.file_content:
        file_name = "test_response_non_streaming_file_search.txt"
@ -101,11 +103,13 @@ def test_response_non_streaming_file_search(compat_client, text_model_id, tmp_pa
    assert case.expected.lower() in response.output_text.lower().strip()


-def test_response_non_streaming_file_search_empty_vector_store(compat_client, text_model_id):
+def test_response_non_streaming_file_search_empty_vector_store(
+    compat_client, text_model_id, embedding_model_id, embedding_dimension
+):
    if isinstance(compat_client, LlamaStackAsLibraryClient):
        pytest.skip("Responses API file search is not yet supported in library client.")

-    vector_store = new_vector_store(compat_client, "test_vector_store")
+    vector_store = new_vector_store(compat_client, "test_vector_store", embedding_model_id, embedding_dimension)

    # Create the response request, which should query our vector store
    response = compat_client.responses.create(
@ -127,12 +131,14 @@ def test_response_non_streaming_file_search_empty_vector_store(compat_client, te
    assert response.output_text


-def test_response_sequential_file_search(compat_client, text_model_id, tmp_path):
+def test_response_sequential_file_search(
+    compat_client, text_model_id, embedding_model_id, embedding_dimension, tmp_path
+):
    """Test file search with sequential responses using previous_response_id."""
    if isinstance(compat_client, LlamaStackAsLibraryClient):
        pytest.skip("Responses API file search is not yet supported in library client.")

-    vector_store = new_vector_store(compat_client, "test_vector_store")
+    vector_store = new_vector_store(compat_client, "test_vector_store", embedding_model_id, embedding_dimension)

    # Create a test file with content
    file_content = "The Llama 4 Maverick model has 128 experts in its mixture of experts architecture."
--- a/tests/integration/suites.py
+++ b/tests/integration/suites.py
@ -39,7 +39,7 @@ class Setup(BaseModel):

    name: str
    description: str
-    defaults: dict[str, str] = Field(default_factory=dict)
+    defaults: dict[str, str | int] = Field(default_factory=dict)
    env: dict[str, str] = Field(default_factory=dict)


@ -88,6 +88,7 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
        defaults={
            "text_model": "openai/gpt-4o",
            "embedding_model": "openai/text-embedding-3-small",
+            "embedding_dimension": 1536,
        },
    ),
    "tgi": Setup(