chore(misc): make tests and starter faster (#3042)

A bunch of miscellaneous cleanup focusing on tests, but ended up speeding up starter distro substantially. - Pulled llama stack client init for tests into `pytest_sessionstart` so it does not clobber output - Profiling of that told me where we were doing lots of heavy imports for starter, so lazied them - starter now starts 20seconds+ faster on my Mac - A few other smallish refactors for `compat_client`
2025-12-03 09:53:45 +00:00 · 2025-08-05 14:55:05 -07:00 · 2025-08-05 14:55:05 -07:00 · 7f834339ba
commit 7f834339ba
parent e12524af85
45 changed files with 2897 additions and 1688 deletions
--- a/tests/integration/agents/test_openai_responses.py
+++ b/tests/integration/agents/test_openai_responses.py
@ -9,12 +9,6 @@ from openai import BadRequestError, OpenAI
 from llama_stack.core.library_client import LlamaStackAsLibraryClient


-@pytest.fixture
-def openai_client(client_with_models):
-    base_url = f"{client_with_models.base_url}/v1/openai/v1"
-    return OpenAI(base_url=base_url, api_key="bar")
-
-
@pytest.mark.parametrize(
    "stream",
    [
@ -41,15 +35,14 @@ def openai_client(client_with_models):
        ],
    ],
 )
-def test_responses_store(openai_client, client_with_models, text_model_id, stream, tools):
-    if isinstance(client_with_models, LlamaStackAsLibraryClient):
-        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
+def test_responses_store(compat_client, text_model_id, stream, tools):
+    if not isinstance(compat_client, OpenAI):
+        pytest.skip("OpenAI client is required until responses.delete() exists in llama-stack-client")

-    client = openai_client
    message = "What's the weather in Tokyo?" + (
        " YOU MUST USE THE get_weather function to get the weather." if tools else ""
    )
-    response = client.responses.create(
+    response = compat_client.responses.create(
        model=text_model_id,
        input=[
            {
@ -78,14 +71,8 @@ def test_responses_store(openai_client, client_with_models, text_model_id, strea
        if output_type == "message":
            content = response.output[0].content[0].text

-    # list responses - use the underlying HTTP client for endpoints not in SDK
-    list_response = client._client.get("/responses")
-    assert list_response.status_code == 200
-    data = list_response.json()["data"]
-    assert response_id in [r["id"] for r in data]
-
    # test retrieve response
-    retrieved_response = client.responses.retrieve(response_id)
+    retrieved_response = compat_client.responses.retrieve(response_id)
    assert retrieved_response.id == response_id
    assert retrieved_response.model == text_model_id
    assert retrieved_response.output[0].type == output_type, retrieved_response
@ -93,23 +80,19 @@ def test_responses_store(openai_client, client_with_models, text_model_id, strea
        assert retrieved_response.output[0].content[0].text == content

    # Delete the response
-    delete_response = client.responses.delete(response_id)
+    delete_response = compat_client.responses.delete(response_id)
    assert delete_response is None

    with pytest.raises(BadRequestError):
-        client.responses.retrieve(response_id)
+        compat_client.responses.retrieve(response_id)


-def test_list_response_input_items(openai_client, client_with_models, text_model_id):
+def test_list_response_input_items(compat_client, text_model_id):
    """Test the new list_openai_response_input_items endpoint."""
-    if isinstance(client_with_models, LlamaStackAsLibraryClient):
-        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
-
-    client = openai_client
    message = "What is the capital of France?"

    # Create a response first
-    response = client.responses.create(
+    response = compat_client.responses.create(
        model=text_model_id,
        input=[
            {
@ -123,7 +106,7 @@ def test_list_response_input_items(openai_client, client_with_models, text_model
    response_id = response.id

    # Test the new list input items endpoint
-    input_items_response = client.responses.input_items.list(response_id=response_id)
+    input_items_response = compat_client.responses.input_items.list(response_id=response_id)

    # Verify the structure follows OpenAI API spec
    assert input_items_response.object == "list"
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@ -9,12 +9,15 @@ import os
 import platform
 import textwrap
 import time
+import warnings

 import pytest
 from dotenv import load_dotenv

 from llama_stack.log import get_logger

+from .fixtures.common import instantiate_llama_stack_client
+
 logger = get_logger(__name__, category="tests")


@ -27,6 +30,20 @@ def pytest_runtest_makereport(item, call):
        item.was_xfail = getattr(report, "wasxfail", False)


+def pytest_sessionstart(session):
+    # stop macOS from complaining about duplicate OpenMP libraries
+    os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
+
+    # pull client instantiation to session start so all the complex logs during initialization
+    # don't clobber the test one-liner outputs
+    print("instantiating llama_stack_client")
+    start_time = time.time()
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore", category=DeprecationWarning)
+        session._llama_stack_client = instantiate_llama_stack_client(session)
+    print(f"llama_stack_client instantiated in {time.time() - start_time:.3f}s")
+
+
 def pytest_runtest_teardown(item):
    # Check if the test actually ran and passed or failed, but was not skipped or an expected failure (xfail)
    outcome = getattr(item, "execution_outcome", None)
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@ -82,8 +82,7 @@ def wait_for_server_ready(base_url: str, timeout: int = 30, process: subprocess.
    return False


-@pytest.fixture(scope="session")
-def provider_data():
+def get_provider_data():
    # TODO: this needs to be generalized so each provider can have a sample provider data just
    # like sample run config on which we can do replace_env_vars()
    keymap = {
@ -178,8 +177,14 @@ def skip_if_no_model(request):


@pytest.fixture(scope="session")
-def llama_stack_client(request, provider_data):
-    config = request.config.getoption("--stack-config")
+def llama_stack_client(request):
+    client = request.session._llama_stack_client
+    assert client is not None, "llama_stack_client not found in session cache"
+    return client
+
+
+def instantiate_llama_stack_client(session):
+    config = session.config.getoption("--stack-config")
    if not config:
        config = get_env_or_fail("LLAMA_STACK_CONFIG")

@ -212,13 +217,13 @@ def llama_stack_client(request, provider_data):
            print(f"Server is ready at {base_url}")

            # Store process for potential cleanup (pytest will handle termination at session end)
-            request.session._llama_stack_server_process = server_process
+            session._llama_stack_server_process = server_process
        else:
            print(f"Port {port} is already in use, assuming server is already running...")

        return LlamaStackClient(
            base_url=base_url,
-            provider_data=provider_data,
+            provider_data=get_provider_data(),
            timeout=int(os.environ.get("LLAMA_STACK_CLIENT_TIMEOUT", "30")),
        )

@ -228,7 +233,7 @@ def llama_stack_client(request, provider_data):
        if parsed_url.scheme and parsed_url.netloc:
            return LlamaStackClient(
                base_url=config,
-                provider_data=provider_data,
+                provider_data=get_provider_data(),
            )
    except Exception:
        # If URL parsing fails, treat as non-URL config
@ -243,7 +248,7 @@ def llama_stack_client(request, provider_data):

    client = LlamaStackAsLibraryClient(
        config,
-        provider_data=provider_data,
+        provider_data=get_provider_data(),
        skip_logger_removal=True,
    )
    if not client.initialize():
@ -258,8 +263,17 @@ def openai_client(client_with_models):
    return OpenAI(base_url=base_url, api_key="fake")


-@pytest.fixture(params=["openai_client", "llama_stack_client"])
-def compat_client(request):
+@pytest.fixture(params=["openai_client", "client_with_models"])
+def compat_client(request, client_with_models):
+    if isinstance(client_with_models, LlamaStackAsLibraryClient):
+        # OpenAI client expects a server, so unless we also rewrite OpenAI client's requests
+        # to go via the Stack library client (which itself rewrites requests to be served inline),
+        # we cannot do this.
+        #
+        # This means when we are using Stack as a library, we will test only via the Llama Stack client.
+        # When we are using a server setup, we can exercise both OpenAI and Llama Stack clients.
+        pytest.skip("(OpenAI) Compat client cannot be used with Stack library client")
+
    return request.getfixturevalue(request.param)


--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@ -6,9 +6,6 @@


 import pytest
-from openai import OpenAI
-
-from llama_stack.core.library_client import LlamaStackAsLibraryClient

 from ..test_cases.test_case import TestCase

@ -59,9 +56,6 @@ def skip_if_model_doesnt_support_suffix(client_with_models, model_id):


 def skip_if_model_doesnt_support_openai_chat_completion(client_with_models, model_id):
-    if isinstance(client_with_models, LlamaStackAsLibraryClient):
-        pytest.skip("OpenAI chat completions are not supported when testing with library client yet.")
-
    provider = provider_from_model(client_with_models, model_id)
    if provider.provider_type in (
        "inline::meta-reference",
@ -90,17 +84,6 @@ def skip_if_provider_isnt_openai(client_with_models, model_id):
        )


-@pytest.fixture
-def openai_client(client_with_models):
-    base_url = f"{client_with_models.base_url}/v1/openai/v1"
-    return OpenAI(base_url=base_url, api_key="bar")
-
-
-@pytest.fixture(params=["openai_client", "llama_stack_client"])
-def compat_client(request):
-    return request.getfixturevalue(request.param)
-
-
@pytest.mark.parametrize(
    "test_case",
    [
--- a/tests/integration/recordings/index.sqlite
+++ b/tests/integration/recordings/index.sqlite
--- a/tests/integration/recordings/responses/4a3a4447b16b.json
+++ b/tests/integration/recordings/responses/4a3a4447b16b.json
@ -14,7 +14,7 @@
        "models": [
          {
            "model": "nomic-embed-text:latest",
-            "modified_at": "2025-08-04T15:54:50.584357-07:00",
+            "modified_at": "2025-08-05T14:04:07.946926-07:00",
            "digest": "0a109f422b47e3a30ba2b10eca18548e944e8a23073ee3f3e947efcf3c45e59f",
            "size": 274302450,
            "details": {
@ -28,9 +28,41 @@
              "quantization_level": "F16"
            }
          },
+          {
+            "model": "llama3.2-vision:11b",
+            "modified_at": "2025-07-30T18:45:02.517873-07:00",
+            "digest": "6f2f9757ae97e8a3f8ea33d6adb2b11d93d9a35bef277cd2c0b1b5af8e8d0b1e",
+            "size": 7816589186,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "mllama",
+              "families": [
+                "mllama"
+              ],
+              "parameter_size": "10.7B",
+              "quantization_level": "Q4_K_M"
+            }
+          },
+          {
+            "model": "llama3.2-vision:latest",
+            "modified_at": "2025-07-29T20:18:47.920468-07:00",
+            "digest": "6f2f9757ae97e8a3f8ea33d6adb2b11d93d9a35bef277cd2c0b1b5af8e8d0b1e",
+            "size": 7816589186,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "mllama",
+              "families": [
+                "mllama"
+              ],
+              "parameter_size": "10.7B",
+              "quantization_level": "Q4_K_M"
+            }
+          },
          {
            "model": "llama-guard3:1b",
-            "modified_at": "2025-08-01T15:46:28.963517-07:00",
+            "modified_at": "2025-07-25T14:39:44.978630-07:00",
            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
            "size": 1600181919,
            "details": {
@ -46,7 +78,7 @@
          },
          {
            "model": "all-minilm:l6-v2",
-            "modified_at": "2025-07-29T15:07:06.295748-07:00",
+            "modified_at": "2025-07-24T15:15:11.129290-07:00",
            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
            "size": 45960996,
            "details": {
@ -61,26 +93,10 @@
            }
          },
          {
-            "model": "all-minilm:latest",
-            "modified_at": "2025-06-04T12:06:43.990073-07:00",
-            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
-            "size": 45960996,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "bert",
-              "families": [
-                "bert"
-              ],
-              "parameter_size": "23M",
-              "quantization_level": "F16"
-            }
-          },
-          {
-            "model": "llama3.1:8b-instruct-fp16",
-            "modified_at": "2025-02-14T15:23:24.865395-08:00",
-            "digest": "4aacac4194543ff7f70dab3f2ebc169c132d5319bb36f7a7e99c4ff525ebcc09",
-            "size": 16068910253,
+            "model": "llama3.2:1b",
+            "modified_at": "2025-07-17T22:02:24.953208-07:00",
+            "digest": "baf6a787fdffd633537aa2eb51cfd54cb93ff08e28040095462bb63daf552878",
+            "size": 1321098329,
            "details": {
              "parent_model": "",
              "format": "gguf",
@ -88,13 +104,45 @@
              "families": [
                "llama"
              ],
-              "parameter_size": "8.0B",
+              "parameter_size": "1.2B",
+              "quantization_level": "Q8_0"
+            }
+          },
+          {
+            "model": "all-minilm:latest",
+            "modified_at": "2025-06-03T16:50:10.946583-07:00",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "size": 45960996,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "bert",
+              "families": [
+                "bert"
+              ],
+              "parameter_size": "23M",
              "quantization_level": "F16"
            }
          },
+          {
+            "model": "llama3.2:3b",
+            "modified_at": "2025-05-01T11:15:23.797447-07:00",
+            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
+            "size": 2019393189,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "3.2B",
+              "quantization_level": "Q4_K_M"
+            }
+          },
          {
            "model": "llama3.2:3b-instruct-fp16",
-            "modified_at": "2025-01-21T13:46:43.514008-08:00",
+            "modified_at": "2025-04-30T15:33:48.939665-07:00",
            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
            "size": 6433703586,
            "details": {
--- a/tests/integration/recordings/responses/b44cc7a7afc8.json
+++ b/tests/integration/recordings/responses/b44cc7a7afc8.json
--- a/tests/integration/recordings/responses/d0ac68cbde69.json
+++ b/tests/integration/recordings/responses/d0ac68cbde69.json
@ -16,9 +16,9 @@
            "model": "llama3.2:3b-instruct-fp16",
            "name": "llama3.2:3b-instruct-fp16",
            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
-            "expires_at": "2025-08-04T16:00:57.955349-07:00",
-            "size": 8581748736,
-            "size_vram": 8581748736,
+            "expires_at": "2025-08-05T14:12:18.480323-07:00",
+            "size": 7919570944,
+            "size_vram": 7919570944,
            "details": {
              "parent_model": "",
              "format": "gguf",
@ -29,6 +29,24 @@
              "parameter_size": "3.2B",
              "quantization_level": "F16"
            }
+          },
+          {
+            "model": "all-minilm:l6-v2",
+            "name": "all-minilm:l6-v2",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "expires_at": "2025-08-05T14:10:20.883978-07:00",
+            "size": 590204928,
+            "size_vram": 590204928,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "bert",
+              "families": [
+                "bert"
+              ],
+              "parameter_size": "23M",
+              "quantization_level": "F16"
+            }
          }
        ]
      }
--- a/tests/integration/recordings/responses/da531c71e64f.json
+++ b/tests/integration/recordings/responses/da531c71e64f.json
@ -0,0 +1,421 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "Python programming language"
+      ]
+    },
+    "endpoint": "/api/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.EmbedResponse",
+      "__data__": {
+        "model": "all-minilm:l6-v2",
+        "created_at": null,
+        "done": null,
+        "done_reason": null,
+        "total_duration": 105895041,
+        "load_duration": 91634666,
+        "prompt_eval_count": 3,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "embeddings": [
+          [
+            -0.063880146,
+            0.013411989,
+            -0.054502595,
+            0.01193493,
+            -0.074262686,
+            -0.13344447,
+            0.04294062,
+            0.045387108,
+            -0.06949706,
+            -0.035939943,
+            0.01200873,
+            0.0068830596,
+            0.08886977,
+            0.0026030506,
+            0.032482542,
+            -0.007821568,
+            -0.05044649,
+            0.006662123,
+            0.027794942,
+            -0.12791364,
+            0.00062353734,
+            0.045270294,
+            -0.03605076,
+            0.044243146,
+            0.0129354475,
+            -0.0092799105,
+            0.011904844,
+            0.026060482,
+            0.020055141,
+            -0.03368774,
+            -0.028043076,
+            0.087557025,
+            0.059002083,
+            0.053893365,
+            0.02027196,
+            0.06840361,
+            -0.03180594,
+            -0.087597735,
+            -0.11277839,
+            0.022651086,
+            -0.09037903,
+            -0.0033202847,
+            -0.040132593,
+            -0.034084503,
+            -0.032953303,
+            0.02925268,
+            -0.03903928,
+            0.04551951,
+            -0.0331016,
+            -0.006518362,
+            -0.09629851,
+            -0.011739161,
+            -0.052575007,
+            -0.064773224,
+            0.031043475,
+            -0.012586444,
+            0.09737276,
+            0.005224713,
+            -0.035071153,
+            -0.1404299,
+            -0.06678175,
+            0.03654573,
+            -0.039277818,
+            0.07014256,
+            -0.0010227569,
+            -0.026846789,
+            -0.0175696,
+            0.03044068,
+            0.06403526,
+            -0.031643596,
+            -0.14598879,
+            -0.045400888,
+            -0.018469285,
+            0.06689445,
+            0.030553635,
+            -0.12255281,
+            0.061046645,
+            -0.05678168,
+            -0.005118667,
+            -0.0087622,
+            0.006514719,
+            -0.016424034,
+            -0.033650044,
+            0.08491301,
+            -0.00029260007,
+            -0.07339515,
+            0.038627055,
+            0.15695965,
+            0.010035773,
+            0.025318887,
+            -0.0021428047,
+            -0.04613549,
+            0.06244243,
+            -0.019905778,
+            -0.05471386,
+            0.09796629,
+            0.0384793,
+            -0.072424814,
+            -0.038704097,
+            0.07158691,
+            0.007360897,
+            -0.05120446,
+            0.0313513,
+            -0.032230332,
+            0.039326303,
+            -0.009643992,
+            0.069905065,
+            -0.052026685,
+            0.049440835,
+            -0.04272916,
+            -0.0037707465,
+            -0.04155246,
+            -0.0561972,
+            -0.03340213,
+            0.05105359,
+            0.038616214,
+            -0.0029470131,
+            0.08188407,
+            -0.0035886324,
+            0.04530431,
+            0.0068888925,
+            0.016499842,
+            0.016347302,
+            0.007283021,
+            -0.021663606,
+            -0.0046215886,
+            -0.007931065,
+            -4.1536508e-33,
+            -0.045777988,
+            -0.050903402,
+            -0.038634304,
+            0.0100991195,
+            0.070007294,
+            -0.025182785,
+            0.1050647,
+            -0.0049731904,
+            -0.064141616,
+            -0.047639705,
+            0.012718577,
+            0.05198462,
+            -0.016051587,
+            0.08170543,
+            0.024008816,
+            -0.020879291,
+            0.045706064,
+            0.091577366,
+            0.02512945,
+            0.019055998,
+            0.048144504,
+            0.097951256,
+            0.034154113,
+            0.03543114,
+            0.011410896,
+            -0.043446988,
+            -0.0041784984,
+            -0.05564714,
+            0.01147717,
+            0.0071039577,
+            -0.06426582,
+            -0.020623188,
+            -0.0045247558,
+            -0.012943628,
+            0.02658834,
+            -0.012385487,
+            0.008399212,
+            -0.06824828,
+            0.04683057,
+            -0.04165085,
+            -0.025662417,
+            -0.0038799767,
+            0.05007075,
+            -0.008117481,
+            -0.023308154,
+            0.023914568,
+            0.0015741173,
+            0.046142872,
+            -0.06898886,
+            0.041611847,
+            0.0045286645,
+            -0.047628563,
+            0.054236773,
+            0.06972688,
+            -0.016889753,
+            0.04806098,
+            0.012714234,
+            0.0022186628,
+            -0.006355918,
+            -0.031550523,
+            0.023726372,
+            0.06859327,
+            0.077228814,
+            -0.01227583,
+            0.03901903,
+            0.034360897,
+            0.03032876,
+            0.058690928,
+            0.08030179,
+            0.06976231,
+            -0.09047136,
+            0.02376998,
+            -0.008751518,
+            0.038334776,
+            -0.02751323,
+            0.023137644,
+            0.027101006,
+            -0.08135271,
+            -0.010334998,
+            0.04730408,
+            -0.02033998,
+            -0.026008504,
+            -0.017415512,
+            -0.0035714875,
+            -0.018727385,
+            -0.037389226,
+            0.041064497,
+            0.05317889,
+            -0.0055602547,
+            -0.058561854,
+            -0.072036326,
+            -0.075019896,
+            0.04825644,
+            0.011348427,
+            -0.02259257,
+            1.3515749e-33,
+            0.006240622,
+            0.031606406,
+            -0.036119435,
+            -0.0016494404,
+            -0.08255665,
+            -0.06069396,
+            0.059934463,
+            0.014492232,
+            0.059514895,
+            0.027053975,
+            -0.011601325,
+            -0.057609312,
+            0.10365583,
+            -0.002784741,
+            0.07693759,
+            0.019432511,
+            -0.052210074,
+            0.015158053,
+            -0.0012768542,
+            0.027789148,
+            -0.115292676,
+            0.047323048,
+            -0.07599195,
+            -0.074344486,
+            -0.029194841,
+            -0.020079462,
+            -0.034749795,
+            -0.05769437,
+            -0.0301632,
+            0.04749987,
+            0.012206333,
+            0.011497502,
+            -0.051970575,
+            0.05972769,
+            0.03281016,
+            0.0013676677,
+            0.057720944,
+            -0.041179247,
+            -0.02150875,
+            -0.0067487382,
+            0.1419711,
+            0.05795878,
+            0.010094941,
+            0.09603845,
+            0.014521089,
+            0.02133803,
+            -0.07551916,
+            0.07887724,
+            -0.04273237,
+            -0.06601746,
+            -0.038729392,
+            -0.008161129,
+            0.015012324,
+            -0.049418066,
+            -0.037083283,
+            -0.02378242,
+            0.03743137,
+            0.008194503,
+            -0.086978436,
+            -0.05960285,
+            -0.07732487,
+            -0.056507926,
+            0.029065313,
+            0.0073954053,
+            -0.077878684,
+            0.0026059505,
+            -0.10405392,
+            -0.04738624,
+            -0.015872862,
+            -0.11591199,
+            0.09724705,
+            0.0049243565,
+            -0.010273523,
+            0.0066429917,
+            -0.060295314,
+            0.02550513,
+            -0.052950058,
+            -0.0038489713,
+            -0.050250847,
+            0.07679287,
+            0.046089787,
+            0.007386997,
+            0.0046740095,
+            0.07385862,
+            -0.07792065,
+            0.0013675193,
+            0.013730894,
+            0.05658653,
+            0.021934126,
+            0.007195913,
+            0.0076705213,
+            0.10221154,
+            0.060060997,
+            0.036779005,
+            -0.037765697,
+            -1.187368e-08,
+            -0.00885571,
+            0.01760442,
+            0.062224448,
+            0.032051455,
+            -0.011581793,
+            0.051908698,
+            -0.011685676,
+            -0.06391574,
+            -0.029866237,
+            0.03258576,
+            0.0055078953,
+            -0.012040446,
+            -0.054406017,
+            -0.056690563,
+            -0.030638037,
+            0.14276367,
+            0.028526368,
+            -0.028743364,
+            0.019917691,
+            0.025652615,
+            0.073813364,
+            -0.0066998666,
+            0.0061508445,
+            0.09610696,
+            -0.08799916,
+            -0.0089272335,
+            0.03823298,
+            0.04832936,
+            0.018829934,
+            -0.10534708,
+            0.048226915,
+            -0.02225069,
+            0.020491786,
+            0.014641141,
+            0.030794447,
+            -0.029119467,
+            0.008283775,
+            -0.04506887,
+            0.0025344177,
+            0.021756247,
+            -0.008108281,
+            0.00904927,
+            -0.013340866,
+            -0.014037631,
+            0.06845187,
+            0.045173325,
+            -0.034587316,
+            -0.07275669,
+            -0.004159724,
+            -0.058231864,
+            -0.033032075,
+            0.0040235794,
+            -0.019985583,
+            -0.020122562,
+            0.055365406,
+            0.10250875,
+            -0.10799118,
+            -0.013780294,
+            -0.009652406,
+            0.015592658,
+            -0.031221472,
+            0.1329332,
+            0.15243866,
+            -0.022426173
+          ]
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/dbc41d2417e1.json
+++ b/tests/integration/recordings/responses/dbc41d2417e1.json
@ -0,0 +1,674 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Hello, world!"
+        }
+      ],
+      "stream": true
+    },
+    "endpoint": "/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-698",
+          "choices": [
+            {
+              "delta": {
+                "content": "Hello",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1754422171,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-698",
+          "choices": [
+            {
+              "delta": {
+                "content": "!",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1754422171,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-698",
+          "choices": [
+            {
+              "delta": {
+                "content": " It",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1754422171,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-698",
+          "choices": [
+            {
+              "delta": {
+                "content": "'s",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1754422171,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-698",
+          "choices": [
+            {
+              "delta": {
+                "content": " nice",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1754422171,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-698",
+          "choices": [
+            {
+              "delta": {
+                "content": " to",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1754422171,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-698",
+          "choices": [
+            {
+              "delta": {
+                "content": " meet",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1754422171,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-698",
+          "choices": [
+            {
+              "delta": {
+                "content": " you",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1754422171,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-698",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1754422171,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-698",
+          "choices": [
+            {
+              "delta": {
+                "content": " Is",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1754422171,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-698",
+          "choices": [
+            {
+              "delta": {
+                "content": " there",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1754422171,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-698",
+          "choices": [
+            {
+              "delta": {
+                "content": " something",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1754422171,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-698",
+          "choices": [
+            {
+              "delta": {
+                "content": " I",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1754422171,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-698",
+          "choices": [
+            {
+              "delta": {
+                "content": " can",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1754422171,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-698",
+          "choices": [
+            {
+              "delta": {
+                "content": " help",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1754422172,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-698",
+          "choices": [
+            {
+              "delta": {
+                "content": " you",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1754422172,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-698",
+          "choices": [
+            {
+              "delta": {
+                "content": " with",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1754422172,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-698",
+          "choices": [
+            {
+              "delta": {
+                "content": " or",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1754422172,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-698",
+          "choices": [
+            {
+              "delta": {
+                "content": " would",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1754422172,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-698",
+          "choices": [
+            {
+              "delta": {
+                "content": " you",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1754422172,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-698",
+          "choices": [
+            {
+              "delta": {
+                "content": " like",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1754422172,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-698",
+          "choices": [
+            {
+              "delta": {
+                "content": " to",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1754422172,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-698",
+          "choices": [
+            {
+              "delta": {
+                "content": " chat",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1754422172,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-698",
+          "choices": [
+            {
+              "delta": {
+                "content": "?",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1754422172,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-698",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1754422172,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/recordings/responses/f1ea938b0b0d.json
+++ b/tests/integration/recordings/responses/f1ea938b0b0d.json
@ -0,0 +1,56 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Hello, world!"
+        }
+      ],
+      "stream": false
+    },
+    "endpoint": "/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "chatcmpl-796",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "Hello! It's nice to meet you. Is there something I can help you with, or would you like to chat?",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 1754422173,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 26,
+          "prompt_tokens": 29,
+          "total_tokens": 55,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@ -11,10 +11,8 @@ from io import BytesIO
 import pytest
 from llama_stack_client import BadRequestError, LlamaStackClient
 from openai import BadRequestError as OpenAIBadRequestError
-from openai import OpenAI

 from llama_stack.apis.vector_io import Chunk
-from llama_stack.core.library_client import LlamaStackAsLibraryClient

 logger = logging.getLogger(__name__)

@ -69,19 +67,6 @@ def skip_if_provider_doesnt_support_openai_vector_stores_search(client_with_mode
    )


-@pytest.fixture
-def openai_client(client_with_models):
-    base_url = f"{client_with_models.base_url}/v1/openai/v1"
-    return OpenAI(base_url=base_url, api_key="fake")
-
-
-@pytest.fixture(params=["openai_client", "llama_stack_client"])
-def compat_client(request, client_with_models):
-    if request.param == "openai_client" and isinstance(client_with_models, LlamaStackAsLibraryClient):
-        pytest.skip("OpenAI client tests not supported with library client")
-    return request.getfixturevalue(request.param)
-
-
@pytest.fixture(scope="session")
 def sample_chunks():
    return [