chore(misc): make tests and starter faster (#3042)
Some checks failed
Integration Tests (Replay) / discover-tests (push) Successful in 3s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 9s
Python Package Build Test / build (3.12) (push) Failing after 4s
Vector IO Integration Tests / test-matrix (3.12, inline::milvus) (push) Failing after 12s
Test Llama Stack Build / generate-matrix (push) Successful in 11s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 12s
Vector IO Integration Tests / test-matrix (3.12, inline::faiss) (push) Failing after 14s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 22s
Test External API and Providers / test-external (venv) (push) Failing after 14s
Integration Tests (Replay) / Integration Tests (, , , client=, vision=) (push) Failing after 12s
Vector IO Integration Tests / test-matrix (3.12, remote::pgvector) (push) Failing after 15s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 22s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 14s
Unit Tests / unit-tests (3.13) (push) Failing after 14s
Test Llama Stack Build / build-single-provider (push) Failing after 13s
Vector IO Integration Tests / test-matrix (3.12, remote::chromadb) (push) Failing after 18s
Unit Tests / unit-tests (3.12) (push) Failing after 16s
Vector IO Integration Tests / test-matrix (3.12, remote::qdrant) (push) Failing after 18s
Vector IO Integration Tests / test-matrix (3.13, remote::weaviate) (push) Failing after 10s
Vector IO Integration Tests / test-matrix (3.13, inline::faiss) (push) Failing after 11s
Vector IO Integration Tests / test-matrix (3.12, remote::weaviate) (push) Failing after 16s
Vector IO Integration Tests / test-matrix (3.13, remote::qdrant) (push) Failing after 18s
Test Llama Stack Build / build (push) Failing after 12s
Vector IO Integration Tests / test-matrix (3.13, remote::chromadb) (push) Failing after 18s
Vector IO Integration Tests / test-matrix (3.13, remote::pgvector) (push) Failing after 20s
Vector IO Integration Tests / test-matrix (3.13, inline::sqlite-vec) (push) Failing after 16s
Python Package Build Test / build (3.13) (push) Failing after 53s
Vector IO Integration Tests / test-matrix (3.13, inline::milvus) (push) Failing after 59s
Vector IO Integration Tests / test-matrix (3.12, inline::sqlite-vec) (push) Failing after 1m1s
Update ReadTheDocs / update-readthedocs (push) Failing after 1m6s
Pre-commit / pre-commit (push) Successful in 1m53s

A bunch of miscellaneous cleanup focusing on tests, but ended up
speeding up starter distro substantially.

- Pulled llama stack client init for tests into `pytest_sessionstart` so
it does not clobber output
- Profiling of that told me where we were doing lots of heavy imports
for starter, so lazied them
- starter now starts 20seconds+ faster on my Mac
- A few other smallish refactors for `compat_client`
This commit is contained in:
Ashwin Bharambe 2025-08-05 14:55:05 -07:00 committed by GitHub
parent e12524af85
commit 7f834339ba
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
45 changed files with 2897 additions and 1688 deletions

View file

@ -9,12 +9,6 @@ from openai import BadRequestError, OpenAI
from llama_stack.core.library_client import LlamaStackAsLibraryClient
@pytest.fixture
def openai_client(client_with_models):
base_url = f"{client_with_models.base_url}/v1/openai/v1"
return OpenAI(base_url=base_url, api_key="bar")
@pytest.mark.parametrize(
"stream",
[
@ -41,15 +35,14 @@ def openai_client(client_with_models):
],
],
)
def test_responses_store(openai_client, client_with_models, text_model_id, stream, tools):
if isinstance(client_with_models, LlamaStackAsLibraryClient):
pytest.skip("OpenAI responses are not supported when testing with library client yet.")
def test_responses_store(compat_client, text_model_id, stream, tools):
if not isinstance(compat_client, OpenAI):
pytest.skip("OpenAI client is required until responses.delete() exists in llama-stack-client")
client = openai_client
message = "What's the weather in Tokyo?" + (
" YOU MUST USE THE get_weather function to get the weather." if tools else ""
)
response = client.responses.create(
response = compat_client.responses.create(
model=text_model_id,
input=[
{
@ -78,14 +71,8 @@ def test_responses_store(openai_client, client_with_models, text_model_id, strea
if output_type == "message":
content = response.output[0].content[0].text
# list responses - use the underlying HTTP client for endpoints not in SDK
list_response = client._client.get("/responses")
assert list_response.status_code == 200
data = list_response.json()["data"]
assert response_id in [r["id"] for r in data]
# test retrieve response
retrieved_response = client.responses.retrieve(response_id)
retrieved_response = compat_client.responses.retrieve(response_id)
assert retrieved_response.id == response_id
assert retrieved_response.model == text_model_id
assert retrieved_response.output[0].type == output_type, retrieved_response
@ -93,23 +80,19 @@ def test_responses_store(openai_client, client_with_models, text_model_id, strea
assert retrieved_response.output[0].content[0].text == content
# Delete the response
delete_response = client.responses.delete(response_id)
delete_response = compat_client.responses.delete(response_id)
assert delete_response is None
with pytest.raises(BadRequestError):
client.responses.retrieve(response_id)
compat_client.responses.retrieve(response_id)
def test_list_response_input_items(openai_client, client_with_models, text_model_id):
def test_list_response_input_items(compat_client, text_model_id):
"""Test the new list_openai_response_input_items endpoint."""
if isinstance(client_with_models, LlamaStackAsLibraryClient):
pytest.skip("OpenAI responses are not supported when testing with library client yet.")
client = openai_client
message = "What is the capital of France?"
# Create a response first
response = client.responses.create(
response = compat_client.responses.create(
model=text_model_id,
input=[
{
@ -123,7 +106,7 @@ def test_list_response_input_items(openai_client, client_with_models, text_model
response_id = response.id
# Test the new list input items endpoint
input_items_response = client.responses.input_items.list(response_id=response_id)
input_items_response = compat_client.responses.input_items.list(response_id=response_id)
# Verify the structure follows OpenAI API spec
assert input_items_response.object == "list"

View file

@ -9,12 +9,15 @@ import os
import platform
import textwrap
import time
import warnings
import pytest
from dotenv import load_dotenv
from llama_stack.log import get_logger
from .fixtures.common import instantiate_llama_stack_client
logger = get_logger(__name__, category="tests")
@ -27,6 +30,20 @@ def pytest_runtest_makereport(item, call):
item.was_xfail = getattr(report, "wasxfail", False)
def pytest_sessionstart(session):
# stop macOS from complaining about duplicate OpenMP libraries
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
# pull client instantiation to session start so all the complex logs during initialization
# don't clobber the test one-liner outputs
print("instantiating llama_stack_client")
start_time = time.time()
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=DeprecationWarning)
session._llama_stack_client = instantiate_llama_stack_client(session)
print(f"llama_stack_client instantiated in {time.time() - start_time:.3f}s")
def pytest_runtest_teardown(item):
# Check if the test actually ran and passed or failed, but was not skipped or an expected failure (xfail)
outcome = getattr(item, "execution_outcome", None)

View file

@ -82,8 +82,7 @@ def wait_for_server_ready(base_url: str, timeout: int = 30, process: subprocess.
return False
@pytest.fixture(scope="session")
def provider_data():
def get_provider_data():
# TODO: this needs to be generalized so each provider can have a sample provider data just
# like sample run config on which we can do replace_env_vars()
keymap = {
@ -178,8 +177,14 @@ def skip_if_no_model(request):
@pytest.fixture(scope="session")
def llama_stack_client(request, provider_data):
config = request.config.getoption("--stack-config")
def llama_stack_client(request):
client = request.session._llama_stack_client
assert client is not None, "llama_stack_client not found in session cache"
return client
def instantiate_llama_stack_client(session):
config = session.config.getoption("--stack-config")
if not config:
config = get_env_or_fail("LLAMA_STACK_CONFIG")
@ -212,13 +217,13 @@ def llama_stack_client(request, provider_data):
print(f"Server is ready at {base_url}")
# Store process for potential cleanup (pytest will handle termination at session end)
request.session._llama_stack_server_process = server_process
session._llama_stack_server_process = server_process
else:
print(f"Port {port} is already in use, assuming server is already running...")
return LlamaStackClient(
base_url=base_url,
provider_data=provider_data,
provider_data=get_provider_data(),
timeout=int(os.environ.get("LLAMA_STACK_CLIENT_TIMEOUT", "30")),
)
@ -228,7 +233,7 @@ def llama_stack_client(request, provider_data):
if parsed_url.scheme and parsed_url.netloc:
return LlamaStackClient(
base_url=config,
provider_data=provider_data,
provider_data=get_provider_data(),
)
except Exception:
# If URL parsing fails, treat as non-URL config
@ -243,7 +248,7 @@ def llama_stack_client(request, provider_data):
client = LlamaStackAsLibraryClient(
config,
provider_data=provider_data,
provider_data=get_provider_data(),
skip_logger_removal=True,
)
if not client.initialize():
@ -258,8 +263,17 @@ def openai_client(client_with_models):
return OpenAI(base_url=base_url, api_key="fake")
@pytest.fixture(params=["openai_client", "llama_stack_client"])
def compat_client(request):
@pytest.fixture(params=["openai_client", "client_with_models"])
def compat_client(request, client_with_models):
if isinstance(client_with_models, LlamaStackAsLibraryClient):
# OpenAI client expects a server, so unless we also rewrite OpenAI client's requests
# to go via the Stack library client (which itself rewrites requests to be served inline),
# we cannot do this.
#
# This means when we are using Stack as a library, we will test only via the Llama Stack client.
# When we are using a server setup, we can exercise both OpenAI and Llama Stack clients.
pytest.skip("(OpenAI) Compat client cannot be used with Stack library client")
return request.getfixturevalue(request.param)

View file

@ -6,9 +6,6 @@
import pytest
from openai import OpenAI
from llama_stack.core.library_client import LlamaStackAsLibraryClient
from ..test_cases.test_case import TestCase
@ -59,9 +56,6 @@ def skip_if_model_doesnt_support_suffix(client_with_models, model_id):
def skip_if_model_doesnt_support_openai_chat_completion(client_with_models, model_id):
if isinstance(client_with_models, LlamaStackAsLibraryClient):
pytest.skip("OpenAI chat completions are not supported when testing with library client yet.")
provider = provider_from_model(client_with_models, model_id)
if provider.provider_type in (
"inline::meta-reference",
@ -90,17 +84,6 @@ def skip_if_provider_isnt_openai(client_with_models, model_id):
)
@pytest.fixture
def openai_client(client_with_models):
base_url = f"{client_with_models.base_url}/v1/openai/v1"
return OpenAI(base_url=base_url, api_key="bar")
@pytest.fixture(params=["openai_client", "llama_stack_client"])
def compat_client(request):
return request.getfixturevalue(request.param)
@pytest.mark.parametrize(
"test_case",
[

View file

@ -14,7 +14,7 @@
"models": [
{
"model": "nomic-embed-text:latest",
"modified_at": "2025-08-04T15:54:50.584357-07:00",
"modified_at": "2025-08-05T14:04:07.946926-07:00",
"digest": "0a109f422b47e3a30ba2b10eca18548e944e8a23073ee3f3e947efcf3c45e59f",
"size": 274302450,
"details": {
@ -28,9 +28,41 @@
"quantization_level": "F16"
}
},
{
"model": "llama3.2-vision:11b",
"modified_at": "2025-07-30T18:45:02.517873-07:00",
"digest": "6f2f9757ae97e8a3f8ea33d6adb2b11d93d9a35bef277cd2c0b1b5af8e8d0b1e",
"size": 7816589186,
"details": {
"parent_model": "",
"format": "gguf",
"family": "mllama",
"families": [
"mllama"
],
"parameter_size": "10.7B",
"quantization_level": "Q4_K_M"
}
},
{
"model": "llama3.2-vision:latest",
"modified_at": "2025-07-29T20:18:47.920468-07:00",
"digest": "6f2f9757ae97e8a3f8ea33d6adb2b11d93d9a35bef277cd2c0b1b5af8e8d0b1e",
"size": 7816589186,
"details": {
"parent_model": "",
"format": "gguf",
"family": "mllama",
"families": [
"mllama"
],
"parameter_size": "10.7B",
"quantization_level": "Q4_K_M"
}
},
{
"model": "llama-guard3:1b",
"modified_at": "2025-08-01T15:46:28.963517-07:00",
"modified_at": "2025-07-25T14:39:44.978630-07:00",
"digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
"size": 1600181919,
"details": {
@ -46,7 +78,7 @@
},
{
"model": "all-minilm:l6-v2",
"modified_at": "2025-07-29T15:07:06.295748-07:00",
"modified_at": "2025-07-24T15:15:11.129290-07:00",
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
"size": 45960996,
"details": {
@ -61,26 +93,10 @@
}
},
{
"model": "all-minilm:latest",
"modified_at": "2025-06-04T12:06:43.990073-07:00",
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
"size": 45960996,
"details": {
"parent_model": "",
"format": "gguf",
"family": "bert",
"families": [
"bert"
],
"parameter_size": "23M",
"quantization_level": "F16"
}
},
{
"model": "llama3.1:8b-instruct-fp16",
"modified_at": "2025-02-14T15:23:24.865395-08:00",
"digest": "4aacac4194543ff7f70dab3f2ebc169c132d5319bb36f7a7e99c4ff525ebcc09",
"size": 16068910253,
"model": "llama3.2:1b",
"modified_at": "2025-07-17T22:02:24.953208-07:00",
"digest": "baf6a787fdffd633537aa2eb51cfd54cb93ff08e28040095462bb63daf552878",
"size": 1321098329,
"details": {
"parent_model": "",
"format": "gguf",
@ -88,13 +104,45 @@
"families": [
"llama"
],
"parameter_size": "8.0B",
"parameter_size": "1.2B",
"quantization_level": "Q8_0"
}
},
{
"model": "all-minilm:latest",
"modified_at": "2025-06-03T16:50:10.946583-07:00",
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
"size": 45960996,
"details": {
"parent_model": "",
"format": "gguf",
"family": "bert",
"families": [
"bert"
],
"parameter_size": "23M",
"quantization_level": "F16"
}
},
{
"model": "llama3.2:3b",
"modified_at": "2025-05-01T11:15:23.797447-07:00",
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
"size": 2019393189,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "Q4_K_M"
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"modified_at": "2025-01-21T13:46:43.514008-08:00",
"modified_at": "2025-04-30T15:33:48.939665-07:00",
"digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
"size": 6433703586,
"details": {

File diff suppressed because it is too large Load diff

View file

@ -16,9 +16,9 @@
"model": "llama3.2:3b-instruct-fp16",
"name": "llama3.2:3b-instruct-fp16",
"digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
"expires_at": "2025-08-04T16:00:57.955349-07:00",
"size": 8581748736,
"size_vram": 8581748736,
"expires_at": "2025-08-05T14:12:18.480323-07:00",
"size": 7919570944,
"size_vram": 7919570944,
"details": {
"parent_model": "",
"format": "gguf",
@ -29,6 +29,24 @@
"parameter_size": "3.2B",
"quantization_level": "F16"
}
},
{
"model": "all-minilm:l6-v2",
"name": "all-minilm:l6-v2",
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
"expires_at": "2025-08-05T14:10:20.883978-07:00",
"size": 590204928,
"size_vram": 590204928,
"details": {
"parent_model": "",
"format": "gguf",
"family": "bert",
"families": [
"bert"
],
"parameter_size": "23M",
"quantization_level": "F16"
}
}
]
}

View file

@ -0,0 +1,421 @@
{
"request": {
"method": "POST",
"url": "http://localhost:11434/api/embeddings",
"headers": {},
"body": {
"model": "all-minilm:l6-v2",
"input": [
"Python programming language"
]
},
"endpoint": "/api/embeddings",
"model": "all-minilm:l6-v2"
},
"response": {
"body": {
"__type__": "ollama._types.EmbedResponse",
"__data__": {
"model": "all-minilm:l6-v2",
"created_at": null,
"done": null,
"done_reason": null,
"total_duration": 105895041,
"load_duration": 91634666,
"prompt_eval_count": 3,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"embeddings": [
[
-0.063880146,
0.013411989,
-0.054502595,
0.01193493,
-0.074262686,
-0.13344447,
0.04294062,
0.045387108,
-0.06949706,
-0.035939943,
0.01200873,
0.0068830596,
0.08886977,
0.0026030506,
0.032482542,
-0.007821568,
-0.05044649,
0.006662123,
0.027794942,
-0.12791364,
0.00062353734,
0.045270294,
-0.03605076,
0.044243146,
0.0129354475,
-0.0092799105,
0.011904844,
0.026060482,
0.020055141,
-0.03368774,
-0.028043076,
0.087557025,
0.059002083,
0.053893365,
0.02027196,
0.06840361,
-0.03180594,
-0.087597735,
-0.11277839,
0.022651086,
-0.09037903,
-0.0033202847,
-0.040132593,
-0.034084503,
-0.032953303,
0.02925268,
-0.03903928,
0.04551951,
-0.0331016,
-0.006518362,
-0.09629851,
-0.011739161,
-0.052575007,
-0.064773224,
0.031043475,
-0.012586444,
0.09737276,
0.005224713,
-0.035071153,
-0.1404299,
-0.06678175,
0.03654573,
-0.039277818,
0.07014256,
-0.0010227569,
-0.026846789,
-0.0175696,
0.03044068,
0.06403526,
-0.031643596,
-0.14598879,
-0.045400888,
-0.018469285,
0.06689445,
0.030553635,
-0.12255281,
0.061046645,
-0.05678168,
-0.005118667,
-0.0087622,
0.006514719,
-0.016424034,
-0.033650044,
0.08491301,
-0.00029260007,
-0.07339515,
0.038627055,
0.15695965,
0.010035773,
0.025318887,
-0.0021428047,
-0.04613549,
0.06244243,
-0.019905778,
-0.05471386,
0.09796629,
0.0384793,
-0.072424814,
-0.038704097,
0.07158691,
0.007360897,
-0.05120446,
0.0313513,
-0.032230332,
0.039326303,
-0.009643992,
0.069905065,
-0.052026685,
0.049440835,
-0.04272916,
-0.0037707465,
-0.04155246,
-0.0561972,
-0.03340213,
0.05105359,
0.038616214,
-0.0029470131,
0.08188407,
-0.0035886324,
0.04530431,
0.0068888925,
0.016499842,
0.016347302,
0.007283021,
-0.021663606,
-0.0046215886,
-0.007931065,
-4.1536508e-33,
-0.045777988,
-0.050903402,
-0.038634304,
0.0100991195,
0.070007294,
-0.025182785,
0.1050647,
-0.0049731904,
-0.064141616,
-0.047639705,
0.012718577,
0.05198462,
-0.016051587,
0.08170543,
0.024008816,
-0.020879291,
0.045706064,
0.091577366,
0.02512945,
0.019055998,
0.048144504,
0.097951256,
0.034154113,
0.03543114,
0.011410896,
-0.043446988,
-0.0041784984,
-0.05564714,
0.01147717,
0.0071039577,
-0.06426582,
-0.020623188,
-0.0045247558,
-0.012943628,
0.02658834,
-0.012385487,
0.008399212,
-0.06824828,
0.04683057,
-0.04165085,
-0.025662417,
-0.0038799767,
0.05007075,
-0.008117481,
-0.023308154,
0.023914568,
0.0015741173,
0.046142872,
-0.06898886,
0.041611847,
0.0045286645,
-0.047628563,
0.054236773,
0.06972688,
-0.016889753,
0.04806098,
0.012714234,
0.0022186628,
-0.006355918,
-0.031550523,
0.023726372,
0.06859327,
0.077228814,
-0.01227583,
0.03901903,
0.034360897,
0.03032876,
0.058690928,
0.08030179,
0.06976231,
-0.09047136,
0.02376998,
-0.008751518,
0.038334776,
-0.02751323,
0.023137644,
0.027101006,
-0.08135271,
-0.010334998,
0.04730408,
-0.02033998,
-0.026008504,
-0.017415512,
-0.0035714875,
-0.018727385,
-0.037389226,
0.041064497,
0.05317889,
-0.0055602547,
-0.058561854,
-0.072036326,
-0.075019896,
0.04825644,
0.011348427,
-0.02259257,
1.3515749e-33,
0.006240622,
0.031606406,
-0.036119435,
-0.0016494404,
-0.08255665,
-0.06069396,
0.059934463,
0.014492232,
0.059514895,
0.027053975,
-0.011601325,
-0.057609312,
0.10365583,
-0.002784741,
0.07693759,
0.019432511,
-0.052210074,
0.015158053,
-0.0012768542,
0.027789148,
-0.115292676,
0.047323048,
-0.07599195,
-0.074344486,
-0.029194841,
-0.020079462,
-0.034749795,
-0.05769437,
-0.0301632,
0.04749987,
0.012206333,
0.011497502,
-0.051970575,
0.05972769,
0.03281016,
0.0013676677,
0.057720944,
-0.041179247,
-0.02150875,
-0.0067487382,
0.1419711,
0.05795878,
0.010094941,
0.09603845,
0.014521089,
0.02133803,
-0.07551916,
0.07887724,
-0.04273237,
-0.06601746,
-0.038729392,
-0.008161129,
0.015012324,
-0.049418066,
-0.037083283,
-0.02378242,
0.03743137,
0.008194503,
-0.086978436,
-0.05960285,
-0.07732487,
-0.056507926,
0.029065313,
0.0073954053,
-0.077878684,
0.0026059505,
-0.10405392,
-0.04738624,
-0.015872862,
-0.11591199,
0.09724705,
0.0049243565,
-0.010273523,
0.0066429917,
-0.060295314,
0.02550513,
-0.052950058,
-0.0038489713,
-0.050250847,
0.07679287,
0.046089787,
0.007386997,
0.0046740095,
0.07385862,
-0.07792065,
0.0013675193,
0.013730894,
0.05658653,
0.021934126,
0.007195913,
0.0076705213,
0.10221154,
0.060060997,
0.036779005,
-0.037765697,
-1.187368e-08,
-0.00885571,
0.01760442,
0.062224448,
0.032051455,
-0.011581793,
0.051908698,
-0.011685676,
-0.06391574,
-0.029866237,
0.03258576,
0.0055078953,
-0.012040446,
-0.054406017,
-0.056690563,
-0.030638037,
0.14276367,
0.028526368,
-0.028743364,
0.019917691,
0.025652615,
0.073813364,
-0.0066998666,
0.0061508445,
0.09610696,
-0.08799916,
-0.0089272335,
0.03823298,
0.04832936,
0.018829934,
-0.10534708,
0.048226915,
-0.02225069,
0.020491786,
0.014641141,
0.030794447,
-0.029119467,
0.008283775,
-0.04506887,
0.0025344177,
0.021756247,
-0.008108281,
0.00904927,
-0.013340866,
-0.014037631,
0.06845187,
0.045173325,
-0.034587316,
-0.07275669,
-0.004159724,
-0.058231864,
-0.033032075,
0.0040235794,
-0.019985583,
-0.020122562,
0.055365406,
0.10250875,
-0.10799118,
-0.013780294,
-0.009652406,
0.015592658,
-0.031221472,
0.1329332,
0.15243866,
-0.022426173
]
]
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,674 @@
{
"request": {
"method": "POST",
"url": "http://localhost:11434/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "user",
"content": "Hello, world!"
}
],
"stream": true
},
"endpoint": "/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-698",
"choices": [
{
"delta": {
"content": "Hello",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1754422171,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-698",
"choices": [
{
"delta": {
"content": "!",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1754422171,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-698",
"choices": [
{
"delta": {
"content": " It",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1754422171,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-698",
"choices": [
{
"delta": {
"content": "'s",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1754422171,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-698",
"choices": [
{
"delta": {
"content": " nice",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1754422171,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-698",
"choices": [
{
"delta": {
"content": " to",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1754422171,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-698",
"choices": [
{
"delta": {
"content": " meet",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1754422171,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-698",
"choices": [
{
"delta": {
"content": " you",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1754422171,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-698",
"choices": [
{
"delta": {
"content": ".",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1754422171,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-698",
"choices": [
{
"delta": {
"content": " Is",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1754422171,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-698",
"choices": [
{
"delta": {
"content": " there",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1754422171,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-698",
"choices": [
{
"delta": {
"content": " something",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1754422171,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-698",
"choices": [
{
"delta": {
"content": " I",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1754422171,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-698",
"choices": [
{
"delta": {
"content": " can",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1754422171,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-698",
"choices": [
{
"delta": {
"content": " help",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1754422172,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-698",
"choices": [
{
"delta": {
"content": " you",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1754422172,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-698",
"choices": [
{
"delta": {
"content": " with",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1754422172,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-698",
"choices": [
{
"delta": {
"content": " or",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1754422172,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-698",
"choices": [
{
"delta": {
"content": " would",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1754422172,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-698",
"choices": [
{
"delta": {
"content": " you",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1754422172,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-698",
"choices": [
{
"delta": {
"content": " like",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1754422172,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-698",
"choices": [
{
"delta": {
"content": " to",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1754422172,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-698",
"choices": [
{
"delta": {
"content": " chat",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1754422172,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-698",
"choices": [
{
"delta": {
"content": "?",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1754422172,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-698",
"choices": [
{
"delta": {
"content": "",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 1754422172,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,56 @@
{
"request": {
"method": "POST",
"url": "http://localhost:11434/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "user",
"content": "Hello, world!"
}
],
"stream": false
},
"endpoint": "/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "chatcmpl-796",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "Hello! It's nice to meet you. Is there something I can help you with, or would you like to chat?",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1754422173,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 26,
"prompt_tokens": 29,
"total_tokens": 55,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -11,10 +11,8 @@ from io import BytesIO
import pytest
from llama_stack_client import BadRequestError, LlamaStackClient
from openai import BadRequestError as OpenAIBadRequestError
from openai import OpenAI
from llama_stack.apis.vector_io import Chunk
from llama_stack.core.library_client import LlamaStackAsLibraryClient
logger = logging.getLogger(__name__)
@ -69,19 +67,6 @@ def skip_if_provider_doesnt_support_openai_vector_stores_search(client_with_mode
)
@pytest.fixture
def openai_client(client_with_models):
base_url = f"{client_with_models.base_url}/v1/openai/v1"
return OpenAI(base_url=base_url, api_key="fake")
@pytest.fixture(params=["openai_client", "llama_stack_client"])
def compat_client(request, client_with_models):
if request.param == "openai_client" and isinstance(client_with_models, LlamaStackAsLibraryClient):
pytest.skip("OpenAI client tests not supported with library client")
return request.getfixturevalue(request.param)
@pytest.fixture(scope="session")
def sample_chunks():
return [