fix: update tests for OpenAI-style models endpoint (#4053)

The llama-stack-client now uses /`v1/openai/v1/models` which returns
OpenAI-compatible model objects with 'id' and 'custom_metadata' fields
instead of the Resource-style 'identifier' field. Updated api_recorder
to handle the new endpoint and modified tests to access model metadata
appropriately. Deleted stale model recordings for re-recording.

**NOTE: CI will be red on this one since it is dependent on
https://github.com/llamastack/llama-stack-client-python/pull/291/files
landing. I verified locally that it is green.**
This commit is contained in:
Ashwin Bharambe 2025-11-03 17:30:08 -08:00 committed by GitHub
parent 4a5ef65286
commit cb40da210f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
27 changed files with 852 additions and 6707 deletions

View file

@ -12,7 +12,7 @@ from llama_stack.core.ui.modules.api import llama_stack_api
def models():
# Models Section
st.header("Models")
models_info = {m.identifier: m.to_dict() for m in llama_stack_api.client.models.list()}
models_info = {m.id: m.model_dump() for m in llama_stack_api.client.models.list()}
selected_model = st.selectbox("Select a model", list(models_info.keys()))
st.json(models_info[selected_model])

View file

@ -12,7 +12,11 @@ from llama_stack.core.ui.modules.api import llama_stack_api
with st.sidebar:
st.header("Configuration")
available_models = llama_stack_api.client.models.list()
available_models = [model.identifier for model in available_models if model.model_type == "llm"]
available_models = [
model.id
for model in available_models
if model.custom_metadata and model.custom_metadata.get("model_type") == "llm"
]
selected_model = st.selectbox(
"Choose a model",
available_models,

View file

@ -156,7 +156,7 @@ def normalize_inference_request(method: str, url: str, headers: dict[str, Any],
}
# Include test_id for isolation, except for shared infrastructure endpoints
if parsed.path not in ("/api/tags", "/v1/models"):
if parsed.path not in ("/api/tags", "/v1/models", "/v1/openai/v1/models"):
normalized["test_id"] = test_id
normalized_json = json.dumps(normalized, sort_keys=True)
@ -430,7 +430,7 @@ class ResponseStorage:
# For model-list endpoints, include digest in filename to distinguish different model sets
endpoint = request.get("endpoint")
if endpoint in ("/api/tags", "/v1/models"):
if endpoint in ("/api/tags", "/v1/models", "/v1/openai/v1/models"):
digest = _model_identifiers_digest(endpoint, response)
response_file = f"models-{request_hash}-{digest}.json"
@ -554,13 +554,14 @@ def _model_identifiers_digest(endpoint: str, response: dict[str, Any]) -> str:
Supported endpoints:
- '/api/tags' (Ollama): response body has 'models': [ { name/model/digest/id/... }, ... ]
- '/v1/models' (OpenAI): response body is: [ { id: ... }, ... ]
- '/v1/openai/v1/models' (OpenAI): response body is: [ { id: ... }, ... ]
Returns a list of unique identifiers or None if structure doesn't match.
"""
if "models" in response["body"]:
# ollama
items = response["body"]["models"]
else:
# openai
# openai or openai-style endpoints
items = response["body"]
idents = [m.model if endpoint == "/api/tags" else m.id for m in items]
return sorted(set(idents))
@ -581,7 +582,7 @@ def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]])
seen: dict[str, dict[str, Any]] = {}
for rec in records:
body = rec["response"]["body"]
if endpoint == "/v1/models":
if endpoint in ("/v1/models", "/v1/openai/v1/models"):
for m in body:
key = m.id
seen[key] = m
@ -665,7 +666,7 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
logger.info(f" Test context: {get_test_context()}")
if mode == APIRecordingMode.LIVE or storage is None:
if endpoint == "/v1/models":
if endpoint in ("/v1/models", "/v1/openai/v1/models"):
return original_method(self, *args, **kwargs)
else:
return await original_method(self, *args, **kwargs)
@ -699,7 +700,7 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
recording = None
if mode == APIRecordingMode.REPLAY or mode == APIRecordingMode.RECORD_IF_MISSING:
# Special handling for model-list endpoints: merge all recordings with this hash
if endpoint in ("/api/tags", "/v1/models"):
if endpoint in ("/api/tags", "/v1/models", "/v1/openai/v1/models"):
records = storage._model_list_responses(request_hash)
recording = _combine_model_list_responses(endpoint, records)
else:
@ -739,13 +740,13 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
)
if mode == APIRecordingMode.RECORD or (mode == APIRecordingMode.RECORD_IF_MISSING and not recording):
if endpoint == "/v1/models":
if endpoint in ("/v1/models", "/v1/openai/v1/models"):
response = original_method(self, *args, **kwargs)
else:
response = await original_method(self, *args, **kwargs)
# we want to store the result of the iterator, not the iterator itself
if endpoint == "/v1/models":
if endpoint in ("/v1/models", "/v1/openai/v1/models"):
response = [m async for m in response]
request_data = {