mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
fix: update tests for OpenAI-style models endpoint (#4053)
The llama-stack-client now uses /`v1/openai/v1/models` which returns OpenAI-compatible model objects with 'id' and 'custom_metadata' fields instead of the Resource-style 'identifier' field. Updated api_recorder to handle the new endpoint and modified tests to access model metadata appropriately. Deleted stale model recordings for re-recording. **NOTE: CI will be red on this one since it is dependent on https://github.com/llamastack/llama-stack-client-python/pull/291/files landing. I verified locally that it is green.**
This commit is contained in:
parent
4a5ef65286
commit
cb40da210f
27 changed files with 852 additions and 6707 deletions
|
|
@ -12,7 +12,7 @@ from llama_stack.core.ui.modules.api import llama_stack_api
|
|||
def models():
|
||||
# Models Section
|
||||
st.header("Models")
|
||||
models_info = {m.identifier: m.to_dict() for m in llama_stack_api.client.models.list()}
|
||||
models_info = {m.id: m.model_dump() for m in llama_stack_api.client.models.list()}
|
||||
|
||||
selected_model = st.selectbox("Select a model", list(models_info.keys()))
|
||||
st.json(models_info[selected_model])
|
||||
|
|
|
|||
|
|
@ -12,7 +12,11 @@ from llama_stack.core.ui.modules.api import llama_stack_api
|
|||
with st.sidebar:
|
||||
st.header("Configuration")
|
||||
available_models = llama_stack_api.client.models.list()
|
||||
available_models = [model.identifier for model in available_models if model.model_type == "llm"]
|
||||
available_models = [
|
||||
model.id
|
||||
for model in available_models
|
||||
if model.custom_metadata and model.custom_metadata.get("model_type") == "llm"
|
||||
]
|
||||
selected_model = st.selectbox(
|
||||
"Choose a model",
|
||||
available_models,
|
||||
|
|
|
|||
|
|
@ -156,7 +156,7 @@ def normalize_inference_request(method: str, url: str, headers: dict[str, Any],
|
|||
}
|
||||
|
||||
# Include test_id for isolation, except for shared infrastructure endpoints
|
||||
if parsed.path not in ("/api/tags", "/v1/models"):
|
||||
if parsed.path not in ("/api/tags", "/v1/models", "/v1/openai/v1/models"):
|
||||
normalized["test_id"] = test_id
|
||||
|
||||
normalized_json = json.dumps(normalized, sort_keys=True)
|
||||
|
|
@ -430,7 +430,7 @@ class ResponseStorage:
|
|||
|
||||
# For model-list endpoints, include digest in filename to distinguish different model sets
|
||||
endpoint = request.get("endpoint")
|
||||
if endpoint in ("/api/tags", "/v1/models"):
|
||||
if endpoint in ("/api/tags", "/v1/models", "/v1/openai/v1/models"):
|
||||
digest = _model_identifiers_digest(endpoint, response)
|
||||
response_file = f"models-{request_hash}-{digest}.json"
|
||||
|
||||
|
|
@ -554,13 +554,14 @@ def _model_identifiers_digest(endpoint: str, response: dict[str, Any]) -> str:
|
|||
Supported endpoints:
|
||||
- '/api/tags' (Ollama): response body has 'models': [ { name/model/digest/id/... }, ... ]
|
||||
- '/v1/models' (OpenAI): response body is: [ { id: ... }, ... ]
|
||||
- '/v1/openai/v1/models' (OpenAI): response body is: [ { id: ... }, ... ]
|
||||
Returns a list of unique identifiers or None if structure doesn't match.
|
||||
"""
|
||||
if "models" in response["body"]:
|
||||
# ollama
|
||||
items = response["body"]["models"]
|
||||
else:
|
||||
# openai
|
||||
# openai or openai-style endpoints
|
||||
items = response["body"]
|
||||
idents = [m.model if endpoint == "/api/tags" else m.id for m in items]
|
||||
return sorted(set(idents))
|
||||
|
|
@ -581,7 +582,7 @@ def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]])
|
|||
seen: dict[str, dict[str, Any]] = {}
|
||||
for rec in records:
|
||||
body = rec["response"]["body"]
|
||||
if endpoint == "/v1/models":
|
||||
if endpoint in ("/v1/models", "/v1/openai/v1/models"):
|
||||
for m in body:
|
||||
key = m.id
|
||||
seen[key] = m
|
||||
|
|
@ -665,7 +666,7 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
|||
logger.info(f" Test context: {get_test_context()}")
|
||||
|
||||
if mode == APIRecordingMode.LIVE or storage is None:
|
||||
if endpoint == "/v1/models":
|
||||
if endpoint in ("/v1/models", "/v1/openai/v1/models"):
|
||||
return original_method(self, *args, **kwargs)
|
||||
else:
|
||||
return await original_method(self, *args, **kwargs)
|
||||
|
|
@ -699,7 +700,7 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
|||
recording = None
|
||||
if mode == APIRecordingMode.REPLAY or mode == APIRecordingMode.RECORD_IF_MISSING:
|
||||
# Special handling for model-list endpoints: merge all recordings with this hash
|
||||
if endpoint in ("/api/tags", "/v1/models"):
|
||||
if endpoint in ("/api/tags", "/v1/models", "/v1/openai/v1/models"):
|
||||
records = storage._model_list_responses(request_hash)
|
||||
recording = _combine_model_list_responses(endpoint, records)
|
||||
else:
|
||||
|
|
@ -739,13 +740,13 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
|||
)
|
||||
|
||||
if mode == APIRecordingMode.RECORD or (mode == APIRecordingMode.RECORD_IF_MISSING and not recording):
|
||||
if endpoint == "/v1/models":
|
||||
if endpoint in ("/v1/models", "/v1/openai/v1/models"):
|
||||
response = original_method(self, *args, **kwargs)
|
||||
else:
|
||||
response = await original_method(self, *args, **kwargs)
|
||||
|
||||
# we want to store the result of the iterator, not the iterator itself
|
||||
if endpoint == "/v1/models":
|
||||
if endpoint in ("/v1/models", "/v1/openai/v1/models"):
|
||||
response = [m async for m in response]
|
||||
|
||||
request_data = {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue