Merge branch 'main' into fix_client_sdk_inference

This commit is contained in:
Xi Yan 2025-01-15 15:55:51 -08:00 committed by GitHub
commit f9f3303365
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 4 additions and 2 deletions

View file

@ -6,7 +6,8 @@
import pytest import pytest
from llama_stack.apis.inference import EmbeddingsResponse, ModelType from llama_stack.apis.inference import EmbeddingsResponse
from llama_stack.apis.models import ModelType
# How to run this test: # How to run this test:
# pytest -v -s llama_stack/providers/tests/inference/test_embeddings.py # pytest -v -s llama_stack/providers/tests/inference/test_embeddings.py

View file

@ -143,7 +143,7 @@ class TestVisionModelInference:
assert len(grouped[ChatCompletionResponseEventType.complete]) == 1 assert len(grouped[ChatCompletionResponseEventType.complete]) == 1
content = "".join( content = "".join(
chunk.event.delta chunk.event.delta.text
for chunk in grouped[ChatCompletionResponseEventType.progress] for chunk in grouped[ChatCompletionResponseEventType.progress]
) )
for expected_string in expected_strings: for expected_string in expected_strings:

View file

@ -39,6 +39,7 @@ def text_model_id(llama_stack_client):
for model in llama_stack_client.models.list().data for model in llama_stack_client.models.list().data
if model.identifier.startswith("meta-llama") and "405" not in model.identifier if model.identifier.startswith("meta-llama") and "405" not in model.identifier
] ]
print(available_models)
assert len(available_models) > 0 assert len(available_models) > 0
return available_models[0] return available_models[0]