forked from phoenix-oss/llama-stack-mirror
# What does this PR do? The previous image URLs were sometimes blocked by Cloudflare, causing test failures for some users. This update replaces them with a GitHub-hosted image (`dog.png`) from the `llama-stack` repository, ensuring more reliable access during testing. Signed-off-by: Sébastien Han <seb@redhat.com> [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan ``` $ ollama run llama3.2-vision:latest --keep-alive 2m & $ uv run pytest -v -s -k "ollama" --inference-model=llama3.2-vision:latest llama_stack/providers/tests/inference/test_vision_inference.py /Users/leseb/Documents/AI/llama-stack/.venv/lib/python3.13/site-packages/pytest_asyncio/plugin.py:207: PytestDeprecationWarning: The configuration option "asyncio_default_fixture_loop_scope" is unset. The event loop scope for asynchronous fixtures will default to the fixture caching scope. Future versions of pytest-asyncio will default the loop scope for asynchronous fixtures to function scope. Set the default fixture loop scope explicitly in order to avoid unexpected behavior in the future. Valid fixture loop scopes are: "function", "class", "module", "package", "session" warnings.warn(PytestDeprecationWarning(_DEFAULT_FIXTURE_LOOP_SCOPE_UNSET)) ============================================ test session starts ============================================= platform darwin -- Python 3.13.1, pytest-8.3.4, pluggy-1.5.0 -- /Users/leseb/Documents/AI/llama-stack/.venv/bin/python3 cachedir: .pytest_cache metadata: {'Python': '3.13.1', 'Platform': 'macOS-15.3-arm64-arm-64bit-Mach-O', 'Packages': {'pytest': '8.3.4', 'pluggy': '1.5.0'}, 'Plugins': {'html': '4.1.1', 'metadata': '3.1.1', 'asyncio': '0.25.3', 'anyio': '4.8.0', 'nbval': '0.11.0'}} rootdir: /Users/leseb/Documents/AI/llama-stack configfile: pyproject.toml plugins: html-4.1.1, metadata-3.1.1, asyncio-0.25.3, anyio-4.8.0, nbval-0.11.0 asyncio: mode=Mode.STRICT, asyncio_default_fixture_loop_scope=None collected 39 items / 36 deselected / 3 selected llama_stack/providers/tests/inference/test_vision_inference.py::TestVisionModelInference::test_vision_chat_completion_non_streaming[-ollama-image0-expected_strings0] PASSED llama_stack/providers/tests/inference/test_vision_inference.py::TestVisionModelInference::test_vision_chat_completion_non_streaming[-ollama-image1-expected_strings1] PASSED llama_stack/providers/tests/inference/test_vision_inference.py::TestVisionModelInference::test_vision_chat_completion_streaming[-ollama] PASSED ========================== 3 passed, 36 deselected, 2 warnings in 62.23s (0:01:02) ========================== ``` [//]: # (## Documentation) [//]: # (- [ ] Added a Changelog entry if the change is significant) Signed-off-by: Sébastien Han <seb@redhat.com>
119 lines
4.2 KiB
Python
119 lines
4.2 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
import base64
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from llama_stack.apis.common.content_types import URL, ImageContentItem, TextContentItem
|
|
from llama_stack.apis.inference import (
|
|
ChatCompletionResponse,
|
|
ChatCompletionResponseEventType,
|
|
ChatCompletionResponseStreamChunk,
|
|
SamplingParams,
|
|
UserMessage,
|
|
)
|
|
|
|
from .utils import group_chunks
|
|
|
|
THIS_DIR = Path(__file__).parent
|
|
|
|
with open(THIS_DIR / "pasta.jpeg", "rb") as f:
|
|
PASTA_IMAGE = base64.b64encode(f.read()).decode("utf-8")
|
|
|
|
|
|
class TestVisionModelInference:
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.parametrize(
|
|
"image, expected_strings",
|
|
[
|
|
(
|
|
ImageContentItem(image=dict(data=PASTA_IMAGE)),
|
|
["spaghetti"],
|
|
),
|
|
(
|
|
ImageContentItem(
|
|
image=dict(
|
|
url=URL(
|
|
uri="https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/client-sdk/inference/dog.png"
|
|
)
|
|
)
|
|
),
|
|
["puppy"],
|
|
),
|
|
],
|
|
)
|
|
async def test_vision_chat_completion_non_streaming(
|
|
self, inference_model, inference_stack, image, expected_strings
|
|
):
|
|
inference_impl, _ = inference_stack
|
|
response = await inference_impl.chat_completion(
|
|
model_id=inference_model,
|
|
messages=[
|
|
UserMessage(content="You are a helpful assistant."),
|
|
UserMessage(
|
|
content=[
|
|
image,
|
|
TextContentItem(text="Describe this image in two sentences."),
|
|
]
|
|
),
|
|
],
|
|
stream=False,
|
|
sampling_params=SamplingParams(max_tokens=100),
|
|
)
|
|
|
|
assert isinstance(response, ChatCompletionResponse)
|
|
assert response.completion_message.role == "assistant"
|
|
assert isinstance(response.completion_message.content, str)
|
|
for expected_string in expected_strings:
|
|
assert expected_string in response.completion_message.content
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_vision_chat_completion_streaming(self, inference_model, inference_stack):
|
|
inference_impl, _ = inference_stack
|
|
|
|
images = [
|
|
ImageContentItem(
|
|
image=dict(
|
|
url=URL(
|
|
uri="https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/client-sdk/inference/dog.png"
|
|
)
|
|
)
|
|
),
|
|
]
|
|
expected_strings_to_check = [
|
|
["puppy"],
|
|
]
|
|
for image, expected_strings in zip(images, expected_strings_to_check):
|
|
response = [
|
|
r
|
|
async for r in await inference_impl.chat_completion(
|
|
model_id=inference_model,
|
|
messages=[
|
|
UserMessage(content="You are a helpful assistant."),
|
|
UserMessage(
|
|
content=[
|
|
image,
|
|
TextContentItem(text="Describe this image in two sentences."),
|
|
]
|
|
),
|
|
],
|
|
stream=True,
|
|
sampling_params=SamplingParams(max_tokens=100),
|
|
)
|
|
]
|
|
|
|
assert len(response) > 0
|
|
assert all(isinstance(chunk, ChatCompletionResponseStreamChunk) for chunk in response)
|
|
grouped = group_chunks(response)
|
|
assert len(grouped[ChatCompletionResponseEventType.start]) == 1
|
|
assert len(grouped[ChatCompletionResponseEventType.progress]) > 0
|
|
assert len(grouped[ChatCompletionResponseEventType.complete]) == 1
|
|
|
|
content = "".join(chunk.event.delta.text for chunk in grouped[ChatCompletionResponseEventType.progress])
|
|
for expected_string in expected_strings:
|
|
assert expected_string in content
|