Enable vision models for (Together, Fireworks, Meta-Reference, Ollama) (#376)

* Enable vision models for Together and Fireworks

* Works with ollama 0.4.0 pre-release with the vision model

* localize media for meta_reference inference

* Fix
This commit is contained in:
Ashwin Bharambe 2024-11-05 16:22:33 -08:00 committed by GitHub
parent db30809141
commit cde9bc1388
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 465 additions and 81 deletions

View file

@ -19,12 +19,11 @@ def pytest_addoption(parser):
def pytest_configure(config):
config.addinivalue_line(
"markers", "llama_8b: mark test to run only with the given model"
)
config.addinivalue_line(
"markers", "llama_3b: mark test to run only with the given model"
)
for model in ["llama_8b", "llama_3b", "llama_vision"]:
config.addinivalue_line(
"markers", f"{model}: mark test to run only with the given model"
)
for fixture_name in INFERENCE_FIXTURES:
config.addinivalue_line(
"markers",
@ -37,6 +36,14 @@ MODEL_PARAMS = [
pytest.param("Llama3.2-3B-Instruct", marks=pytest.mark.llama_3b, id="llama_3b"),
]
VISION_MODEL_PARAMS = [
pytest.param(
"Llama3.2-11B-Vision-Instruct",
marks=pytest.mark.llama_vision,
id="llama_vision",
),
]
def pytest_generate_tests(metafunc):
if "inference_model" in metafunc.fixturenames:
@ -44,7 +51,11 @@ def pytest_generate_tests(metafunc):
if model:
params = [pytest.param(model, id="")]
else:
params = MODEL_PARAMS
cls_name = metafunc.cls.__name__
if "Vision" in cls_name:
params = VISION_MODEL_PARAMS
else:
params = MODEL_PARAMS
metafunc.parametrize(
"inference_model",

Binary file not shown.

After

Width:  |  Height:  |  Size: 438 KiB

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import itertools
import pytest
@ -15,6 +14,9 @@ from llama_stack.apis.inference import * # noqa: F403
from llama_stack.distribution.datatypes import * # noqa: F403
from .utils import group_chunks
# How to run this test:
#
# pytest -v -s llama_stack/providers/tests/inference/test_inference.py
@ -22,15 +24,6 @@ from llama_stack.distribution.datatypes import * # noqa: F403
# --env FIREWORKS_API_KEY=<your_api_key>
def group_chunks(response):
return {
event_type: list(group)
for event_type, group in itertools.groupby(
response, key=lambda chunk: chunk.event.event_type
)
}
def get_expected_stop_reason(model: str):
return StopReason.end_of_message if "Llama3.1" in model else StopReason.end_of_turn

View file

@ -0,0 +1,128 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from pathlib import Path
import pytest
from PIL import Image as PIL_Image
from llama_models.llama3.api.datatypes import * # noqa: F403
from llama_stack.apis.inference import * # noqa: F403
from .utils import group_chunks
THIS_DIR = Path(__file__).parent
class TestVisionModelInference:
@pytest.mark.asyncio
async def test_vision_chat_completion_non_streaming(
self, inference_model, inference_stack
):
inference_impl, _ = inference_stack
provider = inference_impl.routing_table.get_provider_impl(inference_model)
if provider.__provider_spec__.provider_type not in (
"meta-reference",
"remote::together",
"remote::fireworks",
"remote::ollama",
):
pytest.skip(
"Other inference providers don't support vision chat completion() yet"
)
images = [
ImageMedia(image=PIL_Image.open(THIS_DIR / "pasta.jpeg")),
ImageMedia(
image=URL(
uri="https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
)
),
]
# These are a bit hit-and-miss, need to be careful
expected_strings_to_check = [
["spaghetti"],
["puppy"],
]
for image, expected_strings in zip(images, expected_strings_to_check):
response = await inference_impl.chat_completion(
model=inference_model,
messages=[
SystemMessage(content="You are a helpful assistant."),
UserMessage(
content=[image, "Describe this image in two sentences."]
),
],
stream=False,
)
assert isinstance(response, ChatCompletionResponse)
assert response.completion_message.role == "assistant"
assert isinstance(response.completion_message.content, str)
for expected_string in expected_strings:
assert expected_string in response.completion_message.content
@pytest.mark.asyncio
async def test_vision_chat_completion_streaming(
self, inference_model, inference_stack
):
inference_impl, _ = inference_stack
provider = inference_impl.routing_table.get_provider_impl(inference_model)
if provider.__provider_spec__.provider_type not in (
"meta-reference",
"remote::together",
"remote::fireworks",
"remote::ollama",
):
pytest.skip(
"Other inference providers don't support vision chat completion() yet"
)
images = [
ImageMedia(
image=URL(
uri="https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
)
),
]
expected_strings_to_check = [
["puppy"],
]
for image, expected_strings in zip(images, expected_strings_to_check):
response = [
r
async for r in await inference_impl.chat_completion(
model=inference_model,
messages=[
SystemMessage(content="You are a helpful assistant."),
UserMessage(
content=[image, "Describe this image in two sentences."]
),
],
stream=True,
)
]
assert len(response) > 0
assert all(
isinstance(chunk, ChatCompletionResponseStreamChunk)
for chunk in response
)
grouped = group_chunks(response)
assert len(grouped[ChatCompletionResponseEventType.start]) == 1
assert len(grouped[ChatCompletionResponseEventType.progress]) > 0
assert len(grouped[ChatCompletionResponseEventType.complete]) == 1
content = "".join(
chunk.event.delta
for chunk in grouped[ChatCompletionResponseEventType.progress]
)
for expected_string in expected_strings:
assert expected_string in content

View file

@ -0,0 +1,16 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import itertools
def group_chunks(response):
return {
event_type: list(group)
for event_type, group in itertools.groupby(
response, key=lambda chunk: chunk.event.event_type
)
}