mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-16 06:53:47 +00:00
Merge branch 'main' into eval_task_register
This commit is contained in:
commit
1b7e19d5d0
201 changed files with 1635 additions and 807 deletions
|
@ -11,7 +11,7 @@ import pytest_asyncio
|
|||
|
||||
from llama_stack.distribution.datatypes import Api, Provider
|
||||
|
||||
from llama_stack.providers.impls.meta_reference.agents import (
|
||||
from llama_stack.providers.inline.meta_reference.agents import (
|
||||
MetaReferenceAgentsImplConfig,
|
||||
)
|
||||
|
||||
|
|
|
@ -19,12 +19,11 @@ def pytest_addoption(parser):
|
|||
|
||||
|
||||
def pytest_configure(config):
|
||||
config.addinivalue_line(
|
||||
"markers", "llama_8b: mark test to run only with the given model"
|
||||
)
|
||||
config.addinivalue_line(
|
||||
"markers", "llama_3b: mark test to run only with the given model"
|
||||
)
|
||||
for model in ["llama_8b", "llama_3b", "llama_vision"]:
|
||||
config.addinivalue_line(
|
||||
"markers", f"{model}: mark test to run only with the given model"
|
||||
)
|
||||
|
||||
for fixture_name in INFERENCE_FIXTURES:
|
||||
config.addinivalue_line(
|
||||
"markers",
|
||||
|
@ -37,6 +36,14 @@ MODEL_PARAMS = [
|
|||
pytest.param("Llama3.2-3B-Instruct", marks=pytest.mark.llama_3b, id="llama_3b"),
|
||||
]
|
||||
|
||||
VISION_MODEL_PARAMS = [
|
||||
pytest.param(
|
||||
"Llama3.2-11B-Vision-Instruct",
|
||||
marks=pytest.mark.llama_vision,
|
||||
id="llama_vision",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def pytest_generate_tests(metafunc):
|
||||
if "inference_model" in metafunc.fixturenames:
|
||||
|
@ -44,7 +51,11 @@ def pytest_generate_tests(metafunc):
|
|||
if model:
|
||||
params = [pytest.param(model, id="")]
|
||||
else:
|
||||
params = MODEL_PARAMS
|
||||
cls_name = metafunc.cls.__name__
|
||||
if "Vision" in cls_name:
|
||||
params = VISION_MODEL_PARAMS
|
||||
else:
|
||||
params = MODEL_PARAMS
|
||||
|
||||
metafunc.parametrize(
|
||||
"inference_model",
|
||||
|
|
|
@ -10,14 +10,16 @@ import pytest
|
|||
import pytest_asyncio
|
||||
|
||||
from llama_stack.distribution.datatypes import Api, Provider
|
||||
|
||||
from llama_stack.providers.adapters.inference.fireworks import FireworksImplConfig
|
||||
from llama_stack.providers.adapters.inference.ollama import OllamaImplConfig
|
||||
from llama_stack.providers.adapters.inference.together import TogetherImplConfig
|
||||
from llama_stack.providers.impls.meta_reference.inference import (
|
||||
from llama_stack.providers.inline.meta_reference.inference import (
|
||||
MetaReferenceInferenceConfig,
|
||||
)
|
||||
|
||||
from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
|
||||
from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
|
||||
from llama_stack.providers.remote.inference.together import TogetherImplConfig
|
||||
from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
|
||||
from llama_stack.providers.tests.resolver import resolve_impls_for_test_v2
|
||||
|
||||
from ..conftest import ProviderFixture, remote_stack_fixture
|
||||
from ..env import get_env_or_fail
|
||||
|
||||
|
@ -78,6 +80,21 @@ def inference_ollama(inference_model) -> ProviderFixture:
|
|||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def inference_vllm_remote() -> ProviderFixture:
|
||||
return ProviderFixture(
|
||||
providers=[
|
||||
Provider(
|
||||
provider_id="remote::vllm",
|
||||
provider_type="remote::vllm",
|
||||
config=VLLMInferenceAdapterConfig(
|
||||
url=get_env_or_fail("VLLM_URL"),
|
||||
).model_dump(),
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def inference_fireworks() -> ProviderFixture:
|
||||
return ProviderFixture(
|
||||
|
@ -109,7 +126,14 @@ def inference_together() -> ProviderFixture:
|
|||
)
|
||||
|
||||
|
||||
INFERENCE_FIXTURES = ["meta_reference", "ollama", "fireworks", "together", "remote"]
|
||||
INFERENCE_FIXTURES = [
|
||||
"meta_reference",
|
||||
"ollama",
|
||||
"fireworks",
|
||||
"together",
|
||||
"vllm_remote",
|
||||
"remote",
|
||||
]
|
||||
|
||||
|
||||
@pytest_asyncio.fixture(scope="session")
|
||||
|
|
BIN
llama_stack/providers/tests/inference/pasta.jpeg
Normal file
BIN
llama_stack/providers/tests/inference/pasta.jpeg
Normal file
Binary file not shown.
After Width: | Height: | Size: 438 KiB |
|
@ -4,7 +4,6 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import itertools
|
||||
|
||||
import pytest
|
||||
|
||||
|
@ -15,6 +14,9 @@ from llama_stack.apis.inference import * # noqa: F403
|
|||
|
||||
from llama_stack.distribution.datatypes import * # noqa: F403
|
||||
|
||||
from .utils import group_chunks
|
||||
|
||||
|
||||
# How to run this test:
|
||||
#
|
||||
# pytest -v -s llama_stack/providers/tests/inference/test_inference.py
|
||||
|
@ -22,15 +24,6 @@ from llama_stack.distribution.datatypes import * # noqa: F403
|
|||
# --env FIREWORKS_API_KEY=<your_api_key>
|
||||
|
||||
|
||||
def group_chunks(response):
|
||||
return {
|
||||
event_type: list(group)
|
||||
for event_type, group in itertools.groupby(
|
||||
response, key=lambda chunk: chunk.event.event_type
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
def get_expected_stop_reason(model: str):
|
||||
return StopReason.end_of_message if "Llama3.1" in model else StopReason.end_of_turn
|
||||
|
||||
|
|
128
llama_stack/providers/tests/inference/test_vision_inference.py
Normal file
128
llama_stack/providers/tests/inference/test_vision_inference.py
Normal file
|
@ -0,0 +1,128 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from PIL import Image as PIL_Image
|
||||
|
||||
|
||||
from llama_models.llama3.api.datatypes import * # noqa: F403
|
||||
from llama_stack.apis.inference import * # noqa: F403
|
||||
|
||||
from .utils import group_chunks
|
||||
|
||||
THIS_DIR = Path(__file__).parent
|
||||
|
||||
|
||||
class TestVisionModelInference:
|
||||
@pytest.mark.asyncio
|
||||
async def test_vision_chat_completion_non_streaming(
|
||||
self, inference_model, inference_stack
|
||||
):
|
||||
inference_impl, _ = inference_stack
|
||||
|
||||
provider = inference_impl.routing_table.get_provider_impl(inference_model)
|
||||
if provider.__provider_spec__.provider_type not in (
|
||||
"meta-reference",
|
||||
"remote::together",
|
||||
"remote::fireworks",
|
||||
"remote::ollama",
|
||||
):
|
||||
pytest.skip(
|
||||
"Other inference providers don't support vision chat completion() yet"
|
||||
)
|
||||
|
||||
images = [
|
||||
ImageMedia(image=PIL_Image.open(THIS_DIR / "pasta.jpeg")),
|
||||
ImageMedia(
|
||||
image=URL(
|
||||
uri="https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
|
||||
)
|
||||
),
|
||||
]
|
||||
|
||||
# These are a bit hit-and-miss, need to be careful
|
||||
expected_strings_to_check = [
|
||||
["spaghetti"],
|
||||
["puppy"],
|
||||
]
|
||||
for image, expected_strings in zip(images, expected_strings_to_check):
|
||||
response = await inference_impl.chat_completion(
|
||||
model=inference_model,
|
||||
messages=[
|
||||
SystemMessage(content="You are a helpful assistant."),
|
||||
UserMessage(
|
||||
content=[image, "Describe this image in two sentences."]
|
||||
),
|
||||
],
|
||||
stream=False,
|
||||
)
|
||||
|
||||
assert isinstance(response, ChatCompletionResponse)
|
||||
assert response.completion_message.role == "assistant"
|
||||
assert isinstance(response.completion_message.content, str)
|
||||
for expected_string in expected_strings:
|
||||
assert expected_string in response.completion_message.content
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_vision_chat_completion_streaming(
|
||||
self, inference_model, inference_stack
|
||||
):
|
||||
inference_impl, _ = inference_stack
|
||||
|
||||
provider = inference_impl.routing_table.get_provider_impl(inference_model)
|
||||
if provider.__provider_spec__.provider_type not in (
|
||||
"meta-reference",
|
||||
"remote::together",
|
||||
"remote::fireworks",
|
||||
"remote::ollama",
|
||||
):
|
||||
pytest.skip(
|
||||
"Other inference providers don't support vision chat completion() yet"
|
||||
)
|
||||
|
||||
images = [
|
||||
ImageMedia(
|
||||
image=URL(
|
||||
uri="https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
|
||||
)
|
||||
),
|
||||
]
|
||||
expected_strings_to_check = [
|
||||
["puppy"],
|
||||
]
|
||||
for image, expected_strings in zip(images, expected_strings_to_check):
|
||||
response = [
|
||||
r
|
||||
async for r in await inference_impl.chat_completion(
|
||||
model=inference_model,
|
||||
messages=[
|
||||
SystemMessage(content="You are a helpful assistant."),
|
||||
UserMessage(
|
||||
content=[image, "Describe this image in two sentences."]
|
||||
),
|
||||
],
|
||||
stream=True,
|
||||
)
|
||||
]
|
||||
|
||||
assert len(response) > 0
|
||||
assert all(
|
||||
isinstance(chunk, ChatCompletionResponseStreamChunk)
|
||||
for chunk in response
|
||||
)
|
||||
grouped = group_chunks(response)
|
||||
assert len(grouped[ChatCompletionResponseEventType.start]) == 1
|
||||
assert len(grouped[ChatCompletionResponseEventType.progress]) > 0
|
||||
assert len(grouped[ChatCompletionResponseEventType.complete]) == 1
|
||||
|
||||
content = "".join(
|
||||
chunk.event.delta
|
||||
for chunk in grouped[ChatCompletionResponseEventType.progress]
|
||||
)
|
||||
for expected_string in expected_strings:
|
||||
assert expected_string in content
|
16
llama_stack/providers/tests/inference/utils.py
Normal file
16
llama_stack/providers/tests/inference/utils.py
Normal file
|
@ -0,0 +1,16 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import itertools
|
||||
|
||||
|
||||
def group_chunks(response):
|
||||
return {
|
||||
event_type: list(group)
|
||||
for event_type, group in itertools.groupby(
|
||||
response, key=lambda chunk: chunk.event.event_type
|
||||
)
|
||||
}
|
|
@ -5,16 +5,18 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
||||
from llama_stack.distribution.datatypes import Api, Provider
|
||||
from llama_stack.providers.adapters.memory.pgvector import PGVectorConfig
|
||||
from llama_stack.providers.adapters.memory.weaviate import WeaviateConfig
|
||||
from llama_stack.providers.impls.meta_reference.memory import FaissImplConfig
|
||||
from llama_stack.providers.inline.meta_reference.memory import FaissImplConfig
|
||||
from llama_stack.providers.remote.memory.pgvector import PGVectorConfig
|
||||
from llama_stack.providers.remote.memory.weaviate import WeaviateConfig
|
||||
|
||||
from llama_stack.providers.tests.resolver import resolve_impls_for_test_v2
|
||||
from llama_stack.providers.utils.kvstore import SqliteKVStoreConfig
|
||||
from ..conftest import ProviderFixture, remote_stack_fixture
|
||||
from ..env import get_env_or_fail
|
||||
|
||||
|
@ -26,12 +28,15 @@ def memory_remote() -> ProviderFixture:
|
|||
|
||||
@pytest.fixture(scope="session")
|
||||
def memory_meta_reference() -> ProviderFixture:
|
||||
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".db")
|
||||
return ProviderFixture(
|
||||
providers=[
|
||||
Provider(
|
||||
provider_id="meta-reference",
|
||||
provider_type="meta-reference",
|
||||
config=FaissImplConfig().model_dump(),
|
||||
config=FaissImplConfig(
|
||||
kvstore=SqliteKVStoreConfig(db_path=temp_file.name).model_dump(),
|
||||
).model_dump(),
|
||||
)
|
||||
],
|
||||
)
|
||||
|
|
|
@ -8,11 +8,11 @@ import pytest
|
|||
import pytest_asyncio
|
||||
|
||||
from llama_stack.distribution.datatypes import Api, Provider
|
||||
from llama_stack.providers.adapters.safety.together import TogetherSafetyConfig
|
||||
from llama_stack.providers.impls.meta_reference.safety import (
|
||||
from llama_stack.providers.inline.meta_reference.safety import (
|
||||
LlamaGuardShieldConfig,
|
||||
SafetyConfig,
|
||||
)
|
||||
from llama_stack.providers.remote.safety.together import TogetherSafetyConfig
|
||||
|
||||
from llama_stack.providers.tests.resolver import resolve_impls_for_test_v2
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue