Merge branch 'main' into eval_task_register

2025-10-16 06:53:47 +00:00 · 2024-11-06 15:05:46 -08:00 · 2024-11-06 15:05:46 -08:00 · 1b7e19d5d0
commit 1b7e19d5d0
parent f778b907e4 994732e2e0
201 changed files with 1635 additions and 807 deletions
--- a/llama_stack/providers/tests/agents/fixtures.py
+++ b/llama_stack/providers/tests/agents/fixtures.py
@ -11,7 +11,7 @@ import pytest_asyncio

 from llama_stack.distribution.datatypes import Api, Provider

-from llama_stack.providers.impls.meta_reference.agents import (
+from llama_stack.providers.inline.meta_reference.agents import (
    MetaReferenceAgentsImplConfig,
 )

--- a/llama_stack/providers/tests/inference/conftest.py
+++ b/llama_stack/providers/tests/inference/conftest.py
@ -19,12 +19,11 @@ def pytest_addoption(parser):


 def pytest_configure(config):
-    config.addinivalue_line(
-        "markers", "llama_8b: mark test to run only with the given model"
-    )
-    config.addinivalue_line(
-        "markers", "llama_3b: mark test to run only with the given model"
-    )
+    for model in ["llama_8b", "llama_3b", "llama_vision"]:
+        config.addinivalue_line(
+            "markers", f"{model}: mark test to run only with the given model"
+        )
+
    for fixture_name in INFERENCE_FIXTURES:
        config.addinivalue_line(
            "markers",
@ -37,6 +36,14 @@ MODEL_PARAMS = [
    pytest.param("Llama3.2-3B-Instruct", marks=pytest.mark.llama_3b, id="llama_3b"),
 ]

+VISION_MODEL_PARAMS = [
+    pytest.param(
+        "Llama3.2-11B-Vision-Instruct",
+        marks=pytest.mark.llama_vision,
+        id="llama_vision",
+    ),
+]
+

 def pytest_generate_tests(metafunc):
    if "inference_model" in metafunc.fixturenames:
@ -44,7 +51,11 @@ def pytest_generate_tests(metafunc):
        if model:
            params = [pytest.param(model, id="")]
        else:
-            params = MODEL_PARAMS
+            cls_name = metafunc.cls.__name__
+            if "Vision" in cls_name:
+                params = VISION_MODEL_PARAMS
+            else:
+                params = MODEL_PARAMS

        metafunc.parametrize(
            "inference_model",
--- a/llama_stack/providers/tests/inference/fixtures.py
+++ b/llama_stack/providers/tests/inference/fixtures.py
@ -10,14 +10,16 @@ import pytest
 import pytest_asyncio

 from llama_stack.distribution.datatypes import Api, Provider
-
-from llama_stack.providers.adapters.inference.fireworks import FireworksImplConfig
-from llama_stack.providers.adapters.inference.ollama import OllamaImplConfig
-from llama_stack.providers.adapters.inference.together import TogetherImplConfig
-from llama_stack.providers.impls.meta_reference.inference import (
+from llama_stack.providers.inline.meta_reference.inference import (
    MetaReferenceInferenceConfig,
 )
+
+from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
+from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
+from llama_stack.providers.remote.inference.together import TogetherImplConfig
+from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
 from llama_stack.providers.tests.resolver import resolve_impls_for_test_v2
+
 from ..conftest import ProviderFixture, remote_stack_fixture
 from ..env import get_env_or_fail

@ -78,6 +80,21 @@ def inference_ollama(inference_model) -> ProviderFixture:
    )


+@pytest.fixture(scope="session")
+def inference_vllm_remote() -> ProviderFixture:
+    return ProviderFixture(
+        providers=[
+            Provider(
+                provider_id="remote::vllm",
+                provider_type="remote::vllm",
+                config=VLLMInferenceAdapterConfig(
+                    url=get_env_or_fail("VLLM_URL"),
+                ).model_dump(),
+            )
+        ],
+    )
+
+
@pytest.fixture(scope="session")
 def inference_fireworks() -> ProviderFixture:
    return ProviderFixture(
@ -109,7 +126,14 @@ def inference_together() -> ProviderFixture:
    )


-INFERENCE_FIXTURES = ["meta_reference", "ollama", "fireworks", "together", "remote"]
+INFERENCE_FIXTURES = [
+    "meta_reference",
+    "ollama",
+    "fireworks",
+    "together",
+    "vllm_remote",
+    "remote",
+]


@pytest_asyncio.fixture(scope="session")
--- a/llama_stack/providers/tests/inference/pasta.jpeg
+++ b/llama_stack/providers/tests/inference/pasta.jpeg
--- a/llama_stack/providers/tests/inference/test_inference.py
+++ b/llama_stack/providers/tests/inference/test_inference.py
@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-import itertools

 import pytest

@ -15,6 +14,9 @@ from llama_stack.apis.inference import *  # noqa: F403

 from llama_stack.distribution.datatypes import *  # noqa: F403

+from .utils import group_chunks
+
+
 # How to run this test:
 #
 # pytest -v -s llama_stack/providers/tests/inference/test_inference.py
@ -22,15 +24,6 @@ from llama_stack.distribution.datatypes import *  # noqa: F403
 #   --env FIREWORKS_API_KEY=<your_api_key>


-def group_chunks(response):
-    return {
-        event_type: list(group)
-        for event_type, group in itertools.groupby(
-            response, key=lambda chunk: chunk.event.event_type
-        )
-    }
-
-
 def get_expected_stop_reason(model: str):
    return StopReason.end_of_message if "Llama3.1" in model else StopReason.end_of_turn

--- a/llama_stack/providers/tests/inference/test_vision_inference.py
+++ b/llama_stack/providers/tests/inference/test_vision_inference.py
@ -0,0 +1,128 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from pathlib import Path
+
+import pytest
+from PIL import Image as PIL_Image
+
+
+from llama_models.llama3.api.datatypes import *  # noqa: F403
+from llama_stack.apis.inference import *  # noqa: F403
+
+from .utils import group_chunks
+
+THIS_DIR = Path(__file__).parent
+
+
+class TestVisionModelInference:
+    @pytest.mark.asyncio
+    async def test_vision_chat_completion_non_streaming(
+        self, inference_model, inference_stack
+    ):
+        inference_impl, _ = inference_stack
+
+        provider = inference_impl.routing_table.get_provider_impl(inference_model)
+        if provider.__provider_spec__.provider_type not in (
+            "meta-reference",
+            "remote::together",
+            "remote::fireworks",
+            "remote::ollama",
+        ):
+            pytest.skip(
+                "Other inference providers don't support vision chat completion() yet"
+            )
+
+        images = [
+            ImageMedia(image=PIL_Image.open(THIS_DIR / "pasta.jpeg")),
+            ImageMedia(
+                image=URL(
+                    uri="https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
+                )
+            ),
+        ]
+
+        # These are a bit hit-and-miss, need to be careful
+        expected_strings_to_check = [
+            ["spaghetti"],
+            ["puppy"],
+        ]
+        for image, expected_strings in zip(images, expected_strings_to_check):
+            response = await inference_impl.chat_completion(
+                model=inference_model,
+                messages=[
+                    SystemMessage(content="You are a helpful assistant."),
+                    UserMessage(
+                        content=[image, "Describe this image in two sentences."]
+                    ),
+                ],
+                stream=False,
+            )
+
+            assert isinstance(response, ChatCompletionResponse)
+            assert response.completion_message.role == "assistant"
+            assert isinstance(response.completion_message.content, str)
+            for expected_string in expected_strings:
+                assert expected_string in response.completion_message.content
+
+    @pytest.mark.asyncio
+    async def test_vision_chat_completion_streaming(
+        self, inference_model, inference_stack
+    ):
+        inference_impl, _ = inference_stack
+
+        provider = inference_impl.routing_table.get_provider_impl(inference_model)
+        if provider.__provider_spec__.provider_type not in (
+            "meta-reference",
+            "remote::together",
+            "remote::fireworks",
+            "remote::ollama",
+        ):
+            pytest.skip(
+                "Other inference providers don't support vision chat completion() yet"
+            )
+
+        images = [
+            ImageMedia(
+                image=URL(
+                    uri="https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
+                )
+            ),
+        ]
+        expected_strings_to_check = [
+            ["puppy"],
+        ]
+        for image, expected_strings in zip(images, expected_strings_to_check):
+            response = [
+                r
+                async for r in await inference_impl.chat_completion(
+                    model=inference_model,
+                    messages=[
+                        SystemMessage(content="You are a helpful assistant."),
+                        UserMessage(
+                            content=[image, "Describe this image in two sentences."]
+                        ),
+                    ],
+                    stream=True,
+                )
+            ]
+
+            assert len(response) > 0
+            assert all(
+                isinstance(chunk, ChatCompletionResponseStreamChunk)
+                for chunk in response
+            )
+            grouped = group_chunks(response)
+            assert len(grouped[ChatCompletionResponseEventType.start]) == 1
+            assert len(grouped[ChatCompletionResponseEventType.progress]) > 0
+            assert len(grouped[ChatCompletionResponseEventType.complete]) == 1
+
+            content = "".join(
+                chunk.event.delta
+                for chunk in grouped[ChatCompletionResponseEventType.progress]
+            )
+            for expected_string in expected_strings:
+                assert expected_string in content
--- a/llama_stack/providers/tests/inference/utils.py
+++ b/llama_stack/providers/tests/inference/utils.py
@ -0,0 +1,16 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import itertools
+
+
+def group_chunks(response):
+    return {
+        event_type: list(group)
+        for event_type, group in itertools.groupby(
+            response, key=lambda chunk: chunk.event.event_type
+        )
+    }
--- a/llama_stack/providers/tests/memory/fixtures.py
+++ b/llama_stack/providers/tests/memory/fixtures.py
@ -5,16 +5,18 @@
 # the root directory of this source tree.

 import os
+import tempfile

 import pytest
 import pytest_asyncio

 from llama_stack.distribution.datatypes import Api, Provider
-from llama_stack.providers.adapters.memory.pgvector import PGVectorConfig
-from llama_stack.providers.adapters.memory.weaviate import WeaviateConfig
-from llama_stack.providers.impls.meta_reference.memory import FaissImplConfig
+from llama_stack.providers.inline.meta_reference.memory import FaissImplConfig
+from llama_stack.providers.remote.memory.pgvector import PGVectorConfig
+from llama_stack.providers.remote.memory.weaviate import WeaviateConfig

 from llama_stack.providers.tests.resolver import resolve_impls_for_test_v2
+from llama_stack.providers.utils.kvstore import SqliteKVStoreConfig
 from ..conftest import ProviderFixture, remote_stack_fixture
 from ..env import get_env_or_fail

@ -26,12 +28,15 @@ def memory_remote() -> ProviderFixture:

@pytest.fixture(scope="session")
 def memory_meta_reference() -> ProviderFixture:
+    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".db")
    return ProviderFixture(
        providers=[
            Provider(
                provider_id="meta-reference",
                provider_type="meta-reference",
-                config=FaissImplConfig().model_dump(),
+                config=FaissImplConfig(
+                    kvstore=SqliteKVStoreConfig(db_path=temp_file.name).model_dump(),
+                ).model_dump(),
            )
        ],
    )
--- a/llama_stack/providers/tests/safety/fixtures.py
+++ b/llama_stack/providers/tests/safety/fixtures.py
@ -8,11 +8,11 @@ import pytest
 import pytest_asyncio

 from llama_stack.distribution.datatypes import Api, Provider
-from llama_stack.providers.adapters.safety.together import TogetherSafetyConfig
-from llama_stack.providers.impls.meta_reference.safety import (
+from llama_stack.providers.inline.meta_reference.safety import (
    LlamaGuardShieldConfig,
    SafetyConfig,
 )
+from llama_stack.providers.remote.safety.together import TogetherSafetyConfig

 from llama_stack.providers.tests.resolver import resolve_impls_for_test_v2