Merge branch 'main' into add-nim-completion-api

2025-12-20 11:18:41 +00:00 · 2024-12-11 13:07:23 -05:00 · 2024-12-11 13:07:23 -05:00 · df3c239573
commit df3c239573
parent 6d41a93188 a4bcfb8bba
199 changed files with 7739 additions and 814 deletions
--- a/llama_stack/providers/tests/inference/fixtures.py
+++ b/llama_stack/providers/tests/inference/fixtures.py
@ -17,6 +17,7 @@ from llama_stack.providers.inline.inference.meta_reference import (
 )
 from llama_stack.providers.remote.inference.bedrock import BedrockConfig

+from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig
 from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
 from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
 from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
@ -64,6 +65,21 @@ def inference_meta_reference(inference_model) -> ProviderFixture:
    )


+@pytest.fixture(scope="session")
+def inference_cerebras() -> ProviderFixture:
+    return ProviderFixture(
+        providers=[
+            Provider(
+                provider_id="cerebras",
+                provider_type="remote::cerebras",
+                config=CerebrasImplConfig(
+                    api_key=get_env_or_fail("CEREBRAS_API_KEY"),
+                ).model_dump(),
+            )
+        ],
+    )
+
+
@pytest.fixture(scope="session")
 def inference_ollama(inference_model) -> ProviderFixture:
    inference_model = (
@ -206,6 +222,7 @@ INFERENCE_FIXTURES = [
    "vllm_remote",
    "remote",
    "bedrock",
+    "cerebras",
    "nvidia",
    "tgi",
 ]
--- a/llama_stack/providers/tests/inference/test_text_inference.py
+++ b/llama_stack/providers/tests/inference/test_text_inference.py
@ -95,6 +95,7 @@ class TestInference:
            "remote::together",
            "remote::fireworks",
            "remote::nvidia",
+            "remote::cerebras",
        ):
            pytest.skip("Other inference providers don't support completion() yet")

@ -139,6 +140,8 @@ class TestInference:
            "remote::together",
            "remote::fireworks",
            "remote::nvidia",
+            "remote::vllm",
+            "remote::cerebras",
        ):
            pytest.skip(
                "Other inference providers don't support structured output in completions yet"
@ -198,6 +201,7 @@ class TestInference:
            "remote::fireworks",
            "remote::tgi",
            "remote::together",
+            "remote::vllm",
            "remote::nvidia",
        ):
            pytest.skip("Other inference providers don't support structured output yet")
@ -211,7 +215,15 @@ class TestInference:
        response = await inference_impl.chat_completion(
            model_id=inference_model,
            messages=[
-                SystemMessage(content="You are a helpful assistant."),
+                # we include context about Michael Jordan in the prompt so that the test is
+                # focused on the funtionality of the model and not on the information embedded
+                # in the model. Llama 3.2 3B Instruct tends to think MJ played for 14 seasons.
+                SystemMessage(
+                    content=(
+                        "You are a helpful assistant.\n\n"
+                        "Michael Jordan was born in 1963. He played basketball for the Chicago Bulls for 15 seasons."
+                    )
+                ),
                UserMessage(content="Please give me information about Michael Jordan."),
            ],
            stream=False,