Merge branch 'main' into aidand-391-guided-decoding-vllm_3

2025-12-17 17:22:38 +00:00 · 2024-12-08 17:36:57 +11:00 · 2024-12-08 17:36:57 +11:00 · 44bb23ebc8
commit 44bb23ebc8
parent 1801aa145d 14f973a64f
177 changed files with 5725 additions and 594 deletions
--- a/llama_stack/providers/tests/inference/test_text_inference.py
+++ b/llama_stack/providers/tests/inference/test_text_inference.py
@ -94,6 +94,7 @@ class TestInference:
            "remote::tgi",
            "remote::together",
            "remote::fireworks",
+            "remote::cerebras",
        ):
            pytest.skip("Other inference providers don't support completion() yet")

@ -140,6 +141,8 @@ class TestInference:
            "remote::together",
            "remote::fireworks",
            "remote::vllm",
+            "remote::cerebras",
+
        ):
            pytest.skip(
                "Other inference providers don't support structured output in completions yet"
@ -213,7 +216,15 @@ class TestInference:
        response = await inference_impl.chat_completion(
            model_id=inference_model,
            messages=[
-                SystemMessage(content="You are a helpful assistant."),
+                # we include context about Michael Jordan in the prompt so that the test is
+                # focused on the funtionality of the model and not on the information embedded
+                # in the model. Llama 3.2 3B Instruct tends to think MJ played for 14 seasons.
+                SystemMessage(
+                    content=(
+                        "You are a helpful assistant.\n\n"
+                        "Michael Jordan was born in 1963. He played basketball for the Chicago Bulls for 15 seasons."
+                    )
+                ),
                UserMessage(content="Please give me information about Michael Jordan."),
            ],
            stream=False,