Merge branch 'main' into eval_api_final

2025-03-26 12:29:45 -07:00 · 2025-03-26 12:29:45 -07:00 · bc0cd07008
commit bc0cd07008
parent 7f12ea290f cb2a9784ab
79 changed files with 3257 additions and 2358 deletions
--- a/tests/integration/inference/test_text_inference.py
+++ b/tests/integration/inference/test_text_inference.py
@ -99,6 +99,33 @@ def test_text_completion_streaming(client_with_models, text_model_id, test_case)
    assert len(content_str) > 10


+@pytest.mark.parametrize(
+    "test_case",
+    [
+        "inference:completion:stop_sequence",
+    ],
+)
+def test_text_completion_stop_sequence(client_with_models, text_model_id, inference_provider_type, test_case):
+    skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
+    # This is only supported/tested for remote vLLM: https://github.com/meta-llama/llama-stack/issues/1771
+    if inference_provider_type != "remote::vllm":
+        pytest.xfail(f"{inference_provider_type} doesn't support 'stop' parameter yet")
+    tc = TestCase(test_case)
+
+    response = client_with_models.inference.completion(
+        content=tc["content"],
+        stream=True,
+        model_id=text_model_id,
+        sampling_params={
+            "max_tokens": 50,
+            "stop": ["1963"],
+        },
+    )
+    streamed_content = [chunk.delta for chunk in response]
+    content_str = "".join(streamed_content).lower().strip()
+    assert "1963" not in content_str
+
+
@pytest.mark.parametrize(
    "test_case",
    [
--- a/tests/integration/test_cases/inference/completion.json
+++ b/tests/integration/test_cases/inference/completion.json
@ -10,6 +10,11 @@
            "expected": "1963"
        }
    },
+    "stop_sequence": {
+        "data": {
+            "content": "Return the exact same sentence and don't add additional words): Michael Jordan was born in the year of 1963"
+        }
+    },
    "streaming": {
        "data": {
            "content": "Roses are red,"