feat: D69478008 [llama-stack] turning tests into data-driven (#1180)

# What does this PR do? We have several places running tests for different purposes. - oss llama stack - provider tests - e2e tests - provider llama stack - unit tests - e2e tests It would be nice if they can *share the same set of test data*, so we maintain the consistency between spec and implementation. This is what this diff is about, isolating test data from test coding, so that we can reuse the same data at different places by writing different test coding. ## Test Plan == Set up Ollama local server == Run a provider test conda activate stack OLLAMA_URL="http://localhost:8321" \ pytest -v -s -k "ollama" --inference-model="llama3.2:3b-instruct-fp16" \ llama_stack/providers/tests/inference/test_text_inference.py::TestInference::test_completion_structured_output // test_structured_output should also work == Run an e2e test conda activate sherpa with-proxy pip install llama-stack export INFERENCE_MODEL=llama3.2:3b-instruct-fp16 export LLAMA_STACK_PORT=8322 with-proxy llama stack build --template ollama with-proxy llama stack run --env OLLAMA_URL=http://localhost:8321 ollama - Run test client, LLAMA_STACK_PORT=8322 LLAMA_STACK_BASE_URL="http://localhost:8322" \ pytest -v -s --inference-model="llama3.2:3b-instruct-fp16" \ tests/client-sdk/inference/test_text_inference.py::test_text_completion_structured_output // test_text_chat_completion_structured_output should also work ## Notes - This PR was automatically generated by oss_sync - Please refer to D69478008 for more details.
2025-02-20 14:13:06 -08:00 · 2025-02-20 14:13:06 -08:00 · 2cbe9395b0
commit 2cbe9395b0
parent 1166afdf76
8 changed files with 123 additions and 47 deletions
--- a/tests/client-sdk/inference/test_text_inference.py
+++ b/tests/client-sdk/inference/test_text_inference.py
@ -7,6 +7,8 @@
 import pytest
 from pydantic import BaseModel

+from llama_stack.providers.tests.test_cases.test_case import TestCase
+
 PROVIDER_TOOL_PROMPT_FORMAT = {
    "remote::ollama": "json",
    "remote::together": "json",
@ -120,16 +122,16 @@ def test_completion_log_probs_streaming(llama_stack_client, text_model_id, infer
            assert not chunk.logprobs, "Logprobs should be empty"


-def test_text_completion_structured_output(llama_stack_client, text_model_id, inference_provider_type):
-    user_input = """
-    Michael Jordan was born in 1963. He played basketball for the Chicago Bulls. He retired in 2003.
-    """
-
+@pytest.mark.parametrize("test_case", ["completion-01"])
+def test_text_completion_structured_output(llama_stack_client, text_model_id, inference_provider_type, test_case):
    class AnswerFormat(BaseModel):
        name: str
        year_born: str
        year_retired: str

+    tc = TestCase(test_case)
+
+    user_input = tc["user_input"]
    response = llama_stack_client.inference.completion(
        model_id=text_model_id,
        content=user_input,
@ -143,9 +145,10 @@ def test_text_completion_structured_output(llama_stack_client, text_model_id, in
        },
    )
    answer = AnswerFormat.model_validate_json(response.content)
-    assert answer.name == "Michael Jordan"
-    assert answer.year_born == "1963"
-    assert answer.year_retired == "2003"
+    expected = tc["expected"]
+    assert answer.name == expected["name"]
+    assert answer.year_born == expected["year_born"]
+    assert answer.year_retired == expected["year_retired"]


@pytest.mark.parametrize(
@ -247,6 +250,7 @@ def test_text_chat_completion_with_tool_calling_and_streaming(
    assert tool_invocation_content == "[get_weather, {'location': 'San Francisco, CA'}]"


+@pytest.mark.parametrize("test_case", ["chat_completion-01"])
 def test_text_chat_completion_with_tool_choice_required(
    llama_stack_client, text_model_id, get_weather_tool_definition, provider_tool_format, inference_provider_type
 ):
@ -281,25 +285,18 @@ def test_text_chat_completion_with_tool_choice_none(
    assert tool_invocation_content == ""


-def test_text_chat_completion_structured_output(llama_stack_client, text_model_id, inference_provider_type):
+def test_text_chat_completion_structured_output(llama_stack_client, text_model_id, inference_provider_type, test_case):
    class AnswerFormat(BaseModel):
        first_name: str
        last_name: str
        year_of_birth: int
        num_seasons_in_nba: int

+    tc = TestCase(test_case)
+
    response = llama_stack_client.inference.chat_completion(
        model_id=text_model_id,
-        messages=[
-            {
-                "role": "system",
-                "content": "You are a helpful assistant. Michael Jordan was born in 1963. He played basketball for the Chicago Bulls for 15 seasons.",
-            },
-            {
-                "role": "user",
-                "content": "Please give me information about Michael Jordan.",
-            },
-        ],
+        messages=tc["messages"],
        response_format={
            "type": "json_schema",
            "json_schema": AnswerFormat.model_json_schema(),
@ -307,10 +304,11 @@ def test_text_chat_completion_structured_output(llama_stack_client, text_model_i
        stream=False,
    )
    answer = AnswerFormat.model_validate_json(response.completion_message.content)
-    assert answer.first_name == "Michael"
-    assert answer.last_name == "Jordan"
-    assert answer.year_of_birth == 1963
-    assert answer.num_seasons_in_nba == 15
+    expected = tc["expected"]
+    assert answer.first_name == expected["first_name"]
+    assert answer.last_name == expected["last_name"]
+    assert answer.year_of_birth == expected["year_of_birth"]
+    assert answer.num_seasons_in_nba == expected["num_seasons_in_nba"]


@pytest.mark.parametrize(