Fix and add a test

2025-12-13 13:02:38 +00:00 · 2024-10-21 22:02:37 -07:00 · 2024-10-21 22:02:37 -07:00 · cd84dee3e9
commit cd84dee3e9
parent 40ba22f4c8
5 changed files with 76 additions and 62 deletions
--- a/llama_stack/providers/tests/inference/test_inference.py
+++ b/llama_stack/providers/tests/inference/test_inference.py
@ -10,6 +10,8 @@ import os
 import pytest
 import pytest_asyncio

+from pydantic import BaseModel
+
 from llama_models.llama3.api.datatypes import *  # noqa: F403
 from llama_stack.apis.inference import *  # noqa: F403

@ -85,24 +87,11 @@ async def inference_settings(request):
    }


-from pydantic import BaseModel
-
-
-class AnswerFormat(BaseModel):
-    first_name: str
-    last_name: str
-    year_of_birth: int
-    num_seasons_in_nba: int
-
-
@pytest.fixture
 def sample_messages():
-    question = "Please give me information about Michael Jordan."
-    # question_with_schema = f"{question}{AnswerFormat.schema_json()}"
    return [
        SystemMessage(content="You are a helpful assistant."),
-        # UserMessage(content="What's the weather like today?"),
-        UserMessage(content=question),
+        UserMessage(content="What's the weather like today?"),
    ]


@ -183,23 +172,55 @@ async def test_completion(inference_settings):

@pytest.mark.asyncio
 async def test_chat_completion_non_streaming(inference_settings, sample_messages):
-    print(AnswerFormat.schema_json())
-    print(AnswerFormat.schema())
    inference_impl = inference_settings["impl"]
    response = await inference_impl.chat_completion(
        messages=sample_messages,
        stream=False,
+        **inference_settings["common_params"],
+    )
+
+    assert isinstance(response, ChatCompletionResponse)
+    assert response.completion_message.role == "assistant"
+    assert isinstance(response.completion_message.content, str)
+    assert len(response.completion_message.content) > 0
+
+
+@pytest.mark.asyncio
+async def test_structured_output(inference_settings):
+    inference_impl = inference_settings["impl"]
+    params = inference_settings["common_params"]
+
+    provider = inference_impl.routing_table.get_provider_impl(params["model"])
+    if provider.__provider_id__ != "meta-reference":
+        pytest.skip("Other inference providers don't support structured output yet")
+
+    class AnswerFormat(BaseModel):
+        first_name: str
+        last_name: str
+        year_of_birth: int
+        num_seasons_in_nba: int
+
+    response = await inference_impl.chat_completion(
+        messages=[
+            SystemMessage(content="You are a helpful assistant."),
+            UserMessage(content="Please give me information about Michael Jordan."),
+        ],
+        stream=False,
        response_format=JsonResponseFormat(
            schema=AnswerFormat.schema(),
        ),
        **inference_settings["common_params"],
    )

-    print(response)
    assert isinstance(response, ChatCompletionResponse)
    assert response.completion_message.role == "assistant"
    assert isinstance(response.completion_message.content, str)
-    assert len(response.completion_message.content) > 0
+
+    answer = AnswerFormat.parse_raw(response.completion_message.content)
+    assert answer.first_name == "Michael"
+    assert answer.last_name == "Jordan"
+    assert answer.year_of_birth == 1963
+    assert answer.num_seasons_in_nba == 15


@pytest.mark.asyncio