"fbcode 96eeb10decbe0d9de55882d2e35e170417fe62a7(D69478008)"

This commit is contained in:
Haiping Zhao 2025-02-19 19:02:46 -08:00
parent e9b8259cf9
commit a1f77d1353
8 changed files with 122 additions and 46 deletions

View file

@ -4,6 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import os
from typing import Any, Dict from typing import Any, Dict
from pydantic import BaseModel from pydantic import BaseModel
@ -12,7 +13,7 @@ DEFAULT_OLLAMA_URL = "http://localhost:11434"
class OllamaImplConfig(BaseModel): class OllamaImplConfig(BaseModel):
url: str = DEFAULT_OLLAMA_URL url: str = os.getenv("OLLAMA_URL", DEFAULT_OLLAMA_URL)
@classmethod @classmethod
def sample_run_config(cls, url: str = "${env.OLLAMA_URL:http://localhost:11434}", **kwargs) -> Dict[str, Any]: def sample_run_config(cls, url: str = "${env.OLLAMA_URL:http://localhost:11434}", **kwargs) -> Dict[str, Any]:

View file

@ -87,3 +87,6 @@ pytest llama_stack/providers/tests/ --config=ci_test_config.yaml
Currently, we support test config on inference, agents and memory api tests. Currently, we support test config on inference, agents and memory api tests.
Example format of test config can be found in ci_test_config.yaml. Example format of test config can be found in ci_test_config.yaml.
## Test Data
We encourage providers to use our test data for internal development testing, so to make it easier and consistent with the tests we provide. Each test case may define its own data format, and please refer to our test source code to get details on how these fields are used in the test.

View file

@ -30,6 +30,7 @@ from llama_stack.models.llama.datatypes import (
ToolParamDefinition, ToolParamDefinition,
ToolPromptFormat, ToolPromptFormat,
) )
from llama_stack.providers.tests.test_cases.test_case import TestCase
from .utils import group_chunks from .utils import group_chunks
@ -178,8 +179,9 @@ class TestInference:
else: # no token, no logprobs else: # no token, no logprobs
assert not chunk.logprobs, "Logprobs should be empty" assert not chunk.logprobs, "Logprobs should be empty"
@pytest.mark.parametrize("test_case", ["completion-01"])
@pytest.mark.asyncio(loop_scope="session") @pytest.mark.asyncio(loop_scope="session")
async def test_completion_structured_output(self, inference_model, inference_stack): async def test_completion_structured_output(self, inference_model, inference_stack, test_case):
inference_impl, _ = inference_stack inference_impl, _ = inference_stack
class Output(BaseModel): class Output(BaseModel):
@ -187,7 +189,9 @@ class TestInference:
year_born: str year_born: str
year_retired: str year_retired: str
user_input = "Michael Jordan was born in 1963. He played basketball for the Chicago Bulls. He retired in 2003." tc = TestCase(test_case)
user_input = tc["user_input"]
response = await inference_impl.completion( response = await inference_impl.completion(
model_id=inference_model, model_id=inference_model,
content=user_input, content=user_input,
@ -203,9 +207,10 @@ class TestInference:
assert isinstance(response.content, str) assert isinstance(response.content, str)
answer = Output.model_validate_json(response.content) answer = Output.model_validate_json(response.content)
assert answer.name == "Michael Jordan" expected = tc["expected"]
assert answer.year_born == "1963" assert answer.name == expected["name"]
assert answer.year_retired == "2003" assert answer.year_born == expected["year_born"]
assert answer.year_retired == expected["year_retired"]
@pytest.mark.asyncio(loop_scope="session") @pytest.mark.asyncio(loop_scope="session")
async def test_chat_completion_non_streaming( async def test_chat_completion_non_streaming(
@ -224,8 +229,9 @@ class TestInference:
assert isinstance(response.completion_message.content, str) assert isinstance(response.completion_message.content, str)
assert len(response.completion_message.content) > 0 assert len(response.completion_message.content) > 0
@pytest.mark.parametrize("test_case", ["chat_completion-01"])
@pytest.mark.asyncio(loop_scope="session") @pytest.mark.asyncio(loop_scope="session")
async def test_structured_output(self, inference_model, inference_stack, common_params): async def test_structured_output(self, inference_model, inference_stack, common_params, test_case):
inference_impl, _ = inference_stack inference_impl, _ = inference_stack
class AnswerFormat(BaseModel): class AnswerFormat(BaseModel):
@ -234,20 +240,12 @@ class TestInference:
year_of_birth: int year_of_birth: int
num_seasons_in_nba: int num_seasons_in_nba: int
tc = TestCase(test_case)
messages = [SystemMessage.parse_obj(tc["messages"][0]), UserMessage.parse_obj(tc["messages"][1])]
response = await inference_impl.chat_completion( response = await inference_impl.chat_completion(
model_id=inference_model, model_id=inference_model,
messages=[ messages=messages,
# we include context about Michael Jordan in the prompt so that the test is
# focused on the funtionality of the model and not on the information embedded
# in the model. Llama 3.2 3B Instruct tends to think MJ played for 14 seasons.
SystemMessage(
content=(
"You are a helpful assistant.\n\n"
"Michael Jordan was born in 1963. He played basketball for the Chicago Bulls for 15 seasons."
)
),
UserMessage(content="Please give me information about Michael Jordan."),
],
stream=False, stream=False,
response_format=JsonSchemaResponseFormat( response_format=JsonSchemaResponseFormat(
json_schema=AnswerFormat.model_json_schema(), json_schema=AnswerFormat.model_json_schema(),
@ -260,10 +258,11 @@ class TestInference:
assert isinstance(response.completion_message.content, str) assert isinstance(response.completion_message.content, str)
answer = AnswerFormat.model_validate_json(response.completion_message.content) answer = AnswerFormat.model_validate_json(response.completion_message.content)
assert answer.first_name == "Michael" expected = tc["expected"]
assert answer.last_name == "Jordan" assert answer.first_name == expected["first_name"]
assert answer.year_of_birth == 1963 assert answer.last_name == expected["last_name"]
assert answer.num_seasons_in_nba == 15 assert answer.year_of_birth == expected["year_of_birth"]
assert answer.num_seasons_in_nba == expected["num_seasons_in_nba"]
response = await inference_impl.chat_completion( response = await inference_impl.chat_completion(
model_id=inference_model, model_id=inference_model,

View file

@ -0,0 +1,5 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

View file

@ -0,0 +1,24 @@
{
"01": {
"name": "structured output",
"data": {
"notes": "We include context about Michael Jordan in the prompt so that the test is focused on the funtionality of the model and not on the information embedded in the model. Llama 3.2 3B Instruct tends to think MJ played for 14 seasons.",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant. Michael Jordan was born in 1963. He played basketball for the Chicago Bulls for 15 seasons."
},
{
"role": "user",
"content": "Please give me information about Michael Jordan."
}
],
"expected": {
"first_name": "Michael",
"last_name": "Jordan",
"year_of_birth": 1963,
"num_seasons_in_nba": 15
}
}
}
}

View file

@ -0,0 +1,13 @@
{
"01": {
"name": "structured output",
"data": {
"user_input": "Michael Jordan was born in 1963. He played basketball for the Chicago Bulls. He retired in 2003.",
"expected": {
"name": "Michael Jordan",
"year_born": "1963",
"year_retired": "2003"
}
}
}
}

View file

@ -0,0 +1,32 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import json
import pathlib
class TestCase:
_apis = ["chat_completion", "completion"]
_jsonblob = {}
def __init__(self, name):
# loading all test cases
if self._jsonblob == {}:
for api in self._apis:
with open(pathlib.Path(__file__).parent / f"{api}.json", "r") as f:
TestCase._jsonblob.update({f"{api}-{k}": v for k, v in json.load(f).items()})
# loading this test case
tc = self._jsonblob.get(name)
if tc is None:
raise ValueError(f"Test case {name} not found")
# these are the only fields we need
self.name = tc.get("name")
self.data = tc.get("data")
def __getitem__(self, key):
return self.data[key]

View file

@ -7,6 +7,9 @@
import pytest import pytest
from pydantic import BaseModel from pydantic import BaseModel
from llama_stack.providers.tests.test_cases.test_case import TestCase
PROVIDER_TOOL_PROMPT_FORMAT = { PROVIDER_TOOL_PROMPT_FORMAT = {
"remote::ollama": "json", "remote::ollama": "json",
"remote::together": "json", "remote::together": "json",
@ -120,16 +123,16 @@ def test_completion_log_probs_streaming(llama_stack_client, text_model_id, infer
assert not chunk.logprobs, "Logprobs should be empty" assert not chunk.logprobs, "Logprobs should be empty"
def test_text_completion_structured_output(llama_stack_client, text_model_id, inference_provider_type): @pytest.mark.parametrize("test_case", ["completion-01"])
user_input = """ def test_text_completion_structured_output(llama_stack_client, text_model_id, inference_provider_type, test_case):
Michael Jordan was born in 1963. He played basketball for the Chicago Bulls. He retired in 2003.
"""
class AnswerFormat(BaseModel): class AnswerFormat(BaseModel):
name: str name: str
year_born: str year_born: str
year_retired: str year_retired: str
tc = TestCase(test_case)
user_input = tc["user_input"]
response = llama_stack_client.inference.completion( response = llama_stack_client.inference.completion(
model_id=text_model_id, model_id=text_model_id,
content=user_input, content=user_input,
@ -143,9 +146,10 @@ def test_text_completion_structured_output(llama_stack_client, text_model_id, in
}, },
) )
answer = AnswerFormat.model_validate_json(response.content) answer = AnswerFormat.model_validate_json(response.content)
assert answer.name == "Michael Jordan" expected = tc["expected"]
assert answer.year_born == "1963" assert answer.name == expected["name"]
assert answer.year_retired == "2003" assert answer.year_born == expected["year_born"]
assert answer.year_retired == expected["year_retired"]
@pytest.mark.parametrize( @pytest.mark.parametrize(
@ -247,6 +251,7 @@ def test_text_chat_completion_with_tool_calling_and_streaming(
assert tool_invocation_content == "[get_weather, {'location': 'San Francisco, CA'}]" assert tool_invocation_content == "[get_weather, {'location': 'San Francisco, CA'}]"
@pytest.mark.parametrize("test_case", ["chat_completion-01"])
def test_text_chat_completion_with_tool_choice_required( def test_text_chat_completion_with_tool_choice_required(
llama_stack_client, text_model_id, get_weather_tool_definition, provider_tool_format, inference_provider_type llama_stack_client, text_model_id, get_weather_tool_definition, provider_tool_format, inference_provider_type
): ):
@ -281,25 +286,18 @@ def test_text_chat_completion_with_tool_choice_none(
assert tool_invocation_content == "" assert tool_invocation_content == ""
def test_text_chat_completion_structured_output(llama_stack_client, text_model_id, inference_provider_type): def test_text_chat_completion_structured_output(llama_stack_client, text_model_id, inference_provider_type, test_case):
class AnswerFormat(BaseModel): class AnswerFormat(BaseModel):
first_name: str first_name: str
last_name: str last_name: str
year_of_birth: int year_of_birth: int
num_seasons_in_nba: int num_seasons_in_nba: int
tc = TestCase(test_case)
response = llama_stack_client.inference.chat_completion( response = llama_stack_client.inference.chat_completion(
model_id=text_model_id, model_id=text_model_id,
messages=[ messages=tc["messages"],
{
"role": "system",
"content": "You are a helpful assistant. Michael Jordan was born in 1963. He played basketball for the Chicago Bulls for 15 seasons.",
},
{
"role": "user",
"content": "Please give me information about Michael Jordan.",
},
],
response_format={ response_format={
"type": "json_schema", "type": "json_schema",
"json_schema": AnswerFormat.model_json_schema(), "json_schema": AnswerFormat.model_json_schema(),
@ -307,10 +305,11 @@ def test_text_chat_completion_structured_output(llama_stack_client, text_model_i
stream=False, stream=False,
) )
answer = AnswerFormat.model_validate_json(response.completion_message.content) answer = AnswerFormat.model_validate_json(response.completion_message.content)
assert answer.first_name == "Michael" expected = tc["expected"]
assert answer.last_name == "Jordan" assert answer.first_name == expected["first_name"]
assert answer.year_of_birth == 1963 assert answer.last_name == expected["last_name"]
assert answer.num_seasons_in_nba == 15 assert answer.year_of_birth == expected["year_of_birth"]
assert answer.num_seasons_in_nba == expected["num_seasons_in_nba"]
@pytest.mark.parametrize( @pytest.mark.parametrize(