mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-28 02:53:30 +00:00
feat: Structured output for Responses API (#2324)
# What does this PR do? This adds the missing `text` parameter to the Responses API that is how users control structured outputs. All we do with that parameter is map it to the corresponding chat completion response_format. ## Test Plan The new unit tests exercise the various permutations allowed for this property, while a couple of new verification tests actually use it for real to verify the model outputs are following the format as expected. Unit tests: `python -m pytest -s -v tests/unit/providers/agents/meta_reference/test_openai_responses.py` Verification tests: ``` llama stack run llama_stack/templates/together/run.yaml pytest -s -vv 'tests/verifications/openai_api/test_responses.py' \ --base-url=http://localhost:8321/v1/openai/v1 \ --model meta-llama/Llama-4-Scout-17B-16E-Instruct ``` Note that the verification tests can only be run with a real Llama Stack server (as opposed to using the library client via `--provider=stack:together`) because the Llama Stack python client is not yet updated to accept this text field. Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
parent
c70ca8344f
commit
8bee2954be
8 changed files with 323 additions and 2 deletions
|
@ -546,3 +546,39 @@ async def test_response_streaming_multi_turn_tool_execution(
|
|||
assert expected_output.lower() in final_response.output_text.lower(), (
|
||||
f"Expected '{expected_output}' to appear in response: {final_response.output_text}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"text_format",
|
||||
# Not testing json_object because most providers don't actually support it.
|
||||
[
|
||||
{"type": "text"},
|
||||
{
|
||||
"type": "json_schema",
|
||||
"name": "capitals",
|
||||
"description": "A schema for the capital of each country",
|
||||
"schema": {"type": "object", "properties": {"capital": {"type": "string"}}},
|
||||
"strict": True,
|
||||
},
|
||||
],
|
||||
)
|
||||
def test_response_text_format(request, openai_client, model, provider, verification_config, text_format):
|
||||
if isinstance(openai_client, LlamaStackAsLibraryClient):
|
||||
pytest.skip("Responses API text format is not yet supported in library client.")
|
||||
|
||||
test_name_base = get_base_test_name(request)
|
||||
if should_skip_test(verification_config, provider, model, test_name_base):
|
||||
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
|
||||
|
||||
stream = False
|
||||
response = openai_client.responses.create(
|
||||
model=model,
|
||||
input="What is the capital of France?",
|
||||
stream=stream,
|
||||
text={"format": text_format},
|
||||
)
|
||||
# by_alias=True is needed because otherwise Pydantic renames our "schema" field
|
||||
assert response.text.format.model_dump(exclude_none=True, by_alias=True) == text_format
|
||||
assert "paris" in response.output_text.lower()
|
||||
if text_format["type"] == "json_schema":
|
||||
assert "paris" in json.loads(response.output_text)["capital"].lower()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue