forked from phoenix-oss/llama-stack-mirror
feat: OpenAI Responses API (#1989)
# What does this PR do? This provides an initial [OpenAI Responses API](https://platform.openai.com/docs/api-reference/responses) implementation. The API is not yet complete, and this is more a proof-of-concept to show how we can store responses in our key-value stores and use them to support the Responses API concepts like `previous_response_id`. ## Test Plan I've added a new `tests/integration/openai_responses/test_openai_responses.py` as part of a test-driven development for this new API. I'm only testing this locally with the remote-vllm provider for now, but it should work with any of our inference providers since the only API it requires out of the inference provider is the `openai_chat_completion` endpoint. ``` VLLM_URL="http://localhost:8000/v1" \ INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct" \ llama stack build --template remote-vllm --image-type venv --run ``` ``` LLAMA_STACK_CONFIG="http://localhost:8321" \ python -m pytest -v \ tests/integration/openai_responses/test_openai_responses.py \ --text-model "meta-llama/Llama-3.2-3B-Instruct" ``` --------- Signed-off-by: Ben Browning <bbrownin@redhat.com> Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
This commit is contained in:
parent
79851d93aa
commit
8dfce2f596
21 changed files with 1766 additions and 59 deletions
|
@ -14,6 +14,7 @@ from pathlib import Path
|
|||
import pytest
|
||||
import yaml
|
||||
from llama_stack_client import LlamaStackClient
|
||||
from openai import OpenAI
|
||||
|
||||
from llama_stack import LlamaStackAsLibraryClient
|
||||
from llama_stack.apis.datatypes import Api
|
||||
|
@ -207,3 +208,9 @@ def llama_stack_client(request, provider_data, text_model_id):
|
|||
raise RuntimeError("Initialization failed")
|
||||
|
||||
return client
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def openai_client(client_with_models):
|
||||
base_url = f"{client_with_models.base_url}/v1/openai/v1"
|
||||
return OpenAI(base_url=base_url, api_key="fake")
|
||||
|
|
37
tests/integration/test_cases/openai/responses.json
Normal file
37
tests/integration/test_cases/openai/responses.json
Normal file
|
@ -0,0 +1,37 @@
|
|||
{
|
||||
"non_streaming_01": {
|
||||
"data": {
|
||||
"question": "Which planet do humans live on?",
|
||||
"expected": "Earth"
|
||||
}
|
||||
},
|
||||
"non_streaming_02": {
|
||||
"data": {
|
||||
"question": "Which planet has rings around it with a name starting with letter S?",
|
||||
"expected": "Saturn"
|
||||
}
|
||||
},
|
||||
"streaming_01": {
|
||||
"data": {
|
||||
"question": "What's the name of the Sun in latin?",
|
||||
"expected": "Sol"
|
||||
}
|
||||
},
|
||||
"streaming_02": {
|
||||
"data": {
|
||||
"question": "What is the name of the US captial?",
|
||||
"expected": "Washington"
|
||||
}
|
||||
},
|
||||
"tools_web_search_01": {
|
||||
"data": {
|
||||
"input": "How many experts does the Llama 4 Maverick model have?",
|
||||
"tools": [
|
||||
{
|
||||
"type": "web_search"
|
||||
}
|
||||
],
|
||||
"expected": "128"
|
||||
}
|
||||
}
|
||||
}
|
|
@ -12,6 +12,7 @@ class TestCase:
|
|||
_apis = [
|
||||
"inference/chat_completion",
|
||||
"inference/completion",
|
||||
"openai/responses",
|
||||
]
|
||||
_jsonblob = {}
|
||||
|
||||
|
|
|
@ -13,3 +13,5 @@ test_exclusions:
|
|||
- test_chat_non_streaming_image
|
||||
- test_chat_streaming_image
|
||||
- test_chat_multi_turn_multiple_images
|
||||
- test_response_non_streaming_image
|
||||
- test_response_non_streaming_multi_turn_image
|
||||
|
|
|
@ -13,3 +13,5 @@ test_exclusions:
|
|||
- test_chat_non_streaming_image
|
||||
- test_chat_streaming_image
|
||||
- test_chat_multi_turn_multiple_images
|
||||
- test_response_non_streaming_image
|
||||
- test_response_non_streaming_multi_turn_image
|
||||
|
|
|
@ -13,3 +13,5 @@ test_exclusions:
|
|||
- test_chat_non_streaming_image
|
||||
- test_chat_streaming_image
|
||||
- test_chat_multi_turn_multiple_images
|
||||
- test_response_non_streaming_image
|
||||
- test_response_non_streaming_multi_turn_image
|
||||
|
|
|
@ -16,7 +16,7 @@ Description:
|
|||
|
||||
|
||||
Configuration:
|
||||
- Provider details (models, display names) are loaded from `tests/verifications/config.yaml`.
|
||||
- Provider details (models, display names) are loaded from `tests/verifications/conf/*.yaml`.
|
||||
- Test cases are defined in YAML files within `tests/verifications/openai_api/fixtures/test_cases/`.
|
||||
- Test results are stored in `tests/verifications/test_results/`.
|
||||
|
||||
|
|
|
@ -1,10 +1,15 @@
|
|||
# This is a temporary run file because model names used by the verification tests
|
||||
# are not quite consistent with various pre-existing distributions.
|
||||
#
|
||||
version: '2'
|
||||
image_name: openai-api-verification
|
||||
apis:
|
||||
- agents
|
||||
- inference
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
- safety
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: together
|
||||
|
@ -16,12 +21,12 @@ providers:
|
|||
provider_type: remote::fireworks
|
||||
config:
|
||||
url: https://api.fireworks.ai/inference/v1
|
||||
api_key: ${env.FIREWORKS_API_KEY}
|
||||
api_key: ${env.FIREWORKS_API_KEY:}
|
||||
- provider_id: groq
|
||||
provider_type: remote::groq
|
||||
config:
|
||||
url: https://api.groq.com
|
||||
api_key: ${env.GROQ_API_KEY}
|
||||
api_key: ${env.GROQ_API_KEY:}
|
||||
- provider_id: openai
|
||||
provider_type: remote::openai
|
||||
config:
|
||||
|
@ -45,6 +50,19 @@ providers:
|
|||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/openai/trace_store.db}
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
config:
|
||||
excluded_categories: []
|
||||
agents:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
persistence_store:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai}/agents_store.db
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
|
|
35
tests/verifications/openai_api/conftest.py
Normal file
35
tests/verifications/openai_api/conftest.py
Normal file
|
@ -0,0 +1,35 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from tests.verifications.openai_api.fixtures.fixtures import _load_all_verification_configs
|
||||
|
||||
|
||||
def pytest_generate_tests(metafunc):
|
||||
"""Dynamically parametrize tests based on the selected provider and config."""
|
||||
if "model" in metafunc.fixturenames:
|
||||
provider = metafunc.config.getoption("provider")
|
||||
if not provider:
|
||||
print("Warning: --provider not specified. Skipping model parametrization.")
|
||||
metafunc.parametrize("model", [])
|
||||
return
|
||||
|
||||
try:
|
||||
config_data = _load_all_verification_configs()
|
||||
except (FileNotFoundError, IOError) as e:
|
||||
print(f"ERROR loading verification configs: {e}")
|
||||
config_data = {"providers": {}}
|
||||
|
||||
provider_config = config_data.get("providers", {}).get(provider)
|
||||
if provider_config:
|
||||
models = provider_config.get("models", [])
|
||||
if models:
|
||||
metafunc.parametrize("model", models)
|
||||
else:
|
||||
print(f"Warning: No models found for provider '{provider}' in config.")
|
||||
metafunc.parametrize("model", []) # Parametrize empty if no models found
|
||||
else:
|
||||
print(f"Warning: Provider '{provider}' not found in config. No models parametrized.")
|
||||
metafunc.parametrize("model", []) # Parametrize empty if provider not found
|
|
@ -5,14 +5,16 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
from openai import OpenAI
|
||||
|
||||
# --- Helper Functions ---
|
||||
|
||||
|
||||
# --- Helper Function to Load Config ---
|
||||
def _load_all_verification_configs():
|
||||
"""Load and aggregate verification configs from the conf/ directory."""
|
||||
# Note: Path is relative to *this* file (fixtures.py)
|
||||
|
@ -44,7 +46,30 @@ def _load_all_verification_configs():
|
|||
return {"providers": all_provider_configs}
|
||||
|
||||
|
||||
# --- End Helper Function ---
|
||||
def case_id_generator(case):
|
||||
"""Generate a test ID from the case's 'case_id' field, or use a default."""
|
||||
case_id = case.get("case_id")
|
||||
if isinstance(case_id, (str, int)):
|
||||
return re.sub(r"\\W|^(?=\\d)", "_", str(case_id))
|
||||
return None
|
||||
|
||||
|
||||
def should_skip_test(verification_config, provider, model, test_name_base):
|
||||
"""Check if a test should be skipped based on config exclusions."""
|
||||
provider_config = verification_config.get("providers", {}).get(provider)
|
||||
if not provider_config:
|
||||
return False # No config for provider, don't skip
|
||||
|
||||
exclusions = provider_config.get("test_exclusions", {}).get(model, [])
|
||||
return test_name_base in exclusions
|
||||
|
||||
|
||||
# Helper to get the base test name from the request object
|
||||
def get_base_test_name(request):
|
||||
return request.node.originalname
|
||||
|
||||
|
||||
# --- End Helper Functions ---
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
test_response_basic:
|
||||
test_name: test_response_basic
|
||||
test_params:
|
||||
case:
|
||||
- case_id: "earth"
|
||||
input: "Which planet do humans live on?"
|
||||
output: "earth"
|
||||
- case_id: "saturn"
|
||||
input: "Which planet has rings around it with a name starting with letter S?"
|
||||
output: "saturn"
|
||||
|
||||
test_response_multi_turn:
|
||||
test_name: test_response_multi_turn
|
||||
test_params:
|
||||
case:
|
||||
- case_id: "earth"
|
||||
turns:
|
||||
- input: "Which planet do humans live on?"
|
||||
output: "earth"
|
||||
- input: "What is the name of the planet from your previous response?"
|
||||
output: "earth"
|
||||
|
||||
test_response_web_search:
|
||||
test_name: test_response_web_search
|
||||
test_params:
|
||||
case:
|
||||
- case_id: "llama_experts"
|
||||
input: "How many experts does the Llama 4 Maverick model have?"
|
||||
tools:
|
||||
- type: web_search
|
||||
search_context_size: "low"
|
||||
output: "128"
|
||||
|
||||
test_response_image:
|
||||
test_name: test_response_image
|
||||
test_params:
|
||||
case:
|
||||
- case_id: "llama_image"
|
||||
input:
|
||||
- role: user
|
||||
content:
|
||||
- type: input_text
|
||||
text: "Identify the type of animal in this image."
|
||||
- type: input_image
|
||||
image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
|
||||
output: "llama"
|
||||
|
||||
test_response_multi_turn_image:
|
||||
test_name: test_response_multi_turn_image
|
||||
test_params:
|
||||
case:
|
||||
- case_id: "llama_image_search"
|
||||
turns:
|
||||
- input:
|
||||
- role: user
|
||||
content:
|
||||
- type: input_text
|
||||
text: "What type of animal is in this image? Please respond with a single word that starts with the letter 'L'."
|
||||
- type: input_image
|
||||
image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
|
||||
output: "llama"
|
||||
- input: "Search the web using the search tool for the animal from the previous response. Your search query should be a single phrase that includes the animal's name and the words 'maverick' and 'scout'."
|
||||
tools:
|
||||
- type: web_search
|
||||
output: "model"
|
|
@ -7,7 +7,6 @@
|
|||
import base64
|
||||
import copy
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
@ -16,7 +15,9 @@ from openai import APIError
|
|||
from pydantic import BaseModel
|
||||
|
||||
from tests.verifications.openai_api.fixtures.fixtures import (
|
||||
_load_all_verification_configs,
|
||||
case_id_generator,
|
||||
get_base_test_name,
|
||||
should_skip_test,
|
||||
)
|
||||
from tests.verifications.openai_api.fixtures.load import load_test_cases
|
||||
|
||||
|
@ -25,57 +26,6 @@ chat_completion_test_cases = load_test_cases("chat_completion")
|
|||
THIS_DIR = Path(__file__).parent
|
||||
|
||||
|
||||
def case_id_generator(case):
|
||||
"""Generate a test ID from the case's 'case_id' field, or use a default."""
|
||||
case_id = case.get("case_id")
|
||||
if isinstance(case_id, (str, int)):
|
||||
return re.sub(r"\\W|^(?=\\d)", "_", str(case_id))
|
||||
return None
|
||||
|
||||
|
||||
def pytest_generate_tests(metafunc):
|
||||
"""Dynamically parametrize tests based on the selected provider and config."""
|
||||
if "model" in metafunc.fixturenames:
|
||||
provider = metafunc.config.getoption("provider")
|
||||
if not provider:
|
||||
print("Warning: --provider not specified. Skipping model parametrization.")
|
||||
metafunc.parametrize("model", [])
|
||||
return
|
||||
|
||||
try:
|
||||
config_data = _load_all_verification_configs()
|
||||
except (FileNotFoundError, IOError) as e:
|
||||
print(f"ERROR loading verification configs: {e}")
|
||||
config_data = {"providers": {}}
|
||||
|
||||
provider_config = config_data.get("providers", {}).get(provider)
|
||||
if provider_config:
|
||||
models = provider_config.get("models", [])
|
||||
if models:
|
||||
metafunc.parametrize("model", models)
|
||||
else:
|
||||
print(f"Warning: No models found for provider '{provider}' in config.")
|
||||
metafunc.parametrize("model", []) # Parametrize empty if no models found
|
||||
else:
|
||||
print(f"Warning: Provider '{provider}' not found in config. No models parametrized.")
|
||||
metafunc.parametrize("model", []) # Parametrize empty if provider not found
|
||||
|
||||
|
||||
def should_skip_test(verification_config, provider, model, test_name_base):
|
||||
"""Check if a test should be skipped based on config exclusions."""
|
||||
provider_config = verification_config.get("providers", {}).get(provider)
|
||||
if not provider_config:
|
||||
return False # No config for provider, don't skip
|
||||
|
||||
exclusions = provider_config.get("test_exclusions", {}).get(model, [])
|
||||
return test_name_base in exclusions
|
||||
|
||||
|
||||
# Helper to get the base test name from the request object
|
||||
def get_base_test_name(request):
|
||||
return request.node.originalname
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def multi_image_data():
|
||||
files = [
|
||||
|
|
166
tests/verifications/openai_api/test_responses.py
Normal file
166
tests/verifications/openai_api/test_responses.py
Normal file
|
@ -0,0 +1,166 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
|
||||
import pytest
|
||||
|
||||
from tests.verifications.openai_api.fixtures.fixtures import (
|
||||
case_id_generator,
|
||||
get_base_test_name,
|
||||
should_skip_test,
|
||||
)
|
||||
from tests.verifications.openai_api.fixtures.load import load_test_cases
|
||||
|
||||
responses_test_cases = load_test_cases("responses")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"case",
|
||||
responses_test_cases["test_response_basic"]["test_params"]["case"],
|
||||
ids=case_id_generator,
|
||||
)
|
||||
def test_response_non_streaming_basic(request, openai_client, model, provider, verification_config, case):
|
||||
test_name_base = get_base_test_name(request)
|
||||
if should_skip_test(verification_config, provider, model, test_name_base):
|
||||
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
|
||||
|
||||
response = openai_client.responses.create(
|
||||
model=model,
|
||||
input=case["input"],
|
||||
stream=False,
|
||||
)
|
||||
output_text = response.output_text.lower().strip()
|
||||
assert len(output_text) > 0
|
||||
assert case["output"].lower() in output_text
|
||||
|
||||
retrieved_response = openai_client.responses.retrieve(response_id=response.id)
|
||||
assert retrieved_response.output_text == response.output_text
|
||||
|
||||
next_response = openai_client.responses.create(
|
||||
model=model, input="Repeat your previous response in all caps.", previous_response_id=response.id
|
||||
)
|
||||
next_output_text = next_response.output_text.strip()
|
||||
assert case["output"].upper() in next_output_text
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"case",
|
||||
responses_test_cases["test_response_basic"]["test_params"]["case"],
|
||||
ids=case_id_generator,
|
||||
)
|
||||
def test_response_streaming_basic(request, openai_client, model, provider, verification_config, case):
|
||||
test_name_base = get_base_test_name(request)
|
||||
if should_skip_test(verification_config, provider, model, test_name_base):
|
||||
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
|
||||
|
||||
response = openai_client.responses.create(
|
||||
model=model,
|
||||
input=case["input"],
|
||||
stream=True,
|
||||
)
|
||||
streamed_content = []
|
||||
response_id = ""
|
||||
for chunk in response:
|
||||
if chunk.type == "response.completed":
|
||||
response_id = chunk.response.id
|
||||
streamed_content.append(chunk.response.output_text.strip())
|
||||
|
||||
assert len(streamed_content) > 0
|
||||
assert case["output"].lower() in "".join(streamed_content).lower()
|
||||
|
||||
retrieved_response = openai_client.responses.retrieve(response_id=response_id)
|
||||
assert retrieved_response.output_text == "".join(streamed_content)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"case",
|
||||
responses_test_cases["test_response_multi_turn"]["test_params"]["case"],
|
||||
ids=case_id_generator,
|
||||
)
|
||||
def test_response_non_streaming_multi_turn(request, openai_client, model, provider, verification_config, case):
|
||||
test_name_base = get_base_test_name(request)
|
||||
if should_skip_test(verification_config, provider, model, test_name_base):
|
||||
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
|
||||
|
||||
previous_response_id = None
|
||||
for turn in case["turns"]:
|
||||
response = openai_client.responses.create(
|
||||
model=model,
|
||||
input=turn["input"],
|
||||
previous_response_id=previous_response_id,
|
||||
tools=turn["tools"] if "tools" in turn else None,
|
||||
)
|
||||
previous_response_id = response.id
|
||||
output_text = response.output_text.lower()
|
||||
assert turn["output"].lower() in output_text
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"case",
|
||||
responses_test_cases["test_response_web_search"]["test_params"]["case"],
|
||||
ids=case_id_generator,
|
||||
)
|
||||
def test_response_non_streaming_web_search(request, openai_client, model, provider, verification_config, case):
|
||||
test_name_base = get_base_test_name(request)
|
||||
if should_skip_test(verification_config, provider, model, test_name_base):
|
||||
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
|
||||
|
||||
response = openai_client.responses.create(
|
||||
model=model,
|
||||
input=case["input"],
|
||||
tools=case["tools"],
|
||||
stream=False,
|
||||
)
|
||||
assert len(response.output) > 1
|
||||
assert response.output[0].type == "web_search_call"
|
||||
assert response.output[0].status == "completed"
|
||||
assert response.output[1].type == "message"
|
||||
assert response.output[1].status == "completed"
|
||||
assert response.output[1].role == "assistant"
|
||||
assert len(response.output[1].content) > 0
|
||||
assert case["output"].lower() in response.output_text.lower().strip()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"case",
|
||||
responses_test_cases["test_response_image"]["test_params"]["case"],
|
||||
ids=case_id_generator,
|
||||
)
|
||||
def test_response_non_streaming_image(request, openai_client, model, provider, verification_config, case):
|
||||
test_name_base = get_base_test_name(request)
|
||||
if should_skip_test(verification_config, provider, model, test_name_base):
|
||||
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
|
||||
|
||||
response = openai_client.responses.create(
|
||||
model=model,
|
||||
input=case["input"],
|
||||
stream=False,
|
||||
)
|
||||
output_text = response.output_text.lower()
|
||||
assert case["output"].lower() in output_text
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"case",
|
||||
responses_test_cases["test_response_multi_turn_image"]["test_params"]["case"],
|
||||
ids=case_id_generator,
|
||||
)
|
||||
def test_response_non_streaming_multi_turn_image(request, openai_client, model, provider, verification_config, case):
|
||||
test_name_base = get_base_test_name(request)
|
||||
if should_skip_test(verification_config, provider, model, test_name_base):
|
||||
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
|
||||
|
||||
previous_response_id = None
|
||||
for turn in case["turns"]:
|
||||
response = openai_client.responses.create(
|
||||
model=model,
|
||||
input=turn["input"],
|
||||
previous_response_id=previous_response_id,
|
||||
tools=turn["tools"] if "tools" in turn else None,
|
||||
)
|
||||
previous_response_id = response.id
|
||||
output_text = response.output_text.lower()
|
||||
assert turn["output"].lower() in output_text
|
Loading…
Add table
Add a link
Reference in a new issue