mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-29 16:14:45 +00:00
Merge branch 'main' into out-of-token-budget-fix
This commit is contained in:
commit
85ef55391d
61 changed files with 1322 additions and 1598 deletions
9
tests/README.md
Normal file
9
tests/README.md
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
# Llama Stack Tests
|
||||
|
||||
Llama Stack has multiple layers of testing done to ensure continuous functionality and prevent regressions to the codebase.
|
||||
|
||||
| Testing Type | Details |
|
||||
|--------------|---------|
|
||||
| Unit | [unit/README.md](unit/README.md) |
|
||||
| Integration | [integration/README.md](integration/README.md) |
|
||||
| Verification | [verifications/README.md](verifications/README.md) |
|
||||
|
|
@ -53,9 +53,6 @@ providers:
|
|||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: code-interpreter
|
||||
provider_type: inline::code-interpreter
|
||||
config: {}
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
config: {}
|
||||
|
|
@ -90,8 +87,6 @@ tool_groups:
|
|||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
- toolgroup_id: builtin::wolfram_alpha
|
||||
provider_id: wolfram-alpha
|
||||
server:
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ pytest --help
|
|||
Here are the most important options:
|
||||
- `--stack-config`: specify the stack config to use. You have three ways to point to a stack:
|
||||
- a URL which points to a Llama Stack distribution server
|
||||
- a template (e.g., `fireworks`, `together`) or a path to a run.yaml file
|
||||
- a template (e.g., `fireworks`, `together`) or a path to a `run.yaml` file
|
||||
- a comma-separated list of api=provider pairs, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface.
|
||||
- `--env`: set environment variables, e.g. --env KEY=value. this is a utility option to set environment variables required by various providers.
|
||||
|
||||
|
|
@ -28,7 +28,6 @@ if no model is specified.
|
|||
|
||||
Experimental, under development, options:
|
||||
- `--record-responses`: record new API responses instead of using cached ones
|
||||
- `--report`: path where the test report should be written, e.g. --report=/path/to/report.md
|
||||
|
||||
|
||||
## Examples
|
||||
|
|
|
|||
|
|
@ -15,8 +15,6 @@ from dotenv import load_dotenv
|
|||
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
from .report import Report
|
||||
|
||||
logger = get_logger(__name__, category="tests")
|
||||
|
||||
|
||||
|
|
@ -60,9 +58,6 @@ def pytest_configure(config):
|
|||
os.environ["DISABLE_CODE_SANDBOX"] = "1"
|
||||
logger.info("Setting DISABLE_CODE_SANDBOX=1 for macOS")
|
||||
|
||||
if config.getoption("--report"):
|
||||
config.pluginmanager.register(Report(config))
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption(
|
||||
|
|
|
|||
|
|
@ -1,54 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.providers.datatypes import Api
|
||||
|
||||
INFERENCE_API_CAPA_TEST_MAP = {
|
||||
"chat_completion": {
|
||||
"streaming": [
|
||||
"test_text_chat_completion_streaming",
|
||||
"test_image_chat_completion_streaming",
|
||||
],
|
||||
"non_streaming": [
|
||||
"test_image_chat_completion_non_streaming",
|
||||
"test_text_chat_completion_non_streaming",
|
||||
],
|
||||
"tool_calling": [
|
||||
"test_text_chat_completion_with_tool_calling_and_streaming",
|
||||
"test_text_chat_completion_with_tool_calling_and_non_streaming",
|
||||
],
|
||||
"log_probs": [
|
||||
"test_completion_log_probs_non_streaming",
|
||||
"test_completion_log_probs_streaming",
|
||||
],
|
||||
},
|
||||
"completion": {
|
||||
"streaming": ["test_text_completion_streaming"],
|
||||
"non_streaming": ["test_text_completion_non_streaming"],
|
||||
"structured_output": ["test_text_completion_structured_output"],
|
||||
},
|
||||
}
|
||||
|
||||
VECTORIO_API_TEST_MAP = {
|
||||
"retrieve": {
|
||||
"": ["test_vector_db_retrieve"],
|
||||
}
|
||||
}
|
||||
|
||||
AGENTS_API_TEST_MAP = {
|
||||
"create_agent_turn": {
|
||||
"rag": ["test_rag_agent"],
|
||||
"custom_tool": ["test_custom_tool"],
|
||||
"code_execution": ["test_code_interpreter_for_attachments"],
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
API_MAPS = {
|
||||
Api.inference: INFERENCE_API_CAPA_TEST_MAP,
|
||||
Api.vector_io: VECTORIO_API_TEST_MAP,
|
||||
Api.agents: AGENTS_API_TEST_MAP,
|
||||
}
|
||||
|
|
@ -1,216 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
import pytest
|
||||
from pytest import CollectReport
|
||||
from termcolor import cprint
|
||||
|
||||
from llama_stack.models.llama.sku_list import (
|
||||
all_registered_models,
|
||||
llama3_1_instruct_models,
|
||||
llama3_2_instruct_models,
|
||||
llama3_3_instruct_models,
|
||||
llama3_instruct_models,
|
||||
safety_models,
|
||||
)
|
||||
from llama_stack.models.llama.sku_types import CoreModelId
|
||||
from llama_stack.providers.datatypes import Api
|
||||
|
||||
from .metadata import API_MAPS
|
||||
|
||||
|
||||
def featured_models():
|
||||
models = [
|
||||
*llama3_instruct_models(),
|
||||
*llama3_1_instruct_models(),
|
||||
*llama3_2_instruct_models(),
|
||||
*llama3_3_instruct_models(),
|
||||
*safety_models(),
|
||||
]
|
||||
return {model.huggingface_repo: model for model in models if not model.variant}
|
||||
|
||||
|
||||
SUPPORTED_MODELS = {
|
||||
"ollama": {
|
||||
CoreModelId.llama3_1_8b_instruct.value,
|
||||
CoreModelId.llama3_1_8b_instruct.value,
|
||||
CoreModelId.llama3_1_70b_instruct.value,
|
||||
CoreModelId.llama3_1_70b_instruct.value,
|
||||
CoreModelId.llama3_1_405b_instruct.value,
|
||||
CoreModelId.llama3_1_405b_instruct.value,
|
||||
CoreModelId.llama3_2_1b_instruct.value,
|
||||
CoreModelId.llama3_2_1b_instruct.value,
|
||||
CoreModelId.llama3_2_3b_instruct.value,
|
||||
CoreModelId.llama3_2_3b_instruct.value,
|
||||
CoreModelId.llama3_2_11b_vision_instruct.value,
|
||||
CoreModelId.llama3_2_11b_vision_instruct.value,
|
||||
CoreModelId.llama3_2_90b_vision_instruct.value,
|
||||
CoreModelId.llama3_2_90b_vision_instruct.value,
|
||||
CoreModelId.llama3_3_70b_instruct.value,
|
||||
CoreModelId.llama_guard_3_8b.value,
|
||||
CoreModelId.llama_guard_3_1b.value,
|
||||
},
|
||||
"tgi": {model.core_model_id.value for model in all_registered_models() if model.huggingface_repo},
|
||||
"vllm": {model.core_model_id.value for model in all_registered_models() if model.huggingface_repo},
|
||||
}
|
||||
|
||||
|
||||
class Report:
|
||||
def __init__(self, config):
|
||||
self.distro_name = None
|
||||
self.config = config
|
||||
|
||||
stack_config = self.config.getoption("--stack-config")
|
||||
if stack_config:
|
||||
is_url = stack_config.startswith("http") or "//" in stack_config
|
||||
is_yaml = stack_config.endswith(".yaml")
|
||||
if not is_url and not is_yaml:
|
||||
self.distro_name = stack_config
|
||||
|
||||
self.report_data = defaultdict(dict)
|
||||
# test function -> test nodeid
|
||||
self.test_data = dict()
|
||||
self.test_name_to_nodeid = defaultdict(list)
|
||||
self.vision_model_id = None
|
||||
self.text_model_id = None
|
||||
self.client = None
|
||||
|
||||
@pytest.hookimpl(tryfirst=True)
|
||||
def pytest_runtest_logreport(self, report):
|
||||
# This hook is called in several phases, including setup, call and teardown
|
||||
# The test is considered failed / error if any of the outcomes is not "Passed"
|
||||
outcome = self._process_outcome(report)
|
||||
if report.nodeid not in self.test_data:
|
||||
self.test_data[report.nodeid] = outcome
|
||||
elif self.test_data[report.nodeid] != outcome and outcome != "Passed":
|
||||
self.test_data[report.nodeid] = outcome
|
||||
|
||||
def pytest_sessionfinish(self, session):
|
||||
if not self.client:
|
||||
return
|
||||
|
||||
report = []
|
||||
report.append(f"# Report for {self.distro_name} distribution")
|
||||
report.append("\n## Supported Models")
|
||||
|
||||
header = f"| Model Descriptor | {self.distro_name} |"
|
||||
dividor = "|:---|:---|"
|
||||
|
||||
report.append(header)
|
||||
report.append(dividor)
|
||||
|
||||
rows = []
|
||||
if self.distro_name in SUPPORTED_MODELS:
|
||||
for model in all_registered_models():
|
||||
if ("Instruct" not in model.core_model_id.value and "Guard" not in model.core_model_id.value) or (
|
||||
model.variant
|
||||
):
|
||||
continue
|
||||
row = f"| {model.core_model_id.value} |"
|
||||
if model.core_model_id.value in SUPPORTED_MODELS[self.distro_name]:
|
||||
row += " ✅ |"
|
||||
else:
|
||||
row += " ❌ |"
|
||||
rows.append(row)
|
||||
else:
|
||||
supported_models = {m.identifier for m in self.client.models.list()}
|
||||
for hf_name, model in featured_models().items():
|
||||
row = f"| {model.core_model_id.value} |"
|
||||
if hf_name in supported_models:
|
||||
row += " ✅ |"
|
||||
else:
|
||||
row += " ❌ |"
|
||||
rows.append(row)
|
||||
report.extend(rows)
|
||||
|
||||
report.append("\n## Inference")
|
||||
test_table = [
|
||||
"| Model | API | Capability | Test | Status |",
|
||||
"|:----- |:-----|:-----|:-----|:-----|",
|
||||
]
|
||||
for api, capa_map in API_MAPS[Api.inference].items():
|
||||
for capa, tests in capa_map.items():
|
||||
for test_name in tests:
|
||||
model_id = self.text_model_id if "text" in test_name else self.vision_model_id
|
||||
test_nodeids = self.test_name_to_nodeid[test_name]
|
||||
if not test_nodeids:
|
||||
continue
|
||||
|
||||
# There might be more than one parametrizations for the same test function. We take
|
||||
# the result of the first one for now. Ideally we should mark the test as failed if
|
||||
# any of the parametrizations failed.
|
||||
test_table.append(
|
||||
f"| {model_id} | /{api} | {capa} | {test_name} | {self._print_result_icon(self.test_data[test_nodeids[0]])} |"
|
||||
)
|
||||
|
||||
report.extend(test_table)
|
||||
|
||||
name_map = {Api.vector_io: "Vector IO", Api.agents: "Agents"}
|
||||
providers = self.client.providers.list()
|
||||
for api_group in [Api.vector_io, Api.agents]:
|
||||
api_capitalized = name_map[api_group]
|
||||
report.append(f"\n## {api_capitalized}")
|
||||
test_table = [
|
||||
"| Provider | API | Capability | Test | Status |",
|
||||
"|:-----|:-----|:-----|:-----|:-----|",
|
||||
]
|
||||
provider = [p for p in providers if p.api == str(api_group.name)]
|
||||
provider_str = ",".join(provider) if provider else ""
|
||||
for api, capa_map in API_MAPS[api_group].items():
|
||||
for capa, tests in capa_map.items():
|
||||
for test_name in tests:
|
||||
test_nodeids = self.test_name_to_nodeid[test_name]
|
||||
if not test_nodeids:
|
||||
continue
|
||||
test_table.append(
|
||||
f"| {provider_str} | /{api} | {capa} | {test_name} | {self._print_result_icon(self.test_data[test_nodeids[0]])} |"
|
||||
)
|
||||
report.extend(test_table)
|
||||
|
||||
output_file = self.output_path
|
||||
text = "\n".join(report) + "\n"
|
||||
output_file.write_text(text)
|
||||
cprint(f"\nReport generated: {output_file.absolute()}", "green")
|
||||
|
||||
def pytest_runtest_makereport(self, item, call):
|
||||
func_name = getattr(item, "originalname", item.name)
|
||||
self.test_name_to_nodeid[func_name].append(item.nodeid)
|
||||
|
||||
# Get values from fixtures for report output
|
||||
if model_id := item.funcargs.get("text_model_id"):
|
||||
text_model = model_id.split("/")[1]
|
||||
self.text_model_id = self.text_model_id or text_model
|
||||
elif model_id := item.funcargs.get("vision_model_id"):
|
||||
vision_model = model_id.split("/")[1]
|
||||
self.vision_model_id = self.vision_model_id or vision_model
|
||||
|
||||
if not self.client:
|
||||
self.client = item.funcargs.get("llama_stack_client")
|
||||
|
||||
def _print_result_icon(self, result):
|
||||
if result == "Passed":
|
||||
return "✅"
|
||||
elif result == "Failed" or result == "Error":
|
||||
return "❌"
|
||||
else:
|
||||
# result == "Skipped":
|
||||
return "⏭️"
|
||||
|
||||
def _process_outcome(self, report: CollectReport):
|
||||
if self._is_error(report):
|
||||
return "Error"
|
||||
if hasattr(report, "wasxfail"):
|
||||
if report.outcome in ["passed", "failed"]:
|
||||
return "XPassed"
|
||||
if report.outcome == "skipped":
|
||||
return "XFailed"
|
||||
return report.outcome.capitalize()
|
||||
|
||||
def _is_error(self, report: CollectReport):
|
||||
return report.when in ["setup", "teardown", "collect"] and report.outcome == "failed"
|
||||
21
tests/unit/README.md
Normal file
21
tests/unit/README.md
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
# Llama Stack Unit Tests
|
||||
|
||||
You can run the unit tests by running:
|
||||
|
||||
```bash
|
||||
source .venv/bin/activate
|
||||
./scripts/unit-tests.sh [PYTEST_ARGS]
|
||||
```
|
||||
|
||||
Any additional arguments are passed to pytest. For example, you can specify a test directory, a specific test file, or any pytest flags (e.g., -vvv for verbosity). If no test directory is specified, it defaults to "tests/unit", e.g:
|
||||
|
||||
```bash
|
||||
./scripts/unit-tests.sh tests/unit/registry/test_registry.py -vvv
|
||||
```
|
||||
|
||||
If you'd like to run for a non-default version of Python (currently 3.10), pass `PYTHON_VERSION` variable as follows:
|
||||
|
||||
```
|
||||
source .venv/bin/activate
|
||||
PYTHON_VERSION=3.13 ./scripts/unit-tests.sh
|
||||
```
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import os
|
||||
|
||||
import yaml
|
||||
|
||||
from llama_stack.apis.inference.inference import (
|
||||
OpenAIChatCompletion,
|
||||
)
|
||||
|
||||
FIXTURES_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
def load_chat_completion_fixture(filename: str) -> OpenAIChatCompletion:
|
||||
fixture_path = os.path.join(FIXTURES_DIR, filename)
|
||||
|
||||
with open(fixture_path) as f:
|
||||
data = yaml.safe_load(f)
|
||||
return OpenAIChatCompletion(**data)
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
id: chat-completion-123
|
||||
choices:
|
||||
- message:
|
||||
content: "Dublin"
|
||||
role: assistant
|
||||
finish_reason: stop
|
||||
index: 0
|
||||
created: 1234567890
|
||||
model: meta-llama/Llama-3.1-8B-Instruct
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
id: chat-completion-123
|
||||
choices:
|
||||
- message:
|
||||
tool_calls:
|
||||
- id: tool_call_123
|
||||
type: function
|
||||
function:
|
||||
name: web_search
|
||||
arguments: '{"query":"What is the capital of Ireland?"}'
|
||||
role: assistant
|
||||
finish_reason: stop
|
||||
index: 0
|
||||
created: 1234567890
|
||||
model: meta-llama/Llama-3.1-8B-Instruct
|
||||
|
|
@ -4,27 +4,32 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from unittest.mock import AsyncMock
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from llama_stack.apis.agents.openai_responses import (
|
||||
OpenAIResponseInputItemList,
|
||||
OpenAIResponseInputMessageContentText,
|
||||
OpenAIResponseInputToolWebSearch,
|
||||
OpenAIResponseOutputMessage,
|
||||
OpenAIResponseMessage,
|
||||
OpenAIResponseObject,
|
||||
OpenAIResponseOutputMessageContentOutputText,
|
||||
OpenAIResponseOutputMessageWebSearchToolCall,
|
||||
)
|
||||
from llama_stack.apis.inference.inference import (
|
||||
OpenAIAssistantMessageParam,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionToolCall,
|
||||
OpenAIChatCompletionToolCallFunction,
|
||||
OpenAIChoice,
|
||||
OpenAIChatCompletionContentPartTextParam,
|
||||
OpenAIDeveloperMessageParam,
|
||||
OpenAIUserMessageParam,
|
||||
)
|
||||
from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime
|
||||
from llama_stack.providers.inline.agents.meta_reference.openai_responses import (
|
||||
OpenAIResponsePreviousResponseWithInputItems,
|
||||
OpenAIResponsesImpl,
|
||||
)
|
||||
from llama_stack.providers.utils.kvstore import KVStore
|
||||
from tests.unit.providers.agents.meta_reference.fixtures import load_chat_completion_fixture
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
|
@ -65,21 +70,11 @@ def openai_responses_impl(mock_kvstore, mock_inference_api, mock_tool_groups_api
|
|||
async def test_create_openai_response_with_string_input(openai_responses_impl, mock_inference_api):
|
||||
"""Test creating an OpenAI response with a simple string input."""
|
||||
# Setup
|
||||
input_text = "Hello, world!"
|
||||
input_text = "What is the capital of Ireland?"
|
||||
model = "meta-llama/Llama-3.1-8B-Instruct"
|
||||
|
||||
mock_chat_completion = OpenAIChatCompletion(
|
||||
id="chat-completion-123",
|
||||
choices=[
|
||||
OpenAIChoice(
|
||||
message=OpenAIAssistantMessageParam(content="Hello! How can I help you?"),
|
||||
finish_reason="stop",
|
||||
index=0,
|
||||
)
|
||||
],
|
||||
created=1234567890,
|
||||
model=model,
|
||||
)
|
||||
# Load the chat completion fixture
|
||||
mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
|
||||
mock_inference_api.openai_chat_completion.return_value = mock_chat_completion
|
||||
|
||||
# Execute
|
||||
|
|
@ -92,7 +87,7 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m
|
|||
# Verify
|
||||
mock_inference_api.openai_chat_completion.assert_called_once_with(
|
||||
model=model,
|
||||
messages=[OpenAIUserMessageParam(role="user", content="Hello, world!", name=None)],
|
||||
messages=[OpenAIUserMessageParam(role="user", content="What is the capital of Ireland?", name=None)],
|
||||
tools=None,
|
||||
stream=False,
|
||||
temperature=0.1,
|
||||
|
|
@ -100,55 +95,25 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m
|
|||
openai_responses_impl.persistence_store.set.assert_called_once()
|
||||
assert result.model == model
|
||||
assert len(result.output) == 1
|
||||
assert isinstance(result.output[0], OpenAIResponseOutputMessage)
|
||||
assert result.output[0].content[0].text == "Hello! How can I help you?"
|
||||
assert isinstance(result.output[0], OpenAIResponseMessage)
|
||||
assert result.output[0].content[0].text == "Dublin"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_openai_response_with_string_input_with_tools(openai_responses_impl, mock_inference_api):
|
||||
"""Test creating an OpenAI response with a simple string input and tools."""
|
||||
# Setup
|
||||
input_text = "What was the score of todays game?"
|
||||
input_text = "What is the capital of Ireland?"
|
||||
model = "meta-llama/Llama-3.1-8B-Instruct"
|
||||
|
||||
mock_chat_completions = [
|
||||
OpenAIChatCompletion(
|
||||
id="chat-completion-123",
|
||||
choices=[
|
||||
OpenAIChoice(
|
||||
message=OpenAIAssistantMessageParam(
|
||||
tool_calls=[
|
||||
OpenAIChatCompletionToolCall(
|
||||
id="tool_call_123",
|
||||
type="function",
|
||||
function=OpenAIChatCompletionToolCallFunction(
|
||||
name="web_search", arguments='{"query":"What was the score of todays game?"}'
|
||||
),
|
||||
)
|
||||
],
|
||||
),
|
||||
finish_reason="stop",
|
||||
index=0,
|
||||
)
|
||||
],
|
||||
created=1234567890,
|
||||
model=model,
|
||||
),
|
||||
OpenAIChatCompletion(
|
||||
id="chat-completion-123",
|
||||
choices=[
|
||||
OpenAIChoice(
|
||||
message=OpenAIAssistantMessageParam(content="The score of todays game was 10-12"),
|
||||
finish_reason="stop",
|
||||
index=0,
|
||||
)
|
||||
],
|
||||
created=1234567890,
|
||||
model=model,
|
||||
),
|
||||
]
|
||||
# Load the chat completion fixtures
|
||||
tool_call_completion = load_chat_completion_fixture("tool_call_completion.yaml")
|
||||
tool_response_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
|
||||
|
||||
mock_inference_api.openai_chat_completion.side_effect = mock_chat_completions
|
||||
mock_inference_api.openai_chat_completion.side_effect = [
|
||||
tool_call_completion,
|
||||
tool_response_completion,
|
||||
]
|
||||
|
||||
openai_responses_impl.tool_groups_api.get_tool.return_value = Tool(
|
||||
identifier="web_search",
|
||||
|
|
@ -163,7 +128,7 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon
|
|||
|
||||
openai_responses_impl.tool_runtime_api.invoke_tool.return_value = ToolInvocationResult(
|
||||
status="completed",
|
||||
content="The score of todays game was 10-12",
|
||||
content="Dublin",
|
||||
)
|
||||
|
||||
# Execute
|
||||
|
|
@ -180,23 +145,172 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon
|
|||
|
||||
# Verify
|
||||
first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
|
||||
assert first_call.kwargs["messages"][0].content == "What was the score of todays game?"
|
||||
assert first_call.kwargs["messages"][0].content == "What is the capital of Ireland?"
|
||||
assert first_call.kwargs["tools"] is not None
|
||||
assert first_call.kwargs["temperature"] == 0.1
|
||||
|
||||
second_call = mock_inference_api.openai_chat_completion.call_args_list[1]
|
||||
assert second_call.kwargs["messages"][-1].content == "The score of todays game was 10-12"
|
||||
assert second_call.kwargs["messages"][-1].content == "Dublin"
|
||||
assert second_call.kwargs["temperature"] == 0.1
|
||||
|
||||
openai_responses_impl.tool_groups_api.get_tool.assert_called_once_with("web_search")
|
||||
openai_responses_impl.tool_runtime_api.invoke_tool.assert_called_once_with(
|
||||
tool_name="web_search",
|
||||
kwargs={"query": "What was the score of todays game?"},
|
||||
kwargs={"query": "What is the capital of Ireland?"},
|
||||
)
|
||||
|
||||
openai_responses_impl.persistence_store.set.assert_called_once()
|
||||
|
||||
# Check that we got the content from our mocked tool execution result
|
||||
assert len(result.output) >= 1
|
||||
assert isinstance(result.output[1], OpenAIResponseOutputMessage)
|
||||
assert result.output[1].content[0].text == "The score of todays game was 10-12"
|
||||
assert isinstance(result.output[1], OpenAIResponseMessage)
|
||||
assert result.output[1].content[0].text == "Dublin"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_openai_response_with_multiple_messages(openai_responses_impl, mock_inference_api):
|
||||
"""Test creating an OpenAI response with multiple messages."""
|
||||
# Setup
|
||||
input_messages = [
|
||||
OpenAIResponseMessage(role="developer", content="You are a helpful assistant", name=None),
|
||||
OpenAIResponseMessage(role="user", content="Name some towns in Ireland", name=None),
|
||||
OpenAIResponseMessage(
|
||||
role="assistant",
|
||||
content=[
|
||||
OpenAIResponseInputMessageContentText(text="Galway, Longford, Sligo"),
|
||||
OpenAIResponseInputMessageContentText(text="Dublin"),
|
||||
],
|
||||
name=None,
|
||||
),
|
||||
OpenAIResponseMessage(role="user", content="Which is the largest town in Ireland?", name=None),
|
||||
]
|
||||
model = "meta-llama/Llama-3.1-8B-Instruct"
|
||||
|
||||
mock_inference_api.openai_chat_completion.return_value = load_chat_completion_fixture("simple_chat_completion.yaml")
|
||||
|
||||
# Execute
|
||||
await openai_responses_impl.create_openai_response(
|
||||
input=input_messages,
|
||||
model=model,
|
||||
temperature=0.1,
|
||||
)
|
||||
|
||||
# Verify the the correct messages were sent to the inference API i.e.
|
||||
# All of the responses message were convered to the chat completion message objects
|
||||
inference_messages = mock_inference_api.openai_chat_completion.call_args_list[0].kwargs["messages"]
|
||||
for i, m in enumerate(input_messages):
|
||||
if isinstance(m.content, str):
|
||||
assert inference_messages[i].content == m.content
|
||||
else:
|
||||
assert inference_messages[i].content[0].text == m.content[0].text
|
||||
assert isinstance(inference_messages[i].content[0], OpenAIChatCompletionContentPartTextParam)
|
||||
assert inference_messages[i].role == m.role
|
||||
if m.role == "user":
|
||||
assert isinstance(inference_messages[i], OpenAIUserMessageParam)
|
||||
elif m.role == "assistant":
|
||||
assert isinstance(inference_messages[i], OpenAIAssistantMessageParam)
|
||||
else:
|
||||
assert isinstance(inference_messages[i], OpenAIDeveloperMessageParam)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_prepend_previous_response_none(openai_responses_impl):
|
||||
"""Test prepending no previous response to a new response."""
|
||||
|
||||
input = await openai_responses_impl._prepend_previous_response("fake_input", None)
|
||||
assert input == "fake_input"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch.object(OpenAIResponsesImpl, "_get_previous_response_with_input")
|
||||
async def test_prepend_previous_response_basic(get_previous_response_with_input, openai_responses_impl):
|
||||
"""Test prepending a basic previous response to a new response."""
|
||||
|
||||
input_item_message = OpenAIResponseMessage(
|
||||
id="123",
|
||||
content=[OpenAIResponseInputMessageContentText(text="fake_previous_input")],
|
||||
role="user",
|
||||
)
|
||||
input_items = OpenAIResponseInputItemList(data=[input_item_message])
|
||||
response_output_message = OpenAIResponseMessage(
|
||||
id="123",
|
||||
content=[OpenAIResponseOutputMessageContentOutputText(text="fake_response")],
|
||||
status="completed",
|
||||
role="assistant",
|
||||
)
|
||||
response = OpenAIResponseObject(
|
||||
created_at=1,
|
||||
id="resp_123",
|
||||
model="fake_model",
|
||||
output=[response_output_message],
|
||||
status="completed",
|
||||
)
|
||||
previous_response = OpenAIResponsePreviousResponseWithInputItems(
|
||||
input_items=input_items,
|
||||
response=response,
|
||||
)
|
||||
get_previous_response_with_input.return_value = previous_response
|
||||
|
||||
input = await openai_responses_impl._prepend_previous_response("fake_input", "resp_123")
|
||||
|
||||
assert len(input) == 3
|
||||
# Check for previous input
|
||||
assert isinstance(input[0], OpenAIResponseMessage)
|
||||
assert input[0].content[0].text == "fake_previous_input"
|
||||
# Check for previous output
|
||||
assert isinstance(input[1], OpenAIResponseMessage)
|
||||
assert input[1].content[0].text == "fake_response"
|
||||
# Check for new input
|
||||
assert isinstance(input[2], OpenAIResponseMessage)
|
||||
assert input[2].content == "fake_input"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch.object(OpenAIResponsesImpl, "_get_previous_response_with_input")
|
||||
async def test_prepend_previous_response_web_search(get_previous_response_with_input, openai_responses_impl):
|
||||
"""Test prepending a web search previous response to a new response."""
|
||||
|
||||
input_item_message = OpenAIResponseMessage(
|
||||
id="123",
|
||||
content=[OpenAIResponseInputMessageContentText(text="fake_previous_input")],
|
||||
role="user",
|
||||
)
|
||||
input_items = OpenAIResponseInputItemList(data=[input_item_message])
|
||||
output_web_search = OpenAIResponseOutputMessageWebSearchToolCall(
|
||||
id="ws_123",
|
||||
status="completed",
|
||||
)
|
||||
output_message = OpenAIResponseMessage(
|
||||
id="123",
|
||||
content=[OpenAIResponseOutputMessageContentOutputText(text="fake_web_search_response")],
|
||||
status="completed",
|
||||
role="assistant",
|
||||
)
|
||||
response = OpenAIResponseObject(
|
||||
created_at=1,
|
||||
id="resp_123",
|
||||
model="fake_model",
|
||||
output=[output_web_search, output_message],
|
||||
status="completed",
|
||||
)
|
||||
previous_response = OpenAIResponsePreviousResponseWithInputItems(
|
||||
input_items=input_items,
|
||||
response=response,
|
||||
)
|
||||
get_previous_response_with_input.return_value = previous_response
|
||||
|
||||
input_messages = [OpenAIResponseMessage(content="fake_input", role="user")]
|
||||
input = await openai_responses_impl._prepend_previous_response(input_messages, "resp_123")
|
||||
|
||||
assert len(input) == 4
|
||||
# Check for previous input
|
||||
assert isinstance(input[0], OpenAIResponseMessage)
|
||||
assert input[0].content[0].text == "fake_previous_input"
|
||||
# Check for previous output web search tool call
|
||||
assert isinstance(input[1], OpenAIResponseOutputMessageWebSearchToolCall)
|
||||
# Check for previous output web search response
|
||||
assert isinstance(input[2], OpenAIResponseMessage)
|
||||
assert input[2].content[0].text == "fake_web_search_response"
|
||||
# Check for new input
|
||||
assert isinstance(input[3], OpenAIResponseMessage)
|
||||
assert input[3].content == "fake_input"
|
||||
|
|
|
|||
19
tests/unit/rag/test_rag_query.py
Normal file
19
tests/unit/rag/test_rag_query.py
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl
|
||||
|
||||
|
||||
class TestRagQuery:
|
||||
@pytest.mark.asyncio
|
||||
async def test_query_raises_on_empty_vector_db_ids(self):
|
||||
rag_tool = MemoryToolRuntimeImpl(config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock())
|
||||
with pytest.raises(ValueError):
|
||||
await rag_tool.query(content=MagicMock(), vector_db_ids=[])
|
||||
|
|
@ -4,7 +4,7 @@ Llama Stack Verifications provide standardized test suites to ensure API compati
|
|||
|
||||
## Overview
|
||||
|
||||
This framework allows you to run the same set of verification tests against different LLM providers' OpenAI-compatible endpoints (Fireworks, Together, Groq, Cerebras, etc., and OpenAI itself) to ensure they meet the expected behavior and interface standards.
|
||||
This framework allows you to run the same set of verification tests against different LLM providers' OpenAI-compatible endpoints (Fireworks, Together, Groq, Cerebras, etc., and OpenAI itself) to ensure they meet the expected behavior and interface standards.
|
||||
|
||||
## Features
|
||||
|
||||
|
|
|
|||
|
|
@ -74,9 +74,6 @@ providers:
|
|||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: code-interpreter
|
||||
provider_type: inline::code-interpreter
|
||||
config: {}
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
config: {}
|
||||
|
|
@ -156,8 +153,6 @@ tool_groups:
|
|||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
- toolgroup_id: builtin::wolfram_alpha
|
||||
provider_id: wolfram-alpha
|
||||
server:
|
||||
|
|
|
|||
|
|
@ -31,6 +31,26 @@ test_response_web_search:
|
|||
search_context_size: "low"
|
||||
output: "128"
|
||||
|
||||
test_response_custom_tool:
|
||||
test_name: test_response_custom_tool
|
||||
test_params:
|
||||
case:
|
||||
- case_id: "sf_weather"
|
||||
input: "What's the weather like in San Francisco?"
|
||||
tools:
|
||||
- type: function
|
||||
name: get_weather
|
||||
description: Get current temperature for a given location.
|
||||
parameters:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
location:
|
||||
description: "City and country e.g. Bogot\xE1, Colombia"
|
||||
type: string
|
||||
required:
|
||||
- location
|
||||
type: object
|
||||
|
||||
test_response_image:
|
||||
test_name: test_response_image
|
||||
test_params:
|
||||
|
|
@ -59,7 +79,7 @@ test_response_multi_turn_image:
|
|||
- type: input_image
|
||||
image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
|
||||
output: "llama"
|
||||
- input: "Search the web using the search tool for the animal from the previous response. Your search query should be a single phrase that includes the animal's name and the words 'maverick' and 'scout'."
|
||||
- input: "Search the web using the search tool for the animal from the previous response. Your search query should be a single phrase that includes the animal's name and the words 'maverick', 'scout' and 'llm'"
|
||||
tools:
|
||||
- type: web_search
|
||||
output: "model"
|
||||
|
|
|
|||
|
|
@ -124,6 +124,28 @@ def test_response_non_streaming_web_search(request, openai_client, model, provid
|
|||
assert case["output"].lower() in response.output_text.lower().strip()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"case",
|
||||
responses_test_cases["test_response_custom_tool"]["test_params"]["case"],
|
||||
ids=case_id_generator,
|
||||
)
|
||||
def test_response_non_streaming_custom_tool(request, openai_client, model, provider, verification_config, case):
|
||||
test_name_base = get_base_test_name(request)
|
||||
if should_skip_test(verification_config, provider, model, test_name_base):
|
||||
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
|
||||
|
||||
response = openai_client.responses.create(
|
||||
model=model,
|
||||
input=case["input"],
|
||||
tools=case["tools"],
|
||||
stream=False,
|
||||
)
|
||||
assert len(response.output) == 1
|
||||
assert response.output[0].type == "function_call"
|
||||
assert response.output[0].status == "completed"
|
||||
assert response.output[0].name == "get_weather"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"case",
|
||||
responses_test_cases["test_response_image"]["test_params"]["case"],
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue