Merge branch 'main' into out-of-token-budget-fix

This commit is contained in:
Ilya Kolchinsky 2025-05-14 12:46:24 +02:00 committed by GitHub
commit 85ef55391d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
61 changed files with 1322 additions and 1598 deletions

9
tests/README.md Normal file
View file

@ -0,0 +1,9 @@
# Llama Stack Tests
Llama Stack has multiple layers of testing done to ensure continuous functionality and prevent regressions to the codebase.
| Testing Type | Details |
|--------------|---------|
| Unit | [unit/README.md](unit/README.md) |
| Integration | [integration/README.md](integration/README.md) |
| Verification | [verifications/README.md](verifications/README.md) |

View file

@ -53,9 +53,6 @@ providers:
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: rag-runtime
provider_type: inline::rag-runtime
config: {}
@ -90,8 +87,6 @@ tool_groups:
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
- toolgroup_id: builtin::wolfram_alpha
provider_id: wolfram-alpha
server:

View file

@ -11,7 +11,7 @@ pytest --help
Here are the most important options:
- `--stack-config`: specify the stack config to use. You have three ways to point to a stack:
- a URL which points to a Llama Stack distribution server
- a template (e.g., `fireworks`, `together`) or a path to a run.yaml file
- a template (e.g., `fireworks`, `together`) or a path to a `run.yaml` file
- a comma-separated list of api=provider pairs, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface.
- `--env`: set environment variables, e.g. --env KEY=value. this is a utility option to set environment variables required by various providers.
@ -28,7 +28,6 @@ if no model is specified.
Experimental, under development, options:
- `--record-responses`: record new API responses instead of using cached ones
- `--report`: path where the test report should be written, e.g. --report=/path/to/report.md
## Examples

View file

@ -15,8 +15,6 @@ from dotenv import load_dotenv
from llama_stack.log import get_logger
from .report import Report
logger = get_logger(__name__, category="tests")
@ -60,9 +58,6 @@ def pytest_configure(config):
os.environ["DISABLE_CODE_SANDBOX"] = "1"
logger.info("Setting DISABLE_CODE_SANDBOX=1 for macOS")
if config.getoption("--report"):
config.pluginmanager.register(Report(config))
def pytest_addoption(parser):
parser.addoption(

View file

@ -1,54 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.providers.datatypes import Api
INFERENCE_API_CAPA_TEST_MAP = {
"chat_completion": {
"streaming": [
"test_text_chat_completion_streaming",
"test_image_chat_completion_streaming",
],
"non_streaming": [
"test_image_chat_completion_non_streaming",
"test_text_chat_completion_non_streaming",
],
"tool_calling": [
"test_text_chat_completion_with_tool_calling_and_streaming",
"test_text_chat_completion_with_tool_calling_and_non_streaming",
],
"log_probs": [
"test_completion_log_probs_non_streaming",
"test_completion_log_probs_streaming",
],
},
"completion": {
"streaming": ["test_text_completion_streaming"],
"non_streaming": ["test_text_completion_non_streaming"],
"structured_output": ["test_text_completion_structured_output"],
},
}
VECTORIO_API_TEST_MAP = {
"retrieve": {
"": ["test_vector_db_retrieve"],
}
}
AGENTS_API_TEST_MAP = {
"create_agent_turn": {
"rag": ["test_rag_agent"],
"custom_tool": ["test_custom_tool"],
"code_execution": ["test_code_interpreter_for_attachments"],
}
}
API_MAPS = {
Api.inference: INFERENCE_API_CAPA_TEST_MAP,
Api.vector_io: VECTORIO_API_TEST_MAP,
Api.agents: AGENTS_API_TEST_MAP,
}

View file

@ -1,216 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from collections import defaultdict
import pytest
from pytest import CollectReport
from termcolor import cprint
from llama_stack.models.llama.sku_list import (
all_registered_models,
llama3_1_instruct_models,
llama3_2_instruct_models,
llama3_3_instruct_models,
llama3_instruct_models,
safety_models,
)
from llama_stack.models.llama.sku_types import CoreModelId
from llama_stack.providers.datatypes import Api
from .metadata import API_MAPS
def featured_models():
models = [
*llama3_instruct_models(),
*llama3_1_instruct_models(),
*llama3_2_instruct_models(),
*llama3_3_instruct_models(),
*safety_models(),
]
return {model.huggingface_repo: model for model in models if not model.variant}
SUPPORTED_MODELS = {
"ollama": {
CoreModelId.llama3_1_8b_instruct.value,
CoreModelId.llama3_1_8b_instruct.value,
CoreModelId.llama3_1_70b_instruct.value,
CoreModelId.llama3_1_70b_instruct.value,
CoreModelId.llama3_1_405b_instruct.value,
CoreModelId.llama3_1_405b_instruct.value,
CoreModelId.llama3_2_1b_instruct.value,
CoreModelId.llama3_2_1b_instruct.value,
CoreModelId.llama3_2_3b_instruct.value,
CoreModelId.llama3_2_3b_instruct.value,
CoreModelId.llama3_2_11b_vision_instruct.value,
CoreModelId.llama3_2_11b_vision_instruct.value,
CoreModelId.llama3_2_90b_vision_instruct.value,
CoreModelId.llama3_2_90b_vision_instruct.value,
CoreModelId.llama3_3_70b_instruct.value,
CoreModelId.llama_guard_3_8b.value,
CoreModelId.llama_guard_3_1b.value,
},
"tgi": {model.core_model_id.value for model in all_registered_models() if model.huggingface_repo},
"vllm": {model.core_model_id.value for model in all_registered_models() if model.huggingface_repo},
}
class Report:
def __init__(self, config):
self.distro_name = None
self.config = config
stack_config = self.config.getoption("--stack-config")
if stack_config:
is_url = stack_config.startswith("http") or "//" in stack_config
is_yaml = stack_config.endswith(".yaml")
if not is_url and not is_yaml:
self.distro_name = stack_config
self.report_data = defaultdict(dict)
# test function -> test nodeid
self.test_data = dict()
self.test_name_to_nodeid = defaultdict(list)
self.vision_model_id = None
self.text_model_id = None
self.client = None
@pytest.hookimpl(tryfirst=True)
def pytest_runtest_logreport(self, report):
# This hook is called in several phases, including setup, call and teardown
# The test is considered failed / error if any of the outcomes is not "Passed"
outcome = self._process_outcome(report)
if report.nodeid not in self.test_data:
self.test_data[report.nodeid] = outcome
elif self.test_data[report.nodeid] != outcome and outcome != "Passed":
self.test_data[report.nodeid] = outcome
def pytest_sessionfinish(self, session):
if not self.client:
return
report = []
report.append(f"# Report for {self.distro_name} distribution")
report.append("\n## Supported Models")
header = f"| Model Descriptor | {self.distro_name} |"
dividor = "|:---|:---|"
report.append(header)
report.append(dividor)
rows = []
if self.distro_name in SUPPORTED_MODELS:
for model in all_registered_models():
if ("Instruct" not in model.core_model_id.value and "Guard" not in model.core_model_id.value) or (
model.variant
):
continue
row = f"| {model.core_model_id.value} |"
if model.core_model_id.value in SUPPORTED_MODELS[self.distro_name]:
row += " ✅ |"
else:
row += " ❌ |"
rows.append(row)
else:
supported_models = {m.identifier for m in self.client.models.list()}
for hf_name, model in featured_models().items():
row = f"| {model.core_model_id.value} |"
if hf_name in supported_models:
row += " ✅ |"
else:
row += " ❌ |"
rows.append(row)
report.extend(rows)
report.append("\n## Inference")
test_table = [
"| Model | API | Capability | Test | Status |",
"|:----- |:-----|:-----|:-----|:-----|",
]
for api, capa_map in API_MAPS[Api.inference].items():
for capa, tests in capa_map.items():
for test_name in tests:
model_id = self.text_model_id if "text" in test_name else self.vision_model_id
test_nodeids = self.test_name_to_nodeid[test_name]
if not test_nodeids:
continue
# There might be more than one parametrizations for the same test function. We take
# the result of the first one for now. Ideally we should mark the test as failed if
# any of the parametrizations failed.
test_table.append(
f"| {model_id} | /{api} | {capa} | {test_name} | {self._print_result_icon(self.test_data[test_nodeids[0]])} |"
)
report.extend(test_table)
name_map = {Api.vector_io: "Vector IO", Api.agents: "Agents"}
providers = self.client.providers.list()
for api_group in [Api.vector_io, Api.agents]:
api_capitalized = name_map[api_group]
report.append(f"\n## {api_capitalized}")
test_table = [
"| Provider | API | Capability | Test | Status |",
"|:-----|:-----|:-----|:-----|:-----|",
]
provider = [p for p in providers if p.api == str(api_group.name)]
provider_str = ",".join(provider) if provider else ""
for api, capa_map in API_MAPS[api_group].items():
for capa, tests in capa_map.items():
for test_name in tests:
test_nodeids = self.test_name_to_nodeid[test_name]
if not test_nodeids:
continue
test_table.append(
f"| {provider_str} | /{api} | {capa} | {test_name} | {self._print_result_icon(self.test_data[test_nodeids[0]])} |"
)
report.extend(test_table)
output_file = self.output_path
text = "\n".join(report) + "\n"
output_file.write_text(text)
cprint(f"\nReport generated: {output_file.absolute()}", "green")
def pytest_runtest_makereport(self, item, call):
func_name = getattr(item, "originalname", item.name)
self.test_name_to_nodeid[func_name].append(item.nodeid)
# Get values from fixtures for report output
if model_id := item.funcargs.get("text_model_id"):
text_model = model_id.split("/")[1]
self.text_model_id = self.text_model_id or text_model
elif model_id := item.funcargs.get("vision_model_id"):
vision_model = model_id.split("/")[1]
self.vision_model_id = self.vision_model_id or vision_model
if not self.client:
self.client = item.funcargs.get("llama_stack_client")
def _print_result_icon(self, result):
if result == "Passed":
return ""
elif result == "Failed" or result == "Error":
return ""
else:
# result == "Skipped":
return "⏭️"
def _process_outcome(self, report: CollectReport):
if self._is_error(report):
return "Error"
if hasattr(report, "wasxfail"):
if report.outcome in ["passed", "failed"]:
return "XPassed"
if report.outcome == "skipped":
return "XFailed"
return report.outcome.capitalize()
def _is_error(self, report: CollectReport):
return report.when in ["setup", "teardown", "collect"] and report.outcome == "failed"

21
tests/unit/README.md Normal file
View file

@ -0,0 +1,21 @@
# Llama Stack Unit Tests
You can run the unit tests by running:
```bash
source .venv/bin/activate
./scripts/unit-tests.sh [PYTEST_ARGS]
```
Any additional arguments are passed to pytest. For example, you can specify a test directory, a specific test file, or any pytest flags (e.g., -vvv for verbosity). If no test directory is specified, it defaults to "tests/unit", e.g:
```bash
./scripts/unit-tests.sh tests/unit/registry/test_registry.py -vvv
```
If you'd like to run for a non-default version of Python (currently 3.10), pass `PYTHON_VERSION` variable as follows:
```
source .venv/bin/activate
PYTHON_VERSION=3.13 ./scripts/unit-tests.sh
```

View file

@ -0,0 +1,23 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import os
import yaml
from llama_stack.apis.inference.inference import (
OpenAIChatCompletion,
)
FIXTURES_DIR = os.path.dirname(os.path.abspath(__file__))
def load_chat_completion_fixture(filename: str) -> OpenAIChatCompletion:
fixture_path = os.path.join(FIXTURES_DIR, filename)
with open(fixture_path) as f:
data = yaml.safe_load(f)
return OpenAIChatCompletion(**data)

View file

@ -0,0 +1,9 @@
id: chat-completion-123
choices:
- message:
content: "Dublin"
role: assistant
finish_reason: stop
index: 0
created: 1234567890
model: meta-llama/Llama-3.1-8B-Instruct

View file

@ -0,0 +1,14 @@
id: chat-completion-123
choices:
- message:
tool_calls:
- id: tool_call_123
type: function
function:
name: web_search
arguments: '{"query":"What is the capital of Ireland?"}'
role: assistant
finish_reason: stop
index: 0
created: 1234567890
model: meta-llama/Llama-3.1-8B-Instruct

View file

@ -4,27 +4,32 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from unittest.mock import AsyncMock
from unittest.mock import AsyncMock, patch
import pytest
from llama_stack.apis.agents.openai_responses import (
OpenAIResponseInputItemList,
OpenAIResponseInputMessageContentText,
OpenAIResponseInputToolWebSearch,
OpenAIResponseOutputMessage,
OpenAIResponseMessage,
OpenAIResponseObject,
OpenAIResponseOutputMessageContentOutputText,
OpenAIResponseOutputMessageWebSearchToolCall,
)
from llama_stack.apis.inference.inference import (
OpenAIAssistantMessageParam,
OpenAIChatCompletion,
OpenAIChatCompletionToolCall,
OpenAIChatCompletionToolCallFunction,
OpenAIChoice,
OpenAIChatCompletionContentPartTextParam,
OpenAIDeveloperMessageParam,
OpenAIUserMessageParam,
)
from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime
from llama_stack.providers.inline.agents.meta_reference.openai_responses import (
OpenAIResponsePreviousResponseWithInputItems,
OpenAIResponsesImpl,
)
from llama_stack.providers.utils.kvstore import KVStore
from tests.unit.providers.agents.meta_reference.fixtures import load_chat_completion_fixture
@pytest.fixture
@ -65,21 +70,11 @@ def openai_responses_impl(mock_kvstore, mock_inference_api, mock_tool_groups_api
async def test_create_openai_response_with_string_input(openai_responses_impl, mock_inference_api):
"""Test creating an OpenAI response with a simple string input."""
# Setup
input_text = "Hello, world!"
input_text = "What is the capital of Ireland?"
model = "meta-llama/Llama-3.1-8B-Instruct"
mock_chat_completion = OpenAIChatCompletion(
id="chat-completion-123",
choices=[
OpenAIChoice(
message=OpenAIAssistantMessageParam(content="Hello! How can I help you?"),
finish_reason="stop",
index=0,
)
],
created=1234567890,
model=model,
)
# Load the chat completion fixture
mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
mock_inference_api.openai_chat_completion.return_value = mock_chat_completion
# Execute
@ -92,7 +87,7 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m
# Verify
mock_inference_api.openai_chat_completion.assert_called_once_with(
model=model,
messages=[OpenAIUserMessageParam(role="user", content="Hello, world!", name=None)],
messages=[OpenAIUserMessageParam(role="user", content="What is the capital of Ireland?", name=None)],
tools=None,
stream=False,
temperature=0.1,
@ -100,55 +95,25 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m
openai_responses_impl.persistence_store.set.assert_called_once()
assert result.model == model
assert len(result.output) == 1
assert isinstance(result.output[0], OpenAIResponseOutputMessage)
assert result.output[0].content[0].text == "Hello! How can I help you?"
assert isinstance(result.output[0], OpenAIResponseMessage)
assert result.output[0].content[0].text == "Dublin"
@pytest.mark.asyncio
async def test_create_openai_response_with_string_input_with_tools(openai_responses_impl, mock_inference_api):
"""Test creating an OpenAI response with a simple string input and tools."""
# Setup
input_text = "What was the score of todays game?"
input_text = "What is the capital of Ireland?"
model = "meta-llama/Llama-3.1-8B-Instruct"
mock_chat_completions = [
OpenAIChatCompletion(
id="chat-completion-123",
choices=[
OpenAIChoice(
message=OpenAIAssistantMessageParam(
tool_calls=[
OpenAIChatCompletionToolCall(
id="tool_call_123",
type="function",
function=OpenAIChatCompletionToolCallFunction(
name="web_search", arguments='{"query":"What was the score of todays game?"}'
),
)
],
),
finish_reason="stop",
index=0,
)
],
created=1234567890,
model=model,
),
OpenAIChatCompletion(
id="chat-completion-123",
choices=[
OpenAIChoice(
message=OpenAIAssistantMessageParam(content="The score of todays game was 10-12"),
finish_reason="stop",
index=0,
)
],
created=1234567890,
model=model,
),
]
# Load the chat completion fixtures
tool_call_completion = load_chat_completion_fixture("tool_call_completion.yaml")
tool_response_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
mock_inference_api.openai_chat_completion.side_effect = mock_chat_completions
mock_inference_api.openai_chat_completion.side_effect = [
tool_call_completion,
tool_response_completion,
]
openai_responses_impl.tool_groups_api.get_tool.return_value = Tool(
identifier="web_search",
@ -163,7 +128,7 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon
openai_responses_impl.tool_runtime_api.invoke_tool.return_value = ToolInvocationResult(
status="completed",
content="The score of todays game was 10-12",
content="Dublin",
)
# Execute
@ -180,23 +145,172 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon
# Verify
first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
assert first_call.kwargs["messages"][0].content == "What was the score of todays game?"
assert first_call.kwargs["messages"][0].content == "What is the capital of Ireland?"
assert first_call.kwargs["tools"] is not None
assert first_call.kwargs["temperature"] == 0.1
second_call = mock_inference_api.openai_chat_completion.call_args_list[1]
assert second_call.kwargs["messages"][-1].content == "The score of todays game was 10-12"
assert second_call.kwargs["messages"][-1].content == "Dublin"
assert second_call.kwargs["temperature"] == 0.1
openai_responses_impl.tool_groups_api.get_tool.assert_called_once_with("web_search")
openai_responses_impl.tool_runtime_api.invoke_tool.assert_called_once_with(
tool_name="web_search",
kwargs={"query": "What was the score of todays game?"},
kwargs={"query": "What is the capital of Ireland?"},
)
openai_responses_impl.persistence_store.set.assert_called_once()
# Check that we got the content from our mocked tool execution result
assert len(result.output) >= 1
assert isinstance(result.output[1], OpenAIResponseOutputMessage)
assert result.output[1].content[0].text == "The score of todays game was 10-12"
assert isinstance(result.output[1], OpenAIResponseMessage)
assert result.output[1].content[0].text == "Dublin"
@pytest.mark.asyncio
async def test_create_openai_response_with_multiple_messages(openai_responses_impl, mock_inference_api):
"""Test creating an OpenAI response with multiple messages."""
# Setup
input_messages = [
OpenAIResponseMessage(role="developer", content="You are a helpful assistant", name=None),
OpenAIResponseMessage(role="user", content="Name some towns in Ireland", name=None),
OpenAIResponseMessage(
role="assistant",
content=[
OpenAIResponseInputMessageContentText(text="Galway, Longford, Sligo"),
OpenAIResponseInputMessageContentText(text="Dublin"),
],
name=None,
),
OpenAIResponseMessage(role="user", content="Which is the largest town in Ireland?", name=None),
]
model = "meta-llama/Llama-3.1-8B-Instruct"
mock_inference_api.openai_chat_completion.return_value = load_chat_completion_fixture("simple_chat_completion.yaml")
# Execute
await openai_responses_impl.create_openai_response(
input=input_messages,
model=model,
temperature=0.1,
)
# Verify the the correct messages were sent to the inference API i.e.
# All of the responses message were convered to the chat completion message objects
inference_messages = mock_inference_api.openai_chat_completion.call_args_list[0].kwargs["messages"]
for i, m in enumerate(input_messages):
if isinstance(m.content, str):
assert inference_messages[i].content == m.content
else:
assert inference_messages[i].content[0].text == m.content[0].text
assert isinstance(inference_messages[i].content[0], OpenAIChatCompletionContentPartTextParam)
assert inference_messages[i].role == m.role
if m.role == "user":
assert isinstance(inference_messages[i], OpenAIUserMessageParam)
elif m.role == "assistant":
assert isinstance(inference_messages[i], OpenAIAssistantMessageParam)
else:
assert isinstance(inference_messages[i], OpenAIDeveloperMessageParam)
@pytest.mark.asyncio
async def test_prepend_previous_response_none(openai_responses_impl):
"""Test prepending no previous response to a new response."""
input = await openai_responses_impl._prepend_previous_response("fake_input", None)
assert input == "fake_input"
@pytest.mark.asyncio
@patch.object(OpenAIResponsesImpl, "_get_previous_response_with_input")
async def test_prepend_previous_response_basic(get_previous_response_with_input, openai_responses_impl):
"""Test prepending a basic previous response to a new response."""
input_item_message = OpenAIResponseMessage(
id="123",
content=[OpenAIResponseInputMessageContentText(text="fake_previous_input")],
role="user",
)
input_items = OpenAIResponseInputItemList(data=[input_item_message])
response_output_message = OpenAIResponseMessage(
id="123",
content=[OpenAIResponseOutputMessageContentOutputText(text="fake_response")],
status="completed",
role="assistant",
)
response = OpenAIResponseObject(
created_at=1,
id="resp_123",
model="fake_model",
output=[response_output_message],
status="completed",
)
previous_response = OpenAIResponsePreviousResponseWithInputItems(
input_items=input_items,
response=response,
)
get_previous_response_with_input.return_value = previous_response
input = await openai_responses_impl._prepend_previous_response("fake_input", "resp_123")
assert len(input) == 3
# Check for previous input
assert isinstance(input[0], OpenAIResponseMessage)
assert input[0].content[0].text == "fake_previous_input"
# Check for previous output
assert isinstance(input[1], OpenAIResponseMessage)
assert input[1].content[0].text == "fake_response"
# Check for new input
assert isinstance(input[2], OpenAIResponseMessage)
assert input[2].content == "fake_input"
@pytest.mark.asyncio
@patch.object(OpenAIResponsesImpl, "_get_previous_response_with_input")
async def test_prepend_previous_response_web_search(get_previous_response_with_input, openai_responses_impl):
"""Test prepending a web search previous response to a new response."""
input_item_message = OpenAIResponseMessage(
id="123",
content=[OpenAIResponseInputMessageContentText(text="fake_previous_input")],
role="user",
)
input_items = OpenAIResponseInputItemList(data=[input_item_message])
output_web_search = OpenAIResponseOutputMessageWebSearchToolCall(
id="ws_123",
status="completed",
)
output_message = OpenAIResponseMessage(
id="123",
content=[OpenAIResponseOutputMessageContentOutputText(text="fake_web_search_response")],
status="completed",
role="assistant",
)
response = OpenAIResponseObject(
created_at=1,
id="resp_123",
model="fake_model",
output=[output_web_search, output_message],
status="completed",
)
previous_response = OpenAIResponsePreviousResponseWithInputItems(
input_items=input_items,
response=response,
)
get_previous_response_with_input.return_value = previous_response
input_messages = [OpenAIResponseMessage(content="fake_input", role="user")]
input = await openai_responses_impl._prepend_previous_response(input_messages, "resp_123")
assert len(input) == 4
# Check for previous input
assert isinstance(input[0], OpenAIResponseMessage)
assert input[0].content[0].text == "fake_previous_input"
# Check for previous output web search tool call
assert isinstance(input[1], OpenAIResponseOutputMessageWebSearchToolCall)
# Check for previous output web search response
assert isinstance(input[2], OpenAIResponseMessage)
assert input[2].content[0].text == "fake_web_search_response"
# Check for new input
assert isinstance(input[3], OpenAIResponseMessage)
assert input[3].content == "fake_input"

View file

@ -0,0 +1,19 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from unittest.mock import MagicMock
import pytest
from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl
class TestRagQuery:
@pytest.mark.asyncio
async def test_query_raises_on_empty_vector_db_ids(self):
rag_tool = MemoryToolRuntimeImpl(config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock())
with pytest.raises(ValueError):
await rag_tool.query(content=MagicMock(), vector_db_ids=[])

View file

@ -4,7 +4,7 @@ Llama Stack Verifications provide standardized test suites to ensure API compati
## Overview
This framework allows you to run the same set of verification tests against different LLM providers' OpenAI-compatible endpoints (Fireworks, Together, Groq, Cerebras, etc., and OpenAI itself) to ensure they meet the expected behavior and interface standards.
This framework allows you to run the same set of verification tests against different LLM providers' OpenAI-compatible endpoints (Fireworks, Together, Groq, Cerebras, etc., and OpenAI itself) to ensure they meet the expected behavior and interface standards.
## Features

View file

@ -74,9 +74,6 @@ providers:
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: rag-runtime
provider_type: inline::rag-runtime
config: {}
@ -156,8 +153,6 @@ tool_groups:
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
- toolgroup_id: builtin::wolfram_alpha
provider_id: wolfram-alpha
server:

View file

@ -31,6 +31,26 @@ test_response_web_search:
search_context_size: "low"
output: "128"
test_response_custom_tool:
test_name: test_response_custom_tool
test_params:
case:
- case_id: "sf_weather"
input: "What's the weather like in San Francisco?"
tools:
- type: function
name: get_weather
description: Get current temperature for a given location.
parameters:
additionalProperties: false
properties:
location:
description: "City and country e.g. Bogot\xE1, Colombia"
type: string
required:
- location
type: object
test_response_image:
test_name: test_response_image
test_params:
@ -59,7 +79,7 @@ test_response_multi_turn_image:
- type: input_image
image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
output: "llama"
- input: "Search the web using the search tool for the animal from the previous response. Your search query should be a single phrase that includes the animal's name and the words 'maverick' and 'scout'."
- input: "Search the web using the search tool for the animal from the previous response. Your search query should be a single phrase that includes the animal's name and the words 'maverick', 'scout' and 'llm'"
tools:
- type: web_search
output: "model"

View file

@ -124,6 +124,28 @@ def test_response_non_streaming_web_search(request, openai_client, model, provid
assert case["output"].lower() in response.output_text.lower().strip()
@pytest.mark.parametrize(
"case",
responses_test_cases["test_response_custom_tool"]["test_params"]["case"],
ids=case_id_generator,
)
def test_response_non_streaming_custom_tool(request, openai_client, model, provider, verification_config, case):
test_name_base = get_base_test_name(request)
if should_skip_test(verification_config, provider, model, test_name_base):
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
response = openai_client.responses.create(
model=model,
input=case["input"],
tools=case["tools"],
stream=False,
)
assert len(response.output) == 1
assert response.output[0].type == "function_call"
assert response.output[0].status == "completed"
assert response.output[0].name == "get_weather"
@pytest.mark.parametrize(
"case",
responses_test_cases["test_response_image"]["test_params"]["case"],