Merge branch 'main' into rag-metadata-support

This commit is contained in:
Francisco Arceo 2025-05-13 21:14:02 -06:00 committed by GitHub
commit 227ff4c9b3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
42 changed files with 1294 additions and 563 deletions

9
tests/README.md Normal file
View file

@ -0,0 +1,9 @@
# Llama Stack Tests
Llama Stack has multiple layers of testing done to ensure continuous functionality and prevent regressions to the codebase.
| Testing Type | Details |
|--------------|---------|
| Unit | [unit/README.md](unit/README.md) |
| Integration | [integration/README.md](integration/README.md) |
| Verification | [verifications/README.md](verifications/README.md) |

View file

@ -53,9 +53,6 @@ providers:
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: rag-runtime
provider_type: inline::rag-runtime
config: {}
@ -90,8 +87,6 @@ tool_groups:
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
- toolgroup_id: builtin::wolfram_alpha
provider_id: wolfram-alpha
server:

View file

@ -11,7 +11,7 @@ pytest --help
Here are the most important options:
- `--stack-config`: specify the stack config to use. You have three ways to point to a stack:
- a URL which points to a Llama Stack distribution server
- a template (e.g., `fireworks`, `together`) or a path to a run.yaml file
- a template (e.g., `fireworks`, `together`) or a path to a `run.yaml` file
- a comma-separated list of api=provider pairs, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface.
- `--env`: set environment variables, e.g. --env KEY=value. this is a utility option to set environment variables required by various providers.

View file

@ -6,6 +6,7 @@
from collections import defaultdict
from pathlib import Path
import pytest
from pytest import CollectReport
@ -65,6 +66,7 @@ class Report:
def __init__(self, config):
self.distro_name = None
self.config = config
self.output_path = Path(config.getoption("--report")) if config.getoption("--report") else None
stack_config = self.config.getoption("--stack-config")
if stack_config:
@ -161,7 +163,7 @@ class Report:
"|:-----|:-----|:-----|:-----|:-----|",
]
provider = [p for p in providers if p.api == str(api_group.name)]
provider_str = ",".join(provider) if provider else ""
provider_str = ",".join(str(p) for p in provider) if provider else ""
for api, capa_map in API_MAPS[api_group].items():
for capa, tests in capa_map.items():
for test_name in tests:
@ -184,10 +186,12 @@ class Report:
# Get values from fixtures for report output
if model_id := item.funcargs.get("text_model_id"):
text_model = model_id.split("/")[1]
parts = model_id.split("/")
text_model = parts[1] if len(parts) > 1 else model_id
self.text_model_id = self.text_model_id or text_model
elif model_id := item.funcargs.get("vision_model_id"):
vision_model = model_id.split("/")[1]
parts = model_id.split("/")
vision_model = parts[1] if len(parts) > 1 else model_id
self.vision_model_id = self.vision_model_id or vision_model
if not self.client:

21
tests/unit/README.md Normal file
View file

@ -0,0 +1,21 @@
# Llama Stack Unit Tests
You can run the unit tests by running:
```bash
source .venv/bin/activate
./scripts/unit-tests.sh [PYTEST_ARGS]
```
Any additional arguments are passed to pytest. For example, you can specify a test directory, a specific test file, or any pytest flags (e.g., -vvv for verbosity). If no test directory is specified, it defaults to "tests/unit", e.g:
```bash
./scripts/unit-tests.sh tests/unit/registry/test_registry.py -vvv
```
If you'd like to run for a non-default version of Python (currently 3.10), pass `PYTHON_VERSION` variable as follows:
```
source .venv/bin/activate
PYTHON_VERSION=3.13 ./scripts/unit-tests.sh
```

View file

@ -0,0 +1,23 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import os
import yaml
from llama_stack.apis.inference.inference import (
OpenAIChatCompletion,
)
FIXTURES_DIR = os.path.dirname(os.path.abspath(__file__))
def load_chat_completion_fixture(filename: str) -> OpenAIChatCompletion:
fixture_path = os.path.join(FIXTURES_DIR, filename)
with open(fixture_path) as f:
data = yaml.safe_load(f)
return OpenAIChatCompletion(**data)

View file

@ -0,0 +1,9 @@
id: chat-completion-123
choices:
- message:
content: "Dublin"
role: assistant
finish_reason: stop
index: 0
created: 1234567890
model: meta-llama/Llama-3.1-8B-Instruct

View file

@ -0,0 +1,14 @@
id: chat-completion-123
choices:
- message:
tool_calls:
- id: tool_call_123
type: function
function:
name: web_search
arguments: '{"query":"What is the capital of Ireland?"}'
role: assistant
finish_reason: stop
index: 0
created: 1234567890
model: meta-llama/Llama-3.1-8B-Instruct

View file

@ -4,27 +4,32 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from unittest.mock import AsyncMock
from unittest.mock import AsyncMock, patch
import pytest
from llama_stack.apis.agents.openai_responses import (
OpenAIResponseInputItemList,
OpenAIResponseInputMessageContentText,
OpenAIResponseInputToolWebSearch,
OpenAIResponseOutputMessage,
OpenAIResponseMessage,
OpenAIResponseObject,
OpenAIResponseOutputMessageContentOutputText,
OpenAIResponseOutputMessageWebSearchToolCall,
)
from llama_stack.apis.inference.inference import (
OpenAIAssistantMessageParam,
OpenAIChatCompletion,
OpenAIChatCompletionToolCall,
OpenAIChatCompletionToolCallFunction,
OpenAIChoice,
OpenAIChatCompletionContentPartTextParam,
OpenAIDeveloperMessageParam,
OpenAIUserMessageParam,
)
from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime
from llama_stack.providers.inline.agents.meta_reference.openai_responses import (
OpenAIResponsePreviousResponseWithInputItems,
OpenAIResponsesImpl,
)
from llama_stack.providers.utils.kvstore import KVStore
from tests.unit.providers.agents.meta_reference.fixtures import load_chat_completion_fixture
@pytest.fixture
@ -65,21 +70,11 @@ def openai_responses_impl(mock_kvstore, mock_inference_api, mock_tool_groups_api
async def test_create_openai_response_with_string_input(openai_responses_impl, mock_inference_api):
"""Test creating an OpenAI response with a simple string input."""
# Setup
input_text = "Hello, world!"
input_text = "What is the capital of Ireland?"
model = "meta-llama/Llama-3.1-8B-Instruct"
mock_chat_completion = OpenAIChatCompletion(
id="chat-completion-123",
choices=[
OpenAIChoice(
message=OpenAIAssistantMessageParam(content="Hello! How can I help you?"),
finish_reason="stop",
index=0,
)
],
created=1234567890,
model=model,
)
# Load the chat completion fixture
mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
mock_inference_api.openai_chat_completion.return_value = mock_chat_completion
# Execute
@ -92,7 +87,7 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m
# Verify
mock_inference_api.openai_chat_completion.assert_called_once_with(
model=model,
messages=[OpenAIUserMessageParam(role="user", content="Hello, world!", name=None)],
messages=[OpenAIUserMessageParam(role="user", content="What is the capital of Ireland?", name=None)],
tools=None,
stream=False,
temperature=0.1,
@ -100,55 +95,25 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m
openai_responses_impl.persistence_store.set.assert_called_once()
assert result.model == model
assert len(result.output) == 1
assert isinstance(result.output[0], OpenAIResponseOutputMessage)
assert result.output[0].content[0].text == "Hello! How can I help you?"
assert isinstance(result.output[0], OpenAIResponseMessage)
assert result.output[0].content[0].text == "Dublin"
@pytest.mark.asyncio
async def test_create_openai_response_with_string_input_with_tools(openai_responses_impl, mock_inference_api):
"""Test creating an OpenAI response with a simple string input and tools."""
# Setup
input_text = "What was the score of todays game?"
input_text = "What is the capital of Ireland?"
model = "meta-llama/Llama-3.1-8B-Instruct"
mock_chat_completions = [
OpenAIChatCompletion(
id="chat-completion-123",
choices=[
OpenAIChoice(
message=OpenAIAssistantMessageParam(
tool_calls=[
OpenAIChatCompletionToolCall(
id="tool_call_123",
type="function",
function=OpenAIChatCompletionToolCallFunction(
name="web_search", arguments='{"query":"What was the score of todays game?"}'
),
)
],
),
finish_reason="stop",
index=0,
)
],
created=1234567890,
model=model,
),
OpenAIChatCompletion(
id="chat-completion-123",
choices=[
OpenAIChoice(
message=OpenAIAssistantMessageParam(content="The score of todays game was 10-12"),
finish_reason="stop",
index=0,
)
],
created=1234567890,
model=model,
),
]
# Load the chat completion fixtures
tool_call_completion = load_chat_completion_fixture("tool_call_completion.yaml")
tool_response_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
mock_inference_api.openai_chat_completion.side_effect = mock_chat_completions
mock_inference_api.openai_chat_completion.side_effect = [
tool_call_completion,
tool_response_completion,
]
openai_responses_impl.tool_groups_api.get_tool.return_value = Tool(
identifier="web_search",
@ -163,7 +128,7 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon
openai_responses_impl.tool_runtime_api.invoke_tool.return_value = ToolInvocationResult(
status="completed",
content="The score of todays game was 10-12",
content="Dublin",
)
# Execute
@ -180,23 +145,172 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon
# Verify
first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
assert first_call.kwargs["messages"][0].content == "What was the score of todays game?"
assert first_call.kwargs["messages"][0].content == "What is the capital of Ireland?"
assert first_call.kwargs["tools"] is not None
assert first_call.kwargs["temperature"] == 0.1
second_call = mock_inference_api.openai_chat_completion.call_args_list[1]
assert second_call.kwargs["messages"][-1].content == "The score of todays game was 10-12"
assert second_call.kwargs["messages"][-1].content == "Dublin"
assert second_call.kwargs["temperature"] == 0.1
openai_responses_impl.tool_groups_api.get_tool.assert_called_once_with("web_search")
openai_responses_impl.tool_runtime_api.invoke_tool.assert_called_once_with(
tool_name="web_search",
kwargs={"query": "What was the score of todays game?"},
kwargs={"query": "What is the capital of Ireland?"},
)
openai_responses_impl.persistence_store.set.assert_called_once()
# Check that we got the content from our mocked tool execution result
assert len(result.output) >= 1
assert isinstance(result.output[1], OpenAIResponseOutputMessage)
assert result.output[1].content[0].text == "The score of todays game was 10-12"
assert isinstance(result.output[1], OpenAIResponseMessage)
assert result.output[1].content[0].text == "Dublin"
@pytest.mark.asyncio
async def test_create_openai_response_with_multiple_messages(openai_responses_impl, mock_inference_api):
"""Test creating an OpenAI response with multiple messages."""
# Setup
input_messages = [
OpenAIResponseMessage(role="developer", content="You are a helpful assistant", name=None),
OpenAIResponseMessage(role="user", content="Name some towns in Ireland", name=None),
OpenAIResponseMessage(
role="assistant",
content=[
OpenAIResponseInputMessageContentText(text="Galway, Longford, Sligo"),
OpenAIResponseInputMessageContentText(text="Dublin"),
],
name=None,
),
OpenAIResponseMessage(role="user", content="Which is the largest town in Ireland?", name=None),
]
model = "meta-llama/Llama-3.1-8B-Instruct"
mock_inference_api.openai_chat_completion.return_value = load_chat_completion_fixture("simple_chat_completion.yaml")
# Execute
await openai_responses_impl.create_openai_response(
input=input_messages,
model=model,
temperature=0.1,
)
# Verify the the correct messages were sent to the inference API i.e.
# All of the responses message were convered to the chat completion message objects
inference_messages = mock_inference_api.openai_chat_completion.call_args_list[0].kwargs["messages"]
for i, m in enumerate(input_messages):
if isinstance(m.content, str):
assert inference_messages[i].content == m.content
else:
assert inference_messages[i].content[0].text == m.content[0].text
assert isinstance(inference_messages[i].content[0], OpenAIChatCompletionContentPartTextParam)
assert inference_messages[i].role == m.role
if m.role == "user":
assert isinstance(inference_messages[i], OpenAIUserMessageParam)
elif m.role == "assistant":
assert isinstance(inference_messages[i], OpenAIAssistantMessageParam)
else:
assert isinstance(inference_messages[i], OpenAIDeveloperMessageParam)
@pytest.mark.asyncio
async def test_prepend_previous_response_none(openai_responses_impl):
"""Test prepending no previous response to a new response."""
input = await openai_responses_impl._prepend_previous_response("fake_input", None)
assert input == "fake_input"
@pytest.mark.asyncio
@patch.object(OpenAIResponsesImpl, "_get_previous_response_with_input")
async def test_prepend_previous_response_basic(get_previous_response_with_input, openai_responses_impl):
"""Test prepending a basic previous response to a new response."""
input_item_message = OpenAIResponseMessage(
id="123",
content=[OpenAIResponseInputMessageContentText(text="fake_previous_input")],
role="user",
)
input_items = OpenAIResponseInputItemList(data=[input_item_message])
response_output_message = OpenAIResponseMessage(
id="123",
content=[OpenAIResponseOutputMessageContentOutputText(text="fake_response")],
status="completed",
role="assistant",
)
response = OpenAIResponseObject(
created_at=1,
id="resp_123",
model="fake_model",
output=[response_output_message],
status="completed",
)
previous_response = OpenAIResponsePreviousResponseWithInputItems(
input_items=input_items,
response=response,
)
get_previous_response_with_input.return_value = previous_response
input = await openai_responses_impl._prepend_previous_response("fake_input", "resp_123")
assert len(input) == 3
# Check for previous input
assert isinstance(input[0], OpenAIResponseMessage)
assert input[0].content[0].text == "fake_previous_input"
# Check for previous output
assert isinstance(input[1], OpenAIResponseMessage)
assert input[1].content[0].text == "fake_response"
# Check for new input
assert isinstance(input[2], OpenAIResponseMessage)
assert input[2].content == "fake_input"
@pytest.mark.asyncio
@patch.object(OpenAIResponsesImpl, "_get_previous_response_with_input")
async def test_prepend_previous_response_web_search(get_previous_response_with_input, openai_responses_impl):
"""Test prepending a web search previous response to a new response."""
input_item_message = OpenAIResponseMessage(
id="123",
content=[OpenAIResponseInputMessageContentText(text="fake_previous_input")],
role="user",
)
input_items = OpenAIResponseInputItemList(data=[input_item_message])
output_web_search = OpenAIResponseOutputMessageWebSearchToolCall(
id="ws_123",
status="completed",
)
output_message = OpenAIResponseMessage(
id="123",
content=[OpenAIResponseOutputMessageContentOutputText(text="fake_web_search_response")],
status="completed",
role="assistant",
)
response = OpenAIResponseObject(
created_at=1,
id="resp_123",
model="fake_model",
output=[output_web_search, output_message],
status="completed",
)
previous_response = OpenAIResponsePreviousResponseWithInputItems(
input_items=input_items,
response=response,
)
get_previous_response_with_input.return_value = previous_response
input_messages = [OpenAIResponseMessage(content="fake_input", role="user")]
input = await openai_responses_impl._prepend_previous_response(input_messages, "resp_123")
assert len(input) == 4
# Check for previous input
assert isinstance(input[0], OpenAIResponseMessage)
assert input[0].content[0].text == "fake_previous_input"
# Check for previous output web search tool call
assert isinstance(input[1], OpenAIResponseOutputMessageWebSearchToolCall)
# Check for previous output web search response
assert isinstance(input[2], OpenAIResponseMessage)
assert input[2].content[0].text == "fake_web_search_response"
# Check for new input
assert isinstance(input[3], OpenAIResponseMessage)
assert input[3].content == "fake_input"

View file

@ -4,7 +4,7 @@ Llama Stack Verifications provide standardized test suites to ensure API compati
## Overview
This framework allows you to run the same set of verification tests against different LLM providers' OpenAI-compatible endpoints (Fireworks, Together, Groq, Cerebras, etc., and OpenAI itself) to ensure they meet the expected behavior and interface standards.
This framework allows you to run the same set of verification tests against different LLM providers' OpenAI-compatible endpoints (Fireworks, Together, Groq, Cerebras, etc., and OpenAI itself) to ensure they meet the expected behavior and interface standards.
## Features

View file

@ -74,9 +74,6 @@ providers:
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: rag-runtime
provider_type: inline::rag-runtime
config: {}
@ -156,8 +153,6 @@ tool_groups:
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
- toolgroup_id: builtin::wolfram_alpha
provider_id: wolfram-alpha
server:

View file

@ -31,6 +31,26 @@ test_response_web_search:
search_context_size: "low"
output: "128"
test_response_custom_tool:
test_name: test_response_custom_tool
test_params:
case:
- case_id: "sf_weather"
input: "What's the weather like in San Francisco?"
tools:
- type: function
name: get_weather
description: Get current temperature for a given location.
parameters:
additionalProperties: false
properties:
location:
description: "City and country e.g. Bogot\xE1, Colombia"
type: string
required:
- location
type: object
test_response_image:
test_name: test_response_image
test_params:

View file

@ -124,6 +124,28 @@ def test_response_non_streaming_web_search(request, openai_client, model, provid
assert case["output"].lower() in response.output_text.lower().strip()
@pytest.mark.parametrize(
"case",
responses_test_cases["test_response_custom_tool"]["test_params"]["case"],
ids=case_id_generator,
)
def test_response_non_streaming_custom_tool(request, openai_client, model, provider, verification_config, case):
test_name_base = get_base_test_name(request)
if should_skip_test(verification_config, provider, model, test_name_base):
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
response = openai_client.responses.create(
model=model,
input=case["input"],
tools=case["tools"],
stream=False,
)
assert len(response.output) == 1
assert response.output[0].type == "function_call"
assert response.output[0].status == "completed"
assert response.output[0].name == "get_weather"
@pytest.mark.parametrize(
"case",
responses_test_cases["test_response_image"]["test_params"]["case"],