mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-28 01:01:59 +00:00
Merge branch 'main' into feat/litellm_sambanova_usage
This commit is contained in:
commit
b7f16ac7a6
535 changed files with 23539 additions and 8112 deletions
|
|
@ -4,7 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any, Dict
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
|
|
@ -37,7 +37,7 @@ def get_boiling_point(liquid_name: str, celcius: bool = True) -> int:
|
|||
return -1
|
||||
|
||||
|
||||
def get_boiling_point_with_metadata(liquid_name: str, celcius: bool = True) -> Dict[str, Any]:
|
||||
def get_boiling_point_with_metadata(liquid_name: str, celcius: bool = True) -> dict[str, Any]:
|
||||
"""
|
||||
Returns the boiling point of a liquid in Celcius or Fahrenheit
|
||||
|
||||
|
|
@ -115,6 +115,70 @@ def test_agent_simple(llama_stack_client_with_mocked_inference, agent_config):
|
|||
assert "I can't" in logs_str
|
||||
|
||||
|
||||
def test_agent_name(llama_stack_client, text_model_id):
|
||||
agent_name = f"test-agent-{uuid4()}"
|
||||
|
||||
try:
|
||||
agent = Agent(
|
||||
llama_stack_client,
|
||||
model=text_model_id,
|
||||
instructions="You are a helpful assistant",
|
||||
name=agent_name,
|
||||
)
|
||||
except TypeError:
|
||||
agent = Agent(
|
||||
llama_stack_client,
|
||||
model=text_model_id,
|
||||
instructions="You are a helpful assistant",
|
||||
)
|
||||
return
|
||||
|
||||
session_id = agent.create_session(f"test-session-{uuid4()}")
|
||||
|
||||
agent.create_turn(
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Give me a sentence that contains the word: hello",
|
||||
}
|
||||
],
|
||||
session_id=session_id,
|
||||
stream=False,
|
||||
)
|
||||
|
||||
all_spans = []
|
||||
for span in llama_stack_client.telemetry.query_spans(
|
||||
attribute_filters=[
|
||||
{"key": "session_id", "op": "eq", "value": session_id},
|
||||
],
|
||||
attributes_to_return=["input", "output", "agent_name", "agent_id", "session_id"],
|
||||
):
|
||||
all_spans.append(span.attributes)
|
||||
|
||||
agent_name_spans = []
|
||||
for span in llama_stack_client.telemetry.query_spans(
|
||||
attribute_filters=[],
|
||||
attributes_to_return=["agent_name"],
|
||||
):
|
||||
if "agent_name" in span.attributes:
|
||||
agent_name_spans.append(span.attributes)
|
||||
|
||||
agent_logs = []
|
||||
for span in llama_stack_client.telemetry.query_spans(
|
||||
attribute_filters=[
|
||||
{"key": "agent_name", "op": "eq", "value": agent_name},
|
||||
],
|
||||
attributes_to_return=["input", "output", "agent_name"],
|
||||
):
|
||||
if "output" in span.attributes and span.attributes["output"] != "no shields":
|
||||
agent_logs.append(span.attributes)
|
||||
|
||||
assert len(agent_logs) == 1
|
||||
assert agent_logs[0]["agent_name"] == agent_name
|
||||
assert "Give me a sentence that contains the word: hello" in agent_logs[0]["input"]
|
||||
assert "hello" in agent_logs[0]["output"].lower()
|
||||
|
||||
|
||||
def test_tool_config(llama_stack_client_with_mocked_inference, agent_config):
|
||||
common_params = dict(
|
||||
model="meta-llama/Llama-3.2-3B-Instruct",
|
||||
|
|
@ -231,6 +295,7 @@ def test_builtin_tool_code_execution(llama_stack_client_with_mocked_inference, a
|
|||
# This test must be run in an environment where `bwrap` is available. If you are running against a
|
||||
# server, this means the _server_ must have `bwrap` available. If you are using library client, then
|
||||
# you must have `bwrap` available in test's environment.
|
||||
@pytest.mark.skip(reason="Code interpreter is currently disabled in the Stack")
|
||||
def test_code_interpreter_for_attachments(llama_stack_client_with_mocked_inference, agent_config):
|
||||
agent_config = {
|
||||
**agent_config,
|
||||
|
|
@ -487,6 +552,7 @@ def test_rag_agent_with_attachments(llama_stack_client_with_mocked_inference, ag
|
|||
assert "lora" in response.output_message.content.lower()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Code interpreter is currently disabled in the Stack")
|
||||
def test_rag_and_code_agent(llama_stack_client_with_mocked_inference, agent_config):
|
||||
if "llama-4" in agent_config["model"].lower():
|
||||
pytest.xfail("Not working for llama4")
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import platform
|
|||
import textwrap
|
||||
import time
|
||||
|
||||
import pytest
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from llama_stack.log import get_logger
|
||||
|
|
@ -19,10 +20,29 @@ from .report import Report
|
|||
logger = get_logger(__name__, category="tests")
|
||||
|
||||
|
||||
@pytest.hookimpl(hookwrapper=True)
|
||||
def pytest_runtest_makereport(item, call):
|
||||
outcome = yield
|
||||
report = outcome.get_result()
|
||||
if report.when == "call":
|
||||
item.execution_outcome = report.outcome
|
||||
item.was_xfail = getattr(report, "wasxfail", False)
|
||||
|
||||
|
||||
def pytest_runtest_teardown(item):
|
||||
interval_seconds = os.getenv("LLAMA_STACK_TEST_INTERVAL_SECONDS")
|
||||
if interval_seconds:
|
||||
time.sleep(float(interval_seconds))
|
||||
# Check if the test actually ran and passed or failed, but was not skipped or an expected failure (xfail)
|
||||
outcome = getattr(item, "execution_outcome", None)
|
||||
was_xfail = getattr(item, "was_xfail", False)
|
||||
|
||||
name = item.nodeid
|
||||
if not any(x in name for x in ("inference/", "safety/", "agents/")):
|
||||
return
|
||||
|
||||
logger.debug(f"Test '{item.nodeid}' outcome was '{outcome}' (xfail={was_xfail})")
|
||||
if outcome in ("passed", "failed") and not was_xfail:
|
||||
interval_seconds = os.getenv("LLAMA_STACK_TEST_INTERVAL_SECONDS")
|
||||
if interval_seconds:
|
||||
time.sleep(float(interval_seconds))
|
||||
|
||||
|
||||
def pytest_configure(config):
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ def data_url_from_file(file_path: str) -> str:
|
|||
return data_url
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="flaky. Couldn't find 'llamastack/simpleqa' on the Hugging Face Hub")
|
||||
@pytest.mark.parametrize(
|
||||
"purpose, source, provider_id, limit",
|
||||
[
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ from pathlib import Path
|
|||
import pytest
|
||||
import yaml
|
||||
from llama_stack_client import LlamaStackClient
|
||||
from openai import OpenAI
|
||||
|
||||
from llama_stack import LlamaStackAsLibraryClient
|
||||
from llama_stack.apis.datatypes import Api
|
||||
|
|
@ -207,3 +208,9 @@ def llama_stack_client(request, provider_data, text_model_id):
|
|||
raise RuntimeError("Initialization failed")
|
||||
|
||||
return client
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def openai_client(client_with_models):
|
||||
base_url = f"{client_with_models.base_url}/v1/openai/v1"
|
||||
return OpenAI(base_url=base_url, api_key="fake")
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ class RecordableMock:
|
|||
# Load existing cache if available and not recording
|
||||
if self.json_path.exists():
|
||||
try:
|
||||
with open(self.json_path, "r") as f:
|
||||
with open(self.json_path) as f:
|
||||
self.cache = json.load(f)
|
||||
except Exception as e:
|
||||
print(f"Error loading cache from {self.json_path}: {e}")
|
||||
|
|
|
|||
|
|
@ -75,19 +75,24 @@ def openai_client(client_with_models):
|
|||
return OpenAI(base_url=base_url, api_key="bar")
|
||||
|
||||
|
||||
@pytest.fixture(params=["openai_client", "llama_stack_client"])
|
||||
def compat_client(request):
|
||||
return request.getfixturevalue(request.param)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
"inference:completion:sanity",
|
||||
],
|
||||
)
|
||||
def test_openai_completion_non_streaming(openai_client, client_with_models, text_model_id, test_case):
|
||||
def test_openai_completion_non_streaming(llama_stack_client, client_with_models, text_model_id, test_case):
|
||||
skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
|
||||
tc = TestCase(test_case)
|
||||
|
||||
# ollama needs more verbose prompting for some reason here...
|
||||
prompt = "Respond to this question and explain your answer. " + tc["content"]
|
||||
response = openai_client.completions.create(
|
||||
response = llama_stack_client.completions.create(
|
||||
model=text_model_id,
|
||||
prompt=prompt,
|
||||
stream=False,
|
||||
|
|
@ -103,13 +108,13 @@ def test_openai_completion_non_streaming(openai_client, client_with_models, text
|
|||
"inference:completion:sanity",
|
||||
],
|
||||
)
|
||||
def test_openai_completion_streaming(openai_client, client_with_models, text_model_id, test_case):
|
||||
def test_openai_completion_streaming(llama_stack_client, client_with_models, text_model_id, test_case):
|
||||
skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
|
||||
tc = TestCase(test_case)
|
||||
|
||||
# ollama needs more verbose prompting for some reason here...
|
||||
prompt = "Respond to this question and explain your answer. " + tc["content"]
|
||||
response = openai_client.completions.create(
|
||||
response = llama_stack_client.completions.create(
|
||||
model=text_model_id,
|
||||
prompt=prompt,
|
||||
stream=True,
|
||||
|
|
@ -127,11 +132,11 @@ def test_openai_completion_streaming(openai_client, client_with_models, text_mod
|
|||
0,
|
||||
],
|
||||
)
|
||||
def test_openai_completion_prompt_logprobs(openai_client, client_with_models, text_model_id, prompt_logprobs):
|
||||
def test_openai_completion_prompt_logprobs(llama_stack_client, client_with_models, text_model_id, prompt_logprobs):
|
||||
skip_if_provider_isnt_vllm(client_with_models, text_model_id)
|
||||
|
||||
prompt = "Hello, world!"
|
||||
response = openai_client.completions.create(
|
||||
response = llama_stack_client.completions.create(
|
||||
model=text_model_id,
|
||||
prompt=prompt,
|
||||
stream=False,
|
||||
|
|
@ -144,11 +149,11 @@ def test_openai_completion_prompt_logprobs(openai_client, client_with_models, te
|
|||
assert len(choice.prompt_logprobs) > 0
|
||||
|
||||
|
||||
def test_openai_completion_guided_choice(openai_client, client_with_models, text_model_id):
|
||||
def test_openai_completion_guided_choice(llama_stack_client, client_with_models, text_model_id):
|
||||
skip_if_provider_isnt_vllm(client_with_models, text_model_id)
|
||||
|
||||
prompt = "I am feeling really sad today."
|
||||
response = openai_client.completions.create(
|
||||
response = llama_stack_client.completions.create(
|
||||
model=text_model_id,
|
||||
prompt=prompt,
|
||||
stream=False,
|
||||
|
|
@ -161,6 +166,9 @@ def test_openai_completion_guided_choice(openai_client, client_with_models, text
|
|||
assert choice.text in ["joy", "sadness"]
|
||||
|
||||
|
||||
# Run the chat-completion tests with both the OpenAI client and the LlamaStack client
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_case",
|
||||
[
|
||||
|
|
@ -168,13 +176,13 @@ def test_openai_completion_guided_choice(openai_client, client_with_models, text
|
|||
"inference:chat_completion:non_streaming_02",
|
||||
],
|
||||
)
|
||||
def test_openai_chat_completion_non_streaming(openai_client, client_with_models, text_model_id, test_case):
|
||||
def test_openai_chat_completion_non_streaming(compat_client, client_with_models, text_model_id, test_case):
|
||||
skip_if_model_doesnt_support_openai_chat_completion(client_with_models, text_model_id)
|
||||
tc = TestCase(test_case)
|
||||
question = tc["question"]
|
||||
expected = tc["expected"]
|
||||
|
||||
response = openai_client.chat.completions.create(
|
||||
response = compat_client.chat.completions.create(
|
||||
model=text_model_id,
|
||||
messages=[
|
||||
{
|
||||
|
|
@ -196,13 +204,13 @@ def test_openai_chat_completion_non_streaming(openai_client, client_with_models,
|
|||
"inference:chat_completion:streaming_02",
|
||||
],
|
||||
)
|
||||
def test_openai_chat_completion_streaming(openai_client, client_with_models, text_model_id, test_case):
|
||||
def test_openai_chat_completion_streaming(compat_client, client_with_models, text_model_id, test_case):
|
||||
skip_if_model_doesnt_support_openai_chat_completion(client_with_models, text_model_id)
|
||||
tc = TestCase(test_case)
|
||||
question = tc["question"]
|
||||
expected = tc["expected"]
|
||||
|
||||
response = openai_client.chat.completions.create(
|
||||
response = compat_client.chat.completions.create(
|
||||
model=text_model_id,
|
||||
messages=[{"role": "user", "content": question}],
|
||||
stream=True,
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
from typing import List
|
||||
|
||||
import pytest
|
||||
|
||||
|
|
@ -77,7 +76,7 @@ class TestPostTraining:
|
|||
async def test_get_training_jobs(self, post_training_stack):
|
||||
post_training_impl = post_training_stack
|
||||
jobs_list = await post_training_impl.get_training_jobs()
|
||||
assert isinstance(jobs_list, List)
|
||||
assert isinstance(jobs_list, list)
|
||||
assert jobs_list[0].job_uuid == "1234"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
|
|
|||
5
tests/integration/providers/nvidia/__init__.py
Normal file
5
tests/integration/providers/nvidia/__init__.py
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
14
tests/integration/providers/nvidia/conftest.py
Normal file
14
tests/integration/providers/nvidia/conftest.py
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
# Skip all tests in this directory when running in GitHub Actions
|
||||
in_github_actions = os.environ.get("GITHUB_ACTIONS") == "true"
|
||||
if in_github_actions:
|
||||
pytest.skip("Skipping NVIDIA tests in GitHub Actions environment", allow_module_level=True)
|
||||
47
tests/integration/providers/nvidia/test_datastore.py
Normal file
47
tests/integration/providers/nvidia/test_datastore.py
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
|
||||
import pytest
|
||||
|
||||
# How to run this test:
|
||||
#
|
||||
# LLAMA_STACK_CONFIG="nvidia" pytest -v tests/integration/providers/nvidia/test_datastore.py
|
||||
|
||||
|
||||
# nvidia provider only
|
||||
@pytest.mark.parametrize(
|
||||
"provider_id",
|
||||
[
|
||||
"nvidia",
|
||||
],
|
||||
)
|
||||
def test_register_and_unregister(llama_stack_client, provider_id):
|
||||
purpose = "eval/messages-answer"
|
||||
source = {
|
||||
"type": "uri",
|
||||
"uri": "hf://datasets/llamastack/simpleqa?split=train",
|
||||
}
|
||||
dataset_id = f"test-dataset-{provider_id}"
|
||||
dataset = llama_stack_client.datasets.register(
|
||||
dataset_id=dataset_id,
|
||||
purpose=purpose,
|
||||
source=source,
|
||||
metadata={"provider_id": provider_id, "format": "json", "description": "Test dataset description"},
|
||||
)
|
||||
assert dataset.identifier is not None
|
||||
assert dataset.provider_id == provider_id
|
||||
assert dataset.identifier == dataset_id
|
||||
|
||||
dataset_list = llama_stack_client.datasets.list()
|
||||
provider_datasets = [d for d in dataset_list if d.provider_id == provider_id]
|
||||
assert any(provider_datasets)
|
||||
assert any(d.identifier == dataset_id for d in provider_datasets)
|
||||
|
||||
llama_stack_client.datasets.unregister(dataset.identifier)
|
||||
dataset_list = llama_stack_client.datasets.list()
|
||||
provider_datasets = [d for d in dataset_list if d.identifier == dataset.identifier]
|
||||
assert not any(provider_datasets)
|
||||
37
tests/integration/test_cases/openai/responses.json
Normal file
37
tests/integration/test_cases/openai/responses.json
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
{
|
||||
"non_streaming_01": {
|
||||
"data": {
|
||||
"question": "Which planet do humans live on?",
|
||||
"expected": "Earth"
|
||||
}
|
||||
},
|
||||
"non_streaming_02": {
|
||||
"data": {
|
||||
"question": "Which planet has rings around it with a name starting with letter S?",
|
||||
"expected": "Saturn"
|
||||
}
|
||||
},
|
||||
"streaming_01": {
|
||||
"data": {
|
||||
"question": "What's the name of the Sun in latin?",
|
||||
"expected": "Sol"
|
||||
}
|
||||
},
|
||||
"streaming_02": {
|
||||
"data": {
|
||||
"question": "What is the name of the US captial?",
|
||||
"expected": "Washington"
|
||||
}
|
||||
},
|
||||
"tools_web_search_01": {
|
||||
"data": {
|
||||
"input": "How many experts does the Llama 4 Maverick model have?",
|
||||
"tools": [
|
||||
{
|
||||
"type": "web_search"
|
||||
}
|
||||
],
|
||||
"expected": "128"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -12,6 +12,7 @@ class TestCase:
|
|||
_apis = [
|
||||
"inference/chat_completion",
|
||||
"inference/completion",
|
||||
"openai/responses",
|
||||
]
|
||||
_jsonblob = {}
|
||||
|
||||
|
|
@ -19,7 +20,7 @@ class TestCase:
|
|||
# loading all test cases
|
||||
if self._jsonblob == {}:
|
||||
for api in self._apis:
|
||||
with open(pathlib.Path(__file__).parent / f"{api}.json", "r") as f:
|
||||
with open(pathlib.Path(__file__).parent / f"{api}.json") as f:
|
||||
coloned = api.replace("/", ":")
|
||||
try:
|
||||
loaded = json.load(f)
|
||||
|
|
|
|||
|
|
@ -114,7 +114,7 @@ def test_register_and_unregister_toolgroup(llama_stack_client, mcp_server):
|
|||
llama_stack_client.toolgroups.unregister(toolgroup_id=test_toolgroup_id)
|
||||
|
||||
# Verify it is unregistered
|
||||
with pytest.raises(ValueError, match=f"Tool group '{test_toolgroup_id}' not found"):
|
||||
with pytest.raises(Exception, match=f"Tool group '{test_toolgroup_id}' not found"):
|
||||
llama_stack_client.toolgroups.get(toolgroup_id=test_toolgroup_id)
|
||||
|
||||
# Verify tools are also unregistered
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue