Merge branch 'main' into feat/litellm_sambanova_usage

This commit is contained in:
jhpiedrahitao 2025-05-05 11:49:58 -05:00
commit b7f16ac7a6
535 changed files with 23539 additions and 8112 deletions

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Any, Dict
from typing import Any
from uuid import uuid4
import pytest
@ -37,7 +37,7 @@ def get_boiling_point(liquid_name: str, celcius: bool = True) -> int:
return -1
def get_boiling_point_with_metadata(liquid_name: str, celcius: bool = True) -> Dict[str, Any]:
def get_boiling_point_with_metadata(liquid_name: str, celcius: bool = True) -> dict[str, Any]:
"""
Returns the boiling point of a liquid in Celcius or Fahrenheit
@ -115,6 +115,70 @@ def test_agent_simple(llama_stack_client_with_mocked_inference, agent_config):
assert "I can't" in logs_str
def test_agent_name(llama_stack_client, text_model_id):
agent_name = f"test-agent-{uuid4()}"
try:
agent = Agent(
llama_stack_client,
model=text_model_id,
instructions="You are a helpful assistant",
name=agent_name,
)
except TypeError:
agent = Agent(
llama_stack_client,
model=text_model_id,
instructions="You are a helpful assistant",
)
return
session_id = agent.create_session(f"test-session-{uuid4()}")
agent.create_turn(
messages=[
{
"role": "user",
"content": "Give me a sentence that contains the word: hello",
}
],
session_id=session_id,
stream=False,
)
all_spans = []
for span in llama_stack_client.telemetry.query_spans(
attribute_filters=[
{"key": "session_id", "op": "eq", "value": session_id},
],
attributes_to_return=["input", "output", "agent_name", "agent_id", "session_id"],
):
all_spans.append(span.attributes)
agent_name_spans = []
for span in llama_stack_client.telemetry.query_spans(
attribute_filters=[],
attributes_to_return=["agent_name"],
):
if "agent_name" in span.attributes:
agent_name_spans.append(span.attributes)
agent_logs = []
for span in llama_stack_client.telemetry.query_spans(
attribute_filters=[
{"key": "agent_name", "op": "eq", "value": agent_name},
],
attributes_to_return=["input", "output", "agent_name"],
):
if "output" in span.attributes and span.attributes["output"] != "no shields":
agent_logs.append(span.attributes)
assert len(agent_logs) == 1
assert agent_logs[0]["agent_name"] == agent_name
assert "Give me a sentence that contains the word: hello" in agent_logs[0]["input"]
assert "hello" in agent_logs[0]["output"].lower()
def test_tool_config(llama_stack_client_with_mocked_inference, agent_config):
common_params = dict(
model="meta-llama/Llama-3.2-3B-Instruct",
@ -231,6 +295,7 @@ def test_builtin_tool_code_execution(llama_stack_client_with_mocked_inference, a
# This test must be run in an environment where `bwrap` is available. If you are running against a
# server, this means the _server_ must have `bwrap` available. If you are using library client, then
# you must have `bwrap` available in test's environment.
@pytest.mark.skip(reason="Code interpreter is currently disabled in the Stack")
def test_code_interpreter_for_attachments(llama_stack_client_with_mocked_inference, agent_config):
agent_config = {
**agent_config,
@ -487,6 +552,7 @@ def test_rag_agent_with_attachments(llama_stack_client_with_mocked_inference, ag
assert "lora" in response.output_message.content.lower()
@pytest.mark.skip(reason="Code interpreter is currently disabled in the Stack")
def test_rag_and_code_agent(llama_stack_client_with_mocked_inference, agent_config):
if "llama-4" in agent_config["model"].lower():
pytest.xfail("Not working for llama4")

View file

@ -10,6 +10,7 @@ import platform
import textwrap
import time
import pytest
from dotenv import load_dotenv
from llama_stack.log import get_logger
@ -19,10 +20,29 @@ from .report import Report
logger = get_logger(__name__, category="tests")
@pytest.hookimpl(hookwrapper=True)
def pytest_runtest_makereport(item, call):
outcome = yield
report = outcome.get_result()
if report.when == "call":
item.execution_outcome = report.outcome
item.was_xfail = getattr(report, "wasxfail", False)
def pytest_runtest_teardown(item):
interval_seconds = os.getenv("LLAMA_STACK_TEST_INTERVAL_SECONDS")
if interval_seconds:
time.sleep(float(interval_seconds))
# Check if the test actually ran and passed or failed, but was not skipped or an expected failure (xfail)
outcome = getattr(item, "execution_outcome", None)
was_xfail = getattr(item, "was_xfail", False)
name = item.nodeid
if not any(x in name for x in ("inference/", "safety/", "agents/")):
return
logger.debug(f"Test '{item.nodeid}' outcome was '{outcome}' (xfail={was_xfail})")
if outcome in ("passed", "failed") and not was_xfail:
interval_seconds = os.getenv("LLAMA_STACK_TEST_INTERVAL_SECONDS")
if interval_seconds:
time.sleep(float(interval_seconds))
def pytest_configure(config):

View file

@ -31,6 +31,7 @@ def data_url_from_file(file_path: str) -> str:
return data_url
@pytest.mark.skip(reason="flaky. Couldn't find 'llamastack/simpleqa' on the Hugging Face Hub")
@pytest.mark.parametrize(
"purpose, source, provider_id, limit",
[

View file

@ -14,6 +14,7 @@ from pathlib import Path
import pytest
import yaml
from llama_stack_client import LlamaStackClient
from openai import OpenAI
from llama_stack import LlamaStackAsLibraryClient
from llama_stack.apis.datatypes import Api
@ -207,3 +208,9 @@ def llama_stack_client(request, provider_data, text_model_id):
raise RuntimeError("Initialization failed")
return client
@pytest.fixture(scope="session")
def openai_client(client_with_models):
base_url = f"{client_with_models.base_url}/v1/openai/v1"
return OpenAI(base_url=base_url, api_key="fake")

View file

@ -24,7 +24,7 @@ class RecordableMock:
# Load existing cache if available and not recording
if self.json_path.exists():
try:
with open(self.json_path, "r") as f:
with open(self.json_path) as f:
self.cache = json.load(f)
except Exception as e:
print(f"Error loading cache from {self.json_path}: {e}")

View file

@ -75,19 +75,24 @@ def openai_client(client_with_models):
return OpenAI(base_url=base_url, api_key="bar")
@pytest.fixture(params=["openai_client", "llama_stack_client"])
def compat_client(request):
return request.getfixturevalue(request.param)
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:sanity",
],
)
def test_openai_completion_non_streaming(openai_client, client_with_models, text_model_id, test_case):
def test_openai_completion_non_streaming(llama_stack_client, client_with_models, text_model_id, test_case):
skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
tc = TestCase(test_case)
# ollama needs more verbose prompting for some reason here...
prompt = "Respond to this question and explain your answer. " + tc["content"]
response = openai_client.completions.create(
response = llama_stack_client.completions.create(
model=text_model_id,
prompt=prompt,
stream=False,
@ -103,13 +108,13 @@ def test_openai_completion_non_streaming(openai_client, client_with_models, text
"inference:completion:sanity",
],
)
def test_openai_completion_streaming(openai_client, client_with_models, text_model_id, test_case):
def test_openai_completion_streaming(llama_stack_client, client_with_models, text_model_id, test_case):
skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
tc = TestCase(test_case)
# ollama needs more verbose prompting for some reason here...
prompt = "Respond to this question and explain your answer. " + tc["content"]
response = openai_client.completions.create(
response = llama_stack_client.completions.create(
model=text_model_id,
prompt=prompt,
stream=True,
@ -127,11 +132,11 @@ def test_openai_completion_streaming(openai_client, client_with_models, text_mod
0,
],
)
def test_openai_completion_prompt_logprobs(openai_client, client_with_models, text_model_id, prompt_logprobs):
def test_openai_completion_prompt_logprobs(llama_stack_client, client_with_models, text_model_id, prompt_logprobs):
skip_if_provider_isnt_vllm(client_with_models, text_model_id)
prompt = "Hello, world!"
response = openai_client.completions.create(
response = llama_stack_client.completions.create(
model=text_model_id,
prompt=prompt,
stream=False,
@ -144,11 +149,11 @@ def test_openai_completion_prompt_logprobs(openai_client, client_with_models, te
assert len(choice.prompt_logprobs) > 0
def test_openai_completion_guided_choice(openai_client, client_with_models, text_model_id):
def test_openai_completion_guided_choice(llama_stack_client, client_with_models, text_model_id):
skip_if_provider_isnt_vllm(client_with_models, text_model_id)
prompt = "I am feeling really sad today."
response = openai_client.completions.create(
response = llama_stack_client.completions.create(
model=text_model_id,
prompt=prompt,
stream=False,
@ -161,6 +166,9 @@ def test_openai_completion_guided_choice(openai_client, client_with_models, text
assert choice.text in ["joy", "sadness"]
# Run the chat-completion tests with both the OpenAI client and the LlamaStack client
@pytest.mark.parametrize(
"test_case",
[
@ -168,13 +176,13 @@ def test_openai_completion_guided_choice(openai_client, client_with_models, text
"inference:chat_completion:non_streaming_02",
],
)
def test_openai_chat_completion_non_streaming(openai_client, client_with_models, text_model_id, test_case):
def test_openai_chat_completion_non_streaming(compat_client, client_with_models, text_model_id, test_case):
skip_if_model_doesnt_support_openai_chat_completion(client_with_models, text_model_id)
tc = TestCase(test_case)
question = tc["question"]
expected = tc["expected"]
response = openai_client.chat.completions.create(
response = compat_client.chat.completions.create(
model=text_model_id,
messages=[
{
@ -196,13 +204,13 @@ def test_openai_chat_completion_non_streaming(openai_client, client_with_models,
"inference:chat_completion:streaming_02",
],
)
def test_openai_chat_completion_streaming(openai_client, client_with_models, text_model_id, test_case):
def test_openai_chat_completion_streaming(compat_client, client_with_models, text_model_id, test_case):
skip_if_model_doesnt_support_openai_chat_completion(client_with_models, text_model_id)
tc = TestCase(test_case)
question = tc["question"]
expected = tc["expected"]
response = openai_client.chat.completions.create(
response = compat_client.chat.completions.create(
model=text_model_id,
messages=[{"role": "user", "content": question}],
stream=True,

View file

@ -3,7 +3,6 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import List
import pytest
@ -77,7 +76,7 @@ class TestPostTraining:
async def test_get_training_jobs(self, post_training_stack):
post_training_impl = post_training_stack
jobs_list = await post_training_impl.get_training_jobs()
assert isinstance(jobs_list, List)
assert isinstance(jobs_list, list)
assert jobs_list[0].job_uuid == "1234"
@pytest.mark.asyncio

View file

@ -0,0 +1,5 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

View file

@ -0,0 +1,14 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import os
import pytest
# Skip all tests in this directory when running in GitHub Actions
in_github_actions = os.environ.get("GITHUB_ACTIONS") == "true"
if in_github_actions:
pytest.skip("Skipping NVIDIA tests in GitHub Actions environment", allow_module_level=True)

View file

@ -0,0 +1,47 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import pytest
# How to run this test:
#
# LLAMA_STACK_CONFIG="nvidia" pytest -v tests/integration/providers/nvidia/test_datastore.py
# nvidia provider only
@pytest.mark.parametrize(
"provider_id",
[
"nvidia",
],
)
def test_register_and_unregister(llama_stack_client, provider_id):
purpose = "eval/messages-answer"
source = {
"type": "uri",
"uri": "hf://datasets/llamastack/simpleqa?split=train",
}
dataset_id = f"test-dataset-{provider_id}"
dataset = llama_stack_client.datasets.register(
dataset_id=dataset_id,
purpose=purpose,
source=source,
metadata={"provider_id": provider_id, "format": "json", "description": "Test dataset description"},
)
assert dataset.identifier is not None
assert dataset.provider_id == provider_id
assert dataset.identifier == dataset_id
dataset_list = llama_stack_client.datasets.list()
provider_datasets = [d for d in dataset_list if d.provider_id == provider_id]
assert any(provider_datasets)
assert any(d.identifier == dataset_id for d in provider_datasets)
llama_stack_client.datasets.unregister(dataset.identifier)
dataset_list = llama_stack_client.datasets.list()
provider_datasets = [d for d in dataset_list if d.identifier == dataset.identifier]
assert not any(provider_datasets)

View file

@ -0,0 +1,37 @@
{
"non_streaming_01": {
"data": {
"question": "Which planet do humans live on?",
"expected": "Earth"
}
},
"non_streaming_02": {
"data": {
"question": "Which planet has rings around it with a name starting with letter S?",
"expected": "Saturn"
}
},
"streaming_01": {
"data": {
"question": "What's the name of the Sun in latin?",
"expected": "Sol"
}
},
"streaming_02": {
"data": {
"question": "What is the name of the US captial?",
"expected": "Washington"
}
},
"tools_web_search_01": {
"data": {
"input": "How many experts does the Llama 4 Maverick model have?",
"tools": [
{
"type": "web_search"
}
],
"expected": "128"
}
}
}

View file

@ -12,6 +12,7 @@ class TestCase:
_apis = [
"inference/chat_completion",
"inference/completion",
"openai/responses",
]
_jsonblob = {}
@ -19,7 +20,7 @@ class TestCase:
# loading all test cases
if self._jsonblob == {}:
for api in self._apis:
with open(pathlib.Path(__file__).parent / f"{api}.json", "r") as f:
with open(pathlib.Path(__file__).parent / f"{api}.json") as f:
coloned = api.replace("/", ":")
try:
loaded = json.load(f)

View file

@ -114,7 +114,7 @@ def test_register_and_unregister_toolgroup(llama_stack_client, mcp_server):
llama_stack_client.toolgroups.unregister(toolgroup_id=test_toolgroup_id)
# Verify it is unregistered
with pytest.raises(ValueError, match=f"Tool group '{test_toolgroup_id}' not found"):
with pytest.raises(Exception, match=f"Tool group '{test_toolgroup_id}' not found"):
llama_stack_client.toolgroups.get(toolgroup_id=test_toolgroup_id)
# Verify tools are also unregistered