From 73868ce9e30ea60d97c151e9d74e578bf8355599 Mon Sep 17 00:00:00 2001 From: Mustafa Elbehery Date: Fri, 18 Jul 2025 01:20:12 +0200 Subject: [PATCH 1/5] =?UTF-8?q?chore(test):=20migrate=20unit=20tests=20fro?= =?UTF-8?q?m=20unittest=20to=20pytest=20for=20server=20en=E2=80=A6=20(#279?= =?UTF-8?q?5)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR replaces unittest with pytest. Part of https://github.com/meta-llama/llama-stack/issues/2680 cc @leseb Signed-off-by: Mustafa Elbehery --- tests/unit/server/test_replace_env_vars.py | 135 +++++++++++---------- 1 file changed, 74 insertions(+), 61 deletions(-) diff --git a/tests/unit/server/test_replace_env_vars.py b/tests/unit/server/test_replace_env_vars.py index 432d6aee5..55817044d 100644 --- a/tests/unit/server/test_replace_env_vars.py +++ b/tests/unit/server/test_replace_env_vars.py @@ -5,73 +5,86 @@ # the root directory of this source tree. import os -import unittest + +import pytest from llama_stack.distribution.stack import replace_env_vars -class TestReplaceEnvVars(unittest.TestCase): - def setUp(self): - # Clear any existing environment variables we'll use in tests - for var in ["TEST_VAR", "EMPTY_VAR", "ZERO_VAR"]: - if var in os.environ: - del os.environ[var] +@pytest.fixture +def setup_env_vars(): + # Clear any existing environment variables we'll use in tests + for var in ["TEST_VAR", "EMPTY_VAR", "ZERO_VAR"]: + if var in os.environ: + del os.environ[var] - # Set up test environment variables - os.environ["TEST_VAR"] = "test_value" - os.environ["EMPTY_VAR"] = "" - os.environ["ZERO_VAR"] = "0" + # Set up test environment variables + os.environ["TEST_VAR"] = "test_value" + os.environ["EMPTY_VAR"] = "" + os.environ["ZERO_VAR"] = "0" - def test_simple_replacement(self): - self.assertEqual(replace_env_vars("${env.TEST_VAR}"), "test_value") + yield - def test_default_value_when_not_set(self): - self.assertEqual(replace_env_vars("${env.NOT_SET:=default}"), "default") - - def test_default_value_when_set(self): - self.assertEqual(replace_env_vars("${env.TEST_VAR:=default}"), "test_value") - - def test_default_value_when_empty(self): - self.assertEqual(replace_env_vars("${env.EMPTY_VAR:=default}"), "default") - - def test_none_value_when_empty(self): - self.assertEqual(replace_env_vars("${env.EMPTY_VAR:=}"), None) - - def test_value_when_set(self): - self.assertEqual(replace_env_vars("${env.TEST_VAR:=}"), "test_value") - - def test_empty_var_no_default(self): - self.assertEqual(replace_env_vars("${env.EMPTY_VAR_NO_DEFAULT:+}"), None) - - def test_conditional_value_when_set(self): - self.assertEqual(replace_env_vars("${env.TEST_VAR:+conditional}"), "conditional") - - def test_conditional_value_when_not_set(self): - self.assertEqual(replace_env_vars("${env.NOT_SET:+conditional}"), None) - - def test_conditional_value_when_empty(self): - self.assertEqual(replace_env_vars("${env.EMPTY_VAR:+conditional}"), None) - - def test_conditional_value_with_zero(self): - self.assertEqual(replace_env_vars("${env.ZERO_VAR:+conditional}"), "conditional") - - def test_mixed_syntax(self): - self.assertEqual( - replace_env_vars("${env.TEST_VAR:=default} and ${env.NOT_SET:+conditional}"), "test_value and " - ) - self.assertEqual( - replace_env_vars("${env.NOT_SET:=default} and ${env.TEST_VAR:+conditional}"), "default and conditional" - ) - - def test_nested_structures(self): - data = { - "key1": "${env.TEST_VAR:=default}", - "key2": ["${env.NOT_SET:=default}", "${env.TEST_VAR:+conditional}"], - "key3": {"nested": "${env.NOT_SET:+conditional}"}, - } - expected = {"key1": "test_value", "key2": ["default", "conditional"], "key3": {"nested": None}} - self.assertEqual(replace_env_vars(data), expected) + # Cleanup after test + for var in ["TEST_VAR", "EMPTY_VAR", "ZERO_VAR"]: + if var in os.environ: + del os.environ[var] -if __name__ == "__main__": - unittest.main() +def test_simple_replacement(setup_env_vars): + assert replace_env_vars("${env.TEST_VAR}") == "test_value" + + +def test_default_value_when_not_set(setup_env_vars): + assert replace_env_vars("${env.NOT_SET:=default}") == "default" + + +def test_default_value_when_set(setup_env_vars): + assert replace_env_vars("${env.TEST_VAR:=default}") == "test_value" + + +def test_default_value_when_empty(setup_env_vars): + assert replace_env_vars("${env.EMPTY_VAR:=default}") == "default" + + +def test_none_value_when_empty(setup_env_vars): + assert replace_env_vars("${env.EMPTY_VAR:=}") is None + + +def test_value_when_set(setup_env_vars): + assert replace_env_vars("${env.TEST_VAR:=}") == "test_value" + + +def test_empty_var_no_default(setup_env_vars): + assert replace_env_vars("${env.EMPTY_VAR_NO_DEFAULT:+}") is None + + +def test_conditional_value_when_set(setup_env_vars): + assert replace_env_vars("${env.TEST_VAR:+conditional}") == "conditional" + + +def test_conditional_value_when_not_set(setup_env_vars): + assert replace_env_vars("${env.NOT_SET:+conditional}") is None + + +def test_conditional_value_when_empty(setup_env_vars): + assert replace_env_vars("${env.EMPTY_VAR:+conditional}") is None + + +def test_conditional_value_with_zero(setup_env_vars): + assert replace_env_vars("${env.ZERO_VAR:+conditional}") == "conditional" + + +def test_mixed_syntax(setup_env_vars): + assert replace_env_vars("${env.TEST_VAR:=default} and ${env.NOT_SET:+conditional}") == "test_value and " + assert replace_env_vars("${env.NOT_SET:=default} and ${env.TEST_VAR:+conditional}") == "default and conditional" + + +def test_nested_structures(setup_env_vars): + data = { + "key1": "${env.TEST_VAR:=default}", + "key2": ["${env.NOT_SET:=default}", "${env.TEST_VAR:+conditional}"], + "key3": {"nested": "${env.NOT_SET:+conditional}"}, + } + expected = {"key1": "test_value", "key2": ["default", "conditional"], "key3": {"nested": None}} + assert replace_env_vars(data) == expected From 3ae4aeb344b0a9977fc0fe83abb8ff09fb0eefa4 Mon Sep 17 00:00:00 2001 From: ehhuang Date: Thu, 17 Jul 2025 16:20:51 -0700 Subject: [PATCH 2/5] test: add some tests for Telemetry API (#2787) # What does this PR do? ## Test Plan ENABLE_OLLAMA=ollama LLAMA_STACK_CONFIG=starter uv run pytest tests/integration/telemetry --text-model="ollama/llama3.2:3b-instruct-fp16" --- tests/integration/telemetry/test_telemetry.py | 194 +++++++++++++++--- 1 file changed, 168 insertions(+), 26 deletions(-) diff --git a/tests/integration/telemetry/test_telemetry.py b/tests/integration/telemetry/test_telemetry.py index c65f87489..9df03da70 100644 --- a/tests/integration/telemetry/test_telemetry.py +++ b/tests/integration/telemetry/test_telemetry.py @@ -5,41 +5,183 @@ # the root directory of this source tree. import time +from datetime import UTC, datetime from uuid import uuid4 import pytest from llama_stack_client import Agent -@pytest.mark.skip(reason="telemetry is not stable") -def test_agent_query_spans(llama_stack_client, text_model_id): +@pytest.fixture(scope="module", autouse=True) +def setup_telemetry_data(llama_stack_client, text_model_id): + """Setup fixture that creates telemetry data before tests run.""" agent = Agent(llama_stack_client, model=text_model_id, instructions="You are a helpful assistant") - session_id = agent.create_session(f"test-session-{uuid4()}") - agent.create_turn( - messages=[ - { - "role": "user", - "content": "Give me a sentence that contains the word: hello", - } - ], - session_id=session_id, - stream=False, + + session_id = agent.create_session(f"test-setup-session-{uuid4()}") + + messages = [ + "What is 2 + 2?", + "Tell me a short joke", + ] + + for msg in messages: + agent.create_turn( + messages=[{"role": "user", "content": msg}], + session_id=session_id, + stream=False, + ) + + for i in range(2): + llama_stack_client.inference.chat_completion( + model_id=text_model_id, messages=[{"role": "user", "content": f"Test trace {i}"}] + ) + + start_time = time.time() + + while time.time() - start_time < 30: + traces = llama_stack_client.telemetry.query_traces(limit=10) + if len(traces) >= 4: + break + time.sleep(1) + + if len(traces) < 4: + pytest.fail(f"Failed to create sufficient telemetry data after 30s. Got {len(traces)} traces.") + + yield + + +def test_query_traces_basic(llama_stack_client): + """Test basic trace querying functionality with proper data validation.""" + all_traces = llama_stack_client.telemetry.query_traces(limit=5) + + assert isinstance(all_traces, list), "Should return a list of traces" + assert len(all_traces) >= 4, "Should have at least 4 traces from setup" + + # Verify trace structure and data quality + first_trace = all_traces[0] + assert hasattr(first_trace, "trace_id"), "Trace should have trace_id" + assert hasattr(first_trace, "start_time"), "Trace should have start_time" + assert hasattr(first_trace, "root_span_id"), "Trace should have root_span_id" + + # Validate trace_id is a valid UUID format + assert isinstance(first_trace.trace_id, str) and len(first_trace.trace_id) > 0, ( + "trace_id should be non-empty string" ) - # Wait for the span to be logged - time.sleep(2) + # Validate start_time format and not in the future + now = datetime.now(UTC) + if isinstance(first_trace.start_time, str): + trace_time = datetime.fromisoformat(first_trace.start_time.replace("Z", "+00:00")) + else: + # start_time is already a datetime object + trace_time = first_trace.start_time + if trace_time.tzinfo is None: + trace_time = trace_time.replace(tzinfo=UTC) - agent_logs = [] + # Ensure trace time is not in the future (but allow any age in the past for persistent test data) + time_diff = (now - trace_time).total_seconds() + assert time_diff >= 0, f"Trace start_time should not be in the future, got {time_diff}s" - for span in llama_stack_client.telemetry.query_spans( - attribute_filters=[ - {"key": "session_id", "op": "eq", "value": session_id}, - ], - attributes_to_return=["input", "output"], - ): - if span.attributes["output"] != "no shields": - agent_logs.append(span.attributes) + # Validate root_span_id exists and is non-empty + assert isinstance(first_trace.root_span_id, str) and len(first_trace.root_span_id) > 0, ( + "root_span_id should be non-empty string" + ) - assert len(agent_logs) == 1 - assert "Give me a sentence that contains the word: hello" in agent_logs[0]["input"] - assert "hello" in agent_logs[0]["output"].lower() + # Test querying specific trace by ID + specific_trace = llama_stack_client.telemetry.get_trace(trace_id=first_trace.trace_id) + assert specific_trace.trace_id == first_trace.trace_id, "Retrieved trace should match requested ID" + assert specific_trace.start_time == first_trace.start_time, "Retrieved trace should have same start_time" + assert specific_trace.root_span_id == first_trace.root_span_id, "Retrieved trace should have same root_span_id" + + # Test pagination with proper validation + recent_traces = llama_stack_client.telemetry.query_traces(limit=3, offset=0) + assert len(recent_traces) <= 3, "Should return at most 3 traces when limit=3" + assert len(recent_traces) >= 1, "Should return at least 1 trace" + + # Verify all traces have required fields + for trace in recent_traces: + assert hasattr(trace, "trace_id") and trace.trace_id, "Each trace should have non-empty trace_id" + assert hasattr(trace, "start_time") and trace.start_time, "Each trace should have non-empty start_time" + assert hasattr(trace, "root_span_id") and trace.root_span_id, "Each trace should have non-empty root_span_id" + + +def test_query_spans_basic(llama_stack_client): + """Test basic span querying functionality with proper validation.""" + spans = llama_stack_client.telemetry.query_spans(attribute_filters=[], attributes_to_return=[]) + + assert isinstance(spans, list), "Should return a list of spans" + assert len(spans) >= 1, "Should have at least one span from setup" + + # Verify span structure and data quality + first_span = spans[0] + required_attrs = ["span_id", "name", "trace_id"] + for attr in required_attrs: + assert hasattr(first_span, attr), f"Span should have {attr} attribute" + assert getattr(first_span, attr), f"Span {attr} should not be empty" + + # Validate span data types and content + assert isinstance(first_span.span_id, str) and len(first_span.span_id) > 0, "span_id should be non-empty string" + assert isinstance(first_span.name, str) and len(first_span.name) > 0, "span name should be non-empty string" + assert isinstance(first_span.trace_id, str) and len(first_span.trace_id) > 0, "trace_id should be non-empty string" + + # Verify span belongs to a valid trace (test with traces we know exist) + all_traces = llama_stack_client.telemetry.query_traces(limit=10) + trace_ids = {t.trace_id for t in all_traces} + if first_span.trace_id in trace_ids: + trace = llama_stack_client.telemetry.get_trace(trace_id=first_span.trace_id) + assert trace is not None, "Should be able to retrieve trace for valid trace_id" + assert trace.trace_id == first_span.trace_id, "Trace ID should match span's trace_id" + + # Test with span filtering and validate results + filtered_spans = llama_stack_client.telemetry.query_spans( + attribute_filters=[{"key": "name", "op": "eq", "value": first_span.name}], + attributes_to_return=["name", "span_id"], + ) + assert isinstance(filtered_spans, list), "Should return a list with span name filter" + + # Validate filtered spans if filtering works + if len(filtered_spans) > 0: + for span in filtered_spans: + assert hasattr(span, "name"), "Filtered spans should have name attribute" + assert hasattr(span, "span_id"), "Filtered spans should have span_id attribute" + assert span.name == first_span.name, "Filtered spans should match the filter criteria" + assert isinstance(span.span_id, str) and len(span.span_id) > 0, "Filtered span_id should be valid" + + # Test that all spans have consistent structure + for span in spans: + for attr in required_attrs: + assert hasattr(span, attr) and getattr(span, attr), f"All spans should have non-empty {attr}" + + +def test_telemetry_pagination(llama_stack_client): + """Test pagination in telemetry queries.""" + # Get total count of traces + all_traces = llama_stack_client.telemetry.query_traces(limit=20) + total_count = len(all_traces) + assert total_count >= 4, "Should have at least 4 traces from setup" + + # Test trace pagination + page1 = llama_stack_client.telemetry.query_traces(limit=2, offset=0) + page2 = llama_stack_client.telemetry.query_traces(limit=2, offset=2) + + assert len(page1) == 2, "First page should have exactly 2 traces" + assert len(page2) >= 1, "Second page should have at least 1 trace" + + # Verify no overlap between pages + page1_ids = {t.trace_id for t in page1} + page2_ids = {t.trace_id for t in page2} + assert len(page1_ids.intersection(page2_ids)) == 0, "Pages should contain different traces" + + # Test ordering + ordered_traces = llama_stack_client.telemetry.query_traces(limit=5, order_by=["start_time"]) + assert len(ordered_traces) >= 4, "Should have at least 4 traces for ordering test" + + # Verify ordering by start_time + for i in range(len(ordered_traces) - 1): + current_time = ordered_traces[i].start_time + next_time = ordered_traces[i + 1].start_time + assert current_time <= next_time, f"Traces should be ordered by start_time: {current_time} > {next_time}" + + # Test limit behavior + limited = llama_stack_client.telemetry.query_traces(limit=3) + assert len(limited) == 3, "Should return exactly 3 traces when limit=3" From bd8a3ae3ccd4d22165faf69e00ed0759d50cd372 Mon Sep 17 00:00:00 2001 From: Mustafa Elbehery Date: Fri, 18 Jul 2025 01:31:38 +0200 Subject: [PATCH 3/5] chore(test): migrate unit tests from unittest to pytest for prompt adapter (#2788) This PR replaces unittest with pytest. Part of https://github.com/meta-llama/llama-stack/issues/2680 cc @leseb Co-authored-by: ehhuang --- tests/unit/models/test_prompt_adapter.py | 501 ++++++++++++----------- 1 file changed, 256 insertions(+), 245 deletions(-) diff --git a/tests/unit/models/test_prompt_adapter.py b/tests/unit/models/test_prompt_adapter.py index 0e2780e50..577496cec 100644 --- a/tests/unit/models/test_prompt_adapter.py +++ b/tests/unit/models/test_prompt_adapter.py @@ -4,14 +4,14 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import asyncio -import unittest +import pytest from llama_stack.apis.inference import ( ChatCompletionRequest, CompletionMessage, StopReason, SystemMessage, + SystemMessageBehavior, ToolCall, ToolConfig, UserMessage, @@ -25,264 +25,275 @@ from llama_stack.models.llama.datatypes import ( from llama_stack.providers.utils.inference.prompt_adapter import ( chat_completion_request_to_messages, chat_completion_request_to_prompt, + interleaved_content_as_str, ) MODEL = "Llama3.1-8B-Instruct" MODEL3_2 = "Llama3.2-3B-Instruct" -class PrepareMessagesTests(unittest.IsolatedAsyncioTestCase): - async def asyncSetUp(self): - asyncio.get_running_loop().set_debug(False) +@pytest.mark.asyncio +async def test_system_default(): + content = "Hello !" + request = ChatCompletionRequest( + model=MODEL, + messages=[ + UserMessage(content=content), + ], + ) + messages = chat_completion_request_to_messages(request, MODEL) + assert len(messages) == 2 + assert messages[-1].content == content + assert "Cutting Knowledge Date: December 2023" in interleaved_content_as_str(messages[0].content) - async def test_system_default(self): - content = "Hello !" - request = ChatCompletionRequest( - model=MODEL, - messages=[ - UserMessage(content=content), - ], - ) - messages = chat_completion_request_to_messages(request, MODEL) - self.assertEqual(len(messages), 2) - self.assertEqual(messages[-1].content, content) - self.assertTrue("Cutting Knowledge Date: December 2023" in messages[0].content) - async def test_system_builtin_only(self): - content = "Hello !" - request = ChatCompletionRequest( - model=MODEL, - messages=[ - UserMessage(content=content), - ], - tools=[ - ToolDefinition(tool_name=BuiltinTool.code_interpreter), - ToolDefinition(tool_name=BuiltinTool.brave_search), - ], - ) - messages = chat_completion_request_to_messages(request, MODEL) - self.assertEqual(len(messages), 2) - self.assertEqual(messages[-1].content, content) - self.assertTrue("Cutting Knowledge Date: December 2023" in messages[0].content) - self.assertTrue("Tools: brave_search" in messages[0].content) +@pytest.mark.asyncio +async def test_system_builtin_only(): + content = "Hello !" + request = ChatCompletionRequest( + model=MODEL, + messages=[ + UserMessage(content=content), + ], + tools=[ + ToolDefinition(tool_name=BuiltinTool.code_interpreter), + ToolDefinition(tool_name=BuiltinTool.brave_search), + ], + ) + messages = chat_completion_request_to_messages(request, MODEL) + assert len(messages) == 2 + assert messages[-1].content == content + assert "Cutting Knowledge Date: December 2023" in interleaved_content_as_str(messages[0].content) + assert "Tools: brave_search" in interleaved_content_as_str(messages[0].content) - async def test_system_custom_only(self): - content = "Hello !" - request = ChatCompletionRequest( - model=MODEL, - messages=[ - UserMessage(content=content), - ], - tools=[ - ToolDefinition( - tool_name="custom1", - description="custom1 tool", - parameters={ - "param1": ToolParamDefinition( - param_type="str", - description="param1 description", - required=True, - ), - }, - ) - ], - tool_config=ToolConfig(tool_prompt_format=ToolPromptFormat.json), - ) - messages = chat_completion_request_to_messages(request, MODEL) - self.assertEqual(len(messages), 3) - self.assertTrue("Environment: ipython" in messages[0].content) - self.assertTrue("Return function calls in JSON format" in messages[1].content) - self.assertEqual(messages[-1].content, content) +@pytest.mark.asyncio +async def test_system_custom_only(): + content = "Hello !" + request = ChatCompletionRequest( + model=MODEL, + messages=[ + UserMessage(content=content), + ], + tools=[ + ToolDefinition( + tool_name="custom1", + description="custom1 tool", + parameters={ + "param1": ToolParamDefinition( + param_type="str", + description="param1 description", + required=True, + ), + }, + ) + ], + tool_config=ToolConfig(tool_prompt_format=ToolPromptFormat.json), + ) + messages = chat_completion_request_to_messages(request, MODEL) + assert len(messages) == 3 + assert "Environment: ipython" in interleaved_content_as_str(messages[0].content) - async def test_system_custom_and_builtin(self): - content = "Hello !" - request = ChatCompletionRequest( - model=MODEL, - messages=[ - UserMessage(content=content), - ], - tools=[ - ToolDefinition(tool_name=BuiltinTool.code_interpreter), - ToolDefinition(tool_name=BuiltinTool.brave_search), - ToolDefinition( - tool_name="custom1", - description="custom1 tool", - parameters={ - "param1": ToolParamDefinition( - param_type="str", - description="param1 description", - required=True, - ), - }, - ), - ], - ) - messages = chat_completion_request_to_messages(request, MODEL) - self.assertEqual(len(messages), 3) + assert "Return function calls in JSON format" in interleaved_content_as_str(messages[1].content) + assert messages[-1].content == content - self.assertTrue("Environment: ipython" in messages[0].content) - self.assertTrue("Tools: brave_search" in messages[0].content) - self.assertTrue("Return function calls in JSON format" in messages[1].content) - self.assertEqual(messages[-1].content, content) - - async def test_completion_message_encoding(self): - request = ChatCompletionRequest( - model=MODEL3_2, - messages=[ - UserMessage(content="hello"), - CompletionMessage( - content="", - stop_reason=StopReason.end_of_turn, - tool_calls=[ - ToolCall( - tool_name="custom1", - arguments={"param1": "value1"}, - call_id="123", - ) - ], - ), - ], - tools=[ - ToolDefinition( - tool_name="custom1", - description="custom1 tool", - parameters={ - "param1": ToolParamDefinition( - param_type="str", - description="param1 description", - required=True, - ), - }, - ), - ], - tool_config=ToolConfig(tool_prompt_format=ToolPromptFormat.python_list), - ) - prompt = await chat_completion_request_to_prompt(request, request.model) - self.assertIn('[custom1(param1="value1")]', prompt) - - request.model = MODEL - request.tool_config.tool_prompt_format = ToolPromptFormat.json - prompt = await chat_completion_request_to_prompt(request, request.model) - self.assertIn( - '{"type": "function", "name": "custom1", "parameters": {"param1": "value1"}}', - prompt, - ) - - async def test_user_provided_system_message(self): - content = "Hello !" - system_prompt = "You are a pirate" - request = ChatCompletionRequest( - model=MODEL, - messages=[ - SystemMessage(content=system_prompt), - UserMessage(content=content), - ], - tools=[ - ToolDefinition(tool_name=BuiltinTool.code_interpreter), - ], - ) - messages = chat_completion_request_to_messages(request, MODEL) - self.assertEqual(len(messages), 2, messages) - self.assertTrue(messages[0].content.endswith(system_prompt)) - - self.assertEqual(messages[-1].content, content) - - async def test_repalce_system_message_behavior_builtin_tools(self): - content = "Hello !" - system_prompt = "You are a pirate" - request = ChatCompletionRequest( - model=MODEL, - messages=[ - SystemMessage(content=system_prompt), - UserMessage(content=content), - ], - tools=[ - ToolDefinition(tool_name=BuiltinTool.code_interpreter), - ], - tool_config=ToolConfig( - tool_choice="auto", - tool_prompt_format="python_list", - system_message_behavior="replace", +@pytest.mark.asyncio +async def test_system_custom_and_builtin(): + content = "Hello !" + request = ChatCompletionRequest( + model=MODEL, + messages=[ + UserMessage(content=content), + ], + tools=[ + ToolDefinition(tool_name=BuiltinTool.code_interpreter), + ToolDefinition(tool_name=BuiltinTool.brave_search), + ToolDefinition( + tool_name="custom1", + description="custom1 tool", + parameters={ + "param1": ToolParamDefinition( + param_type="str", + description="param1 description", + required=True, + ), + }, ), - ) - messages = chat_completion_request_to_messages(request, MODEL3_2) - self.assertEqual(len(messages), 2, messages) - self.assertTrue(messages[0].content.endswith(system_prompt)) - self.assertIn("Environment: ipython", messages[0].content) - self.assertEqual(messages[-1].content, content) + ], + ) + messages = chat_completion_request_to_messages(request, MODEL) + assert len(messages) == 3 - async def test_repalce_system_message_behavior_custom_tools(self): - content = "Hello !" - system_prompt = "You are a pirate" - request = ChatCompletionRequest( - model=MODEL, - messages=[ - SystemMessage(content=system_prompt), - UserMessage(content=content), - ], - tools=[ - ToolDefinition(tool_name=BuiltinTool.code_interpreter), - ToolDefinition( - tool_name="custom1", - description="custom1 tool", - parameters={ - "param1": ToolParamDefinition( - param_type="str", - description="param1 description", - required=True, - ), - }, - ), - ], - tool_config=ToolConfig( - tool_choice="auto", - tool_prompt_format="python_list", - system_message_behavior="replace", + assert "Environment: ipython" in interleaved_content_as_str(messages[0].content) + assert "Tools: brave_search" in interleaved_content_as_str(messages[0].content) + + assert "Return function calls in JSON format" in interleaved_content_as_str(messages[1].content) + assert messages[-1].content == content + + +@pytest.mark.asyncio +async def test_completion_message_encoding(): + request = ChatCompletionRequest( + model=MODEL3_2, + messages=[ + UserMessage(content="hello"), + CompletionMessage( + content="", + stop_reason=StopReason.end_of_turn, + tool_calls=[ + ToolCall( + tool_name="custom1", + arguments={"param1": "value1"}, + call_id="123", + ) + ], ), - ) - messages = chat_completion_request_to_messages(request, MODEL3_2) - - self.assertEqual(len(messages), 2, messages) - self.assertTrue(messages[0].content.endswith(system_prompt)) - self.assertIn("Environment: ipython", messages[0].content) - self.assertEqual(messages[-1].content, content) - - async def test_replace_system_message_behavior_custom_tools_with_template(self): - content = "Hello !" - system_prompt = "You are a pirate {{ function_description }}" - request = ChatCompletionRequest( - model=MODEL, - messages=[ - SystemMessage(content=system_prompt), - UserMessage(content=content), - ], - tools=[ - ToolDefinition(tool_name=BuiltinTool.code_interpreter), - ToolDefinition( - tool_name="custom1", - description="custom1 tool", - parameters={ - "param1": ToolParamDefinition( - param_type="str", - description="param1 description", - required=True, - ), - }, - ), - ], - tool_config=ToolConfig( - tool_choice="auto", - tool_prompt_format="python_list", - system_message_behavior="replace", + ], + tools=[ + ToolDefinition( + tool_name="custom1", + description="custom1 tool", + parameters={ + "param1": ToolParamDefinition( + param_type="str", + description="param1 description", + required=True, + ), + }, ), - ) - messages = chat_completion_request_to_messages(request, MODEL3_2) + ], + tool_config=ToolConfig(tool_prompt_format=ToolPromptFormat.python_list), + ) + prompt = await chat_completion_request_to_prompt(request, request.model) + assert '[custom1(param1="value1")]' in prompt - self.assertEqual(len(messages), 2, messages) - self.assertIn("Environment: ipython", messages[0].content) - self.assertIn("You are a pirate", messages[0].content) - # function description is present in the system prompt - self.assertIn('"name": "custom1"', messages[0].content) - self.assertEqual(messages[-1].content, content) + request.model = MODEL + request.tool_config = ToolConfig(tool_prompt_format=ToolPromptFormat.json) + prompt = await chat_completion_request_to_prompt(request, request.model) + assert '{"type": "function", "name": "custom1", "parameters": {"param1": "value1"}}' in prompt + + +@pytest.mark.asyncio +async def test_user_provided_system_message(): + content = "Hello !" + system_prompt = "You are a pirate" + request = ChatCompletionRequest( + model=MODEL, + messages=[ + SystemMessage(content=system_prompt), + UserMessage(content=content), + ], + tools=[ + ToolDefinition(tool_name=BuiltinTool.code_interpreter), + ], + ) + messages = chat_completion_request_to_messages(request, MODEL) + assert len(messages) == 2 + assert interleaved_content_as_str(messages[0].content).endswith(system_prompt) + + assert messages[-1].content == content + + +@pytest.mark.asyncio +async def test_replace_system_message_behavior_builtin_tools(): + content = "Hello !" + system_prompt = "You are a pirate" + request = ChatCompletionRequest( + model=MODEL, + messages=[ + SystemMessage(content=system_prompt), + UserMessage(content=content), + ], + tools=[ + ToolDefinition(tool_name=BuiltinTool.code_interpreter), + ], + tool_config=ToolConfig( + tool_choice="auto", + tool_prompt_format=ToolPromptFormat.python_list, + system_message_behavior=SystemMessageBehavior.replace, + ), + ) + messages = chat_completion_request_to_messages(request, MODEL3_2) + assert len(messages) == 2 + assert interleaved_content_as_str(messages[0].content).endswith(system_prompt) + assert "Environment: ipython" in interleaved_content_as_str(messages[0].content) + assert messages[-1].content == content + + +@pytest.mark.asyncio +async def test_replace_system_message_behavior_custom_tools(): + content = "Hello !" + system_prompt = "You are a pirate" + request = ChatCompletionRequest( + model=MODEL, + messages=[ + SystemMessage(content=system_prompt), + UserMessage(content=content), + ], + tools=[ + ToolDefinition(tool_name=BuiltinTool.code_interpreter), + ToolDefinition( + tool_name="custom1", + description="custom1 tool", + parameters={ + "param1": ToolParamDefinition( + param_type="str", + description="param1 description", + required=True, + ), + }, + ), + ], + tool_config=ToolConfig( + tool_choice="auto", + tool_prompt_format=ToolPromptFormat.python_list, + system_message_behavior=SystemMessageBehavior.replace, + ), + ) + messages = chat_completion_request_to_messages(request, MODEL3_2) + + assert len(messages) == 2 + assert interleaved_content_as_str(messages[0].content).endswith(system_prompt) + assert "Environment: ipython" in interleaved_content_as_str(messages[0].content) + assert messages[-1].content == content + + +@pytest.mark.asyncio +async def test_replace_system_message_behavior_custom_tools_with_template(): + content = "Hello !" + system_prompt = "You are a pirate {{ function_description }}" + request = ChatCompletionRequest( + model=MODEL, + messages=[ + SystemMessage(content=system_prompt), + UserMessage(content=content), + ], + tools=[ + ToolDefinition(tool_name=BuiltinTool.code_interpreter), + ToolDefinition( + tool_name="custom1", + description="custom1 tool", + parameters={ + "param1": ToolParamDefinition( + param_type="str", + description="param1 description", + required=True, + ), + }, + ), + ], + tool_config=ToolConfig( + tool_choice="auto", + tool_prompt_format=ToolPromptFormat.python_list, + system_message_behavior=SystemMessageBehavior.replace, + ), + ) + messages = chat_completion_request_to_messages(request, MODEL3_2) + + assert len(messages) == 2 + assert "Environment: ipython" in interleaved_content_as_str(messages[0].content) + assert "You are a pirate" in interleaved_content_as_str(messages[0].content) + # function description is present in the system prompt + assert '"name": "custom1"' in interleaved_content_as_str(messages[0].content) + assert messages[-1].content == content From 910b0176800243c6eab3a3ab500f452d664028cb Mon Sep 17 00:00:00 2001 From: Matthew Farrellee Date: Thu, 17 Jul 2025 19:33:30 -0400 Subject: [PATCH 4/5] chore: block asyncio marks in tests (#2744) # What does this PR do? use pre-commit to block addition of new asyncio marks, since we configure pytest with async-mode=auto, see https://github.com/meta-llama/llama-stack/pull/2730 --- .pre-commit-config.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3c744c6bc..cf72ecd0e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -129,6 +129,22 @@ repos: require_serial: true always_run: true files: ^llama_stack/.*$ + - id: forbid-pytest-asyncio + name: Block @pytest.mark.asyncio and @pytest_asyncio.fixture + entry: bash + language: system + types: [python] + pass_filenames: true + args: + - -c + - | + grep -EnH '^[^#]*@pytest\.mark\.asyncio|@pytest_asyncio\.fixture' "$@" && { + echo; + echo "❌ Do not use @pytest.mark.asyncio or @pytest_asyncio.fixture." + echo " pytest is already configured with async-mode=auto." + echo; + exit 1; + } || true ci: autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks From d64e096c5f8a30f1d8455baca2250e13c73d77c3 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Thu, 17 Jul 2025 16:40:35 -0700 Subject: [PATCH 5/5] fix(cli): image name should not default to CONDA_DEFAULT_ENV (#2806) If I am running `uv run llama stack run --image-type venv` it should not be saying to me "Conda detected" because I am pretty clearly telling it I need venv. The root cause is the offending line. --- llama_stack/cli/stack/run.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llama_stack/cli/stack/run.py b/llama_stack/cli/stack/run.py index 1d6c475f2..f4a119522 100644 --- a/llama_stack/cli/stack/run.py +++ b/llama_stack/cli/stack/run.py @@ -47,8 +47,7 @@ class StackRun(Subcommand): self.parser.add_argument( "--image-name", type=str, - default=os.environ.get("CONDA_DEFAULT_ENV"), - help="Name of the image to run. Defaults to the current environment", + help="Name of the image to run.", ) self.parser.add_argument( "--env",