diff --git a/tests/client-sdk/agents/test_agents.py b/tests/client-sdk/agents/test_agents.py index 9690a8139..bdd2d8a51 100644 --- a/tests/client-sdk/agents/test_agents.py +++ b/tests/client-sdk/agents/test_agents.py @@ -314,87 +314,6 @@ def test_tool_choice(llama_stack_client, agent_config): assert len(tool_execution_steps) >= 1 and tool_execution_steps[0].tool_calls[0].tool_name == "get_boiling_point" -# TODO: fix this flaky test -def xtest_override_system_message_behavior(llama_stack_client, agent_config): - client_tool = get_boiling_point - agent_config = { - **agent_config, - "instructions": "You are a pirate", - "client_tools": [client_tool.get_tool_definition()], - "model": "meta-llama/Llama-3.2-3B-Instruct", - } - - agent = Agent(llama_stack_client, agent_config, client_tools=(client_tool,)) - session_id = agent.create_session(f"test-session-{uuid4()}") - - response = agent.create_turn( - messages=[ - { - "role": "user", - "content": "tell me a joke about bicycles", - }, - ], - session_id=session_id, - ) - - logs = [str(log) for log in EventLogger().log(response) if log is not None] - logs_str = "".join(logs) - # can't tell a joke: "I don't have a function" - assert "function" in logs_str - - # with system message behavior replace - instructions = """ - You are a helpful assistant. You have access to functions, but you should only use them if they are required. - - You are an expert in composing functions. You are given a question and a set of possible functions. - Based on the question, you may or may not need to make one or more function/tool calls to achieve the purpose. - If none of the function can be used, don't return [], instead answer the question directly without using functions. If the given question lacks the parameters required by the function, - also point it out. - - {{ function_description }} - """ - agent_config = { - **agent_config, - "instructions": instructions, - "client_tools": [client_tool.get_tool_definition()], - "tool_config": { - "system_message_behavior": "replace", - }, - } - - agent = Agent(llama_stack_client, agent_config, client_tools=(client_tool,)) - session_id = agent.create_session(f"test-session-{uuid4()}") - - response = agent.create_turn( - messages=[ - { - "role": "user", - "content": "tell me a joke about bicycles", - }, - ], - session_id=session_id, - ) - - logs = [str(log) for log in EventLogger().log(response) if log is not None] - logs_str = "".join(logs) - assert "bicycle" in logs_str - - response = agent.create_turn( - messages=[ - { - "role": "user", - "content": "What is the boiling point of polyjuice?", - }, - ], - session_id=session_id, - ) - - logs = [str(log) for log in EventLogger().log(response) if log is not None] - logs_str = "".join(logs) - assert "-100" in logs_str - assert "get_boiling_point" in logs_str - - @pytest.mark.parametrize("rag_tool_name", ["builtin::rag/knowledge_search", "builtin::rag"]) def test_rag_agent(llama_stack_client, agent_config, rag_tool_name): urls = ["chat.rst", "llama3.rst", "memory_optimizations.rst", "lora_finetune.rst"]