mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-23 12:57:11 +00:00
Continues the refactor of tests. Tests from `providers/tests` should be considered deprecated. For this PR, I deleted most of the tests in - inference - safety - agents since much more comprehensive tests exist in `tests/integration/{inference,safety,agents}` already. I moved `test_persistence.py` from agents, but disabled all the tests since that test needs to be properly migrated. ## Test Plan ``` LLAMA_STACK_CONFIG=fireworks pytest -s -v agents --vision-inference-model='' /Users/ashwin/homebrew/Caskroom/miniconda/base/envs/toolchain/lib/python3.10/site-packages/pytest_asyncio/plugin.py:208: PytestDeprecationWarning: The configuration option "asyncio_default_fixture_loop_scope" is unset. The event loop scope for asynchronous fixtures will default to the fixture caching scope. Future versions of pytest-asyncio will default the loop scope for asynchronous fixtures to function scope. Set the default fixture loop scope explicitly in order to avoid unexpected behavior in the future. Valid fixture loop scopes are: "function", "class", "module", "package", "session" warnings.warn(PytestDeprecationWarning(_DEFAULT_FIXTURE_LOOP_SCOPE_UNSET)) ======================================================================================================= test session starts ======================================================================================================== platform darwin -- Python 3.10.16, pytest-8.3.3, pluggy-1.5.0 -- /Users/ashwin/homebrew/Caskroom/miniconda/base/envs/toolchain/bin/python cachedir: .pytest_cache metadata: {'Python': '3.10.16', 'Platform': 'macOS-15.3.1-arm64-arm-64bit', 'Packages': {'pytest': '8.3.3', 'pluggy': '1.5.0'}, 'Plugins': {'asyncio': '0.24.0', 'html': '4.1.1', 'metadata': '3.1.1', 'anyio': '4.8.0', 'nbval': '0.11.0'}} rootdir: /Users/ashwin/local/llama-stack configfile: pyproject.toml plugins: asyncio-0.24.0, html-4.1.1, metadata-3.1.1, anyio-4.8.0, nbval-0.11.0 asyncio: mode=strict, default_loop_scope=None collected 15 items agents/test_agents.py::test_agent_simple[txt=8B] PASSED agents/test_agents.py::test_tool_config[txt=8B] PASSED agents/test_agents.py::test_builtin_tool_web_search[txt=8B] PASSED agents/test_agents.py::test_builtin_tool_code_execution[txt=8B] PASSED agents/test_agents.py::test_code_interpreter_for_attachments[txt=8B] PASSED agents/test_agents.py::test_custom_tool[txt=8B] PASSED agents/test_agents.py::test_custom_tool_infinite_loop[txt=8B] PASSED agents/test_agents.py::test_tool_choice[txt=8B] PASSED agents/test_agents.py::test_rag_agent[txt=8B-builtin::rag/knowledge_search] PASSED agents/test_agents.py::test_rag_agent[txt=8B-builtin::rag] PASSED agents/test_agents.py::test_rag_agent_with_attachments[txt=8B] PASSED agents/test_agents.py::test_rag_and_code_agent[txt=8B] PASSED agents/test_agents.py::test_create_turn_response[txt=8B] PASSED agents/test_persistence.py::test_delete_agents_and_sessions SKIPPED (This test needs to be migrated to api / client-sdk world) agents/test_persistence.py::test_get_agent_turns_and_steps SKIPPED (This test needs to be migrated to api / client-sdk world) ```
172 lines
4.4 KiB
JSON
172 lines
4.4 KiB
JSON
{
|
|
"non_streaming_01": {
|
|
"data": {
|
|
"question": "Which planet do humans live on?",
|
|
"expected": "Earth"
|
|
}
|
|
},
|
|
"non_streaming_02": {
|
|
"data": {
|
|
"question": "Which planet has rings around it with a name starting with letter S?",
|
|
"expected": "Saturn"
|
|
}
|
|
},
|
|
"sample_messages": {
|
|
"data": {
|
|
"messages": [
|
|
{
|
|
"role": "system",
|
|
"content": "You are a helpful assistant."
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": "What's the weather like today?"
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"streaming_01": {
|
|
"data": {
|
|
"question": "What's the name of the Sun in latin?",
|
|
"expected": "Sol"
|
|
}
|
|
},
|
|
"streaming_02": {
|
|
"data": {
|
|
"question": "What is the name of the US captial?",
|
|
"expected": "Washington"
|
|
}
|
|
},
|
|
"tool_calling": {
|
|
"data": {
|
|
"messages": [
|
|
{"role": "system", "content": "Pretend you are a weather assistant."},
|
|
{"role": "user", "content": "What's the weather like in San Francisco?"}
|
|
],
|
|
"tools": [
|
|
{
|
|
"tool_name": "get_weather",
|
|
"description": "Get the current weather",
|
|
"parameters": {
|
|
"location": {
|
|
"param_type": "string",
|
|
"description": "The city and state, e.g. San Francisco, CA"
|
|
}
|
|
}
|
|
}
|
|
],
|
|
"expected": {
|
|
"location": "San Francisco, CA"
|
|
}
|
|
}
|
|
},
|
|
"sample_messages_tool_calling": {
|
|
"data": {
|
|
"messages": [
|
|
{
|
|
"role": "system",
|
|
"content": "Pretend you are a weather assistant."
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": "What's the weather like today?"
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": "What's the weather like in San Francisco?"
|
|
}
|
|
],
|
|
"tools": [
|
|
{
|
|
"tool_name": "get_weather",
|
|
"description": "Get the current weather",
|
|
"parameters": {
|
|
"location": {
|
|
"param_type": "string",
|
|
"description": "The city and state, e.g. San Francisco, CA",
|
|
"required": true
|
|
}
|
|
}
|
|
}
|
|
],
|
|
"expected": {
|
|
"location": "San Francisco"
|
|
}
|
|
}
|
|
},
|
|
"structured_output": {
|
|
"data": {
|
|
"notes": "We include context about Michael Jordan in the prompt so that the test is focused on the funtionality of the model and not on the information embedded in the model. Llama 3.2 3B Instruct tends to think MJ played for 14 seasons.",
|
|
"messages": [
|
|
{
|
|
"role": "system",
|
|
"content": "You are a helpful assistant. Michael Jordan was born in 1963. He played basketball for the Chicago Bulls for 15 seasons."
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": "Please give me information about Michael Jordan."
|
|
}
|
|
],
|
|
"expected": {
|
|
"first_name": "Michael",
|
|
"last_name": "Jordan",
|
|
"year_of_birth": 1963,
|
|
"num_seasons_in_nba": 15,
|
|
"year_for_draft": 1984
|
|
}
|
|
}
|
|
},
|
|
"tool_calling_tools_absent": {
|
|
"data": {
|
|
"messages": [
|
|
{
|
|
"role": "system",
|
|
"content": "You are a helpful assistant."
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": "What pods are in the namespace openshift-lightspeed?"
|
|
},
|
|
{
|
|
"role": "assistant",
|
|
"content": "",
|
|
"stop_reason": "end_of_turn",
|
|
"tool_calls": [
|
|
{
|
|
"call_id": "1",
|
|
"tool_name": "get_object_namespace_list",
|
|
"arguments": {
|
|
"kind": "pod",
|
|
"namespace": "openshift-lightspeed"
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "tool",
|
|
"call_id": "1",
|
|
"tool_name": "get_object_namespace_list",
|
|
"content": "the objects are pod1, pod2, pod3"
|
|
}
|
|
],
|
|
"tools": [
|
|
{
|
|
"tool_name": "get_object_namespace_list",
|
|
"description": "Get the list of objects in a namespace",
|
|
"parameters": {
|
|
"kind": {
|
|
"param_type": "string",
|
|
"description": "the type of object",
|
|
"required": true
|
|
},
|
|
"namespace": {
|
|
"param_type": "string",
|
|
"description": "the name of the namespace",
|
|
"required": true
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|
|
}
|
|
}
|