mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-24 13:19:54 +00:00
136 lines
4.6 KiB
YAML
136 lines
4.6 KiB
YAML
test_response_basic:
|
|
test_name: test_response_basic
|
|
test_params:
|
|
case:
|
|
- case_id: "earth"
|
|
input: "Which planet do humans live on?"
|
|
output: "earth"
|
|
- case_id: "saturn"
|
|
input: "Which planet has rings around it with a name starting with letter S?"
|
|
output: "saturn"
|
|
|
|
test_response_multi_turn:
|
|
test_name: test_response_multi_turn
|
|
test_params:
|
|
case:
|
|
- case_id: "earth"
|
|
turns:
|
|
- input: "Which planet do humans live on?"
|
|
output: "earth"
|
|
- input: "What is the name of the planet from your previous response?"
|
|
output: "earth"
|
|
|
|
test_response_web_search:
|
|
test_name: test_response_web_search
|
|
test_params:
|
|
case:
|
|
- case_id: "llama_experts"
|
|
input: "How many experts does the Llama 4 Maverick model have?"
|
|
tools:
|
|
- type: web_search
|
|
search_context_size: "low"
|
|
output: "128"
|
|
|
|
test_response_mcp_tool:
|
|
test_name: test_response_mcp_tool
|
|
test_params:
|
|
case:
|
|
- case_id: "boiling_point_tool"
|
|
input: "What is the boiling point of polyjuice?"
|
|
tools:
|
|
- type: mcp
|
|
server_label: "localmcp"
|
|
server_url: "<FILLED_BY_TEST_RUNNER>"
|
|
output: "Hello, world!"
|
|
|
|
test_response_custom_tool:
|
|
test_name: test_response_custom_tool
|
|
test_params:
|
|
case:
|
|
- case_id: "sf_weather"
|
|
input: "What's the weather like in San Francisco?"
|
|
tools:
|
|
- type: function
|
|
name: get_weather
|
|
description: Get current temperature for a given location.
|
|
parameters:
|
|
additionalProperties: false
|
|
properties:
|
|
location:
|
|
description: "City and country e.g. Bogot\xE1, Colombia"
|
|
type: string
|
|
required:
|
|
- location
|
|
type: object
|
|
|
|
test_response_image:
|
|
test_name: test_response_image
|
|
test_params:
|
|
case:
|
|
- case_id: "llama_image"
|
|
input:
|
|
- role: user
|
|
content:
|
|
- type: input_text
|
|
text: "Identify the type of animal in this image."
|
|
- type: input_image
|
|
image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
|
|
output: "llama"
|
|
|
|
# the models are really poor at tool calling after seeing images :/
|
|
test_response_multi_turn_image:
|
|
test_name: test_response_multi_turn_image
|
|
test_params:
|
|
case:
|
|
- case_id: "llama_image_understanding"
|
|
turns:
|
|
- input:
|
|
- role: user
|
|
content:
|
|
- type: input_text
|
|
text: "What type of animal is in this image? Please respond with a single word that starts with the letter 'L'."
|
|
- type: input_image
|
|
image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
|
|
output: "llama"
|
|
- input: "What country do you find this animal primarily in? What continent?"
|
|
output: "peru"
|
|
|
|
test_response_multi_turn_tool_execution:
|
|
test_name: test_response_multi_turn_tool_execution
|
|
test_params:
|
|
case:
|
|
- case_id: "user_file_access_check"
|
|
input: "I need to check if user 'alice' can access the file 'document.txt'. First, get alice's user ID, then check if that user ID can access the file 'document.txt'. Tell me the final result."
|
|
tools:
|
|
- type: mcp
|
|
server_label: "localmcp"
|
|
server_url: "<FILLED_BY_TEST_RUNNER>"
|
|
output: "yes"
|
|
- case_id: "experiment_results_lookup"
|
|
input: "I need to get the results for the 'boiling_point' experiment. First, get the experiment ID for 'boiling_point', then use that ID to get the experiment results. Tell me what you found."
|
|
tools:
|
|
- type: mcp
|
|
server_label: "localmcp"
|
|
server_url: "<FILLED_BY_TEST_RUNNER>"
|
|
output: "100°C"
|
|
|
|
test_response_multi_turn_tool_execution_streaming:
|
|
test_name: test_response_multi_turn_tool_execution_streaming
|
|
test_params:
|
|
case:
|
|
- case_id: "user_permissions_workflow"
|
|
input: "Help me with this security check: First, get the user ID for 'charlie', then get the permissions for that user ID, and finally check if that user can access 'secret_file.txt'. Stream your progress as you work through each step."
|
|
tools:
|
|
- type: mcp
|
|
server_label: "localmcp"
|
|
server_url: "<FILLED_BY_TEST_RUNNER>"
|
|
stream: true
|
|
output: "no"
|
|
- case_id: "experiment_analysis_streaming"
|
|
input: "I need a complete analysis: First, get the experiment ID for 'chemical_reaction', then get the results for that experiment, and tell me if the yield was above 80%. Please stream your analysis process."
|
|
tools:
|
|
- type: mcp
|
|
server_label: "localmcp"
|
|
server_url: "<FILLED_BY_TEST_RUNNER>"
|
|
stream: true
|
|
output: "85%"
|