fixes, update test to be more robust

This commit is contained in:
Ashwin Bharambe 2025-05-27 12:46:03 -07:00
parent 70d3e4bd67
commit cad646478f
2 changed files with 20 additions and 29 deletions

View file

@ -77,11 +77,12 @@ test_response_image:
image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
output: "llama"
# the models are really poor at tool calling after seeing images :/
test_response_multi_turn_image:
test_name: test_response_multi_turn_image
test_params:
case:
- case_id: "llama_image_search"
- case_id: "llama_image_understanding"
turns:
- input:
- role: user
@ -91,7 +92,5 @@ test_response_multi_turn_image:
- type: input_image
image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
output: "llama"
- input: "Search the web using the search tool for the animal from the previous response. Your search query should be a single phrase that includes the animal's name and the words 'maverick', 'scout' and 'llm'"
tools:
- type: web_search
output: "model"
- input: "What country do you find this animal primarily in? What continent?"
output: "peru"