mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-06 14:00:42 +00:00
test(verification): more tests, multiturn tool use tests (#1954)
# What does this PR do?
## Test Plan
(myenv) ➜ llama-stack python tests/verifications/generate_report.py
--providers fireworks,together,openai --run-tests
f27f617629/tests/verifications/REPORT.md
This commit is contained in:
parent
86c6f1f112
commit
32e3da7392
6 changed files with 6274 additions and 599 deletions
|
@ -131,3 +131,221 @@ test_tool_calling:
|
|||
type: object
|
||||
type: function
|
||||
output: get_weather_tool_call
|
||||
|
||||
test_chat_multi_turn_tool_calling:
|
||||
test_name: test_chat_multi_turn_tool_calling
|
||||
test_params:
|
||||
case:
|
||||
- case_id: "text_then_weather_tool"
|
||||
input:
|
||||
messages:
|
||||
- - role: user
|
||||
content: "What's the name of the Sun in latin?"
|
||||
- - role: user
|
||||
content: "What's the weather like in San Francisco?"
|
||||
tools:
|
||||
- function:
|
||||
description: Get the current weather
|
||||
name: get_weather
|
||||
parameters:
|
||||
type: object
|
||||
properties:
|
||||
location:
|
||||
description: "The city and state (both required), e.g. San Francisco, CA."
|
||||
type: string
|
||||
required: ["location"]
|
||||
type: function
|
||||
tool_responses:
|
||||
- response: "{'response': '70 degrees and foggy'}"
|
||||
expected:
|
||||
- num_tool_calls: 0
|
||||
answer: ["sol"]
|
||||
- num_tool_calls: 1
|
||||
tool_name: get_weather
|
||||
tool_arguments:
|
||||
location: "San Francisco, CA"
|
||||
- num_tool_calls: 0
|
||||
answer: ["foggy", "70 degrees"]
|
||||
- case_id: "weather_tool_then_text"
|
||||
input:
|
||||
messages:
|
||||
- - role: user
|
||||
content: "What's the weather like in San Francisco?"
|
||||
tools:
|
||||
- function:
|
||||
description: Get the current weather
|
||||
name: get_weather
|
||||
parameters:
|
||||
type: object
|
||||
properties:
|
||||
location:
|
||||
description: "The city and state (both required), e.g. San Francisco, CA."
|
||||
type: string
|
||||
required: ["location"]
|
||||
type: function
|
||||
tool_responses:
|
||||
- response: "{'response': '70 degrees and foggy'}"
|
||||
expected:
|
||||
- num_tool_calls: 1
|
||||
tool_name: get_weather
|
||||
tool_arguments:
|
||||
location: "San Francisco, CA"
|
||||
- num_tool_calls: 0
|
||||
answer: ["foggy", "70 degrees"]
|
||||
- case_id: "add_product_tool"
|
||||
input:
|
||||
messages:
|
||||
- - role: user
|
||||
content: "Please add a new product with name 'Widget', price 19.99, in stock, and tags ['new', 'sale'] and give me the product id."
|
||||
tools:
|
||||
- function:
|
||||
description: Add a new product
|
||||
name: addProduct
|
||||
parameters:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
description: "Name of the product"
|
||||
type: string
|
||||
price:
|
||||
description: "Price of the product"
|
||||
type: number
|
||||
inStock:
|
||||
description: "Availability status of the product."
|
||||
type: boolean
|
||||
tags:
|
||||
description: "List of product tags"
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
required: ["name", "price", "inStock"]
|
||||
type: function
|
||||
tool_responses:
|
||||
- response: "{'response': 'Successfully added product with id: 123'}"
|
||||
expected:
|
||||
- num_tool_calls: 1
|
||||
tool_name: addProduct
|
||||
tool_arguments:
|
||||
name: "Widget"
|
||||
price: 19.99
|
||||
inStock: true
|
||||
tags:
|
||||
- "new"
|
||||
- "sale"
|
||||
- num_tool_calls: 0
|
||||
answer: ["123", "product id: 123"]
|
||||
- case_id: "get_then_create_event_tool"
|
||||
input:
|
||||
messages:
|
||||
- - role: system
|
||||
content: "Todays date is 2025-03-01."
|
||||
- role: user
|
||||
content: "Do i have any meetings on March 3rd at 10 am? Yes or no?"
|
||||
- - role: user
|
||||
content: "Alright then, Create an event named 'Team Building', scheduled for that time same time, in the 'Main Conference Room' and add Alice, Bob, Charlie to it. Give me the created event id."
|
||||
tools:
|
||||
- function:
|
||||
description: Create a new event
|
||||
name: create_event
|
||||
parameters:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
description: "Name of the event"
|
||||
type: string
|
||||
date:
|
||||
description: "Date of the event in ISO format"
|
||||
type: string
|
||||
time:
|
||||
description: "Event Time (HH:MM)"
|
||||
type: string
|
||||
location:
|
||||
description: "Location of the event"
|
||||
type: string
|
||||
participants:
|
||||
description: "List of participant names"
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
required: ["name", "date", "time", "location", "participants"]
|
||||
type: function
|
||||
- function:
|
||||
description: Get an event by date and time
|
||||
name: get_event
|
||||
parameters:
|
||||
type: object
|
||||
properties:
|
||||
date:
|
||||
description: "Date of the event in ISO format"
|
||||
type: string
|
||||
time:
|
||||
description: "Event Time (HH:MM)"
|
||||
type: string
|
||||
required: ["date", "time"]
|
||||
type: function
|
||||
tool_responses:
|
||||
- response: "{'response': 'No events found for 2025-03-03 at 10:00'}"
|
||||
- response: "{'response': 'Successfully created new event with id: e_123'}"
|
||||
expected:
|
||||
- num_tool_calls: 1
|
||||
tool_name: get_event
|
||||
tool_arguments:
|
||||
date: "2025-03-03"
|
||||
time: "10:00"
|
||||
- num_tool_calls: 0
|
||||
answer: ["no", "no events found", "no meetings"]
|
||||
- num_tool_calls: 1
|
||||
tool_name: create_event
|
||||
tool_arguments:
|
||||
name: "Team Building"
|
||||
date: "2025-03-03"
|
||||
time: "10:00"
|
||||
location: "Main Conference Room"
|
||||
participants:
|
||||
- "Alice"
|
||||
- "Bob"
|
||||
- "Charlie"
|
||||
- num_tool_calls: 0
|
||||
answer: ["e_123", "event id: e_123"]
|
||||
- case_id: "compare_monthly_expense_tool"
|
||||
input:
|
||||
messages:
|
||||
- - role: system
|
||||
content: "Todays date is 2025-03-01."
|
||||
- role: user
|
||||
content: "what was my monthly expense in Jan of this year?"
|
||||
- - role: user
|
||||
content: "Was it less than Feb of last year? Only answer with yes or no."
|
||||
tools:
|
||||
- function:
|
||||
description: Get monthly expense summary
|
||||
name: getMonthlyExpenseSummary
|
||||
parameters:
|
||||
type: object
|
||||
properties:
|
||||
month:
|
||||
description: "Month of the year (1-12)"
|
||||
type: integer
|
||||
year:
|
||||
description: "Year"
|
||||
type: integer
|
||||
required: ["month", "year"]
|
||||
type: function
|
||||
tool_responses:
|
||||
- response: "{'response': 'Total expenses for January 2025: $1000'}"
|
||||
- response: "{'response': 'Total expenses for February 2024: $2000'}"
|
||||
expected:
|
||||
- num_tool_calls: 1
|
||||
tool_name: getMonthlyExpenseSummary
|
||||
tool_arguments:
|
||||
month: 1
|
||||
year: 2025
|
||||
- num_tool_calls: 0
|
||||
answer: ["1000", "$1,000", "1,000"]
|
||||
- num_tool_calls: 1
|
||||
tool_name: getMonthlyExpenseSummary
|
||||
tool_arguments:
|
||||
month: 2
|
||||
year: 2024
|
||||
- num_tool_calls: 0
|
||||
answer: ["yes"]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue