forked from phoenix-oss/llama-stack-mirror
# What does this PR do? When clients called the Open AI API with invalid input that wasn't caught by our own Pydantic API validation but instead only caught by the backend inference provider, that backend inference provider was returning a HTTP 400 error. However, we were wrapping that into a HTTP 500 error, obfuscating the actual issue from calling clients and triggering OpenAI client retry logic. This change adjusts our existing `translate_exception` method in `server.py` to wrap `openai.BadRequestError` as HTTP 400 errors, passing through the string representation of the error message to the calling user so they can see the actual input validation error and correct it. I tried changing this in a few other places, but ultimately `translate_exception` was the only real place to handle this for both streaming and non-streaming requests across all inference providers that use the OpenAI server APIs. This also tightens up our validation a bit for the OpenAI chat completions API, to catch empty `messages` parameters, invalid `tool_choice` parameters, invalid `tools` items, or passing `tool_choice` when `tools` isn't given. Lastly, this extends our OpenAI API chat completions verifications to also check for consistent input validation across providers. Providers behind Llama Stack should automatically pass all the new tests due to the input validation added here, but some of the providers fail this test when not run behind Llama Stack due to differences in how they handle input validation and errors. (Closes #1951) ## Test Plan To test this, start an OpenAI API verification stack: ``` llama stack run --image-type venv tests/verifications/openai-api-verification-run.yaml ``` Then, run the new verification tests with your provider(s) of choice: ``` python -m pytest -s -v \ tests/verifications/openai_api/test_chat_completion.py \ --provider openai-llama-stack python -m pytest -s -v \ tests/verifications/openai_api/test_chat_completion.py \ --provider together-llama-stack ``` Signed-off-by: Ben Browning <bbrownin@redhat.com>
397 lines
12 KiB
YAML
397 lines
12 KiB
YAML
test_chat_basic:
|
|
test_name: test_chat_basic
|
|
test_params:
|
|
case:
|
|
- case_id: "earth"
|
|
input:
|
|
messages:
|
|
- content: Which planet do humans live on?
|
|
role: user
|
|
output: Earth
|
|
- case_id: "saturn"
|
|
input:
|
|
messages:
|
|
- content: Which planet has rings around it with a name starting with letter
|
|
S?
|
|
role: user
|
|
output: Saturn
|
|
test_chat_input_validation:
|
|
test_name: test_chat_input_validation
|
|
test_params:
|
|
case:
|
|
- case_id: "messages_missing"
|
|
input:
|
|
messages: []
|
|
output:
|
|
error:
|
|
status_code: 400
|
|
- case_id: "messages_role_invalid"
|
|
input:
|
|
messages:
|
|
- content: Which planet do humans live on?
|
|
role: fake_role
|
|
output:
|
|
error:
|
|
status_code: 400
|
|
- case_id: "tool_choice_invalid"
|
|
input:
|
|
messages:
|
|
- content: Which planet do humans live on?
|
|
role: user
|
|
tool_choice: invalid
|
|
output:
|
|
error:
|
|
status_code: 400
|
|
- case_id: "tool_choice_no_tools"
|
|
input:
|
|
messages:
|
|
- content: Which planet do humans live on?
|
|
role: user
|
|
tool_choice: required
|
|
output:
|
|
error:
|
|
status_code: 400
|
|
- case_id: "tools_type_invalid"
|
|
input:
|
|
messages:
|
|
- content: Which planet do humans live on?
|
|
role: user
|
|
tools:
|
|
- type: invalid
|
|
output:
|
|
error:
|
|
status_code: 400
|
|
test_chat_image:
|
|
test_name: test_chat_image
|
|
test_params:
|
|
case:
|
|
- input:
|
|
messages:
|
|
- content:
|
|
- text: What is in this image?
|
|
type: text
|
|
- image_url:
|
|
url: https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg
|
|
type: image_url
|
|
role: user
|
|
output: llama
|
|
test_chat_structured_output:
|
|
test_name: test_chat_structured_output
|
|
test_params:
|
|
case:
|
|
- case_id: "calendar"
|
|
input:
|
|
messages:
|
|
- content: Extract the event information.
|
|
role: system
|
|
- content: Alice and Bob are going to a science fair on Friday.
|
|
role: user
|
|
response_format:
|
|
json_schema:
|
|
name: calendar_event
|
|
schema:
|
|
properties:
|
|
date:
|
|
title: Date
|
|
type: string
|
|
name:
|
|
title: Name
|
|
type: string
|
|
participants:
|
|
items:
|
|
type: string
|
|
title: Participants
|
|
type: array
|
|
required:
|
|
- name
|
|
- date
|
|
- participants
|
|
title: CalendarEvent
|
|
type: object
|
|
type: json_schema
|
|
output: valid_calendar_event
|
|
- case_id: "math"
|
|
input:
|
|
messages:
|
|
- content: You are a helpful math tutor. Guide the user through the solution
|
|
step by step.
|
|
role: system
|
|
- content: how can I solve 8x + 7 = -23
|
|
role: user
|
|
response_format:
|
|
json_schema:
|
|
name: math_reasoning
|
|
schema:
|
|
$defs:
|
|
Step:
|
|
properties:
|
|
explanation:
|
|
title: Explanation
|
|
type: string
|
|
output:
|
|
title: Output
|
|
type: string
|
|
required:
|
|
- explanation
|
|
- output
|
|
title: Step
|
|
type: object
|
|
properties:
|
|
final_answer:
|
|
title: Final Answer
|
|
type: string
|
|
steps:
|
|
items:
|
|
$ref: '#/$defs/Step'
|
|
title: Steps
|
|
type: array
|
|
required:
|
|
- steps
|
|
- final_answer
|
|
title: MathReasoning
|
|
type: object
|
|
type: json_schema
|
|
output: valid_math_reasoning
|
|
test_tool_calling:
|
|
test_name: test_tool_calling
|
|
test_params:
|
|
case:
|
|
- input:
|
|
messages:
|
|
- content: You are a helpful assistant that can use tools to get information.
|
|
role: system
|
|
- content: What's the weather like in San Francisco?
|
|
role: user
|
|
tools:
|
|
- function:
|
|
description: Get current temperature for a given location.
|
|
name: get_weather
|
|
parameters:
|
|
additionalProperties: false
|
|
properties:
|
|
location:
|
|
description: "City and country e.g. Bogot\xE1, Colombia"
|
|
type: string
|
|
required:
|
|
- location
|
|
type: object
|
|
type: function
|
|
output: get_weather_tool_call
|
|
|
|
test_chat_multi_turn_tool_calling:
|
|
test_name: test_chat_multi_turn_tool_calling
|
|
test_params:
|
|
case:
|
|
- case_id: "text_then_weather_tool"
|
|
input:
|
|
messages:
|
|
- - role: user
|
|
content: "What's the name of the Sun in latin?"
|
|
- - role: user
|
|
content: "What's the weather like in San Francisco?"
|
|
tools:
|
|
- function:
|
|
description: Get the current weather
|
|
name: get_weather
|
|
parameters:
|
|
type: object
|
|
properties:
|
|
location:
|
|
description: "The city and state (both required), e.g. San Francisco, CA."
|
|
type: string
|
|
required: ["location"]
|
|
type: function
|
|
tool_responses:
|
|
- response: "{'response': '70 degrees and foggy'}"
|
|
expected:
|
|
- num_tool_calls: 0
|
|
answer: ["sol"]
|
|
- num_tool_calls: 1
|
|
tool_name: get_weather
|
|
tool_arguments:
|
|
location: "San Francisco, CA"
|
|
- num_tool_calls: 0
|
|
answer: ["foggy", "70 degrees"]
|
|
- case_id: "weather_tool_then_text"
|
|
input:
|
|
messages:
|
|
- - role: user
|
|
content: "What's the weather like in San Francisco?"
|
|
tools:
|
|
- function:
|
|
description: Get the current weather
|
|
name: get_weather
|
|
parameters:
|
|
type: object
|
|
properties:
|
|
location:
|
|
description: "The city and state (both required), e.g. San Francisco, CA."
|
|
type: string
|
|
required: ["location"]
|
|
type: function
|
|
tool_responses:
|
|
- response: "{'response': '70 degrees and foggy'}"
|
|
expected:
|
|
- num_tool_calls: 1
|
|
tool_name: get_weather
|
|
tool_arguments:
|
|
location: "San Francisco, CA"
|
|
- num_tool_calls: 0
|
|
answer: ["foggy", "70 degrees"]
|
|
- case_id: "add_product_tool"
|
|
input:
|
|
messages:
|
|
- - role: user
|
|
content: "Please add a new product with name 'Widget', price 19.99, in stock, and tags ['new', 'sale'] and give me the product id."
|
|
tools:
|
|
- function:
|
|
description: Add a new product
|
|
name: addProduct
|
|
parameters:
|
|
type: object
|
|
properties:
|
|
name:
|
|
description: "Name of the product"
|
|
type: string
|
|
price:
|
|
description: "Price of the product"
|
|
type: number
|
|
inStock:
|
|
description: "Availability status of the product."
|
|
type: boolean
|
|
tags:
|
|
description: "List of product tags"
|
|
type: array
|
|
items:
|
|
type: string
|
|
required: ["name", "price", "inStock"]
|
|
type: function
|
|
tool_responses:
|
|
- response: "{'response': 'Successfully added product with id: 123'}"
|
|
expected:
|
|
- num_tool_calls: 1
|
|
tool_name: addProduct
|
|
tool_arguments:
|
|
name: "Widget"
|
|
price: 19.99
|
|
inStock: true
|
|
tags:
|
|
- "new"
|
|
- "sale"
|
|
- num_tool_calls: 0
|
|
answer: ["123", "product id: 123"]
|
|
- case_id: "get_then_create_event_tool"
|
|
input:
|
|
messages:
|
|
- - role: system
|
|
content: "Todays date is 2025-03-01."
|
|
- role: user
|
|
content: "Do i have any meetings on March 3rd at 10 am? Yes or no?"
|
|
- - role: user
|
|
content: "Alright then, Create an event named 'Team Building', scheduled for that time same time, in the 'Main Conference Room' and add Alice, Bob, Charlie to it. Give me the created event id."
|
|
tools:
|
|
- function:
|
|
description: Create a new event
|
|
name: create_event
|
|
parameters:
|
|
type: object
|
|
properties:
|
|
name:
|
|
description: "Name of the event"
|
|
type: string
|
|
date:
|
|
description: "Date of the event in ISO format"
|
|
type: string
|
|
time:
|
|
description: "Event Time (HH:MM)"
|
|
type: string
|
|
location:
|
|
description: "Location of the event"
|
|
type: string
|
|
participants:
|
|
description: "List of participant names"
|
|
type: array
|
|
items:
|
|
type: string
|
|
required: ["name", "date", "time", "location", "participants"]
|
|
type: function
|
|
- function:
|
|
description: Get an event by date and time
|
|
name: get_event
|
|
parameters:
|
|
type: object
|
|
properties:
|
|
date:
|
|
description: "Date of the event in ISO format"
|
|
type: string
|
|
time:
|
|
description: "Event Time (HH:MM)"
|
|
type: string
|
|
required: ["date", "time"]
|
|
type: function
|
|
tool_responses:
|
|
- response: "{'response': 'No events found for 2025-03-03 at 10:00'}"
|
|
- response: "{'response': 'Successfully created new event with id: e_123'}"
|
|
expected:
|
|
- num_tool_calls: 1
|
|
tool_name: get_event
|
|
tool_arguments:
|
|
date: "2025-03-03"
|
|
time: "10:00"
|
|
- num_tool_calls: 0
|
|
answer: ["no", "no events found", "no meetings"]
|
|
- num_tool_calls: 1
|
|
tool_name: create_event
|
|
tool_arguments:
|
|
name: "Team Building"
|
|
date: "2025-03-03"
|
|
time: "10:00"
|
|
location: "Main Conference Room"
|
|
participants:
|
|
- "Alice"
|
|
- "Bob"
|
|
- "Charlie"
|
|
- num_tool_calls: 0
|
|
answer: ["e_123", "event id: e_123"]
|
|
- case_id: "compare_monthly_expense_tool"
|
|
input:
|
|
messages:
|
|
- - role: system
|
|
content: "Todays date is 2025-03-01."
|
|
- role: user
|
|
content: "what was my monthly expense in Jan of this year?"
|
|
- - role: user
|
|
content: "Was it less than Feb of last year? Only answer with yes or no."
|
|
tools:
|
|
- function:
|
|
description: Get monthly expense summary
|
|
name: getMonthlyExpenseSummary
|
|
parameters:
|
|
type: object
|
|
properties:
|
|
month:
|
|
description: "Month of the year (1-12)"
|
|
type: integer
|
|
year:
|
|
description: "Year"
|
|
type: integer
|
|
required: ["month", "year"]
|
|
type: function
|
|
tool_responses:
|
|
- response: "{'response': 'Total expenses for January 2025: $1000'}"
|
|
- response: "{'response': 'Total expenses for February 2024: $2000'}"
|
|
expected:
|
|
- num_tool_calls: 1
|
|
tool_name: getMonthlyExpenseSummary
|
|
tool_arguments:
|
|
month: 1
|
|
year: 2025
|
|
- num_tool_calls: 0
|
|
answer: ["1000", "$1,000", "1,000"]
|
|
- num_tool_calls: 1
|
|
tool_name: getMonthlyExpenseSummary
|
|
tool_arguments:
|
|
month: 2
|
|
year: 2024
|
|
- num_tool_calls: 0
|
|
answer: ["yes"]
|