forked from phoenix-oss/llama-stack-mirror
fix: llama4 tool use prompt fix (#2103)
Tests: LLAMA_STACK_CONFIG=http://localhost:5002 pytest -s -v tests/integration/inference --safety-shield meta-llama/Llama-Guard-3-8B --vision-model meta-llama/Llama-4-Scout-17B-16E-Instruct --text-model meta-llama/Llama-4-Scout-17B-16E-Instruct LLAMA_STACK_CONFIG=http://localhost:5002 pytest -s -v tests/integration/inference --safety-shield meta-llama/Llama-Guard-3-8B --vision-model Llama-4-Maverick-17B-128E-Instruct --text-model Llama-4-Maverick-17B-128E-Instruct Co-authored-by: Eric Huang <erichuang@fb.com>
This commit is contained in:
parent
b2b00a216b
commit
664161c462
4 changed files with 9 additions and 203 deletions
|
@ -173,9 +173,7 @@ INCORRECT: [get_events(location="Singapore")] <- If function not in list
|
|||
- Don't repeat tool response verbatim
|
||||
- Don't add supplementary information
|
||||
|
||||
|
||||
Here is a list of functions in JSON format that you can invoke.
|
||||
|
||||
Here is a list of functions in JSON format that you can invoke:
|
||||
[
|
||||
{
|
||||
"name": "get_weather",
|
||||
|
@ -196,10 +194,7 @@ Here is a list of functions in JSON format that you can invoke.
|
|||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
You can answer general questions or invoke tools when necessary.
|
||||
In addition to tool calls, you should also augment your responses by using the tool outputs.<|eot|><|header_start|>user<|header_end|>
|
||||
]<|eot|><|header_start|>user<|header_end|>
|
||||
|
||||
What is the weather in SF and Seattle?<|eot|><|header_start|>assistant<|header_end|>
|
||||
|
||||
|
|
|
@ -61,7 +61,6 @@ class PythonListCustomToolGenerator(PromptTemplateGeneratorBase): # noqa: N801
|
|||
- Don't repeat tool response verbatim
|
||||
- Don't add supplementary information
|
||||
|
||||
|
||||
{{ function_description }}
|
||||
""".strip("\n")
|
||||
)
|
||||
|
@ -76,8 +75,7 @@ class PythonListCustomToolGenerator(PromptTemplateGeneratorBase): # noqa: N801
|
|||
def _gen_function_description(self, custom_tools: list[ToolDefinition]) -> PromptTemplate:
|
||||
template_str = textwrap.dedent(
|
||||
"""
|
||||
Here is a list of functions in JSON format that you can invoke.
|
||||
|
||||
Here is a list of functions in JSON format that you can invoke:
|
||||
[
|
||||
{% for t in tools -%}
|
||||
{# manually setting up JSON because jinja sorts keys in unexpected ways -#}
|
||||
|
@ -108,10 +106,6 @@ class PythonListCustomToolGenerator(PromptTemplateGeneratorBase): # noqa: N801
|
|||
{% endif -%}
|
||||
{%- endfor %}
|
||||
]
|
||||
|
||||
You can answer general questions or invoke tools when necessary.
|
||||
In addition to tool calls, you should also augment your responses by using the tool outputs.
|
||||
|
||||
"""
|
||||
)
|
||||
return PromptTemplate(
|
||||
|
|
|
@ -473,18 +473,12 @@ def test_text_chat_completion_tool_calling_tools_not_in_request(
|
|||
[
|
||||
# Tests if the model can handle simple messages like "Hi" or
|
||||
# a message unrelated to one of the tool calls
|
||||
"inference:chat_completion:multi_turn_tool_calling_01",
|
||||
"inference:chat_completion:text_then_tool",
|
||||
# Tests if the model can do full tool call with responses correctly
|
||||
"inference:chat_completion:multi_turn_tool_calling_02",
|
||||
"inference:chat_completion:tool_then_answer",
|
||||
# Tests if model can generate multiple params and
|
||||
# read outputs correctly
|
||||
"inference:chat_completion:multi_turn_tool_calling_03",
|
||||
# Tests if model can do different tool calls in a seqeunce
|
||||
# and use the information between appropriately
|
||||
"inference:chat_completion:multi_turn_tool_calling_04",
|
||||
# Tests if model can use current date and run multiple tool calls
|
||||
# sequentially and infer using both
|
||||
"inference:chat_completion:multi_turn_tool_calling_05",
|
||||
"inference:chat_completion:array_parameter",
|
||||
],
|
||||
)
|
||||
def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, text_model_id, test_case):
|
||||
|
|
|
@ -98,7 +98,7 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"multi_turn_tool_calling_01": {
|
||||
"text_then_tool": {
|
||||
"data": {
|
||||
"messages": [
|
||||
[
|
||||
|
@ -150,7 +150,7 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"multi_turn_tool_calling_02": {
|
||||
"tool_then_answer": {
|
||||
"data": {
|
||||
"messages": [
|
||||
[
|
||||
|
@ -192,7 +192,7 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"multi_turn_tool_calling_03": {
|
||||
"array_parameter": {
|
||||
"data": {
|
||||
"messages": [
|
||||
[
|
||||
|
@ -252,183 +252,6 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"multi_turn_tool_calling_04": {
|
||||
"data": {
|
||||
"messages": [
|
||||
[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Todays date is 2025-03-01."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Do i have any meetings on March 3rd at 10 am ?"
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Alright then, Create an event named 'Team Building', scheduled for that time same time, in the 'Main Conference Room' and add Alice, Bob, Charlie to it. Give me the created event id."
|
||||
}
|
||||
]
|
||||
],
|
||||
"tools": [
|
||||
{
|
||||
"tool_name": "create_event",
|
||||
"description": "Create a new event",
|
||||
"parameters": {
|
||||
"name": {
|
||||
"param_type": "string",
|
||||
"description": "Name of the event"
|
||||
},
|
||||
"date": {
|
||||
"param_type": "string",
|
||||
"description": "Date of the event in ISO format"
|
||||
},
|
||||
"time": {
|
||||
"param_type": "string",
|
||||
"description": "Event Time (HH:MM)"
|
||||
},
|
||||
"location": {
|
||||
"param_type": "string",
|
||||
"description": "Location of the event"
|
||||
},
|
||||
"participants": {
|
||||
"param_type": "list[str]",
|
||||
"description": "List of participant names"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"tool_name": "get_event",
|
||||
"description": "Get an event by date and time",
|
||||
"parameters": {
|
||||
"date": {
|
||||
"param_type": "string",
|
||||
"description": "Date of the event in ISO format"
|
||||
},
|
||||
"time": {
|
||||
"param_type": "string",
|
||||
"description": "Event Time (HH:MM)"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"tool_responses": [
|
||||
{
|
||||
"response": "{'response': 'No events found for 2025-03-03 at 10:00'}"
|
||||
},
|
||||
{
|
||||
"response": "{'response': 'Successfully created new event with id: e_123'}"
|
||||
}
|
||||
],
|
||||
"expected": [
|
||||
{
|
||||
"num_tool_calls": 1,
|
||||
"tool_name": "get_event",
|
||||
"tool_arguments": {
|
||||
"date": "2025-03-03",
|
||||
"time": "10:00"
|
||||
}
|
||||
},
|
||||
{
|
||||
"num_tool_calls": 0,
|
||||
"answer": "no"
|
||||
},
|
||||
{
|
||||
"num_tool_calls": 1,
|
||||
"tool_name": "create_event",
|
||||
"tool_arguments": {
|
||||
"name": "Team Building",
|
||||
"date": "2025-03-03",
|
||||
"time": "10:00",
|
||||
"location": "Main Conference Room",
|
||||
"participants": [
|
||||
"Alice",
|
||||
"Bob",
|
||||
"Charlie"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"num_tool_calls": 0,
|
||||
"answer": "e_123"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"multi_turn_tool_calling_05": {
|
||||
"data": {
|
||||
"messages": [
|
||||
[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Todays date is 2025-03-01."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "what was my monthly expense in Jan of this year?"
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Was it less than Feb of last year? Only answer with yes or no."
|
||||
}
|
||||
]
|
||||
],
|
||||
"tools": [
|
||||
{
|
||||
"tool_name": "getMonthlyExpenseSummary",
|
||||
"description": "Get monthly expense summary",
|
||||
"parameters": {
|
||||
"month": {
|
||||
"param_type": "int",
|
||||
"description": "Month of the year (1-12)"
|
||||
},
|
||||
"year": {
|
||||
"param_type": "int",
|
||||
"description": "Year"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"tool_responses": [
|
||||
{
|
||||
"response": "{'response': 'Total expenses for January 2025: $1000'}"
|
||||
},
|
||||
{
|
||||
"response": "{'response': 'Total expenses for February 2024: $2000'}"
|
||||
}
|
||||
],
|
||||
"expected": [
|
||||
{
|
||||
"num_tool_calls": 1,
|
||||
"tool_name": "getMonthlyExpenseSummary",
|
||||
"tool_arguments": {
|
||||
"month": 1,
|
||||
"year": 2025
|
||||
}
|
||||
},
|
||||
{
|
||||
"num_tool_calls": 0,
|
||||
"answer": "1000"
|
||||
},
|
||||
{
|
||||
"num_tool_calls": 1,
|
||||
"tool_name": "getMonthlyExpenseSummary",
|
||||
"tool_arguments": {
|
||||
"month": 2,
|
||||
"year": 2024
|
||||
}
|
||||
},
|
||||
{
|
||||
"num_tool_calls": 0,
|
||||
"answer": "yes"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"sample_messages_tool_calling": {
|
||||
"data": {
|
||||
"messages": [
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue