fix: llama4 tool use prompt fix (#2103)

Tests:
LLAMA_STACK_CONFIG=http://localhost:5002 pytest -s -v
tests/integration/inference --safety-shield meta-llama/Llama-Guard-3-8B
--vision-model meta-llama/Llama-4-Scout-17B-16E-Instruct --text-model
meta-llama/Llama-4-Scout-17B-16E-Instruct

LLAMA_STACK_CONFIG=http://localhost:5002 pytest -s -v
tests/integration/inference --safety-shield meta-llama/Llama-Guard-3-8B
--vision-model Llama-4-Maverick-17B-128E-Instruct --text-model
Llama-4-Maverick-17B-128E-Instruct

Co-authored-by: Eric Huang <erichuang@fb.com>
This commit is contained in:
ehhuang 2025-05-06 22:18:31 -07:00 committed by GitHub
parent b2b00a216b
commit 664161c462
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 9 additions and 203 deletions

View file

@ -473,18 +473,12 @@ def test_text_chat_completion_tool_calling_tools_not_in_request(
[
# Tests if the model can handle simple messages like "Hi" or
# a message unrelated to one of the tool calls
"inference:chat_completion:multi_turn_tool_calling_01",
"inference:chat_completion:text_then_tool",
# Tests if the model can do full tool call with responses correctly
"inference:chat_completion:multi_turn_tool_calling_02",
"inference:chat_completion:tool_then_answer",
# Tests if model can generate multiple params and
# read outputs correctly
"inference:chat_completion:multi_turn_tool_calling_03",
# Tests if model can do different tool calls in a seqeunce
# and use the information between appropriately
"inference:chat_completion:multi_turn_tool_calling_04",
# Tests if model can use current date and run multiple tool calls
# sequentially and infer using both
"inference:chat_completion:multi_turn_tool_calling_05",
"inference:chat_completion:array_parameter",
],
)
def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, text_model_id, test_case):