mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-23 12:57:11 +00:00
# What does this PR do? When running `tests/integration/inference/test_text_inference.py` on smaller models, such as Llama-3.2-3B-Instruct, I sometimes get test flakes where the model passes "San Francisco" as an argument to my tool call instead of "San Francisco, CA" which is what we expect. So, this expands upon that tool calling parameter's description to explicitly state that both city and state are required. With this change, the tool calling tests that are checking for this "San Francisco, CA" value are always passing for me instead of sometimes failing. ## Test Plan I test this locally via vLLM like: ``` VLLM_URL="http://localhost:8000/v1" \ INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct" \ LLAMA_STACK_CONFIG=remote-vllm \ python -m pytest -v \ tests/integration/inference/test_text_inference.py \ --inference-model "meta-llama/Llama-3.2-3B-Instruct" \ --vision-inference-model "" ``` I don't expect this would negatively impact the parameter generated for this tool call by other models, as we're providing additional guidance but not removing any of the existing guidance. However, I cannot easily confirm that myself. Signed-off-by: Ben Browning <bbrownin@redhat.com>
172 lines
4.5 KiB
JSON
172 lines
4.5 KiB
JSON
{
|
|
"non_streaming_01": {
|
|
"data": {
|
|
"question": "Which planet do humans live on?",
|
|
"expected": "Earth"
|
|
}
|
|
},
|
|
"non_streaming_02": {
|
|
"data": {
|
|
"question": "Which planet has rings around it with a name starting with letter S?",
|
|
"expected": "Saturn"
|
|
}
|
|
},
|
|
"sample_messages": {
|
|
"data": {
|
|
"messages": [
|
|
{
|
|
"role": "system",
|
|
"content": "You are a helpful assistant."
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": "What's the weather like today?"
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"streaming_01": {
|
|
"data": {
|
|
"question": "What's the name of the Sun in latin?",
|
|
"expected": "Sol"
|
|
}
|
|
},
|
|
"streaming_02": {
|
|
"data": {
|
|
"question": "What is the name of the US captial?",
|
|
"expected": "Washington"
|
|
}
|
|
},
|
|
"tool_calling": {
|
|
"data": {
|
|
"messages": [
|
|
{"role": "system", "content": "Pretend you are a weather assistant."},
|
|
{"role": "user", "content": "What's the weather like in San Francisco?"}
|
|
],
|
|
"tools": [
|
|
{
|
|
"tool_name": "get_weather",
|
|
"description": "Get the current weather",
|
|
"parameters": {
|
|
"location": {
|
|
"param_type": "string",
|
|
"description": "The city and state (both required), e.g. San Francisco, CA."
|
|
}
|
|
}
|
|
}
|
|
],
|
|
"expected": {
|
|
"location": "San Francisco, CA"
|
|
}
|
|
}
|
|
},
|
|
"sample_messages_tool_calling": {
|
|
"data": {
|
|
"messages": [
|
|
{
|
|
"role": "system",
|
|
"content": "Pretend you are a weather assistant."
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": "What's the weather like today?"
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": "What's the weather like in San Francisco?"
|
|
}
|
|
],
|
|
"tools": [
|
|
{
|
|
"tool_name": "get_weather",
|
|
"description": "Get the current weather",
|
|
"parameters": {
|
|
"location": {
|
|
"param_type": "string",
|
|
"description": "The city and state, e.g. San Francisco, CA",
|
|
"required": true
|
|
}
|
|
}
|
|
}
|
|
],
|
|
"expected": {
|
|
"location": "San Francisco"
|
|
}
|
|
}
|
|
},
|
|
"structured_output": {
|
|
"data": {
|
|
"notes": "We include context about Michael Jordan in the prompt so that the test is focused on the funtionality of the model and not on the information embedded in the model. Llama 3.2 3B Instruct tends to think MJ played for 14 seasons.",
|
|
"messages": [
|
|
{
|
|
"role": "system",
|
|
"content": "You are a helpful assistant. Michael Jordan was born in 1963. He played basketball for the Chicago Bulls for 15 seasons."
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": "Please give me information about Michael Jordan."
|
|
}
|
|
],
|
|
"expected": {
|
|
"first_name": "Michael",
|
|
"last_name": "Jordan",
|
|
"year_of_birth": 1963,
|
|
"num_seasons_in_nba": 15,
|
|
"year_for_draft": 1984
|
|
}
|
|
}
|
|
},
|
|
"tool_calling_tools_absent": {
|
|
"data": {
|
|
"messages": [
|
|
{
|
|
"role": "system",
|
|
"content": "You are a helpful assistant."
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": "What pods are in the namespace openshift-lightspeed?"
|
|
},
|
|
{
|
|
"role": "assistant",
|
|
"content": "",
|
|
"stop_reason": "end_of_turn",
|
|
"tool_calls": [
|
|
{
|
|
"call_id": "1",
|
|
"tool_name": "get_object_namespace_list",
|
|
"arguments": {
|
|
"kind": "pod",
|
|
"namespace": "openshift-lightspeed"
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "tool",
|
|
"call_id": "1",
|
|
"tool_name": "get_object_namespace_list",
|
|
"content": "the objects are pod1, pod2, pod3"
|
|
}
|
|
],
|
|
"tools": [
|
|
{
|
|
"tool_name": "get_object_namespace_list",
|
|
"description": "Get the list of objects in a namespace",
|
|
"parameters": {
|
|
"kind": {
|
|
"param_type": "string",
|
|
"description": "the type of object",
|
|
"required": true
|
|
},
|
|
"namespace": {
|
|
"param_type": "string",
|
|
"description": "the name of the namespace",
|
|
"required": true
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|
|
}
|
|
}
|