mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
Pass parallel_tool_calls directly and document intended usage in integration test
Signed-off-by: Anastas Stoyanovsky <astoyano@redhat.com>
This commit is contained in:
parent
91f1b352b4
commit
958d0dc515
8 changed files with 31 additions and 196 deletions
|
|
@ -242,6 +242,7 @@ class StreamingResponseOrchestrator:
|
|||
messages=messages,
|
||||
# Pydantic models are dict-compatible but mypy treats them as distinct types
|
||||
tools=self.ctx.chat_tools, # type: ignore[arg-type]
|
||||
parallel_tool_calls=self.parallel_tool_calls,
|
||||
stream=True,
|
||||
temperature=self.ctx.temperature,
|
||||
response_format=response_format,
|
||||
|
|
|
|||
|
|
@ -516,169 +516,3 @@ def test_response_with_instructions(openai_client, client_with_models, text_mode
|
|||
|
||||
# Verify instructions from previous response was not carried over to the next response
|
||||
assert response_with_instructions2.instructions == instructions2
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Tool calling is not reliable.")
|
||||
def test_max_tool_calls_with_function_tools(openai_client, client_with_models, text_model_id):
|
||||
"""Test handling of max_tool_calls with function tools in responses."""
|
||||
if isinstance(client_with_models, LlamaStackAsLibraryClient):
|
||||
pytest.skip("OpenAI responses are not supported when testing with library client yet.")
|
||||
|
||||
client = openai_client
|
||||
max_tool_calls = 1
|
||||
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"name": "get_weather",
|
||||
"description": "Get weather information for a specified location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city name (e.g., 'New York', 'London')",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"name": "get_time",
|
||||
"description": "Get current time for a specified location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city name (e.g., 'New York', 'London')",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
# First create a response that triggers function tools
|
||||
response = client.responses.create(
|
||||
model=text_model_id,
|
||||
input="Can you tell me the weather in Paris and the current time?",
|
||||
tools=tools,
|
||||
stream=False,
|
||||
max_tool_calls=max_tool_calls,
|
||||
)
|
||||
|
||||
# Verify we got two function calls and that the max_tool_calls do not affect function tools
|
||||
assert len(response.output) == 2
|
||||
assert response.output[0].type == "function_call"
|
||||
assert response.output[0].name == "get_weather"
|
||||
assert response.output[0].status == "completed"
|
||||
assert response.output[1].type == "function_call"
|
||||
assert response.output[1].name == "get_time"
|
||||
assert response.output[0].status == "completed"
|
||||
|
||||
# Verify we have a valid max_tool_calls field
|
||||
assert response.max_tool_calls == max_tool_calls
|
||||
|
||||
|
||||
def test_max_tool_calls_invalid(openai_client, client_with_models, text_model_id):
|
||||
"""Test handling of invalid max_tool_calls in responses."""
|
||||
if isinstance(client_with_models, LlamaStackAsLibraryClient):
|
||||
pytest.skip("OpenAI responses are not supported when testing with library client yet.")
|
||||
|
||||
client = openai_client
|
||||
|
||||
input = "Search for today's top technology news."
|
||||
invalid_max_tool_calls = 0
|
||||
tools = [
|
||||
{"type": "web_search"},
|
||||
]
|
||||
|
||||
# Create a response with an invalid max_tool_calls value i.e. 0
|
||||
# Handle ValueError from LLS and BadRequestError from OpenAI client
|
||||
with pytest.raises((ValueError, BadRequestError)) as excinfo:
|
||||
client.responses.create(
|
||||
model=text_model_id,
|
||||
input=input,
|
||||
tools=tools,
|
||||
stream=False,
|
||||
max_tool_calls=invalid_max_tool_calls,
|
||||
)
|
||||
|
||||
error_message = str(excinfo.value)
|
||||
assert f"Invalid max_tool_calls={invalid_max_tool_calls}; should be >= 1" in error_message, (
|
||||
f"Expected error message about invalid max_tool_calls, got: {error_message}"
|
||||
)
|
||||
|
||||
|
||||
def test_max_tool_calls_with_builtin_tools(openai_client, client_with_models, text_model_id):
|
||||
"""Test handling of max_tool_calls with built-in tools in responses."""
|
||||
if isinstance(client_with_models, LlamaStackAsLibraryClient):
|
||||
pytest.skip("OpenAI responses are not supported when testing with library client yet.")
|
||||
|
||||
client = openai_client
|
||||
|
||||
input = "Search for today's top technology and a positive news story. You MUST make exactly two separate web search calls."
|
||||
max_tool_calls = [1, 5]
|
||||
tools = [
|
||||
{"type": "web_search"},
|
||||
]
|
||||
|
||||
# First create a response that triggers web_search tools without max_tool_calls
|
||||
response = client.responses.create(
|
||||
model=text_model_id,
|
||||
input=input,
|
||||
tools=tools,
|
||||
stream=False,
|
||||
)
|
||||
|
||||
# Verify we got two web search calls followed by a message
|
||||
assert len(response.output) == 3
|
||||
assert response.output[0].type == "web_search_call"
|
||||
assert response.output[0].status == "completed"
|
||||
assert response.output[1].type == "web_search_call"
|
||||
assert response.output[1].status == "completed"
|
||||
assert response.output[2].type == "message"
|
||||
assert response.output[2].status == "completed"
|
||||
assert response.output[2].role == "assistant"
|
||||
|
||||
# Next create a response that triggers web_search tools with max_tool_calls set to 1
|
||||
response_2 = client.responses.create(
|
||||
model=text_model_id,
|
||||
input=input,
|
||||
tools=tools,
|
||||
stream=False,
|
||||
max_tool_calls=max_tool_calls[0],
|
||||
)
|
||||
|
||||
# Verify we got one web search tool call followed by a message
|
||||
assert len(response_2.output) == 2
|
||||
assert response_2.output[0].type == "web_search_call"
|
||||
assert response_2.output[0].status == "completed"
|
||||
assert response_2.output[1].type == "message"
|
||||
assert response_2.output[1].status == "completed"
|
||||
assert response_2.output[1].role == "assistant"
|
||||
|
||||
# Verify we have a valid max_tool_calls field
|
||||
assert response_2.max_tool_calls == max_tool_calls[0]
|
||||
|
||||
# Finally create a response that triggers web_search tools with max_tool_calls set to 5
|
||||
response_3 = client.responses.create(
|
||||
model=text_model_id,
|
||||
input=input,
|
||||
tools=tools,
|
||||
stream=False,
|
||||
max_tool_calls=max_tool_calls[1],
|
||||
)
|
||||
|
||||
# Verify we got two web search calls followed by a message
|
||||
assert len(response_3.output) == 3
|
||||
assert response_3.output[0].type == "web_search_call"
|
||||
assert response_3.output[0].status == "completed"
|
||||
assert response_3.output[1].type == "web_search_call"
|
||||
assert response_3.output[1].status == "completed"
|
||||
assert response_3.output[2].type == "message"
|
||||
assert response_3.output[2].status == "completed"
|
||||
assert response_3.output[2].role == "assistant"
|
||||
|
||||
# Verify we have a valid max_tool_calls field
|
||||
assert response_3.max_tool_calls == max_tool_calls[1]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue