mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 18:00:36 +00:00
Pass parallel_tool_calls directly and document intended usage in integration test
Signed-off-by: Anastas Stoyanovsky <astoyano@redhat.com>
This commit is contained in:
parent
91f1b352b4
commit
958d0dc515
8 changed files with 31 additions and 196 deletions
|
|
@ -13,6 +13,6 @@ title: Agents
|
||||||
|
|
||||||
Agents
|
Agents
|
||||||
|
|
||||||
APIs for creating and interacting with agentic systems.
|
APIs for creating and interacting with agentic systems.
|
||||||
|
|
||||||
This section contains documentation for all available providers for the **agents** API.
|
This section contains documentation for all available providers for the **agents** API.
|
||||||
|
|
|
||||||
|
|
@ -19,14 +19,14 @@ title: Batches
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
The Batches API enables efficient processing of multiple requests in a single operation,
|
The Batches API enables efficient processing of multiple requests in a single operation,
|
||||||
particularly useful for processing large datasets, batch evaluation workflows, and
|
particularly useful for processing large datasets, batch evaluation workflows, and
|
||||||
cost-effective inference at scale.
|
cost-effective inference at scale.
|
||||||
|
|
||||||
The API is designed to allow use of openai client libraries for seamless integration.
|
The API is designed to allow use of openai client libraries for seamless integration.
|
||||||
|
|
||||||
This API provides the following extensions:
|
This API provides the following extensions:
|
||||||
- idempotent batch creation
|
- idempotent batch creation
|
||||||
|
|
||||||
Note: This API is currently under active development and may undergo changes.
|
Note: This API is currently under active development and may undergo changes.
|
||||||
|
|
||||||
This section contains documentation for all available providers for the **batches** API.
|
This section contains documentation for all available providers for the **batches** API.
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,6 @@ title: Eval
|
||||||
|
|
||||||
Evaluations
|
Evaluations
|
||||||
|
|
||||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||||
|
|
||||||
This section contains documentation for all available providers for the **eval** API.
|
This section contains documentation for all available providers for the **eval** API.
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,6 @@ title: Files
|
||||||
|
|
||||||
Files
|
Files
|
||||||
|
|
||||||
This API is used to upload documents that can be used with other Llama Stack APIs.
|
This API is used to upload documents that can be used with other Llama Stack APIs.
|
||||||
|
|
||||||
This section contains documentation for all available providers for the **files** API.
|
This section contains documentation for all available providers for the **files** API.
|
||||||
|
|
|
||||||
|
|
@ -18,11 +18,11 @@ title: Inference
|
||||||
|
|
||||||
Inference
|
Inference
|
||||||
|
|
||||||
Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
||||||
|
|
||||||
This API provides the raw interface to the underlying models. Three kinds of models are supported:
|
This API provides the raw interface to the underlying models. Three kinds of models are supported:
|
||||||
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
||||||
- Embedding models: these models generate embeddings to be used for semantic search.
|
- Embedding models: these models generate embeddings to be used for semantic search.
|
||||||
- Rerank models: these models reorder the documents based on their relevance to a query.
|
- Rerank models: these models reorder the documents based on their relevance to a query.
|
||||||
|
|
||||||
This section contains documentation for all available providers for the **inference** API.
|
This section contains documentation for all available providers for the **inference** API.
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,6 @@ title: Safety
|
||||||
|
|
||||||
Safety
|
Safety
|
||||||
|
|
||||||
OpenAI-compatible Moderations API.
|
OpenAI-compatible Moderations API.
|
||||||
|
|
||||||
This section contains documentation for all available providers for the **safety** API.
|
This section contains documentation for all available providers for the **safety** API.
|
||||||
|
|
|
||||||
|
|
@ -242,6 +242,7 @@ class StreamingResponseOrchestrator:
|
||||||
messages=messages,
|
messages=messages,
|
||||||
# Pydantic models are dict-compatible but mypy treats them as distinct types
|
# Pydantic models are dict-compatible but mypy treats them as distinct types
|
||||||
tools=self.ctx.chat_tools, # type: ignore[arg-type]
|
tools=self.ctx.chat_tools, # type: ignore[arg-type]
|
||||||
|
parallel_tool_calls=self.parallel_tool_calls,
|
||||||
stream=True,
|
stream=True,
|
||||||
temperature=self.ctx.temperature,
|
temperature=self.ctx.temperature,
|
||||||
response_format=response_format,
|
response_format=response_format,
|
||||||
|
|
|
||||||
|
|
@ -516,169 +516,3 @@ def test_response_with_instructions(openai_client, client_with_models, text_mode
|
||||||
|
|
||||||
# Verify instructions from previous response was not carried over to the next response
|
# Verify instructions from previous response was not carried over to the next response
|
||||||
assert response_with_instructions2.instructions == instructions2
|
assert response_with_instructions2.instructions == instructions2
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="Tool calling is not reliable.")
|
|
||||||
def test_max_tool_calls_with_function_tools(openai_client, client_with_models, text_model_id):
|
|
||||||
"""Test handling of max_tool_calls with function tools in responses."""
|
|
||||||
if isinstance(client_with_models, LlamaStackAsLibraryClient):
|
|
||||||
pytest.skip("OpenAI responses are not supported when testing with library client yet.")
|
|
||||||
|
|
||||||
client = openai_client
|
|
||||||
max_tool_calls = 1
|
|
||||||
|
|
||||||
tools = [
|
|
||||||
{
|
|
||||||
"type": "function",
|
|
||||||
"name": "get_weather",
|
|
||||||
"description": "Get weather information for a specified location",
|
|
||||||
"parameters": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"location": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The city name (e.g., 'New York', 'London')",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "function",
|
|
||||||
"name": "get_time",
|
|
||||||
"description": "Get current time for a specified location",
|
|
||||||
"parameters": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"location": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The city name (e.g., 'New York', 'London')",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
# First create a response that triggers function tools
|
|
||||||
response = client.responses.create(
|
|
||||||
model=text_model_id,
|
|
||||||
input="Can you tell me the weather in Paris and the current time?",
|
|
||||||
tools=tools,
|
|
||||||
stream=False,
|
|
||||||
max_tool_calls=max_tool_calls,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Verify we got two function calls and that the max_tool_calls do not affect function tools
|
|
||||||
assert len(response.output) == 2
|
|
||||||
assert response.output[0].type == "function_call"
|
|
||||||
assert response.output[0].name == "get_weather"
|
|
||||||
assert response.output[0].status == "completed"
|
|
||||||
assert response.output[1].type == "function_call"
|
|
||||||
assert response.output[1].name == "get_time"
|
|
||||||
assert response.output[0].status == "completed"
|
|
||||||
|
|
||||||
# Verify we have a valid max_tool_calls field
|
|
||||||
assert response.max_tool_calls == max_tool_calls
|
|
||||||
|
|
||||||
|
|
||||||
def test_max_tool_calls_invalid(openai_client, client_with_models, text_model_id):
|
|
||||||
"""Test handling of invalid max_tool_calls in responses."""
|
|
||||||
if isinstance(client_with_models, LlamaStackAsLibraryClient):
|
|
||||||
pytest.skip("OpenAI responses are not supported when testing with library client yet.")
|
|
||||||
|
|
||||||
client = openai_client
|
|
||||||
|
|
||||||
input = "Search for today's top technology news."
|
|
||||||
invalid_max_tool_calls = 0
|
|
||||||
tools = [
|
|
||||||
{"type": "web_search"},
|
|
||||||
]
|
|
||||||
|
|
||||||
# Create a response with an invalid max_tool_calls value i.e. 0
|
|
||||||
# Handle ValueError from LLS and BadRequestError from OpenAI client
|
|
||||||
with pytest.raises((ValueError, BadRequestError)) as excinfo:
|
|
||||||
client.responses.create(
|
|
||||||
model=text_model_id,
|
|
||||||
input=input,
|
|
||||||
tools=tools,
|
|
||||||
stream=False,
|
|
||||||
max_tool_calls=invalid_max_tool_calls,
|
|
||||||
)
|
|
||||||
|
|
||||||
error_message = str(excinfo.value)
|
|
||||||
assert f"Invalid max_tool_calls={invalid_max_tool_calls}; should be >= 1" in error_message, (
|
|
||||||
f"Expected error message about invalid max_tool_calls, got: {error_message}"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_max_tool_calls_with_builtin_tools(openai_client, client_with_models, text_model_id):
|
|
||||||
"""Test handling of max_tool_calls with built-in tools in responses."""
|
|
||||||
if isinstance(client_with_models, LlamaStackAsLibraryClient):
|
|
||||||
pytest.skip("OpenAI responses are not supported when testing with library client yet.")
|
|
||||||
|
|
||||||
client = openai_client
|
|
||||||
|
|
||||||
input = "Search for today's top technology and a positive news story. You MUST make exactly two separate web search calls."
|
|
||||||
max_tool_calls = [1, 5]
|
|
||||||
tools = [
|
|
||||||
{"type": "web_search"},
|
|
||||||
]
|
|
||||||
|
|
||||||
# First create a response that triggers web_search tools without max_tool_calls
|
|
||||||
response = client.responses.create(
|
|
||||||
model=text_model_id,
|
|
||||||
input=input,
|
|
||||||
tools=tools,
|
|
||||||
stream=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Verify we got two web search calls followed by a message
|
|
||||||
assert len(response.output) == 3
|
|
||||||
assert response.output[0].type == "web_search_call"
|
|
||||||
assert response.output[0].status == "completed"
|
|
||||||
assert response.output[1].type == "web_search_call"
|
|
||||||
assert response.output[1].status == "completed"
|
|
||||||
assert response.output[2].type == "message"
|
|
||||||
assert response.output[2].status == "completed"
|
|
||||||
assert response.output[2].role == "assistant"
|
|
||||||
|
|
||||||
# Next create a response that triggers web_search tools with max_tool_calls set to 1
|
|
||||||
response_2 = client.responses.create(
|
|
||||||
model=text_model_id,
|
|
||||||
input=input,
|
|
||||||
tools=tools,
|
|
||||||
stream=False,
|
|
||||||
max_tool_calls=max_tool_calls[0],
|
|
||||||
)
|
|
||||||
|
|
||||||
# Verify we got one web search tool call followed by a message
|
|
||||||
assert len(response_2.output) == 2
|
|
||||||
assert response_2.output[0].type == "web_search_call"
|
|
||||||
assert response_2.output[0].status == "completed"
|
|
||||||
assert response_2.output[1].type == "message"
|
|
||||||
assert response_2.output[1].status == "completed"
|
|
||||||
assert response_2.output[1].role == "assistant"
|
|
||||||
|
|
||||||
# Verify we have a valid max_tool_calls field
|
|
||||||
assert response_2.max_tool_calls == max_tool_calls[0]
|
|
||||||
|
|
||||||
# Finally create a response that triggers web_search tools with max_tool_calls set to 5
|
|
||||||
response_3 = client.responses.create(
|
|
||||||
model=text_model_id,
|
|
||||||
input=input,
|
|
||||||
tools=tools,
|
|
||||||
stream=False,
|
|
||||||
max_tool_calls=max_tool_calls[1],
|
|
||||||
)
|
|
||||||
|
|
||||||
# Verify we got two web search calls followed by a message
|
|
||||||
assert len(response_3.output) == 3
|
|
||||||
assert response_3.output[0].type == "web_search_call"
|
|
||||||
assert response_3.output[0].status == "completed"
|
|
||||||
assert response_3.output[1].type == "web_search_call"
|
|
||||||
assert response_3.output[1].status == "completed"
|
|
||||||
assert response_3.output[2].type == "message"
|
|
||||||
assert response_3.output[2].status == "completed"
|
|
||||||
assert response_3.output[2].role == "assistant"
|
|
||||||
|
|
||||||
# Verify we have a valid max_tool_calls field
|
|
||||||
assert response_3.max_tool_calls == max_tool_calls[1]
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue