mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-04 02:03:44 +00:00
Merge branch 'main' into add-mcp-authentication-param
This commit is contained in:
commit
fa2b361f46
43 changed files with 15292 additions and 138 deletions
|
|
@ -9,6 +9,7 @@ import logging # allow-direct-logging
|
|||
import os
|
||||
|
||||
import httpx
|
||||
import llama_stack_client
|
||||
import openai
|
||||
import pytest
|
||||
|
||||
|
|
@ -29,8 +30,8 @@ from .streaming_assertions import StreamingValidator
|
|||
|
||||
|
||||
@pytest.mark.parametrize("case", web_search_test_cases)
|
||||
def test_response_non_streaming_web_search(compat_client, text_model_id, case):
|
||||
response = compat_client.responses.create(
|
||||
def test_response_non_streaming_web_search(responses_client, text_model_id, case):
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=case.input,
|
||||
tools=case.tools,
|
||||
|
|
@ -48,12 +49,9 @@ def test_response_non_streaming_web_search(compat_client, text_model_id, case):
|
|||
|
||||
@pytest.mark.parametrize("case", file_search_test_cases)
|
||||
def test_response_non_streaming_file_search(
|
||||
compat_client, text_model_id, embedding_model_id, embedding_dimension, tmp_path, case
|
||||
responses_client, text_model_id, embedding_model_id, embedding_dimension, tmp_path, case
|
||||
):
|
||||
if isinstance(compat_client, LlamaStackAsLibraryClient):
|
||||
pytest.skip("Responses API file search is not yet supported in library client.")
|
||||
|
||||
vector_store = new_vector_store(compat_client, "test_vector_store", embedding_model_id, embedding_dimension)
|
||||
vector_store = new_vector_store(responses_client, "test_vector_store", embedding_model_id, embedding_dimension)
|
||||
|
||||
if case.file_content:
|
||||
file_name = "test_response_non_streaming_file_search.txt"
|
||||
|
|
@ -65,16 +63,16 @@ def test_response_non_streaming_file_search(
|
|||
else:
|
||||
raise ValueError("No file content or path provided for case")
|
||||
|
||||
file_response = upload_file(compat_client, file_name, file_path)
|
||||
file_response = upload_file(responses_client, file_name, file_path)
|
||||
|
||||
# Attach our file to the vector store
|
||||
compat_client.vector_stores.files.create(
|
||||
responses_client.vector_stores.files.create(
|
||||
vector_store_id=vector_store.id,
|
||||
file_id=file_response.id,
|
||||
)
|
||||
|
||||
# Wait for the file to be attached
|
||||
wait_for_file_attachment(compat_client, vector_store.id, file_response.id)
|
||||
wait_for_file_attachment(responses_client, vector_store.id, file_response.id)
|
||||
|
||||
# Update our tools with the right vector store id
|
||||
tools = case.tools
|
||||
|
|
@ -83,7 +81,7 @@ def test_response_non_streaming_file_search(
|
|||
tool["vector_store_ids"] = [vector_store.id]
|
||||
|
||||
# Create the response request, which should query our vector store
|
||||
response = compat_client.responses.create(
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=case.input,
|
||||
tools=tools,
|
||||
|
|
@ -105,15 +103,12 @@ def test_response_non_streaming_file_search(
|
|||
|
||||
|
||||
def test_response_non_streaming_file_search_empty_vector_store(
|
||||
compat_client, text_model_id, embedding_model_id, embedding_dimension
|
||||
responses_client, text_model_id, embedding_model_id, embedding_dimension
|
||||
):
|
||||
if isinstance(compat_client, LlamaStackAsLibraryClient):
|
||||
pytest.skip("Responses API file search is not yet supported in library client.")
|
||||
|
||||
vector_store = new_vector_store(compat_client, "test_vector_store", embedding_model_id, embedding_dimension)
|
||||
vector_store = new_vector_store(responses_client, "test_vector_store", embedding_model_id, embedding_dimension)
|
||||
|
||||
# Create the response request, which should query our vector store
|
||||
response = compat_client.responses.create(
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input="How many experts does the Llama 4 Maverick model have?",
|
||||
tools=[{"type": "file_search", "vector_store_ids": [vector_store.id]}],
|
||||
|
|
@ -133,13 +128,10 @@ def test_response_non_streaming_file_search_empty_vector_store(
|
|||
|
||||
|
||||
def test_response_sequential_file_search(
|
||||
compat_client, text_model_id, embedding_model_id, embedding_dimension, tmp_path
|
||||
responses_client, text_model_id, embedding_model_id, embedding_dimension, tmp_path
|
||||
):
|
||||
"""Test file search with sequential responses using previous_response_id."""
|
||||
if isinstance(compat_client, LlamaStackAsLibraryClient):
|
||||
pytest.skip("Responses API file search is not yet supported in library client.")
|
||||
|
||||
vector_store = new_vector_store(compat_client, "test_vector_store", embedding_model_id, embedding_dimension)
|
||||
vector_store = new_vector_store(responses_client, "test_vector_store", embedding_model_id, embedding_dimension)
|
||||
|
||||
# Create a test file with content
|
||||
file_content = "The Llama 4 Maverick model has 128 experts in its mixture of experts architecture."
|
||||
|
|
@ -147,21 +139,21 @@ def test_response_sequential_file_search(
|
|||
file_path = tmp_path / file_name
|
||||
file_path.write_text(file_content)
|
||||
|
||||
file_response = upload_file(compat_client, file_name, file_path)
|
||||
file_response = upload_file(responses_client, file_name, file_path)
|
||||
|
||||
# Attach the file to the vector store
|
||||
compat_client.vector_stores.files.create(
|
||||
responses_client.vector_stores.files.create(
|
||||
vector_store_id=vector_store.id,
|
||||
file_id=file_response.id,
|
||||
)
|
||||
|
||||
# Wait for the file to be attached
|
||||
wait_for_file_attachment(compat_client, vector_store.id, file_response.id)
|
||||
wait_for_file_attachment(responses_client, vector_store.id, file_response.id)
|
||||
|
||||
tools = [{"type": "file_search", "vector_store_ids": [vector_store.id]}]
|
||||
|
||||
# First response request with file search
|
||||
response = compat_client.responses.create(
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input="How many experts does the Llama 4 Maverick model have?",
|
||||
tools=tools,
|
||||
|
|
@ -178,7 +170,7 @@ def test_response_sequential_file_search(
|
|||
assert "128" in response.output_text or "experts" in response.output_text.lower()
|
||||
|
||||
# Second response request using previous_response_id
|
||||
response2 = compat_client.responses.create(
|
||||
response2 = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input="Can you tell me more about the architecture?",
|
||||
tools=tools,
|
||||
|
|
@ -199,14 +191,11 @@ def test_response_sequential_file_search(
|
|||
|
||||
|
||||
@pytest.mark.parametrize("case", mcp_tool_test_cases)
|
||||
def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case, caplog):
|
||||
if not isinstance(compat_client, LlamaStackAsLibraryClient):
|
||||
pytest.skip("in-process MCP server is only supported in library client")
|
||||
|
||||
def test_response_non_streaming_mcp_tool(responses_client, text_model_id, case, caplog):
|
||||
with make_mcp_server() as mcp_server_info:
|
||||
tools = setup_mcp_tools(case.tools, mcp_server_info)
|
||||
|
||||
response = compat_client.responses.create(
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=case.input,
|
||||
tools=tools,
|
||||
|
|
@ -243,15 +232,15 @@ def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case, cap
|
|||
|
||||
exc_type = (
|
||||
AuthenticationRequiredError
|
||||
if isinstance(compat_client, LlamaStackAsLibraryClient)
|
||||
else (httpx.HTTPStatusError, openai.AuthenticationError)
|
||||
if isinstance(responses_client, LlamaStackAsLibraryClient)
|
||||
else (httpx.HTTPStatusError, openai.AuthenticationError, llama_stack_client.AuthenticationError)
|
||||
)
|
||||
# Suppress expected auth error logs only for the failing auth attempt
|
||||
with caplog.at_level(
|
||||
logging.CRITICAL, logger="llama_stack.providers.inline.agents.meta_reference.responses.streaming"
|
||||
):
|
||||
with pytest.raises(exc_type):
|
||||
compat_client.responses.create(
|
||||
responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=case.input,
|
||||
tools=tools,
|
||||
|
|
@ -262,7 +251,7 @@ def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case, cap
|
|||
if tool["type"] == "mcp":
|
||||
tool["authorization"] = "test-token"
|
||||
|
||||
response = compat_client.responses.create(
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=case.input,
|
||||
tools=tools,
|
||||
|
|
@ -272,14 +261,11 @@ def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case, cap
|
|||
|
||||
|
||||
@pytest.mark.parametrize("case", mcp_tool_test_cases)
|
||||
def test_response_sequential_mcp_tool(compat_client, text_model_id, case):
|
||||
if not isinstance(compat_client, LlamaStackAsLibraryClient):
|
||||
pytest.skip("in-process MCP server is only supported in library client")
|
||||
|
||||
def test_response_sequential_mcp_tool(responses_client, text_model_id, case):
|
||||
with make_mcp_server() as mcp_server_info:
|
||||
tools = setup_mcp_tools(case.tools, mcp_server_info)
|
||||
|
||||
response = compat_client.responses.create(
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=case.input,
|
||||
tools=tools,
|
||||
|
|
@ -311,7 +297,7 @@ def test_response_sequential_mcp_tool(compat_client, text_model_id, case):
|
|||
text_content = message.content[0].text
|
||||
assert "boiling point" in text_content.lower()
|
||||
|
||||
response2 = compat_client.responses.create(
|
||||
response2 = responses_client.responses.create(
|
||||
model=text_model_id, input=case.input, tools=tools, stream=False, previous_response_id=response.id
|
||||
)
|
||||
|
||||
|
|
@ -323,16 +309,13 @@ def test_response_sequential_mcp_tool(compat_client, text_model_id, case):
|
|||
|
||||
@pytest.mark.parametrize("case", mcp_tool_test_cases)
|
||||
@pytest.mark.parametrize("approve", [True, False])
|
||||
def test_response_mcp_tool_approval(compat_client, text_model_id, case, approve):
|
||||
if not isinstance(compat_client, LlamaStackAsLibraryClient):
|
||||
pytest.skip("in-process MCP server is only supported in library client")
|
||||
|
||||
def test_response_mcp_tool_approval(responses_client, text_model_id, case, approve):
|
||||
with make_mcp_server() as mcp_server_info:
|
||||
tools = setup_mcp_tools(case.tools, mcp_server_info)
|
||||
for tool in tools:
|
||||
tool["require_approval"] = "always"
|
||||
|
||||
response = compat_client.responses.create(
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=case.input,
|
||||
tools=tools,
|
||||
|
|
@ -352,13 +335,13 @@ def test_response_mcp_tool_approval(compat_client, text_model_id, case, approve)
|
|||
approval_request = response.output[1]
|
||||
assert approval_request.type == "mcp_approval_request"
|
||||
assert approval_request.name == "get_boiling_point"
|
||||
assert json.loads(approval_request.arguments) == {
|
||||
"liquid_name": "myawesomeliquid",
|
||||
"celsius": True,
|
||||
}
|
||||
args = json.loads(approval_request.arguments)
|
||||
assert args["liquid_name"] == "myawesomeliquid"
|
||||
# celsius has a default value of True, so it may be omitted or explicitly set
|
||||
assert args.get("celsius", True) is True
|
||||
|
||||
# send approval response
|
||||
response = compat_client.responses.create(
|
||||
response = responses_client.responses.create(
|
||||
previous_response_id=response.id,
|
||||
model=text_model_id,
|
||||
input=[{"type": "mcp_approval_response", "approval_request_id": approval_request.id, "approve": approve}],
|
||||
|
|
@ -398,8 +381,8 @@ def test_response_mcp_tool_approval(compat_client, text_model_id, case, approve)
|
|||
|
||||
|
||||
@pytest.mark.parametrize("case", custom_tool_test_cases)
|
||||
def test_response_non_streaming_custom_tool(compat_client, text_model_id, case):
|
||||
response = compat_client.responses.create(
|
||||
def test_response_non_streaming_custom_tool(responses_client, text_model_id, case):
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=case.input,
|
||||
tools=case.tools,
|
||||
|
|
@ -412,8 +395,8 @@ def test_response_non_streaming_custom_tool(compat_client, text_model_id, case):
|
|||
|
||||
|
||||
@pytest.mark.parametrize("case", custom_tool_test_cases)
|
||||
def test_response_function_call_ordering_1(compat_client, text_model_id, case):
|
||||
response = compat_client.responses.create(
|
||||
def test_response_function_call_ordering_1(responses_client, text_model_id, case):
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=case.input,
|
||||
tools=case.tools,
|
||||
|
|
@ -437,13 +420,13 @@ def test_response_function_call_ordering_1(compat_client, text_model_id, case):
|
|||
"call_id": response.output[0].call_id,
|
||||
}
|
||||
)
|
||||
response = compat_client.responses.create(
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id, input=inputs, tools=case.tools, stream=False, previous_response_id=response.id
|
||||
)
|
||||
assert len(response.output) == 1
|
||||
|
||||
|
||||
def test_response_function_call_ordering_2(compat_client, text_model_id):
|
||||
def test_response_function_call_ordering_2(responses_client, text_model_id):
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
|
|
@ -468,7 +451,7 @@ def test_response_function_call_ordering_2(compat_client, text_model_id):
|
|||
"content": "Is the weather better in San Francisco or Los Angeles?",
|
||||
}
|
||||
]
|
||||
response = compat_client.responses.create(
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=inputs,
|
||||
tools=tools,
|
||||
|
|
@ -489,7 +472,7 @@ def test_response_function_call_ordering_2(compat_client, text_model_id):
|
|||
"call_id": output.call_id,
|
||||
}
|
||||
)
|
||||
response = compat_client.responses.create(
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=inputs,
|
||||
tools=tools,
|
||||
|
|
@ -500,15 +483,12 @@ def test_response_function_call_ordering_2(compat_client, text_model_id):
|
|||
|
||||
|
||||
@pytest.mark.parametrize("case", multi_turn_tool_execution_test_cases)
|
||||
def test_response_non_streaming_multi_turn_tool_execution(compat_client, text_model_id, case):
|
||||
def test_response_non_streaming_multi_turn_tool_execution(responses_client, text_model_id, case):
|
||||
"""Test multi-turn tool execution where multiple MCP tool calls are performed in sequence."""
|
||||
if not isinstance(compat_client, LlamaStackAsLibraryClient):
|
||||
pytest.skip("in-process MCP server is only supported in library client")
|
||||
|
||||
with make_mcp_server(tools=dependency_tools()) as mcp_server_info:
|
||||
tools = setup_mcp_tools(case.tools, mcp_server_info)
|
||||
|
||||
response = compat_client.responses.create(
|
||||
response = responses_client.responses.create(
|
||||
input=case.input,
|
||||
model=text_model_id,
|
||||
tools=tools,
|
||||
|
|
@ -550,15 +530,12 @@ def test_response_non_streaming_multi_turn_tool_execution(compat_client, text_mo
|
|||
|
||||
|
||||
@pytest.mark.parametrize("case", multi_turn_tool_execution_streaming_test_cases)
|
||||
def test_response_streaming_multi_turn_tool_execution(compat_client, text_model_id, case):
|
||||
def test_response_streaming_multi_turn_tool_execution(responses_client, text_model_id, case):
|
||||
"""Test streaming multi-turn tool execution where multiple MCP tool calls are performed in sequence."""
|
||||
if not isinstance(compat_client, LlamaStackAsLibraryClient):
|
||||
pytest.skip("in-process MCP server is only supported in library client")
|
||||
|
||||
with make_mcp_server(tools=dependency_tools()) as mcp_server_info:
|
||||
tools = setup_mcp_tools(case.tools, mcp_server_info)
|
||||
|
||||
stream = compat_client.responses.create(
|
||||
stream = responses_client.responses.create(
|
||||
input=case.input,
|
||||
model=text_model_id,
|
||||
tools=tools,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue