mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
feat(tests): enable MCP tests in server mode (#4146)
We would like to run all OpenAI compatibility tests using only the openai-client library. This is most friendly for contributors since they can run tests without needing to update the client-sdks (which is getting easier but still a long pole.) This is the first step in enabling that -- no using "library client" for any of the Responses tests. This seems like a reasonable trade-off since the usage of an embeddeble library client for Responses (or any OpenAI-compatible) behavior seems to be not very common. To do this, we needed to enable MCP tests (which only worked in library client mode) for server mode.
This commit is contained in:
parent
9eb81439d2
commit
1e81056a22
29 changed files with 13388 additions and 127 deletions
|
|
@ -9,6 +9,7 @@ import logging # allow-direct-logging
|
|||
import os
|
||||
|
||||
import httpx
|
||||
import llama_stack_client
|
||||
import openai
|
||||
import pytest
|
||||
|
||||
|
|
@ -29,8 +30,8 @@ from .streaming_assertions import StreamingValidator
|
|||
|
||||
|
||||
@pytest.mark.parametrize("case", web_search_test_cases)
|
||||
def test_response_non_streaming_web_search(compat_client, text_model_id, case):
|
||||
response = compat_client.responses.create(
|
||||
def test_response_non_streaming_web_search(responses_client, text_model_id, case):
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=case.input,
|
||||
tools=case.tools,
|
||||
|
|
@ -48,12 +49,9 @@ def test_response_non_streaming_web_search(compat_client, text_model_id, case):
|
|||
|
||||
@pytest.mark.parametrize("case", file_search_test_cases)
|
||||
def test_response_non_streaming_file_search(
|
||||
compat_client, text_model_id, embedding_model_id, embedding_dimension, tmp_path, case
|
||||
responses_client, text_model_id, embedding_model_id, embedding_dimension, tmp_path, case
|
||||
):
|
||||
if isinstance(compat_client, LlamaStackAsLibraryClient):
|
||||
pytest.skip("Responses API file search is not yet supported in library client.")
|
||||
|
||||
vector_store = new_vector_store(compat_client, "test_vector_store", embedding_model_id, embedding_dimension)
|
||||
vector_store = new_vector_store(responses_client, "test_vector_store", embedding_model_id, embedding_dimension)
|
||||
|
||||
if case.file_content:
|
||||
file_name = "test_response_non_streaming_file_search.txt"
|
||||
|
|
@ -65,16 +63,16 @@ def test_response_non_streaming_file_search(
|
|||
else:
|
||||
raise ValueError("No file content or path provided for case")
|
||||
|
||||
file_response = upload_file(compat_client, file_name, file_path)
|
||||
file_response = upload_file(responses_client, file_name, file_path)
|
||||
|
||||
# Attach our file to the vector store
|
||||
compat_client.vector_stores.files.create(
|
||||
responses_client.vector_stores.files.create(
|
||||
vector_store_id=vector_store.id,
|
||||
file_id=file_response.id,
|
||||
)
|
||||
|
||||
# Wait for the file to be attached
|
||||
wait_for_file_attachment(compat_client, vector_store.id, file_response.id)
|
||||
wait_for_file_attachment(responses_client, vector_store.id, file_response.id)
|
||||
|
||||
# Update our tools with the right vector store id
|
||||
tools = case.tools
|
||||
|
|
@ -83,7 +81,7 @@ def test_response_non_streaming_file_search(
|
|||
tool["vector_store_ids"] = [vector_store.id]
|
||||
|
||||
# Create the response request, which should query our vector store
|
||||
response = compat_client.responses.create(
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=case.input,
|
||||
tools=tools,
|
||||
|
|
@ -105,15 +103,12 @@ def test_response_non_streaming_file_search(
|
|||
|
||||
|
||||
def test_response_non_streaming_file_search_empty_vector_store(
|
||||
compat_client, text_model_id, embedding_model_id, embedding_dimension
|
||||
responses_client, text_model_id, embedding_model_id, embedding_dimension
|
||||
):
|
||||
if isinstance(compat_client, LlamaStackAsLibraryClient):
|
||||
pytest.skip("Responses API file search is not yet supported in library client.")
|
||||
|
||||
vector_store = new_vector_store(compat_client, "test_vector_store", embedding_model_id, embedding_dimension)
|
||||
vector_store = new_vector_store(responses_client, "test_vector_store", embedding_model_id, embedding_dimension)
|
||||
|
||||
# Create the response request, which should query our vector store
|
||||
response = compat_client.responses.create(
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input="How many experts does the Llama 4 Maverick model have?",
|
||||
tools=[{"type": "file_search", "vector_store_ids": [vector_store.id]}],
|
||||
|
|
@ -133,13 +128,10 @@ def test_response_non_streaming_file_search_empty_vector_store(
|
|||
|
||||
|
||||
def test_response_sequential_file_search(
|
||||
compat_client, text_model_id, embedding_model_id, embedding_dimension, tmp_path
|
||||
responses_client, text_model_id, embedding_model_id, embedding_dimension, tmp_path
|
||||
):
|
||||
"""Test file search with sequential responses using previous_response_id."""
|
||||
if isinstance(compat_client, LlamaStackAsLibraryClient):
|
||||
pytest.skip("Responses API file search is not yet supported in library client.")
|
||||
|
||||
vector_store = new_vector_store(compat_client, "test_vector_store", embedding_model_id, embedding_dimension)
|
||||
vector_store = new_vector_store(responses_client, "test_vector_store", embedding_model_id, embedding_dimension)
|
||||
|
||||
# Create a test file with content
|
||||
file_content = "The Llama 4 Maverick model has 128 experts in its mixture of experts architecture."
|
||||
|
|
@ -147,21 +139,21 @@ def test_response_sequential_file_search(
|
|||
file_path = tmp_path / file_name
|
||||
file_path.write_text(file_content)
|
||||
|
||||
file_response = upload_file(compat_client, file_name, file_path)
|
||||
file_response = upload_file(responses_client, file_name, file_path)
|
||||
|
||||
# Attach the file to the vector store
|
||||
compat_client.vector_stores.files.create(
|
||||
responses_client.vector_stores.files.create(
|
||||
vector_store_id=vector_store.id,
|
||||
file_id=file_response.id,
|
||||
)
|
||||
|
||||
# Wait for the file to be attached
|
||||
wait_for_file_attachment(compat_client, vector_store.id, file_response.id)
|
||||
wait_for_file_attachment(responses_client, vector_store.id, file_response.id)
|
||||
|
||||
tools = [{"type": "file_search", "vector_store_ids": [vector_store.id]}]
|
||||
|
||||
# First response request with file search
|
||||
response = compat_client.responses.create(
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input="How many experts does the Llama 4 Maverick model have?",
|
||||
tools=tools,
|
||||
|
|
@ -178,7 +170,7 @@ def test_response_sequential_file_search(
|
|||
assert "128" in response.output_text or "experts" in response.output_text.lower()
|
||||
|
||||
# Second response request using previous_response_id
|
||||
response2 = compat_client.responses.create(
|
||||
response2 = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input="Can you tell me more about the architecture?",
|
||||
tools=tools,
|
||||
|
|
@ -199,14 +191,11 @@ def test_response_sequential_file_search(
|
|||
|
||||
|
||||
@pytest.mark.parametrize("case", mcp_tool_test_cases)
|
||||
def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case, caplog):
|
||||
if not isinstance(compat_client, LlamaStackAsLibraryClient):
|
||||
pytest.skip("in-process MCP server is only supported in library client")
|
||||
|
||||
def test_response_non_streaming_mcp_tool(responses_client, text_model_id, case, caplog):
|
||||
with make_mcp_server() as mcp_server_info:
|
||||
tools = setup_mcp_tools(case.tools, mcp_server_info)
|
||||
|
||||
response = compat_client.responses.create(
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=case.input,
|
||||
tools=tools,
|
||||
|
|
@ -243,15 +232,15 @@ def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case, cap
|
|||
|
||||
exc_type = (
|
||||
AuthenticationRequiredError
|
||||
if isinstance(compat_client, LlamaStackAsLibraryClient)
|
||||
else (httpx.HTTPStatusError, openai.AuthenticationError)
|
||||
if isinstance(responses_client, LlamaStackAsLibraryClient)
|
||||
else (httpx.HTTPStatusError, openai.AuthenticationError, llama_stack_client.AuthenticationError)
|
||||
)
|
||||
# Suppress expected auth error logs only for the failing auth attempt
|
||||
with caplog.at_level(
|
||||
logging.CRITICAL, logger="llama_stack.providers.inline.agents.meta_reference.responses.streaming"
|
||||
):
|
||||
with pytest.raises(exc_type):
|
||||
compat_client.responses.create(
|
||||
responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=case.input,
|
||||
tools=tools,
|
||||
|
|
@ -262,7 +251,7 @@ def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case, cap
|
|||
if tool["type"] == "mcp":
|
||||
tool["headers"] = {"Authorization": "Bearer test-token"}
|
||||
|
||||
response = compat_client.responses.create(
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=case.input,
|
||||
tools=tools,
|
||||
|
|
@ -272,14 +261,11 @@ def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case, cap
|
|||
|
||||
|
||||
@pytest.mark.parametrize("case", mcp_tool_test_cases)
|
||||
def test_response_sequential_mcp_tool(compat_client, text_model_id, case):
|
||||
if not isinstance(compat_client, LlamaStackAsLibraryClient):
|
||||
pytest.skip("in-process MCP server is only supported in library client")
|
||||
|
||||
def test_response_sequential_mcp_tool(responses_client, text_model_id, case):
|
||||
with make_mcp_server() as mcp_server_info:
|
||||
tools = setup_mcp_tools(case.tools, mcp_server_info)
|
||||
|
||||
response = compat_client.responses.create(
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=case.input,
|
||||
tools=tools,
|
||||
|
|
@ -311,7 +297,7 @@ def test_response_sequential_mcp_tool(compat_client, text_model_id, case):
|
|||
text_content = message.content[0].text
|
||||
assert "boiling point" in text_content.lower()
|
||||
|
||||
response2 = compat_client.responses.create(
|
||||
response2 = responses_client.responses.create(
|
||||
model=text_model_id, input=case.input, tools=tools, stream=False, previous_response_id=response.id
|
||||
)
|
||||
|
||||
|
|
@ -323,16 +309,13 @@ def test_response_sequential_mcp_tool(compat_client, text_model_id, case):
|
|||
|
||||
@pytest.mark.parametrize("case", mcp_tool_test_cases)
|
||||
@pytest.mark.parametrize("approve", [True, False])
|
||||
def test_response_mcp_tool_approval(compat_client, text_model_id, case, approve):
|
||||
if not isinstance(compat_client, LlamaStackAsLibraryClient):
|
||||
pytest.skip("in-process MCP server is only supported in library client")
|
||||
|
||||
def test_response_mcp_tool_approval(responses_client, text_model_id, case, approve):
|
||||
with make_mcp_server() as mcp_server_info:
|
||||
tools = setup_mcp_tools(case.tools, mcp_server_info)
|
||||
for tool in tools:
|
||||
tool["require_approval"] = "always"
|
||||
|
||||
response = compat_client.responses.create(
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=case.input,
|
||||
tools=tools,
|
||||
|
|
@ -352,13 +335,13 @@ def test_response_mcp_tool_approval(compat_client, text_model_id, case, approve)
|
|||
approval_request = response.output[1]
|
||||
assert approval_request.type == "mcp_approval_request"
|
||||
assert approval_request.name == "get_boiling_point"
|
||||
assert json.loads(approval_request.arguments) == {
|
||||
"liquid_name": "myawesomeliquid",
|
||||
"celsius": True,
|
||||
}
|
||||
args = json.loads(approval_request.arguments)
|
||||
assert args["liquid_name"] == "myawesomeliquid"
|
||||
# celsius has a default value of True, so it may be omitted or explicitly set
|
||||
assert args.get("celsius", True) is True
|
||||
|
||||
# send approval response
|
||||
response = compat_client.responses.create(
|
||||
response = responses_client.responses.create(
|
||||
previous_response_id=response.id,
|
||||
model=text_model_id,
|
||||
input=[{"type": "mcp_approval_response", "approval_request_id": approval_request.id, "approve": approve}],
|
||||
|
|
@ -398,8 +381,8 @@ def test_response_mcp_tool_approval(compat_client, text_model_id, case, approve)
|
|||
|
||||
|
||||
@pytest.mark.parametrize("case", custom_tool_test_cases)
|
||||
def test_response_non_streaming_custom_tool(compat_client, text_model_id, case):
|
||||
response = compat_client.responses.create(
|
||||
def test_response_non_streaming_custom_tool(responses_client, text_model_id, case):
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=case.input,
|
||||
tools=case.tools,
|
||||
|
|
@ -412,8 +395,8 @@ def test_response_non_streaming_custom_tool(compat_client, text_model_id, case):
|
|||
|
||||
|
||||
@pytest.mark.parametrize("case", custom_tool_test_cases)
|
||||
def test_response_function_call_ordering_1(compat_client, text_model_id, case):
|
||||
response = compat_client.responses.create(
|
||||
def test_response_function_call_ordering_1(responses_client, text_model_id, case):
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=case.input,
|
||||
tools=case.tools,
|
||||
|
|
@ -437,13 +420,13 @@ def test_response_function_call_ordering_1(compat_client, text_model_id, case):
|
|||
"call_id": response.output[0].call_id,
|
||||
}
|
||||
)
|
||||
response = compat_client.responses.create(
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id, input=inputs, tools=case.tools, stream=False, previous_response_id=response.id
|
||||
)
|
||||
assert len(response.output) == 1
|
||||
|
||||
|
||||
def test_response_function_call_ordering_2(compat_client, text_model_id):
|
||||
def test_response_function_call_ordering_2(responses_client, text_model_id):
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
|
|
@ -468,7 +451,7 @@ def test_response_function_call_ordering_2(compat_client, text_model_id):
|
|||
"content": "Is the weather better in San Francisco or Los Angeles?",
|
||||
}
|
||||
]
|
||||
response = compat_client.responses.create(
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=inputs,
|
||||
tools=tools,
|
||||
|
|
@ -489,7 +472,7 @@ def test_response_function_call_ordering_2(compat_client, text_model_id):
|
|||
"call_id": output.call_id,
|
||||
}
|
||||
)
|
||||
response = compat_client.responses.create(
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=inputs,
|
||||
tools=tools,
|
||||
|
|
@ -500,15 +483,12 @@ def test_response_function_call_ordering_2(compat_client, text_model_id):
|
|||
|
||||
|
||||
@pytest.mark.parametrize("case", multi_turn_tool_execution_test_cases)
|
||||
def test_response_non_streaming_multi_turn_tool_execution(compat_client, text_model_id, case):
|
||||
def test_response_non_streaming_multi_turn_tool_execution(responses_client, text_model_id, case):
|
||||
"""Test multi-turn tool execution where multiple MCP tool calls are performed in sequence."""
|
||||
if not isinstance(compat_client, LlamaStackAsLibraryClient):
|
||||
pytest.skip("in-process MCP server is only supported in library client")
|
||||
|
||||
with make_mcp_server(tools=dependency_tools()) as mcp_server_info:
|
||||
tools = setup_mcp_tools(case.tools, mcp_server_info)
|
||||
|
||||
response = compat_client.responses.create(
|
||||
response = responses_client.responses.create(
|
||||
input=case.input,
|
||||
model=text_model_id,
|
||||
tools=tools,
|
||||
|
|
@ -550,15 +530,12 @@ def test_response_non_streaming_multi_turn_tool_execution(compat_client, text_mo
|
|||
|
||||
|
||||
@pytest.mark.parametrize("case", multi_turn_tool_execution_streaming_test_cases)
|
||||
def test_response_streaming_multi_turn_tool_execution(compat_client, text_model_id, case):
|
||||
def test_response_streaming_multi_turn_tool_execution(responses_client, text_model_id, case):
|
||||
"""Test streaming multi-turn tool execution where multiple MCP tool calls are performed in sequence."""
|
||||
if not isinstance(compat_client, LlamaStackAsLibraryClient):
|
||||
pytest.skip("in-process MCP server is only supported in library client")
|
||||
|
||||
with make_mcp_server(tools=dependency_tools()) as mcp_server_info:
|
||||
tools = setup_mcp_tools(case.tools, mcp_server_info)
|
||||
|
||||
stream = compat_client.responses.create(
|
||||
stream = responses_client.responses.create(
|
||||
input=case.input,
|
||||
model=text_model_id,
|
||||
tools=tools,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue