mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
feat(tests): enable MCP tests in server mode (#4146)
We would like to run all OpenAI compatibility tests using only the openai-client library. This is most friendly for contributors since they can run tests without needing to update the client-sdks (which is getting easier but still a long pole.) This is the first step in enabling that -- no using "library client" for any of the Responses tests. This seems like a reasonable trade-off since the usage of an embeddeble library client for Responses (or any OpenAI-compatible) behavior seems to be not very common. To do this, we needed to enable MCP tests (which only worked in library client mode) for server mode.
This commit is contained in:
parent
9eb81439d2
commit
1e81056a22
29 changed files with 13388 additions and 127 deletions
|
|
@ -13,8 +13,8 @@ from .streaming_assertions import StreamingValidator
|
|||
|
||||
|
||||
@pytest.mark.parametrize("case", basic_test_cases)
|
||||
def test_response_non_streaming_basic(compat_client, text_model_id, case):
|
||||
response = compat_client.responses.create(
|
||||
def test_response_non_streaming_basic(responses_client, text_model_id, case):
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=case.input,
|
||||
stream=False,
|
||||
|
|
@ -31,10 +31,10 @@ def test_response_non_streaming_basic(compat_client, text_model_id, case):
|
|||
"Total tokens should equal input + output tokens"
|
||||
)
|
||||
|
||||
retrieved_response = compat_client.responses.retrieve(response_id=response.id)
|
||||
retrieved_response = responses_client.responses.retrieve(response_id=response.id)
|
||||
assert retrieved_response.output_text == response.output_text
|
||||
|
||||
next_response = compat_client.responses.create(
|
||||
next_response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input="Repeat your previous response in all caps.",
|
||||
previous_response_id=response.id,
|
||||
|
|
@ -44,8 +44,8 @@ def test_response_non_streaming_basic(compat_client, text_model_id, case):
|
|||
|
||||
|
||||
@pytest.mark.parametrize("case", basic_test_cases)
|
||||
def test_response_streaming_basic(compat_client, text_model_id, case):
|
||||
response = compat_client.responses.create(
|
||||
def test_response_streaming_basic(responses_client, text_model_id, case):
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=case.input,
|
||||
stream=True,
|
||||
|
|
@ -98,15 +98,15 @@ def test_response_streaming_basic(compat_client, text_model_id, case):
|
|||
validator.assert_response_consistency()
|
||||
|
||||
# Verify stored response matches streamed response
|
||||
retrieved_response = compat_client.responses.retrieve(response_id=response_id)
|
||||
retrieved_response = responses_client.responses.retrieve(response_id=response_id)
|
||||
final_event = events[-1]
|
||||
assert retrieved_response.output_text == final_event.response.output_text
|
||||
|
||||
|
||||
@pytest.mark.parametrize("case", basic_test_cases)
|
||||
def test_response_streaming_incremental_content(compat_client, text_model_id, case):
|
||||
def test_response_streaming_incremental_content(responses_client, text_model_id, case):
|
||||
"""Test that streaming actually delivers content incrementally, not just at the end."""
|
||||
response = compat_client.responses.create(
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=case.input,
|
||||
stream=True,
|
||||
|
|
@ -170,10 +170,10 @@ def test_response_streaming_incremental_content(compat_client, text_model_id, ca
|
|||
|
||||
|
||||
@pytest.mark.parametrize("case", multi_turn_test_cases)
|
||||
def test_response_non_streaming_multi_turn(compat_client, text_model_id, case):
|
||||
def test_response_non_streaming_multi_turn(responses_client, text_model_id, case):
|
||||
previous_response_id = None
|
||||
for turn_input, turn_expected in case.turns:
|
||||
response = compat_client.responses.create(
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=turn_input,
|
||||
previous_response_id=previous_response_id,
|
||||
|
|
@ -184,8 +184,8 @@ def test_response_non_streaming_multi_turn(compat_client, text_model_id, case):
|
|||
|
||||
|
||||
@pytest.mark.parametrize("case", image_test_cases)
|
||||
def test_response_non_streaming_image(compat_client, text_model_id, case):
|
||||
response = compat_client.responses.create(
|
||||
def test_response_non_streaming_image(responses_client, text_model_id, case):
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=case.input,
|
||||
stream=False,
|
||||
|
|
@ -195,10 +195,10 @@ def test_response_non_streaming_image(compat_client, text_model_id, case):
|
|||
|
||||
|
||||
@pytest.mark.parametrize("case", multi_turn_image_test_cases)
|
||||
def test_response_non_streaming_multi_turn_image(compat_client, text_model_id, case):
|
||||
def test_response_non_streaming_multi_turn_image(responses_client, text_model_id, case):
|
||||
previous_response_id = None
|
||||
for turn_input, turn_expected in case.turns:
|
||||
response = compat_client.responses.create(
|
||||
response = responses_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=turn_input,
|
||||
previous_response_id=previous_response_id,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue