llama-stack-mirror/tests/integration/responses/test_conversation_responses.py
Ashwin Bharambe 1e81056a22
feat(tests): enable MCP tests in server mode (#4146)
We would like to run all OpenAI compatibility tests using only the
openai-client library. This is most friendly for contributors since they
can run tests without needing to update the client-sdks (which is
getting easier but still a long pole.)

This is the first step in enabling that -- no using "library client" for
any of the Responses tests. This seems like a reasonable trade-off since
the usage of an embeddeble library client for Responses (or any
OpenAI-compatible) behavior seems to be not very common. To do this, we
needed to enable MCP tests (which only worked in library client mode)
for server mode.
2025-11-13 07:23:23 -08:00

148 lines
6.2 KiB
Python

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import pytest
@pytest.mark.integration
class TestConversationResponses:
"""Integration tests for the conversation parameter in responses API."""
def test_conversation_basic_workflow(self, openai_client, text_model_id):
"""Test basic conversation workflow: create conversation, add response, verify sync."""
conversation = openai_client.conversations.create(metadata={"topic": "test"})
assert conversation.id.startswith("conv_")
response = openai_client.responses.create(
model=text_model_id,
input=[{"role": "user", "content": "What are the 5 Ds of dodgeball?"}],
conversation=conversation.id,
)
assert response.id.startswith("resp_")
assert len(response.output_text.strip()) > 0
# Verify conversation was synced properly
conversation_items = openai_client.conversations.items.list(conversation.id)
assert len(conversation_items.data) >= 2
roles = [item.role for item in conversation_items.data if hasattr(item, "role")]
assert "user" in roles and "assistant" in roles
def test_conversation_multi_turn_and_streaming(self, openai_client, text_model_id):
"""Test multi-turn conversations and streaming responses."""
conversation = openai_client.conversations.create()
# First turn
response1 = openai_client.responses.create(
model=text_model_id,
input=[{"role": "user", "content": "Say hello"}],
conversation=conversation.id,
)
# Second turn with streaming
response_stream = openai_client.responses.create(
model=text_model_id,
input=[{"role": "user", "content": "Say goodbye"}],
conversation=conversation.id,
stream=True,
)
final_response = None
for chunk in response_stream:
if chunk.type == "response.completed":
final_response = chunk.response
break
assert response1.id != final_response.id
assert len(response1.output_text.strip()) > 0
assert len(final_response.output_text.strip()) > 0
# Verify all turns are in conversation
conversation_items = openai_client.conversations.items.list(conversation.id)
assert len(conversation_items.data) >= 4 # 2 user + 2 assistant messages
@pytest.mark.timeout(60, method="thread")
def test_conversation_context_loading(self, openai_client, text_model_id):
"""Test that conversation context is properly loaded for responses.
Note: 60s timeout added due to CI-specific deadlock in pytest/OpenAI client/httpx
after running 25+ tests. Hangs before first HTTP request is made. Works fine locally.
Investigation needed: connection pool exhaustion or event loop state issue.
"""
conversation = openai_client.conversations.create(
items=[
{"type": "message", "role": "user", "content": "My name is Alice. I like to eat apples."},
{"type": "message", "role": "assistant", "content": "Hello Alice!"},
]
)
response = openai_client.responses.create(
model=text_model_id,
input=[{"role": "user", "content": "What do I like to eat?"}],
conversation=conversation.id,
)
assert "apple" in response.output_text.lower()
def test_conversation_error_handling(self, openai_client, text_model_id):
"""Test error handling for invalid and nonexistent conversations."""
# Invalid conversation ID format
with pytest.raises(Exception) as exc_info:
openai_client.responses.create(
model=text_model_id,
input=[{"role": "user", "content": "Hello"}],
conversation="invalid_id",
)
assert any(word in str(exc_info.value).lower() for word in ["conv", "invalid", "bad"])
# Nonexistent conversation ID
with pytest.raises(Exception) as exc_info:
openai_client.responses.create(
model=text_model_id,
input=[{"role": "user", "content": "Hello"}],
conversation="conv_nonexistent123",
)
assert any(word in str(exc_info.value).lower() for word in ["not found", "404"])
#
# response = openai_client.responses.create(
# model=text_model_id, input=[{"role": "user", "content": "First response"}]
# )
# with pytest.raises(Exception) as exc_info:
# openai_client.responses.create(
# model=text_model_id,
# input=[{"role": "user", "content": "Hello"}],
# conversation="conv_test123",
# previous_response_id=response.id,
# )
# assert "mutually exclusive" in str(exc_info.value).lower()
def test_conversation_backward_compatibility(self, openai_client, text_model_id):
"""Test that responses work without conversation parameter (backward compatibility)."""
response = openai_client.responses.create(
model=text_model_id, input=[{"role": "user", "content": "Hello world"}]
)
assert response.id.startswith("resp_")
assert len(response.output_text.strip()) > 0
# this is not ready yet
# def test_conversation_compat_client(self, responses_client, text_model_id):
# """Test conversation parameter works with compatibility client."""
# if not hasattr(responses_client, "conversations"):
# pytest.skip("responses_client does not support conversations API")
#
# conversation = responses_client.conversations.create()
# response = responses_client.responses.create(
# model=text_model_id, input="Tell me a joke", conversation=conversation.id
# )
#
# assert response is not None
# assert len(response.output_text.strip()) > 0
#
# conversation_items = responses_client.conversations.items.list(conversation.id)
# assert len(conversation_items.data) >= 2