mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 18:00:36 +00:00
We would like to run all OpenAI compatibility tests using only the openai-client library. This is most friendly for contributors since they can run tests without needing to update the client-sdks (which is getting easier but still a long pole.) This is the first step in enabling that -- no using "library client" for any of the Responses tests. This seems like a reasonable trade-off since the usage of an embeddeble library client for Responses (or any OpenAI-compatible) behavior seems to be not very common. To do this, we needed to enable MCP tests (which only worked in library client mode) for server mode.
148 lines
6.2 KiB
Python
148 lines
6.2 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
import pytest
|
|
|
|
|
|
@pytest.mark.integration
|
|
class TestConversationResponses:
|
|
"""Integration tests for the conversation parameter in responses API."""
|
|
|
|
def test_conversation_basic_workflow(self, openai_client, text_model_id):
|
|
"""Test basic conversation workflow: create conversation, add response, verify sync."""
|
|
conversation = openai_client.conversations.create(metadata={"topic": "test"})
|
|
assert conversation.id.startswith("conv_")
|
|
|
|
response = openai_client.responses.create(
|
|
model=text_model_id,
|
|
input=[{"role": "user", "content": "What are the 5 Ds of dodgeball?"}],
|
|
conversation=conversation.id,
|
|
)
|
|
|
|
assert response.id.startswith("resp_")
|
|
assert len(response.output_text.strip()) > 0
|
|
|
|
# Verify conversation was synced properly
|
|
conversation_items = openai_client.conversations.items.list(conversation.id)
|
|
assert len(conversation_items.data) >= 2
|
|
|
|
roles = [item.role for item in conversation_items.data if hasattr(item, "role")]
|
|
assert "user" in roles and "assistant" in roles
|
|
|
|
def test_conversation_multi_turn_and_streaming(self, openai_client, text_model_id):
|
|
"""Test multi-turn conversations and streaming responses."""
|
|
conversation = openai_client.conversations.create()
|
|
|
|
# First turn
|
|
response1 = openai_client.responses.create(
|
|
model=text_model_id,
|
|
input=[{"role": "user", "content": "Say hello"}],
|
|
conversation=conversation.id,
|
|
)
|
|
|
|
# Second turn with streaming
|
|
response_stream = openai_client.responses.create(
|
|
model=text_model_id,
|
|
input=[{"role": "user", "content": "Say goodbye"}],
|
|
conversation=conversation.id,
|
|
stream=True,
|
|
)
|
|
|
|
final_response = None
|
|
for chunk in response_stream:
|
|
if chunk.type == "response.completed":
|
|
final_response = chunk.response
|
|
break
|
|
|
|
assert response1.id != final_response.id
|
|
assert len(response1.output_text.strip()) > 0
|
|
assert len(final_response.output_text.strip()) > 0
|
|
|
|
# Verify all turns are in conversation
|
|
conversation_items = openai_client.conversations.items.list(conversation.id)
|
|
assert len(conversation_items.data) >= 4 # 2 user + 2 assistant messages
|
|
|
|
@pytest.mark.timeout(60, method="thread")
|
|
def test_conversation_context_loading(self, openai_client, text_model_id):
|
|
"""Test that conversation context is properly loaded for responses.
|
|
|
|
Note: 60s timeout added due to CI-specific deadlock in pytest/OpenAI client/httpx
|
|
after running 25+ tests. Hangs before first HTTP request is made. Works fine locally.
|
|
Investigation needed: connection pool exhaustion or event loop state issue.
|
|
"""
|
|
conversation = openai_client.conversations.create(
|
|
items=[
|
|
{"type": "message", "role": "user", "content": "My name is Alice. I like to eat apples."},
|
|
{"type": "message", "role": "assistant", "content": "Hello Alice!"},
|
|
]
|
|
)
|
|
|
|
response = openai_client.responses.create(
|
|
model=text_model_id,
|
|
input=[{"role": "user", "content": "What do I like to eat?"}],
|
|
conversation=conversation.id,
|
|
)
|
|
|
|
assert "apple" in response.output_text.lower()
|
|
|
|
def test_conversation_error_handling(self, openai_client, text_model_id):
|
|
"""Test error handling for invalid and nonexistent conversations."""
|
|
# Invalid conversation ID format
|
|
with pytest.raises(Exception) as exc_info:
|
|
openai_client.responses.create(
|
|
model=text_model_id,
|
|
input=[{"role": "user", "content": "Hello"}],
|
|
conversation="invalid_id",
|
|
)
|
|
assert any(word in str(exc_info.value).lower() for word in ["conv", "invalid", "bad"])
|
|
|
|
# Nonexistent conversation ID
|
|
with pytest.raises(Exception) as exc_info:
|
|
openai_client.responses.create(
|
|
model=text_model_id,
|
|
input=[{"role": "user", "content": "Hello"}],
|
|
conversation="conv_nonexistent123",
|
|
)
|
|
assert any(word in str(exc_info.value).lower() for word in ["not found", "404"])
|
|
|
|
#
|
|
# response = openai_client.responses.create(
|
|
# model=text_model_id, input=[{"role": "user", "content": "First response"}]
|
|
# )
|
|
# with pytest.raises(Exception) as exc_info:
|
|
# openai_client.responses.create(
|
|
# model=text_model_id,
|
|
# input=[{"role": "user", "content": "Hello"}],
|
|
# conversation="conv_test123",
|
|
# previous_response_id=response.id,
|
|
# )
|
|
# assert "mutually exclusive" in str(exc_info.value).lower()
|
|
|
|
def test_conversation_backward_compatibility(self, openai_client, text_model_id):
|
|
"""Test that responses work without conversation parameter (backward compatibility)."""
|
|
response = openai_client.responses.create(
|
|
model=text_model_id, input=[{"role": "user", "content": "Hello world"}]
|
|
)
|
|
|
|
assert response.id.startswith("resp_")
|
|
assert len(response.output_text.strip()) > 0
|
|
|
|
# this is not ready yet
|
|
# def test_conversation_compat_client(self, responses_client, text_model_id):
|
|
# """Test conversation parameter works with compatibility client."""
|
|
# if not hasattr(responses_client, "conversations"):
|
|
# pytest.skip("responses_client does not support conversations API")
|
|
#
|
|
# conversation = responses_client.conversations.create()
|
|
# response = responses_client.responses.create(
|
|
# model=text_model_id, input="Tell me a joke", conversation=conversation.id
|
|
# )
|
|
#
|
|
# assert response is not None
|
|
# assert len(response.output_text.strip()) > 0
|
|
#
|
|
# conversation_items = responses_client.conversations.items.list(conversation.id)
|
|
# assert len(conversation_items.data) >= 2
|