mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
test_openai_o1_pro_response_api_streaming
This commit is contained in:
parent
a29587e178
commit
b04cf226aa
1 changed files with 122 additions and 1 deletions
|
@ -829,7 +829,8 @@ async def test_async_bad_request_bad_param_error():
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_openai_o1_pro_response_api():
|
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||||
|
async def test_openai_o1_pro_response_api(sync_mode):
|
||||||
"""
|
"""
|
||||||
Test that LiteLLM correctly handles an incomplete response from OpenAI's o1-pro model
|
Test that LiteLLM correctly handles an incomplete response from OpenAI's o1-pro model
|
||||||
due to reaching max_output_tokens limit.
|
due to reaching max_output_tokens limit.
|
||||||
|
@ -921,3 +922,123 @@ async def test_openai_o1_pro_response_api():
|
||||||
|
|
||||||
# Validate that the response is properly identified as incomplete
|
# Validate that the response is properly identified as incomplete
|
||||||
validate_responses_api_response(response, final_chunk=True)
|
validate_responses_api_response(response, final_chunk=True)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||||
|
async def test_openai_o1_pro_response_api_streaming(sync_mode):
|
||||||
|
"""
|
||||||
|
Test that LiteLLM correctly handles an incomplete response from OpenAI's o1-pro model
|
||||||
|
due to reaching max_output_tokens limit in both sync and async streaming modes.
|
||||||
|
"""
|
||||||
|
# Mock response from o1-pro
|
||||||
|
mock_response = {
|
||||||
|
"id": "resp_67dc3dd77b388190822443a85252da5a0e13d8bdc0e28d88",
|
||||||
|
"object": "response",
|
||||||
|
"created_at": 1742486999,
|
||||||
|
"status": "incomplete",
|
||||||
|
"error": None,
|
||||||
|
"incomplete_details": {"reason": "max_output_tokens"},
|
||||||
|
"instructions": None,
|
||||||
|
"max_output_tokens": 20,
|
||||||
|
"model": "o1-pro-2025-03-19",
|
||||||
|
"output": [
|
||||||
|
{
|
||||||
|
"type": "reasoning",
|
||||||
|
"id": "rs_67dc3de50f64819097450ed50a33d5f90e13d8bdc0e28d88",
|
||||||
|
"summary": [],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"parallel_tool_calls": True,
|
||||||
|
"previous_response_id": None,
|
||||||
|
"reasoning": {"effort": "medium", "generate_summary": None},
|
||||||
|
"store": True,
|
||||||
|
"temperature": 1.0,
|
||||||
|
"text": {"format": {"type": "text"}},
|
||||||
|
"tool_choice": "auto",
|
||||||
|
"tools": [],
|
||||||
|
"top_p": 1.0,
|
||||||
|
"truncation": "disabled",
|
||||||
|
"usage": {
|
||||||
|
"input_tokens": 73,
|
||||||
|
"input_tokens_details": {"cached_tokens": 0},
|
||||||
|
"output_tokens": 20,
|
||||||
|
"output_tokens_details": {"reasoning_tokens": 0},
|
||||||
|
"total_tokens": 93,
|
||||||
|
},
|
||||||
|
"user": None,
|
||||||
|
"metadata": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
class MockResponse:
|
||||||
|
def __init__(self, json_data, status_code):
|
||||||
|
self._json_data = json_data
|
||||||
|
self.status_code = status_code
|
||||||
|
self.text = json.dumps(json_data)
|
||||||
|
|
||||||
|
def json(self):
|
||||||
|
return self._json_data
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
) as mock_post:
|
||||||
|
# Configure the mock to return our response
|
||||||
|
mock_post.return_value = MockResponse(mock_response, 200)
|
||||||
|
|
||||||
|
litellm._turn_on_debug()
|
||||||
|
litellm.set_verbose = True
|
||||||
|
|
||||||
|
# Verify the request was made correctly
|
||||||
|
if sync_mode:
|
||||||
|
# For sync mode, we need to patch the sync HTTP handler
|
||||||
|
with patch(
|
||||||
|
"litellm.llms.custom_httpx.http_handler.HTTPHandler.post",
|
||||||
|
return_value=MockResponse(mock_response, 200),
|
||||||
|
) as mock_sync_post:
|
||||||
|
response = litellm.responses(
|
||||||
|
model="openai/o1-pro",
|
||||||
|
input="Write a detailed essay about artificial intelligence and its impact on society",
|
||||||
|
max_output_tokens=20,
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Process the sync stream
|
||||||
|
event_count = 0
|
||||||
|
for event in response:
|
||||||
|
print(
|
||||||
|
f"Sync litellm response #{event_count}:",
|
||||||
|
json.dumps(event, indent=4, default=str),
|
||||||
|
)
|
||||||
|
event_count += 1
|
||||||
|
|
||||||
|
# Verify the sync request was made correctly
|
||||||
|
mock_sync_post.assert_called_once()
|
||||||
|
request_body = json.loads(mock_sync_post.call_args.kwargs["data"])
|
||||||
|
assert request_body["model"] == "o1-pro"
|
||||||
|
assert request_body["max_output_tokens"] == 20
|
||||||
|
assert "stream" not in request_body
|
||||||
|
else:
|
||||||
|
# For async mode
|
||||||
|
response = await litellm.aresponses(
|
||||||
|
model="openai/o1-pro",
|
||||||
|
input="Write a detailed essay about artificial intelligence and its impact on society",
|
||||||
|
max_output_tokens=20,
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Process the async stream
|
||||||
|
event_count = 0
|
||||||
|
async for event in response:
|
||||||
|
print(
|
||||||
|
f"Async litellm response #{event_count}:",
|
||||||
|
json.dumps(event, indent=4, default=str),
|
||||||
|
)
|
||||||
|
event_count += 1
|
||||||
|
|
||||||
|
# Verify the async request was made correctly
|
||||||
|
mock_post.assert_called_once()
|
||||||
|
request_body = json.loads(mock_post.call_args.kwargs["data"])
|
||||||
|
assert request_body["model"] == "o1-pro"
|
||||||
|
assert request_body["max_output_tokens"] == 20
|
||||||
|
assert "stream" not in request_body
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue