From b04cf226aa2d2fe77468cfbf646e5862d247cd6c Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 20 Mar 2025 13:04:49 -0700 Subject: [PATCH] test_openai_o1_pro_response_api_streaming --- .../test_openai_responses_api.py | 123 +++++++++++++++++- 1 file changed, 122 insertions(+), 1 deletion(-) diff --git a/tests/llm_responses_api_testing/test_openai_responses_api.py b/tests/llm_responses_api_testing/test_openai_responses_api.py index 709fb316fe..677e13b08a 100644 --- a/tests/llm_responses_api_testing/test_openai_responses_api.py +++ b/tests/llm_responses_api_testing/test_openai_responses_api.py @@ -829,7 +829,8 @@ async def test_async_bad_request_bad_param_error(): @pytest.mark.asyncio -async def test_openai_o1_pro_response_api(): +@pytest.mark.parametrize("sync_mode", [True, False]) +async def test_openai_o1_pro_response_api(sync_mode): """ Test that LiteLLM correctly handles an incomplete response from OpenAI's o1-pro model due to reaching max_output_tokens limit. @@ -921,3 +922,123 @@ async def test_openai_o1_pro_response_api(): # Validate that the response is properly identified as incomplete validate_responses_api_response(response, final_chunk=True) + + +@pytest.mark.asyncio +@pytest.mark.parametrize("sync_mode", [True, False]) +async def test_openai_o1_pro_response_api_streaming(sync_mode): + """ + Test that LiteLLM correctly handles an incomplete response from OpenAI's o1-pro model + due to reaching max_output_tokens limit in both sync and async streaming modes. + """ + # Mock response from o1-pro + mock_response = { + "id": "resp_67dc3dd77b388190822443a85252da5a0e13d8bdc0e28d88", + "object": "response", + "created_at": 1742486999, + "status": "incomplete", + "error": None, + "incomplete_details": {"reason": "max_output_tokens"}, + "instructions": None, + "max_output_tokens": 20, + "model": "o1-pro-2025-03-19", + "output": [ + { + "type": "reasoning", + "id": "rs_67dc3de50f64819097450ed50a33d5f90e13d8bdc0e28d88", + "summary": [], + } + ], + "parallel_tool_calls": True, + "previous_response_id": None, + "reasoning": {"effort": "medium", "generate_summary": None}, + "store": True, + "temperature": 1.0, + "text": {"format": {"type": "text"}}, + "tool_choice": "auto", + "tools": [], + "top_p": 1.0, + "truncation": "disabled", + "usage": { + "input_tokens": 73, + "input_tokens_details": {"cached_tokens": 0}, + "output_tokens": 20, + "output_tokens_details": {"reasoning_tokens": 0}, + "total_tokens": 93, + }, + "user": None, + "metadata": {}, + } + + class MockResponse: + def __init__(self, json_data, status_code): + self._json_data = json_data + self.status_code = status_code + self.text = json.dumps(json_data) + + def json(self): + return self._json_data + + with patch( + "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post", + new_callable=AsyncMock, + ) as mock_post: + # Configure the mock to return our response + mock_post.return_value = MockResponse(mock_response, 200) + + litellm._turn_on_debug() + litellm.set_verbose = True + + # Verify the request was made correctly + if sync_mode: + # For sync mode, we need to patch the sync HTTP handler + with patch( + "litellm.llms.custom_httpx.http_handler.HTTPHandler.post", + return_value=MockResponse(mock_response, 200), + ) as mock_sync_post: + response = litellm.responses( + model="openai/o1-pro", + input="Write a detailed essay about artificial intelligence and its impact on society", + max_output_tokens=20, + stream=True, + ) + + # Process the sync stream + event_count = 0 + for event in response: + print( + f"Sync litellm response #{event_count}:", + json.dumps(event, indent=4, default=str), + ) + event_count += 1 + + # Verify the sync request was made correctly + mock_sync_post.assert_called_once() + request_body = json.loads(mock_sync_post.call_args.kwargs["data"]) + assert request_body["model"] == "o1-pro" + assert request_body["max_output_tokens"] == 20 + assert "stream" not in request_body + else: + # For async mode + response = await litellm.aresponses( + model="openai/o1-pro", + input="Write a detailed essay about artificial intelligence and its impact on society", + max_output_tokens=20, + stream=True, + ) + + # Process the async stream + event_count = 0 + async for event in response: + print( + f"Async litellm response #{event_count}:", + json.dumps(event, indent=4, default=str), + ) + event_count += 1 + + # Verify the async request was made correctly + mock_post.assert_called_once() + request_body = json.loads(mock_post.call_args.kwargs["data"]) + assert request_body["model"] == "o1-pro" + assert request_body["max_output_tokens"] == 20 + assert "stream" not in request_body