From b04cf226aa2d2fe77468cfbf646e5862d247cd6c Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 20 Mar 2025 13:04:49 -0700
Subject: [PATCH] test_openai_o1_pro_response_api_streaming

---
 .../test_openai_responses_api.py              | 123 +++++++++++++++++-
 1 file changed, 122 insertions(+), 1 deletion(-)

diff --git a/tests/llm_responses_api_testing/test_openai_responses_api.py b/tests/llm_responses_api_testing/test_openai_responses_api.py
index 709fb316fe..677e13b08a 100644
--- a/tests/llm_responses_api_testing/test_openai_responses_api.py
+++ b/tests/llm_responses_api_testing/test_openai_responses_api.py
@@ -829,7 +829,8 @@ async def test_async_bad_request_bad_param_error():
 
 
 @pytest.mark.asyncio
-async def test_openai_o1_pro_response_api():
+@pytest.mark.parametrize("sync_mode", [True, False])
+async def test_openai_o1_pro_response_api(sync_mode):
     """
     Test that LiteLLM correctly handles an incomplete response from OpenAI's o1-pro model
     due to reaching max_output_tokens limit.
@@ -921,3 +922,123 @@ async def test_openai_o1_pro_response_api():
 
         # Validate that the response is properly identified as incomplete
         validate_responses_api_response(response, final_chunk=True)
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("sync_mode", [True, False])
+async def test_openai_o1_pro_response_api_streaming(sync_mode):
+    """
+    Test that LiteLLM correctly handles an incomplete response from OpenAI's o1-pro model
+    due to reaching max_output_tokens limit in both sync and async streaming modes.
+    """
+    # Mock response from o1-pro
+    mock_response = {
+        "id": "resp_67dc3dd77b388190822443a85252da5a0e13d8bdc0e28d88",
+        "object": "response",
+        "created_at": 1742486999,
+        "status": "incomplete",
+        "error": None,
+        "incomplete_details": {"reason": "max_output_tokens"},
+        "instructions": None,
+        "max_output_tokens": 20,
+        "model": "o1-pro-2025-03-19",
+        "output": [
+            {
+                "type": "reasoning",
+                "id": "rs_67dc3de50f64819097450ed50a33d5f90e13d8bdc0e28d88",
+                "summary": [],
+            }
+        ],
+        "parallel_tool_calls": True,
+        "previous_response_id": None,
+        "reasoning": {"effort": "medium", "generate_summary": None},
+        "store": True,
+        "temperature": 1.0,
+        "text": {"format": {"type": "text"}},
+        "tool_choice": "auto",
+        "tools": [],
+        "top_p": 1.0,
+        "truncation": "disabled",
+        "usage": {
+            "input_tokens": 73,
+            "input_tokens_details": {"cached_tokens": 0},
+            "output_tokens": 20,
+            "output_tokens_details": {"reasoning_tokens": 0},
+            "total_tokens": 93,
+        },
+        "user": None,
+        "metadata": {},
+    }
+
+    class MockResponse:
+        def __init__(self, json_data, status_code):
+            self._json_data = json_data
+            self.status_code = status_code
+            self.text = json.dumps(json_data)
+
+        def json(self):
+            return self._json_data
+
+    with patch(
+        "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
+        new_callable=AsyncMock,
+    ) as mock_post:
+        # Configure the mock to return our response
+        mock_post.return_value = MockResponse(mock_response, 200)
+
+        litellm._turn_on_debug()
+        litellm.set_verbose = True
+
+        # Verify the request was made correctly
+        if sync_mode:
+            # For sync mode, we need to patch the sync HTTP handler
+            with patch(
+                "litellm.llms.custom_httpx.http_handler.HTTPHandler.post",
+                return_value=MockResponse(mock_response, 200),
+            ) as mock_sync_post:
+                response = litellm.responses(
+                    model="openai/o1-pro",
+                    input="Write a detailed essay about artificial intelligence and its impact on society",
+                    max_output_tokens=20,
+                    stream=True,
+                )
+
+                # Process the sync stream
+                event_count = 0
+                for event in response:
+                    print(
+                        f"Sync litellm response #{event_count}:",
+                        json.dumps(event, indent=4, default=str),
+                    )
+                    event_count += 1
+
+                # Verify the sync request was made correctly
+                mock_sync_post.assert_called_once()
+                request_body = json.loads(mock_sync_post.call_args.kwargs["data"])
+                assert request_body["model"] == "o1-pro"
+                assert request_body["max_output_tokens"] == 20
+                assert "stream" not in request_body
+        else:
+            # For async mode
+            response = await litellm.aresponses(
+                model="openai/o1-pro",
+                input="Write a detailed essay about artificial intelligence and its impact on society",
+                max_output_tokens=20,
+                stream=True,
+            )
+
+            # Process the async stream
+            event_count = 0
+            async for event in response:
+                print(
+                    f"Async litellm response #{event_count}:",
+                    json.dumps(event, indent=4, default=str),
+                )
+                event_count += 1
+
+            # Verify the async request was made correctly
+            mock_post.assert_called_once()
+            request_body = json.loads(mock_post.call_args.kwargs["data"])
+            assert request_body["model"] == "o1-pro"
+            assert request_body["max_output_tokens"] == 20
+            assert "stream" not in request_body