test_responses_api_routing_with_previous_response_id

2025-04-24 18:24:20 +00:00 · 2025-04-21 14:41:31 -07:00 · 2025-04-21 14:41:31 -07:00 · e82764158e
commit e82764158e
parent 6783c9e618
1 changed files with 100 additions and 29 deletions
--- a/tests/llm_responses_api_testing/test_azure_responses_api.py
+++ b/tests/llm_responses_api_testing/test_azure_responses_api.py
@ -33,53 +33,124 @@ class TestAzureResponsesAPITest(BaseResponsesAPITest):

@pytest.mark.asyncio
 async def test_responses_api_routing_with_previous_response_id():
+    """
+    Test that when using a previous_response_id, the request is sent to the same model_id
+    """
+    # Create a mock response that simulates Azure responses API
+    mock_response_id = "mock-resp-456"
+    
+    mock_response_data = {
+        "id": mock_response_id,
+        "object": "response",
+        "created_at": 1741476542,
+        "status": "completed",
+        "model": "azure/computer-use-preview",
+        "output": [
+            {
+                "type": "message",
+                "id": "msg_123",
+                "status": "completed",
+                "role": "assistant",
+                "content": [
+                    {"type": "output_text", "text": "I'm doing well, thank you for asking!", "annotations": []}
+                ],
+            }
+        ],
+        "parallel_tool_calls": True,
+        "usage": {
+            "input_tokens": 10,
+            "output_tokens": 20,
+            "total_tokens": 30,
+            "output_tokens_details": {"reasoning_tokens": 0},
+        },
+        "text": {"format": {"type": "text"}},
+        "error": None,
+        "incomplete_details": None,
+        "instructions": None,
+        "metadata": {},
+        "temperature": 1.0,
+        "tool_choice": "auto",
+        "tools": [],
+        "top_p": 1.0,
+        "max_output_tokens": None,
+        "previous_response_id": None,
+        "reasoning": {"effort": None, "summary": None},
+        "truncation": "disabled",
+        "user": None,
+    }
+    
+    class MockResponse:
+        def __init__(self, json_data, status_code):
+            self._json_data = json_data
+            self.status_code = status_code
+            self.text = json.dumps(json_data)
+
+        def json(self):
+            return self._json_data
+
    router = litellm.Router(
        model_list=[
            {
                "model_name": "azure-computer-use-preview",
                "litellm_params": {
                    "model": "azure/computer-use-preview",
-                    "api_key": os.getenv("AZURE_RESPONSES_OPENAI_API_KEY"),
-                    "api_version": os.getenv("AZURE_RESPONSES_OPENAI_API_VERSION"),
-                    "api_base": os.getenv("AZURE_RESPONSES_OPENAI_ENDPOINT"),
+                    "api_key": "mock-api-key",
+                    "api_version": "mock-api-version",
+                    "api_base": "https://mock-endpoint.openai.azure.com",
+                },
+            },
+            {
+                "model_name": "azure-computer-use-preview",
+                "litellm_params": {
+                    "model": "azure/computer-use-preview-2",
+                    "api_key": "mock-api-key-2",
+                    "api_version": "mock-api-version-2",
+                    "api_base": "https://mock-endpoint-2.openai.azure.com",
                },
            },
-            # {
-            #     "model_name": "azure-computer-use-preview",
-            #     "litellm_params": {
-            #         "model": "azure/computer-use-preview",
-            #         "api_key": os.getenv("AZURE_RESPONSES_OPENAI_API_KEY"),
-            #         "api_version": os.getenv("AZURE_RESPONSES_OPENAI_API_VERSION"),
-            #         "api_base": os.getenv("AZURE_RESPONSES_OPENAI_ENDPOINT_2"),
-            #     },
-            # },
        ],
    )
    MODEL = "azure-computer-use-preview"

-    litellm._turn_on_debug()
-    response = await router.aresponses(
-        model=MODEL,
-        input="Hello, how are you?",
-        truncation="auto",
-    )
-
-    expected_model_id = response._hidden_params["model_id"]
-    response_id = response.id
-
-    print("Response ID=", response_id, "came from model_id=", expected_model_id)
-
-    # make 3 other requests with previous_response_id
-    # assert that this was sent to the same model_id
-    for i in range(3):
+    with patch(
+        "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
+        new_callable=AsyncMock,
+    ) as mock_post:
+        # Configure the mock to return our response
+        mock_post.return_value = MockResponse(mock_response_data, 200)
+        
+        # Make the initial request
+        litellm._turn_on_debug()
        response = await router.aresponses(
            model=MODEL,
            input="Hello, how are you?",
            truncation="auto",
-            previous_response_id=response_id,
        )
+        
+        # Store the model_id from the response
+        expected_model_id = response._hidden_params["model_id"]
+        response_id = response.id

-        assert response._hidden_params["model_id"] == expected_model_id
+        print("Response ID=", response_id, "came from model_id=", expected_model_id)
+
+
+        # Make 10 other requests with previous_response_id, assert that they are sent to the same model_id
+        for i in range(10):
+            # Reset the mock for the next call
+            mock_post.reset_mock()
+            
+            # Set up the mock to return our response again
+            mock_post.return_value = MockResponse(mock_response_data, 200)
+            
+            response = await router.aresponses(
+                model=MODEL,
+                input=f"Follow-up question {i+1}",
+                truncation="auto",
+                previous_response_id=response_id,
+            )
+            
+            # Assert the model_id is preserved
+            assert response._hidden_params["model_id"] == expected_model_id