diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py index fcc13509ce..394fbbb968 100644 --- a/litellm/proxy/common_request_processing.py +++ b/litellm/proxy/common_request_processing.py @@ -123,6 +123,7 @@ class ProxyBaseLLMRequestProcessing: """ Common request processing logic for both chat completions and responses API endpoints """ + verbose_proxy_logger.debug( "Request received by LiteLLM:\n{}".format(json.dumps(self.data, indent=4)), ) diff --git a/litellm/proxy/common_utils/http_parsing_utils.py b/litellm/proxy/common_utils/http_parsing_utils.py index 7220ccaa65..ca4b5a0588 100644 --- a/litellm/proxy/common_utils/http_parsing_utils.py +++ b/litellm/proxy/common_utils/http_parsing_utils.py @@ -81,8 +81,13 @@ async def _read_request_body(request: Optional[Request]) -> Dict: def _safe_get_request_parsed_body(request: Optional[Request]) -> Optional[dict]: if request is None: return None - if hasattr(request, "scope") and "parsed_body" in request.scope: - return request.scope["parsed_body"] + if ( + hasattr(request, "scope") + and "parsed_body" in request.scope + and isinstance(request.scope["parsed_body"], tuple) + ): + accepted_keys, parsed_body = request.scope["parsed_body"] + return {key: parsed_body[key] for key in accepted_keys} return None @@ -93,7 +98,7 @@ def _safe_set_request_parsed_body( try: if request is None: return - request.scope["parsed_body"] = parsed_body + request.scope["parsed_body"] = (tuple(parsed_body.keys()), parsed_body) except Exception as e: verbose_proxy_logger.debug( "Unexpected error setting request parsed body - {}".format(e) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 99a8965f42..d265f3bbca 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -3308,15 +3308,6 @@ async def model_list( tags=["chat/completions"], responses={200: {"description": "Successful response"}, **ERROR_RESPONSES}, ) # azure compatible endpoint -@backoff.on_exception( - backoff.expo, - Exception, # base exception to catch for the backoff - max_tries=global_max_parallel_request_retries, # maximum number of retries - max_time=global_max_parallel_request_retry_timeout, # maximum total time to retry for - on_backoff=on_backoff, # specifying the function to call on backoff - giveup=giveup, - logger=verbose_proxy_logger, -) async def chat_completion( # noqa: PLR0915 request: Request, fastapi_response: Response, diff --git a/tests/litellm/proxy/common_utils/test_http_parsing_utils.py b/tests/litellm/proxy/common_utils/test_http_parsing_utils.py index 4d9199ff48..38624422c6 100644 --- a/tests/litellm/proxy/common_utils/test_http_parsing_utils.py +++ b/tests/litellm/proxy/common_utils/test_http_parsing_utils.py @@ -39,7 +39,7 @@ async def test_request_body_caching(): result1 = await _read_request_body(mock_request) assert result1 == test_data assert "parsed_body" in mock_request.scope - assert mock_request.scope["parsed_body"] == test_data + assert mock_request.scope["parsed_body"] == (("key",), {"key": "value"}) # Verify the body was read once mock_request.body.assert_called_once() @@ -49,7 +49,7 @@ async def test_request_body_caching(): # Second call should use the cached body result2 = await _read_request_body(mock_request) - assert result2 == test_data + assert result2 == {"key": "value"} # Verify the body was not read again mock_request.body.assert_not_called() @@ -75,7 +75,10 @@ async def test_form_data_parsing(): # Verify the form data was correctly parsed assert result == test_data assert "parsed_body" in mock_request.scope - assert mock_request.scope["parsed_body"] == test_data + assert mock_request.scope["parsed_body"] == ( + ("name", "message"), + {"name": "test_user", "message": "hello world"}, + ) # Verify form() was called mock_request.form.assert_called_once() @@ -101,7 +104,46 @@ async def test_empty_request_body(): # Verify an empty dict is returned assert result == {} assert "parsed_body" in mock_request.scope - assert mock_request.scope["parsed_body"] == {} + assert mock_request.scope["parsed_body"] == ((), {}) # Verify the body was read mock_request.body.assert_called_once() + + +@pytest.mark.asyncio +async def test_circular_reference_handling(): + """ + Test that cached request body isn't modified when the returned result is modified. + Demonstrates the mutable dictionary reference issue. + """ + # Create a mock request with initial data + mock_request = MagicMock() + initial_body = { + "model": "gpt-4", + "messages": [{"role": "user", "content": "Hello"}], + } + + mock_request.body = AsyncMock(return_value=orjson.dumps(initial_body)) + mock_request.headers = {"content-type": "application/json"} + mock_request.scope = {} + + # First parse + result = await _read_request_body(mock_request) + + # Verify initial parse + assert result["model"] == "gpt-4" + assert result["messages"] == [{"role": "user", "content": "Hello"}] + + # Modify the result by adding proxy_server_request + result["proxy_server_request"] = { + "url": "http://0.0.0.0:4000/v1/chat/completions", + "method": "POST", + "headers": {"content-type": "application/json"}, + "body": result, # Creates circular reference + } + + # Second parse using the same request - will use the modified cached value + result2 = await _read_request_body(mock_request) + assert ( + "proxy_server_request" not in result2 + ) # This will pass, showing the cache pollution