diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py
index fcc13509ce..394fbbb968 100644
--- a/litellm/proxy/common_request_processing.py
+++ b/litellm/proxy/common_request_processing.py
@@ -123,6 +123,7 @@ class ProxyBaseLLMRequestProcessing:
         """
         Common request processing logic for both chat completions and responses API endpoints
         """
+
         verbose_proxy_logger.debug(
             "Request received by LiteLLM:\n{}".format(json.dumps(self.data, indent=4)),
         )
diff --git a/litellm/proxy/common_utils/http_parsing_utils.py b/litellm/proxy/common_utils/http_parsing_utils.py
index 7220ccaa65..ca4b5a0588 100644
--- a/litellm/proxy/common_utils/http_parsing_utils.py
+++ b/litellm/proxy/common_utils/http_parsing_utils.py
@@ -81,8 +81,13 @@ async def _read_request_body(request: Optional[Request]) -> Dict:
 def _safe_get_request_parsed_body(request: Optional[Request]) -> Optional[dict]:
     if request is None:
         return None
-    if hasattr(request, "scope") and "parsed_body" in request.scope:
-        return request.scope["parsed_body"]
+    if (
+        hasattr(request, "scope")
+        and "parsed_body" in request.scope
+        and isinstance(request.scope["parsed_body"], tuple)
+    ):
+        accepted_keys, parsed_body = request.scope["parsed_body"]
+        return {key: parsed_body[key] for key in accepted_keys}
     return None
 
 
@@ -93,7 +98,7 @@ def _safe_set_request_parsed_body(
     try:
         if request is None:
             return
-        request.scope["parsed_body"] = parsed_body
+        request.scope["parsed_body"] = (tuple(parsed_body.keys()), parsed_body)
     except Exception as e:
         verbose_proxy_logger.debug(
             "Unexpected error setting request parsed body - {}".format(e)
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 99a8965f42..d265f3bbca 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -3308,15 +3308,6 @@ async def model_list(
     tags=["chat/completions"],
     responses={200: {"description": "Successful response"}, **ERROR_RESPONSES},
 )  # azure compatible endpoint
-@backoff.on_exception(
-    backoff.expo,
-    Exception,  # base exception to catch for the backoff
-    max_tries=global_max_parallel_request_retries,  # maximum number of retries
-    max_time=global_max_parallel_request_retry_timeout,  # maximum total time to retry for
-    on_backoff=on_backoff,  # specifying the function to call on backoff
-    giveup=giveup,
-    logger=verbose_proxy_logger,
-)
 async def chat_completion(  # noqa: PLR0915
     request: Request,
     fastapi_response: Response,
diff --git a/tests/litellm/proxy/common_utils/test_http_parsing_utils.py b/tests/litellm/proxy/common_utils/test_http_parsing_utils.py
index 4d9199ff48..38624422c6 100644
--- a/tests/litellm/proxy/common_utils/test_http_parsing_utils.py
+++ b/tests/litellm/proxy/common_utils/test_http_parsing_utils.py
@@ -39,7 +39,7 @@ async def test_request_body_caching():
     result1 = await _read_request_body(mock_request)
     assert result1 == test_data
     assert "parsed_body" in mock_request.scope
-    assert mock_request.scope["parsed_body"] == test_data
+    assert mock_request.scope["parsed_body"] == (("key",), {"key": "value"})
 
     # Verify the body was read once
     mock_request.body.assert_called_once()
@@ -49,7 +49,7 @@ async def test_request_body_caching():
 
     # Second call should use the cached body
     result2 = await _read_request_body(mock_request)
-    assert result2 == test_data
+    assert result2 == {"key": "value"}
 
     # Verify the body was not read again
     mock_request.body.assert_not_called()
@@ -75,7 +75,10 @@ async def test_form_data_parsing():
     # Verify the form data was correctly parsed
     assert result == test_data
     assert "parsed_body" in mock_request.scope
-    assert mock_request.scope["parsed_body"] == test_data
+    assert mock_request.scope["parsed_body"] == (
+        ("name", "message"),
+        {"name": "test_user", "message": "hello world"},
+    )
 
     # Verify form() was called
     mock_request.form.assert_called_once()
@@ -101,7 +104,46 @@ async def test_empty_request_body():
     # Verify an empty dict is returned
     assert result == {}
     assert "parsed_body" in mock_request.scope
-    assert mock_request.scope["parsed_body"] == {}
+    assert mock_request.scope["parsed_body"] == ((), {})
 
     # Verify the body was read
     mock_request.body.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_circular_reference_handling():
+    """
+    Test that cached request body isn't modified when the returned result is modified.
+    Demonstrates the mutable dictionary reference issue.
+    """
+    # Create a mock request with initial data
+    mock_request = MagicMock()
+    initial_body = {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "Hello"}],
+    }
+
+    mock_request.body = AsyncMock(return_value=orjson.dumps(initial_body))
+    mock_request.headers = {"content-type": "application/json"}
+    mock_request.scope = {}
+
+    # First parse
+    result = await _read_request_body(mock_request)
+
+    # Verify initial parse
+    assert result["model"] == "gpt-4"
+    assert result["messages"] == [{"role": "user", "content": "Hello"}]
+
+    # Modify the result by adding proxy_server_request
+    result["proxy_server_request"] = {
+        "url": "http://0.0.0.0:4000/v1/chat/completions",
+        "method": "POST",
+        "headers": {"content-type": "application/json"},
+        "body": result,  # Creates circular reference
+    }
+
+    # Second parse using the same request - will use the modified cached value
+    result2 = await _read_request_body(mock_request)
+    assert (
+        "proxy_server_request" not in result2
+    )  # This will pass, showing the cache pollution