fix(proxy_server.py): Fix "Circular reference detected" error when max_parallel_requests = 0 (#9671)

* fix(proxy_server.py): remove non-functional parent backoff/retry on /chat/completion Causes circular reference error * fix(http_parsing_utils.py): safely return parsed body - don't allow mutation of cached request body by client functions Root cause fix for circular reference error * Revert "fix: Anthropic prompt caching on GCP Vertex AI (#9605)" (#9670) This reverts commit a8673246dc. * add type hints for AnthropicMessagesResponse * define types for response form AnthropicMessagesResponse * fix response typing * allow using litellm.messages.acreate and litellm.messages.create * fix anthropic_messages implementation * add clear type hints to litellm.messages.create functions * fix anthropic_messages * working anthropic API tests * fixes - anthropic messages interface * use new anthropic interface * fix code quality check * docs anthropic messages endpoint * add namespace_packages = True to mypy * fix mypy lint errors * docs anthropic messages interface * test: fix unit test * test(test_http_parsing_utils.py): update tests --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
2025-04-25 02:34:29 +00:00 · 2025-03-31 22:06:02 -07:00 · 2025-03-31 22:06:02 -07:00 · f2a7edaddc
commit f2a7edaddc
parent 136f1d60b3
4 changed files with 55 additions and 16 deletions
--- a/litellm/proxy/common_request_processing.py
+++ b/litellm/proxy/common_request_processing.py
@ -123,6 +123,7 @@ class ProxyBaseLLMRequestProcessing:
        """
        Common request processing logic for both chat completions and responses API endpoints
        """
+
        verbose_proxy_logger.debug(
            "Request received by LiteLLM:\n{}".format(json.dumps(self.data, indent=4)),
        )
--- a/litellm/proxy/common_utils/http_parsing_utils.py
+++ b/litellm/proxy/common_utils/http_parsing_utils.py
@ -81,8 +81,13 @@ async def _read_request_body(request: Optional[Request]) -> Dict:
 def _safe_get_request_parsed_body(request: Optional[Request]) -> Optional[dict]:
    if request is None:
        return None
-    if hasattr(request, "scope") and "parsed_body" in request.scope:
-        return request.scope["parsed_body"]
+    if (
+        hasattr(request, "scope")
+        and "parsed_body" in request.scope
+        and isinstance(request.scope["parsed_body"], tuple)
+    ):
+        accepted_keys, parsed_body = request.scope["parsed_body"]
+        return {key: parsed_body[key] for key in accepted_keys}
    return None


@ -93,7 +98,7 @@ def _safe_set_request_parsed_body(
    try:
        if request is None:
            return
-        request.scope["parsed_body"] = parsed_body
+        request.scope["parsed_body"] = (tuple(parsed_body.keys()), parsed_body)
    except Exception as e:
        verbose_proxy_logger.debug(
            "Unexpected error setting request parsed body - {}".format(e)
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -3308,15 +3308,6 @@ async def model_list(
    tags=["chat/completions"],
    responses={200: {"description": "Successful response"}, **ERROR_RESPONSES},
 )  # azure compatible endpoint
-@backoff.on_exception(
-    backoff.expo,
-    Exception,  # base exception to catch for the backoff
-    max_tries=global_max_parallel_request_retries,  # maximum number of retries
-    max_time=global_max_parallel_request_retry_timeout,  # maximum total time to retry for
-    on_backoff=on_backoff,  # specifying the function to call on backoff
-    giveup=giveup,
-    logger=verbose_proxy_logger,
-)
 async def chat_completion(  # noqa: PLR0915
    request: Request,
    fastapi_response: Response,
--- a/tests/litellm/proxy/common_utils/test_http_parsing_utils.py
+++ b/tests/litellm/proxy/common_utils/test_http_parsing_utils.py
@ -39,7 +39,7 @@ async def test_request_body_caching():
    result1 = await _read_request_body(mock_request)
    assert result1 == test_data
    assert "parsed_body" in mock_request.scope
-    assert mock_request.scope["parsed_body"] == test_data
+    assert mock_request.scope["parsed_body"] == (("key",), {"key": "value"})

    # Verify the body was read once
    mock_request.body.assert_called_once()
@ -49,7 +49,7 @@ async def test_request_body_caching():

    # Second call should use the cached body
    result2 = await _read_request_body(mock_request)
-    assert result2 == test_data
+    assert result2 == {"key": "value"}

    # Verify the body was not read again
    mock_request.body.assert_not_called()
@ -75,7 +75,10 @@ async def test_form_data_parsing():
    # Verify the form data was correctly parsed
    assert result == test_data
    assert "parsed_body" in mock_request.scope
-    assert mock_request.scope["parsed_body"] == test_data
+    assert mock_request.scope["parsed_body"] == (
+        ("name", "message"),
+        {"name": "test_user", "message": "hello world"},
+    )

    # Verify form() was called
    mock_request.form.assert_called_once()
@ -101,7 +104,46 @@ async def test_empty_request_body():
    # Verify an empty dict is returned
    assert result == {}
    assert "parsed_body" in mock_request.scope
-    assert mock_request.scope["parsed_body"] == {}
+    assert mock_request.scope["parsed_body"] == ((), {})

    # Verify the body was read
    mock_request.body.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_circular_reference_handling():
+    """
+    Test that cached request body isn't modified when the returned result is modified.
+    Demonstrates the mutable dictionary reference issue.
+    """
+    # Create a mock request with initial data
+    mock_request = MagicMock()
+    initial_body = {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "Hello"}],
+    }
+
+    mock_request.body = AsyncMock(return_value=orjson.dumps(initial_body))
+    mock_request.headers = {"content-type": "application/json"}
+    mock_request.scope = {}
+
+    # First parse
+    result = await _read_request_body(mock_request)
+
+    # Verify initial parse
+    assert result["model"] == "gpt-4"
+    assert result["messages"] == [{"role": "user", "content": "Hello"}]
+
+    # Modify the result by adding proxy_server_request
+    result["proxy_server_request"] = {
+        "url": "http://0.0.0.0:4000/v1/chat/completions",
+        "method": "POST",
+        "headers": {"content-type": "application/json"},
+        "body": result,  # Creates circular reference
+    }
+
+    # Second parse using the same request - will use the modified cached value
+    result2 = await _read_request_body(mock_request)
+    assert (
+        "proxy_server_request" not in result2
+    )  # This will pass, showing the cache pollution