fix(proxy_server.py): Fix "Circular reference detected" error when max_parallel_requests = 0 (#9671)

* fix(proxy_server.py): remove non-functional parent backoff/retry on /chat/completion

Causes circular reference error

* fix(http_parsing_utils.py): safely return parsed body - don't allow mutation of cached request body by client functions

Root cause fix for circular reference error

* Revert "fix: Anthropic prompt caching on GCP Vertex AI (#9605)" (#9670)

This reverts commit a8673246dc.

* add type hints for AnthropicMessagesResponse

* define types for response form AnthropicMessagesResponse

* fix response typing

* allow using litellm.messages.acreate and litellm.messages.create

* fix anthropic_messages implementation

* add clear type hints to litellm.messages.create functions

* fix anthropic_messages

* working anthropic API tests

* fixes - anthropic messages interface

* use new anthropic interface

* fix code quality check

* docs anthropic messages endpoint

* add namespace_packages = True to mypy

* fix mypy lint errors

* docs anthropic messages interface

* test: fix unit test

* test(test_http_parsing_utils.py): update tests

---------

Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
This commit is contained in:
Krish Dholakia 2025-03-31 22:06:02 -07:00 committed by GitHub
parent 136f1d60b3
commit f2a7edaddc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 55 additions and 16 deletions

View file

@ -123,6 +123,7 @@ class ProxyBaseLLMRequestProcessing:
"""
Common request processing logic for both chat completions and responses API endpoints
"""
verbose_proxy_logger.debug(
"Request received by LiteLLM:\n{}".format(json.dumps(self.data, indent=4)),
)

View file

@ -81,8 +81,13 @@ async def _read_request_body(request: Optional[Request]) -> Dict:
def _safe_get_request_parsed_body(request: Optional[Request]) -> Optional[dict]:
if request is None:
return None
if hasattr(request, "scope") and "parsed_body" in request.scope:
return request.scope["parsed_body"]
if (
hasattr(request, "scope")
and "parsed_body" in request.scope
and isinstance(request.scope["parsed_body"], tuple)
):
accepted_keys, parsed_body = request.scope["parsed_body"]
return {key: parsed_body[key] for key in accepted_keys}
return None
@ -93,7 +98,7 @@ def _safe_set_request_parsed_body(
try:
if request is None:
return
request.scope["parsed_body"] = parsed_body
request.scope["parsed_body"] = (tuple(parsed_body.keys()), parsed_body)
except Exception as e:
verbose_proxy_logger.debug(
"Unexpected error setting request parsed body - {}".format(e)

View file

@ -3308,15 +3308,6 @@ async def model_list(
tags=["chat/completions"],
responses={200: {"description": "Successful response"}, **ERROR_RESPONSES},
) # azure compatible endpoint
@backoff.on_exception(
backoff.expo,
Exception, # base exception to catch for the backoff
max_tries=global_max_parallel_request_retries, # maximum number of retries
max_time=global_max_parallel_request_retry_timeout, # maximum total time to retry for
on_backoff=on_backoff, # specifying the function to call on backoff
giveup=giveup,
logger=verbose_proxy_logger,
)
async def chat_completion( # noqa: PLR0915
request: Request,
fastapi_response: Response,

View file

@ -39,7 +39,7 @@ async def test_request_body_caching():
result1 = await _read_request_body(mock_request)
assert result1 == test_data
assert "parsed_body" in mock_request.scope
assert mock_request.scope["parsed_body"] == test_data
assert mock_request.scope["parsed_body"] == (("key",), {"key": "value"})
# Verify the body was read once
mock_request.body.assert_called_once()
@ -49,7 +49,7 @@ async def test_request_body_caching():
# Second call should use the cached body
result2 = await _read_request_body(mock_request)
assert result2 == test_data
assert result2 == {"key": "value"}
# Verify the body was not read again
mock_request.body.assert_not_called()
@ -75,7 +75,10 @@ async def test_form_data_parsing():
# Verify the form data was correctly parsed
assert result == test_data
assert "parsed_body" in mock_request.scope
assert mock_request.scope["parsed_body"] == test_data
assert mock_request.scope["parsed_body"] == (
("name", "message"),
{"name": "test_user", "message": "hello world"},
)
# Verify form() was called
mock_request.form.assert_called_once()
@ -101,7 +104,46 @@ async def test_empty_request_body():
# Verify an empty dict is returned
assert result == {}
assert "parsed_body" in mock_request.scope
assert mock_request.scope["parsed_body"] == {}
assert mock_request.scope["parsed_body"] == ((), {})
# Verify the body was read
mock_request.body.assert_called_once()
@pytest.mark.asyncio
async def test_circular_reference_handling():
"""
Test that cached request body isn't modified when the returned result is modified.
Demonstrates the mutable dictionary reference issue.
"""
# Create a mock request with initial data
mock_request = MagicMock()
initial_body = {
"model": "gpt-4",
"messages": [{"role": "user", "content": "Hello"}],
}
mock_request.body = AsyncMock(return_value=orjson.dumps(initial_body))
mock_request.headers = {"content-type": "application/json"}
mock_request.scope = {}
# First parse
result = await _read_request_body(mock_request)
# Verify initial parse
assert result["model"] == "gpt-4"
assert result["messages"] == [{"role": "user", "content": "Hello"}]
# Modify the result by adding proxy_server_request
result["proxy_server_request"] = {
"url": "http://0.0.0.0:4000/v1/chat/completions",
"method": "POST",
"headers": {"content-type": "application/json"},
"body": result, # Creates circular reference
}
# Second parse using the same request - will use the modified cached value
result2 = await _read_request_body(mock_request)
assert (
"proxy_server_request" not in result2
) # This will pass, showing the cache pollution