mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
fix(proxy_server.py): Fix "Circular reference detected" error when max_parallel_requests = 0 (#9671)
* fix(proxy_server.py): remove non-functional parent backoff/retry on /chat/completion
Causes circular reference error
* fix(http_parsing_utils.py): safely return parsed body - don't allow mutation of cached request body by client functions
Root cause fix for circular reference error
* Revert "fix: Anthropic prompt caching on GCP Vertex AI (#9605)" (#9670)
This reverts commit a8673246dc
.
* add type hints for AnthropicMessagesResponse
* define types for response form AnthropicMessagesResponse
* fix response typing
* allow using litellm.messages.acreate and litellm.messages.create
* fix anthropic_messages implementation
* add clear type hints to litellm.messages.create functions
* fix anthropic_messages
* working anthropic API tests
* fixes - anthropic messages interface
* use new anthropic interface
* fix code quality check
* docs anthropic messages endpoint
* add namespace_packages = True to mypy
* fix mypy lint errors
* docs anthropic messages interface
* test: fix unit test
* test(test_http_parsing_utils.py): update tests
---------
Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
This commit is contained in:
parent
136f1d60b3
commit
f2a7edaddc
4 changed files with 55 additions and 16 deletions
|
@ -123,6 +123,7 @@ class ProxyBaseLLMRequestProcessing:
|
|||
"""
|
||||
Common request processing logic for both chat completions and responses API endpoints
|
||||
"""
|
||||
|
||||
verbose_proxy_logger.debug(
|
||||
"Request received by LiteLLM:\n{}".format(json.dumps(self.data, indent=4)),
|
||||
)
|
||||
|
|
|
@ -81,8 +81,13 @@ async def _read_request_body(request: Optional[Request]) -> Dict:
|
|||
def _safe_get_request_parsed_body(request: Optional[Request]) -> Optional[dict]:
|
||||
if request is None:
|
||||
return None
|
||||
if hasattr(request, "scope") and "parsed_body" in request.scope:
|
||||
return request.scope["parsed_body"]
|
||||
if (
|
||||
hasattr(request, "scope")
|
||||
and "parsed_body" in request.scope
|
||||
and isinstance(request.scope["parsed_body"], tuple)
|
||||
):
|
||||
accepted_keys, parsed_body = request.scope["parsed_body"]
|
||||
return {key: parsed_body[key] for key in accepted_keys}
|
||||
return None
|
||||
|
||||
|
||||
|
@ -93,7 +98,7 @@ def _safe_set_request_parsed_body(
|
|||
try:
|
||||
if request is None:
|
||||
return
|
||||
request.scope["parsed_body"] = parsed_body
|
||||
request.scope["parsed_body"] = (tuple(parsed_body.keys()), parsed_body)
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.debug(
|
||||
"Unexpected error setting request parsed body - {}".format(e)
|
||||
|
|
|
@ -3308,15 +3308,6 @@ async def model_list(
|
|||
tags=["chat/completions"],
|
||||
responses={200: {"description": "Successful response"}, **ERROR_RESPONSES},
|
||||
) # azure compatible endpoint
|
||||
@backoff.on_exception(
|
||||
backoff.expo,
|
||||
Exception, # base exception to catch for the backoff
|
||||
max_tries=global_max_parallel_request_retries, # maximum number of retries
|
||||
max_time=global_max_parallel_request_retry_timeout, # maximum total time to retry for
|
||||
on_backoff=on_backoff, # specifying the function to call on backoff
|
||||
giveup=giveup,
|
||||
logger=verbose_proxy_logger,
|
||||
)
|
||||
async def chat_completion( # noqa: PLR0915
|
||||
request: Request,
|
||||
fastapi_response: Response,
|
||||
|
|
|
@ -39,7 +39,7 @@ async def test_request_body_caching():
|
|||
result1 = await _read_request_body(mock_request)
|
||||
assert result1 == test_data
|
||||
assert "parsed_body" in mock_request.scope
|
||||
assert mock_request.scope["parsed_body"] == test_data
|
||||
assert mock_request.scope["parsed_body"] == (("key",), {"key": "value"})
|
||||
|
||||
# Verify the body was read once
|
||||
mock_request.body.assert_called_once()
|
||||
|
@ -49,7 +49,7 @@ async def test_request_body_caching():
|
|||
|
||||
# Second call should use the cached body
|
||||
result2 = await _read_request_body(mock_request)
|
||||
assert result2 == test_data
|
||||
assert result2 == {"key": "value"}
|
||||
|
||||
# Verify the body was not read again
|
||||
mock_request.body.assert_not_called()
|
||||
|
@ -75,7 +75,10 @@ async def test_form_data_parsing():
|
|||
# Verify the form data was correctly parsed
|
||||
assert result == test_data
|
||||
assert "parsed_body" in mock_request.scope
|
||||
assert mock_request.scope["parsed_body"] == test_data
|
||||
assert mock_request.scope["parsed_body"] == (
|
||||
("name", "message"),
|
||||
{"name": "test_user", "message": "hello world"},
|
||||
)
|
||||
|
||||
# Verify form() was called
|
||||
mock_request.form.assert_called_once()
|
||||
|
@ -101,7 +104,46 @@ async def test_empty_request_body():
|
|||
# Verify an empty dict is returned
|
||||
assert result == {}
|
||||
assert "parsed_body" in mock_request.scope
|
||||
assert mock_request.scope["parsed_body"] == {}
|
||||
assert mock_request.scope["parsed_body"] == ((), {})
|
||||
|
||||
# Verify the body was read
|
||||
mock_request.body.assert_called_once()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_circular_reference_handling():
|
||||
"""
|
||||
Test that cached request body isn't modified when the returned result is modified.
|
||||
Demonstrates the mutable dictionary reference issue.
|
||||
"""
|
||||
# Create a mock request with initial data
|
||||
mock_request = MagicMock()
|
||||
initial_body = {
|
||||
"model": "gpt-4",
|
||||
"messages": [{"role": "user", "content": "Hello"}],
|
||||
}
|
||||
|
||||
mock_request.body = AsyncMock(return_value=orjson.dumps(initial_body))
|
||||
mock_request.headers = {"content-type": "application/json"}
|
||||
mock_request.scope = {}
|
||||
|
||||
# First parse
|
||||
result = await _read_request_body(mock_request)
|
||||
|
||||
# Verify initial parse
|
||||
assert result["model"] == "gpt-4"
|
||||
assert result["messages"] == [{"role": "user", "content": "Hello"}]
|
||||
|
||||
# Modify the result by adding proxy_server_request
|
||||
result["proxy_server_request"] = {
|
||||
"url": "http://0.0.0.0:4000/v1/chat/completions",
|
||||
"method": "POST",
|
||||
"headers": {"content-type": "application/json"},
|
||||
"body": result, # Creates circular reference
|
||||
}
|
||||
|
||||
# Second parse using the same request - will use the modified cached value
|
||||
result2 = await _read_request_body(mock_request)
|
||||
assert (
|
||||
"proxy_server_request" not in result2
|
||||
) # This will pass, showing the cache pollution
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue