(fix proxy perf) use _read_request_body instead of ast.literal_eval to get better performance (#7545)

* fix ast literal eval * run ci/cd again
2025-04-26 03:04:13 +00:00 · 2025-01-03 17:48:32 -08:00 · 2025-01-03 17:48:32 -08:00 · df677ab073
commit df677ab073
parent 81d1826c25
2 changed files with 4 additions and 21 deletions
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -1,4 +1,3 @@
 import ast
 import asyncio
 import copy
 import inspect
@ -3339,13 +3338,7 @@ async def chat_completion(  # noqa: PLR0915
    data = {}
    try:
-        body = await request.body()
+        data = await _read_request_body(request=request)
        body_str = body.decode()
        try:
            data = ast.literal_eval(body_str)
        except Exception:
            data = json.loads(body_str)
        verbose_proxy_logger.debug(
            "Request received by LiteLLM:\n{}".format(json.dumps(data, indent=4)),
        )
@ -3612,12 +3605,7 @@ async def completion(  # noqa: PLR0915
    global user_temperature, user_request_timeout, user_max_tokens, user_api_base
    data = {}
    try:
-        body = await request.body()
+        data = await _read_request_body(request=request)
        body_str = body.decode()
        try:
            data = ast.literal_eval(body_str)
        except Exception:
            data = json.loads(body_str)
        data["model"] = (
            general_settings.get("completion_model", None)  # server default
@ -5350,12 +5338,7 @@ async def anthropic_response(  # noqa: PLR0915
    litellm.adapters = [{"id": "anthropic", "adapter": anthropic_adapter}]
    global user_temperature, user_request_timeout, user_max_tokens, user_api_base
-    body = await request.body()
+    request_data = await _read_request_body(request=request)
    body_str = body.decode()
    try:
        request_data: dict = ast.literal_eval(body_str)
    except Exception:
        request_data = json.loads(body_str)
    data: dict = {**request_data, "adapter_id": "anthropic"}
    try:
        data["model"] = (
--- a/tests/local_testing/test_completion.py
+++ b/tests/local_testing/test_completion.py
@ -24,7 +24,7 @@ from litellm import RateLimitError, Timeout, completion, completion_cost, embedd
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 from litellm.litellm_core_utils.prompt_templates.factory import anthropic_messages_pt
-# litellm.num_retries = 3
+# litellm.num_retries=3
 litellm.cache = None
 litellm.success_callback = []