(fix proxy perf) use _read_request_body instead of ast.literal_eval to get better performance (#7545)

* fix ast literal eval

* run ci/cd again
This commit is contained in:
Ishaan Jaff 2025-01-03 17:48:32 -08:00 committed by GitHub
parent 81d1826c25
commit df677ab073
2 changed files with 4 additions and 21 deletions

View file

@ -1,4 +1,3 @@
import ast
import asyncio import asyncio
import copy import copy
import inspect import inspect
@ -3339,13 +3338,7 @@ async def chat_completion( # noqa: PLR0915
data = {} data = {}
try: try:
body = await request.body() data = await _read_request_body(request=request)
body_str = body.decode()
try:
data = ast.literal_eval(body_str)
except Exception:
data = json.loads(body_str)
verbose_proxy_logger.debug( verbose_proxy_logger.debug(
"Request received by LiteLLM:\n{}".format(json.dumps(data, indent=4)), "Request received by LiteLLM:\n{}".format(json.dumps(data, indent=4)),
) )
@ -3612,12 +3605,7 @@ async def completion( # noqa: PLR0915
global user_temperature, user_request_timeout, user_max_tokens, user_api_base global user_temperature, user_request_timeout, user_max_tokens, user_api_base
data = {} data = {}
try: try:
body = await request.body() data = await _read_request_body(request=request)
body_str = body.decode()
try:
data = ast.literal_eval(body_str)
except Exception:
data = json.loads(body_str)
data["model"] = ( data["model"] = (
general_settings.get("completion_model", None) # server default general_settings.get("completion_model", None) # server default
@ -5350,12 +5338,7 @@ async def anthropic_response( # noqa: PLR0915
litellm.adapters = [{"id": "anthropic", "adapter": anthropic_adapter}] litellm.adapters = [{"id": "anthropic", "adapter": anthropic_adapter}]
global user_temperature, user_request_timeout, user_max_tokens, user_api_base global user_temperature, user_request_timeout, user_max_tokens, user_api_base
body = await request.body() request_data = await _read_request_body(request=request)
body_str = body.decode()
try:
request_data: dict = ast.literal_eval(body_str)
except Exception:
request_data = json.loads(body_str)
data: dict = {**request_data, "adapter_id": "anthropic"} data: dict = {**request_data, "adapter_id": "anthropic"}
try: try:
data["model"] = ( data["model"] = (

View file

@ -24,7 +24,7 @@ from litellm import RateLimitError, Timeout, completion, completion_cost, embedd
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from litellm.litellm_core_utils.prompt_templates.factory import anthropic_messages_pt from litellm.litellm_core_utils.prompt_templates.factory import anthropic_messages_pt
# litellm.num_retries = 3 # litellm.num_retries=3
litellm.cache = None litellm.cache = None
litellm.success_callback = [] litellm.success_callback = []