forked from phoenix/litellm-mirror
fix(proxy_server.py): handle misformatted json body in chat completion request
This commit is contained in:
parent
1e526c7e06
commit
979575a2a6
2 changed files with 10 additions and 3 deletions
|
@ -962,7 +962,12 @@ async def chat_completion(request: Request, model: Optional[str] = None, user_ap
|
||||||
global general_settings, user_debug, proxy_logging_obj
|
global general_settings, user_debug, proxy_logging_obj
|
||||||
try:
|
try:
|
||||||
data = {}
|
data = {}
|
||||||
data = await request.json() # type: ignore
|
body = await request.body()
|
||||||
|
body_str = body.decode()
|
||||||
|
try:
|
||||||
|
data = ast.literal_eval(body_str)
|
||||||
|
except:
|
||||||
|
data = json.loads(body_str)
|
||||||
|
|
||||||
# Include original request and headers in the data
|
# Include original request and headers in the data
|
||||||
data["proxy_server_request"] = {
|
data["proxy_server_request"] = {
|
||||||
|
|
|
@ -740,7 +740,7 @@ class Router:
|
||||||
model_name = kwargs.get('model', None) # i.e. gpt35turbo
|
model_name = kwargs.get('model', None) # i.e. gpt35turbo
|
||||||
custom_llm_provider = kwargs.get("litellm_params", {}).get('custom_llm_provider', None) # i.e. azure
|
custom_llm_provider = kwargs.get("litellm_params", {}).get('custom_llm_provider', None) # i.e. azure
|
||||||
metadata = kwargs.get("litellm_params", {}).get('metadata', None)
|
metadata = kwargs.get("litellm_params", {}).get('metadata', None)
|
||||||
deployment_id = kwargs.get("litellm_params", {}).get("model_info").get("id")
|
deployment_id = kwargs.get("litellm_params", {}).get("model_info", {}).get("id", None)
|
||||||
self._set_cooldown_deployments(deployment_id) # setting deployment_id in cooldown deployments
|
self._set_cooldown_deployments(deployment_id) # setting deployment_id in cooldown deployments
|
||||||
if metadata:
|
if metadata:
|
||||||
deployment = metadata.get("deployment", None)
|
deployment = metadata.get("deployment", None)
|
||||||
|
@ -779,10 +779,12 @@ class Router:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
def _set_cooldown_deployments(self,
|
def _set_cooldown_deployments(self,
|
||||||
deployment: str):
|
deployment: Optional[str]=None):
|
||||||
"""
|
"""
|
||||||
Add a model to the list of models being cooled down for that minute, if it exceeds the allowed fails / minute
|
Add a model to the list of models being cooled down for that minute, if it exceeds the allowed fails / minute
|
||||||
"""
|
"""
|
||||||
|
if deployment is None:
|
||||||
|
return
|
||||||
|
|
||||||
current_minute = datetime.now().strftime("%H-%M")
|
current_minute = datetime.now().strftime("%H-%M")
|
||||||
# get current fails for deployment
|
# get current fails for deployment
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue