feat(proxy_server.py): refactor returning rejected message, to work with error logging

log the rejected request as a failed call to langfuse/slack alerting
This commit is contained in:
Krrish Dholakia 2024-05-20 11:14:36 -07:00
parent 372323c38a
commit f11f207ae6
7 changed files with 118 additions and 100 deletions

View file

@ -18,6 +18,7 @@ from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
from litellm.proxy.hooks.parallel_request_limiter import (
_PROXY_MaxParallelRequestsHandler,
)
from litellm.exceptions import RejectedRequestError
from litellm._service_logger import ServiceLogging, ServiceTypes
from litellm import (
ModelResponse,
@ -186,40 +187,6 @@ class ProxyLogging:
)
litellm.utils.set_callbacks(callback_list=callback_list)
# fmt: off
@overload
async def pre_call_hook(
self,
user_api_key_dict: UserAPIKeyAuth,
data: dict,
call_type: Literal["completion"]
) -> Union[dict, ModelResponse, CustomStreamWrapper]:
...
@overload
async def pre_call_hook(
self,
user_api_key_dict: UserAPIKeyAuth,
data: dict,
call_type: Literal["text_completion"]
) -> Union[dict, TextCompletionResponse, TextCompletionStreamWrapper]:
...
@overload
async def pre_call_hook(
self,
user_api_key_dict: UserAPIKeyAuth,
data: dict,
call_type: Literal["embeddings",
"image_generation",
"moderation",
"audio_transcription",]
) -> dict:
...
# fmt: on
# The actual implementation of the function
async def pre_call_hook(
self,
@ -233,13 +200,7 @@ class ProxyLogging:
"moderation",
"audio_transcription",
],
) -> Union[
dict,
ModelResponse,
TextCompletionResponse,
CustomStreamWrapper,
TextCompletionStreamWrapper,
]:
) -> dict:
"""
Allows users to modify/reject the incoming request to the proxy, without having to deal with parsing Request body.
@ -271,54 +232,20 @@ class ProxyLogging:
elif isinstance(response, dict):
data = response
elif isinstance(response, str):
if call_type == "completion":
_chat_response = ModelResponse()
_chat_response.choices[0].message.content = response
if (
data.get("stream", None) is not None
and data["stream"] == True
):
_iterator = ModelResponseIterator(
model_response=_chat_response
)
return CustomStreamWrapper(
completion_stream=_iterator,
model=data.get("model", ""),
custom_llm_provider="cached_response",
logging_obj=data.get(
"litellm_logging_obj", None
),
)
return _response
elif call_type == "text_completion":
if (
data.get("stream", None) is not None
and data["stream"] == True
):
_chat_response = ModelResponse()
_chat_response.choices[0].message.content = response
if (
data.get("stream", None) is not None
and data["stream"] == True
):
_iterator = ModelResponseIterator(
model_response=_chat_response
)
return TextCompletionStreamWrapper(
completion_stream=_iterator,
model=data.get("model", ""),
)
else:
_response = TextCompletionResponse()
_response.choices[0].text = response
return _response
if (
call_type == "completion"
or call_type == "text_completion"
):
raise RejectedRequestError(
message=response,
model=data.get("model", ""),
llm_provider="",
request_data=data,
)
else:
raise HTTPException(
status_code=400, detail={"error": response}
)
print_verbose(f"final data being sent to {call_type} call: {data}")
return data
except Exception as e: