mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
feat(proxy_server.py): refactor returning rejected message, to work with error logging
log the rejected request as a failed call to langfuse/slack alerting
This commit is contained in:
parent
372323c38a
commit
f11f207ae6
7 changed files with 118 additions and 100 deletions
|
@ -18,6 +18,7 @@ from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
|
|||
from litellm.proxy.hooks.parallel_request_limiter import (
|
||||
_PROXY_MaxParallelRequestsHandler,
|
||||
)
|
||||
from litellm.exceptions import RejectedRequestError
|
||||
from litellm._service_logger import ServiceLogging, ServiceTypes
|
||||
from litellm import (
|
||||
ModelResponse,
|
||||
|
@ -186,40 +187,6 @@ class ProxyLogging:
|
|||
)
|
||||
litellm.utils.set_callbacks(callback_list=callback_list)
|
||||
|
||||
# fmt: off
|
||||
|
||||
@overload
|
||||
async def pre_call_hook(
|
||||
self,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
data: dict,
|
||||
call_type: Literal["completion"]
|
||||
) -> Union[dict, ModelResponse, CustomStreamWrapper]:
|
||||
...
|
||||
|
||||
@overload
|
||||
async def pre_call_hook(
|
||||
self,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
data: dict,
|
||||
call_type: Literal["text_completion"]
|
||||
) -> Union[dict, TextCompletionResponse, TextCompletionStreamWrapper]:
|
||||
...
|
||||
|
||||
@overload
|
||||
async def pre_call_hook(
|
||||
self,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
data: dict,
|
||||
call_type: Literal["embeddings",
|
||||
"image_generation",
|
||||
"moderation",
|
||||
"audio_transcription",]
|
||||
) -> dict:
|
||||
...
|
||||
|
||||
# fmt: on
|
||||
|
||||
# The actual implementation of the function
|
||||
async def pre_call_hook(
|
||||
self,
|
||||
|
@ -233,13 +200,7 @@ class ProxyLogging:
|
|||
"moderation",
|
||||
"audio_transcription",
|
||||
],
|
||||
) -> Union[
|
||||
dict,
|
||||
ModelResponse,
|
||||
TextCompletionResponse,
|
||||
CustomStreamWrapper,
|
||||
TextCompletionStreamWrapper,
|
||||
]:
|
||||
) -> dict:
|
||||
"""
|
||||
Allows users to modify/reject the incoming request to the proxy, without having to deal with parsing Request body.
|
||||
|
||||
|
@ -271,54 +232,20 @@ class ProxyLogging:
|
|||
elif isinstance(response, dict):
|
||||
data = response
|
||||
elif isinstance(response, str):
|
||||
if call_type == "completion":
|
||||
_chat_response = ModelResponse()
|
||||
_chat_response.choices[0].message.content = response
|
||||
|
||||
if (
|
||||
data.get("stream", None) is not None
|
||||
and data["stream"] == True
|
||||
):
|
||||
_iterator = ModelResponseIterator(
|
||||
model_response=_chat_response
|
||||
)
|
||||
return CustomStreamWrapper(
|
||||
completion_stream=_iterator,
|
||||
model=data.get("model", ""),
|
||||
custom_llm_provider="cached_response",
|
||||
logging_obj=data.get(
|
||||
"litellm_logging_obj", None
|
||||
),
|
||||
)
|
||||
return _response
|
||||
elif call_type == "text_completion":
|
||||
if (
|
||||
data.get("stream", None) is not None
|
||||
and data["stream"] == True
|
||||
):
|
||||
_chat_response = ModelResponse()
|
||||
_chat_response.choices[0].message.content = response
|
||||
|
||||
if (
|
||||
data.get("stream", None) is not None
|
||||
and data["stream"] == True
|
||||
):
|
||||
_iterator = ModelResponseIterator(
|
||||
model_response=_chat_response
|
||||
)
|
||||
return TextCompletionStreamWrapper(
|
||||
completion_stream=_iterator,
|
||||
model=data.get("model", ""),
|
||||
)
|
||||
else:
|
||||
_response = TextCompletionResponse()
|
||||
_response.choices[0].text = response
|
||||
return _response
|
||||
if (
|
||||
call_type == "completion"
|
||||
or call_type == "text_completion"
|
||||
):
|
||||
raise RejectedRequestError(
|
||||
message=response,
|
||||
model=data.get("model", ""),
|
||||
llm_provider="",
|
||||
request_data=data,
|
||||
)
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=400, detail={"error": response}
|
||||
)
|
||||
|
||||
print_verbose(f"final data being sent to {call_type} call: {data}")
|
||||
return data
|
||||
except Exception as e:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue