Merge branch 'main' into litellm_webhook_support

This commit is contained in:
Krish Dholakia 2024-05-20 18:41:58 -07:00 committed by GitHub
commit 707cf24472
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 832 additions and 90 deletions

View file

@ -19,8 +19,18 @@ from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
from litellm.proxy.hooks.parallel_request_limiter import (
_PROXY_MaxParallelRequestsHandler,
)
from litellm.exceptions import RejectedRequestError
from litellm._service_logger import ServiceLogging, ServiceTypes
from litellm import ModelResponse, EmbeddingResponse, ImageResponse
from litellm import (
ModelResponse,
EmbeddingResponse,
ImageResponse,
TranscriptionResponse,
TextCompletionResponse,
CustomStreamWrapper,
TextCompletionStreamWrapper,
)
from litellm.utils import ModelResponseIterator
from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
from litellm.proxy.hooks.tpm_rpm_limiter import _PROXY_MaxTPMRPMLimiter
from litellm.proxy.hooks.cache_control_check import _PROXY_CacheControlCheck
@ -33,6 +43,7 @@ from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from datetime import datetime, timedelta
from litellm.integrations.slack_alerting import SlackAlerting
from typing_extensions import overload
def print_verbose(print_statement):
@ -132,7 +143,13 @@ class ProxyLogging:
alerting_args=alerting_args,
)
if "daily_reports" in self.alert_types:
if (
self.alerting is not None
and "slack" in self.alerting
and "daily_reports" in self.alert_types
):
# NOTE: ENSURE we only add callbacks when alerting is on
# We should NOT add callbacks when alerting is off
litellm.callbacks.append(self.slack_alerting_instance) # type: ignore
if redis_cache is not None:
@ -177,18 +194,20 @@ class ProxyLogging:
)
litellm.utils.set_callbacks(callback_list=callback_list)
# The actual implementation of the function
async def pre_call_hook(
self,
user_api_key_dict: UserAPIKeyAuth,
data: dict,
call_type: Literal[
"completion",
"text_completion",
"embeddings",
"image_generation",
"moderation",
"audio_transcription",
],
):
) -> dict:
"""
Allows users to modify/reject the incoming request to the proxy, without having to deal with parsing Request body.
@ -215,8 +234,25 @@ class ProxyLogging:
call_type=call_type,
)
if response is not None:
data = response
if isinstance(response, Exception):
raise response
elif isinstance(response, dict):
data = response
elif isinstance(response, str):
if (
call_type == "completion"
or call_type == "text_completion"
):
raise RejectedRequestError(
message=response,
model=data.get("model", ""),
llm_provider="",
request_data=data,
)
else:
raise HTTPException(
status_code=400, detail={"error": response}
)
print_verbose(f"final data being sent to {call_type} call: {data}")
return data
except Exception as e:
@ -441,7 +477,7 @@ class ProxyLogging:
asyncio.create_task(
self.alerting_handler(
message=f"LLM API call failed: {exception_str}",
message=f"LLM API call failed: `{exception_str}`",
level="High",
alert_type="llm_exceptions",
request_data=request_data,