Merge branch 'main' into litellm_webhook_support

2025-04-25 18:54:30 +00:00 · 2024-05-20 18:41:58 -07:00 · 2024-05-20 18:41:58 -07:00 · 707cf24472
commit 707cf24472
parent 9d815be0b5 ad91bff6f7
19 changed files with 832 additions and 90 deletions
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -19,8 +19,18 @@ from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
 from litellm.proxy.hooks.parallel_request_limiter import (
    _PROXY_MaxParallelRequestsHandler,
 )
+from litellm.exceptions import RejectedRequestError
 from litellm._service_logger import ServiceLogging, ServiceTypes
-from litellm import ModelResponse, EmbeddingResponse, ImageResponse
+from litellm import (
+    ModelResponse,
+    EmbeddingResponse,
+    ImageResponse,
+    TranscriptionResponse,
+    TextCompletionResponse,
+    CustomStreamWrapper,
+    TextCompletionStreamWrapper,
+)
+from litellm.utils import ModelResponseIterator
 from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
 from litellm.proxy.hooks.tpm_rpm_limiter import _PROXY_MaxTPMRPMLimiter
 from litellm.proxy.hooks.cache_control_check import _PROXY_CacheControlCheck
@ -33,6 +43,7 @@ from email.mime.text import MIMEText
 from email.mime.multipart import MIMEMultipart
 from datetime import datetime, timedelta
 from litellm.integrations.slack_alerting import SlackAlerting
+from typing_extensions import overload


 def print_verbose(print_statement):
@ -132,7 +143,13 @@ class ProxyLogging:
            alerting_args=alerting_args,
        )

-        if "daily_reports" in self.alert_types:
+        if (
+            self.alerting is not None
+            and "slack" in self.alerting
+            and "daily_reports" in self.alert_types
+        ):
+            # NOTE: ENSURE we only add callbacks when alerting is on
+            # We should NOT add callbacks when alerting is off
            litellm.callbacks.append(self.slack_alerting_instance)  # type: ignore

        if redis_cache is not None:
@ -177,18 +194,20 @@ class ProxyLogging:
            )
            litellm.utils.set_callbacks(callback_list=callback_list)

+    # The actual implementation of the function
    async def pre_call_hook(
        self,
        user_api_key_dict: UserAPIKeyAuth,
        data: dict,
        call_type: Literal[
            "completion",
+            "text_completion",
            "embeddings",
            "image_generation",
            "moderation",
            "audio_transcription",
        ],
-    ):
+    ) -> dict:
        """
        Allows users to modify/reject the incoming request to the proxy, without having to deal with parsing Request body.

@ -215,8 +234,25 @@ class ProxyLogging:
                        call_type=call_type,
                    )
                    if response is not None:
-                        data = response
-
+                        if isinstance(response, Exception):
+                            raise response
+                        elif isinstance(response, dict):
+                            data = response
+                        elif isinstance(response, str):
+                            if (
+                                call_type == "completion"
+                                or call_type == "text_completion"
+                            ):
+                                raise RejectedRequestError(
+                                    message=response,
+                                    model=data.get("model", ""),
+                                    llm_provider="",
+                                    request_data=data,
+                                )
+                            else:
+                                raise HTTPException(
+                                    status_code=400, detail={"error": response}
+                                )
            print_verbose(f"final data being sent to {call_type} call: {data}")
            return data
        except Exception as e:
@ -441,7 +477,7 @@ class ProxyLogging:

            asyncio.create_task(
                self.alerting_handler(
-                    message=f"LLM API call failed: {exception_str}",
+                    message=f"LLM API call failed: `{exception_str}`",
                    level="High",
                    alert_type="llm_exceptions",
                    request_data=request_data,