LiteLLM Minor Fixes & Improvements (09/19/2024) (#5793)

* fix(model_prices_and_context_window.json): add cost tracking for more vertex llama3.1 model 8b and 70b models * fix(proxy/utils.py): handle data being none on pre-call hooks * fix(proxy/): create views on initial proxy startup fixes base case, where user starts proxy for first time Fixes https://github.com/BerriAI/litellm/issues/5756 * build(config.yml): fix vertex version for test * feat(ui/): support enabling/disabling slack alerting Allows admin to turn on/off slack alerting through ui * feat(rerank/main.py): support langfuse logging * fix(proxy/utils.py): fix linting errors * fix(langfuse.py): log clean metadata * test(tests): replace deprecated openai model
2025-04-26 11:14:04 +00:00 · 2024-09-20 08:19:52 -07:00 · 2024-09-20 08:19:52 -07:00 · 4445bfb9d7
commit 4445bfb9d7
parent 7c241ddfcb
22 changed files with 645 additions and 94 deletions
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -14,7 +14,7 @@ from datetime import datetime, timedelta
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
 from functools import wraps
-from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple, Union, overload

 import backoff
 import httpx
@ -51,6 +51,10 @@ from litellm.proxy._types import (
    SpendLogsPayload,
    UserAPIKeyAuth,
 )
+from litellm.proxy.db.create_views import (
+    create_missing_views,
+    should_create_missing_views,
+)
 from litellm.proxy.hooks.cache_control_check import _PROXY_CacheControlCheck
 from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
 from litellm.proxy.hooks.parallel_request_limiter import (
@ -365,6 +369,25 @@ class ProxyLogging:
        return data

    # The actual implementation of the function
+    @overload
+    async def pre_call_hook(
+        self,
+        user_api_key_dict: UserAPIKeyAuth,
+        data: None,
+        call_type: Literal[
+            "completion",
+            "text_completion",
+            "embeddings",
+            "image_generation",
+            "moderation",
+            "audio_transcription",
+            "pass_through_endpoint",
+            "rerank",
+        ],
+    ) -> None:
+        pass
+
+    @overload
    async def pre_call_hook(
        self,
        user_api_key_dict: UserAPIKeyAuth,
@ -380,6 +403,23 @@ class ProxyLogging:
            "rerank",
        ],
    ) -> dict:
+        pass
+
+    async def pre_call_hook(
+        self,
+        user_api_key_dict: UserAPIKeyAuth,
+        data: Optional[dict],
+        call_type: Literal[
+            "completion",
+            "text_completion",
+            "embeddings",
+            "image_generation",
+            "moderation",
+            "audio_transcription",
+            "pass_through_endpoint",
+            "rerank",
+        ],
+    ) -> Optional[dict]:
        """
        Allows users to modify/reject the incoming request to the proxy, without having to deal with parsing Request body.

@ -394,6 +434,9 @@ class ProxyLogging:
            self.slack_alerting_instance.response_taking_too_long(request_data=data)
        )

+        if data is None:
+            return None
+
        try:
            for callback in litellm.callbacks:
                _callback = None
@ -418,7 +461,7 @@ class ProxyLogging:
                    response = await _callback.async_pre_call_hook(
                        user_api_key_dict=user_api_key_dict,
                        cache=self.call_details["user_api_key_cache"],
-                        data=data,
+                        data=data,  # type: ignore
                        call_type=call_type,
                    )
                    if response is not None:
@ -434,7 +477,7 @@ class ProxyLogging:
                    response = await _callback.async_pre_call_hook(
                        user_api_key_dict=user_api_key_dict,
                        cache=self.call_details["user_api_key_cache"],
-                        data=data,
+                        data=data,  # type: ignore
                        call_type=call_type,
                    )
                    if response is not None:
@ -1021,20 +1064,24 @@ class PrismaClient:
                        "LiteLLM_VerificationTokenView Created in DB!"
                    )
                else:
-                    # don't block execution if these views are missing
-                    # Convert lists to sets for efficient difference calculation
-                    ret_view_names_set = (
-                        set(ret[0]["view_names"]) if ret[0]["view_names"] else set()
-                    )
-                    expected_views_set = set(expected_views)
-                    # Find missing views
-                    missing_views = expected_views_set - ret_view_names_set
-
-                    verbose_proxy_logger.warning(
-                        "\n\n\033[93mNot all views exist in db, needed for UI 'Usage' tab. Missing={}.\nRun 'create_views.py' from https://github.com/BerriAI/litellm/tree/main/db_scripts to create missing views.\033[0m\n".format(
-                            missing_views
+                    should_create_views = await should_create_missing_views(db=self.db)
+                    if should_create_views:
+                        await create_missing_views(db=self.db)
+                    else:
+                        # don't block execution if these views are missing
+                        # Convert lists to sets for efficient difference calculation
+                        ret_view_names_set = (
+                            set(ret[0]["view_names"]) if ret[0]["view_names"] else set()
+                        )
+                        expected_views_set = set(expected_views)
+                        # Find missing views
+                        missing_views = expected_views_set - ret_view_names_set
+
+                        verbose_proxy_logger.warning(
+                            "\n\n\033[93mNot all views exist in db, needed for UI 'Usage' tab. Missing={}.\nRun 'create_views.py' from https://github.com/BerriAI/litellm/tree/main/db_scripts to create missing views.\033[0m\n".format(
+                                missing_views
+                            )
                        )
-                    )

        except Exception as e:
            raise