LiteLLM Minor fixes + improvements (08/04/2024) (#5505)

* Minor IAM AWS OIDC Improvements (#5246) * AWS IAM: Temporary tokens are valid across all regions after being issued, so it is wasteful to request one for each region. * AWS IAM: Include an inline policy, to help reduce misuse of overly permissive IAM roles. * (test_bedrock_completion.py): Ensure we are testing cross AWS region OIDC flow. * fix(router.py): log rejected requests Fixes https://github.com/BerriAI/litellm/issues/5498 * refactor: don't use verbose_logger.exception, if exception is raised User might already have handling for this. But alerting systems in prod will raise this as an unhandled error. * fix(datadog.py): support setting datadog source as an env var Fixes https://github.com/BerriAI/litellm/issues/5508 * docs(logging.md): add dd_source to datadog docs * fix(proxy_server.py): expose `/customer/list` endpoint for showing all customers * (bedrock): Fix usage with Cloudflare AI Gateway, and proxies in general. (#5509) * feat(anthropic.py): support 'cache_control' param for content when it is a string * Revert "(bedrock): Fix usage with Cloudflare AI Gateway, and proxies in gener…" (#5519) This reverts commit 3fac0349c2. * refactor: ci/cd run again --------- Co-authored-by: David Manouchehri <david.manouchehri@ai.moda>
2025-04-26 03:04:13 +00:00 · 2024-09-04 22:16:55 -07:00 · 2024-09-04 22:16:55 -07:00 · 1e7e538261
commit 1e7e538261
parent cdc312d51d
24 changed files with 383 additions and 247 deletions
--- a/litellm/router.py
+++ b/litellm/router.py
@ -47,6 +47,7 @@ from litellm._logging import verbose_router_logger
 from litellm.assistants.main import AssistantDeleted
 from litellm.caching import DualCache, InMemoryCache, RedisCache
 from litellm.integrations.custom_logger import CustomLogger
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
 from litellm.llms.azure import get_azure_ad_token_from_oidc
 from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
 from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
@ -783,6 +784,10 @@ class Router:
                }
            )

+            logging_obj: Optional[LiteLLMLogging] = kwargs.get(
+                "litellm_logging_obj", None
+            )
+
            rpm_semaphore = self._get_client(
                deployment=deployment,
                kwargs=kwargs,
@ -797,11 +802,13 @@ class Router:
                    - If allowed, increment the rpm limit (allows global value to be updated, concurrency-safe)
                    """
                    await self.async_routing_strategy_pre_call_checks(
-                        deployment=deployment
+                        deployment=deployment, logging_obj=logging_obj
                    )
                    response = await _response
            else:
-                await self.async_routing_strategy_pre_call_checks(deployment=deployment)
+                await self.async_routing_strategy_pre_call_checks(
+                    deployment=deployment, logging_obj=logging_obj
+                )
                response = await _response

            ## CHECK CONTENT FILTER ERROR ##
@ -3860,7 +3867,9 @@ class Router:
            if isinstance(_callback, CustomLogger):
                response = _callback.pre_call_check(deployment)

-    async def async_routing_strategy_pre_call_checks(self, deployment: dict):
+    async def async_routing_strategy_pre_call_checks(
+        self, deployment: dict, logging_obj: Optional[LiteLLMLogging] = None
+    ):
        """
        For usage-based-routing-v2, enables running rpm checks before the call is made, inside the semaphore.

@ -3875,8 +3884,22 @@ class Router:
        for _callback in litellm.callbacks:
            if isinstance(_callback, CustomLogger):
                try:
-                    response = await _callback.async_pre_call_check(deployment)
+                    _ = await _callback.async_pre_call_check(deployment)
                except litellm.RateLimitError as e:
+                    ## LOG FAILURE EVENT
+                    if logging_obj is not None:
+                        asyncio.create_task(
+                            logging_obj.async_failure_handler(
+                                exception=e,
+                                traceback_exception=traceback.format_exc(),
+                                end_time=time.time(),
+                            )
+                        )
+                        ## LOGGING
+                        threading.Thread(
+                            target=logging_obj.failure_handler,
+                            args=(e, traceback.format_exc()),
+                        ).start()  # log response
                    self._set_cooldown_deployments(
                        exception_status=e.status_code,
                        original_exception=e,
@ -3885,6 +3908,20 @@ class Router:
                    )
                    raise e
                except Exception as e:
+                    ## LOG FAILURE EVENT
+                    if logging_obj is not None:
+                        asyncio.create_task(
+                            logging_obj.async_failure_handler(
+                                exception=e,
+                                traceback_exception=traceback.format_exc(),
+                                end_time=time.time(),
+                            )
+                        )
+                        ## LOGGING
+                        threading.Thread(
+                            target=logging_obj.failure_handler,
+                            args=(e, traceback.format_exc()),
+                        ).start()  # log response
                    raise e

    def _generate_model_id(self, model_group: str, litellm_params: dict):