diff --git a/litellm/proxy/hooks/cache_control_check.py b/litellm/proxy/hooks/cache_control_check.py index 670e7554d6..c50c4ec1fc 100644 --- a/litellm/proxy/hooks/cache_control_check.py +++ b/litellm/proxy/hooks/cache_control_check.py @@ -10,7 +10,7 @@ from fastapi import HTTPException import json, traceback -class CacheControlCheck(CustomLogger): +class _PROXY_CacheControlCheck(CustomLogger): # Class variables or attributes def __init__(self): pass diff --git a/litellm/proxy/hooks/max_budget_limiter.py b/litellm/proxy/hooks/max_budget_limiter.py index fa24c9f0f7..442cc53e37 100644 --- a/litellm/proxy/hooks/max_budget_limiter.py +++ b/litellm/proxy/hooks/max_budget_limiter.py @@ -7,7 +7,7 @@ from fastapi import HTTPException import json, traceback -class MaxBudgetLimiter(CustomLogger): +class _PROXY_MaxBudgetLimiter(CustomLogger): # Class variables or attributes def __init__(self): pass diff --git a/litellm/proxy/hooks/parallel_request_limiter.py b/litellm/proxy/hooks/parallel_request_limiter.py index 5c1893ea51..ca60421a50 100644 --- a/litellm/proxy/hooks/parallel_request_limiter.py +++ b/litellm/proxy/hooks/parallel_request_limiter.py @@ -9,7 +9,7 @@ from litellm import ModelResponse from datetime import datetime -class MaxParallelRequestsHandler(CustomLogger): +class _PROXY_MaxParallelRequestsHandler(CustomLogger): user_api_key_cache = None # Class variables or attributes diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index aff50b44d1..1a6e40319e 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -8,9 +8,11 @@ from litellm.proxy._types import ( LiteLLM_SpendLogs, ) from litellm.caching import DualCache -from litellm.proxy.hooks.parallel_request_limiter import MaxParallelRequestsHandler -from litellm.proxy.hooks.max_budget_limiter import MaxBudgetLimiter -from litellm.proxy.hooks.cache_control_check import CacheControlCheck +from litellm.proxy.hooks.parallel_request_limiter import ( + _PROXY_MaxParallelRequestsHandler, +) +from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter +from litellm.proxy.hooks.cache_control_check import _PROXY_CacheControlCheck from litellm.integrations.custom_logger import CustomLogger from litellm.proxy.db.base_client import CustomDB from litellm._logging import verbose_proxy_logger @@ -41,9 +43,9 @@ class ProxyLogging: ## INITIALIZE LITELLM CALLBACKS ## self.call_details: dict = {} self.call_details["user_api_key_cache"] = user_api_key_cache - self.max_parallel_request_limiter = MaxParallelRequestsHandler() - self.max_budget_limiter = MaxBudgetLimiter() - self.cache_control_check = CacheControlCheck() + self.max_parallel_request_limiter = _PROXY_MaxParallelRequestsHandler() + self.max_budget_limiter = _PROXY_MaxBudgetLimiter() + self.cache_control_check = _PROXY_CacheControlCheck() self.alerting: Optional[List] = None self.alerting_threshold: float = 300 # default to 5 min. threshold pass diff --git a/litellm/utils.py b/litellm/utils.py index 2d428c26ee..96b6bc0ac1 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -774,14 +774,14 @@ class Logging: self.streaming_chunks = [] # for generating complete stream response self.sync_streaming_chunks = [] # for generating complete stream response self.model_call_details = {} - self.dynamic_input_callbacks = [] # callbacks set for just that call - self.dynamic_failure_callbacks = [] # callbacks set for just that call + self.dynamic_input_callbacks = [] # [TODO] callbacks set for just that call + self.dynamic_failure_callbacks = [] # [TODO] callbacks set for just that call self.dynamic_success_callbacks = ( - dynamic_success_callbacks or [] - ) # callbacks set for just that call + dynamic_success_callbacks # callbacks set for just that call + ) self.dynamic_async_success_callbacks = ( - dynamic_async_success_callbacks or [] - ) # callbacks set for just that call + dynamic_async_success_callbacks # callbacks set for just that call + ) ## DYNAMIC LANGFUSE KEYS ## self.langfuse_public_key = langfuse_public_key self.langfuse_secret = langfuse_secret @@ -1145,7 +1145,19 @@ class Logging: f"Model={self.model} not found in completion cost map." ) self.model_call_details["response_cost"] = None - callbacks = litellm.success_callback + self.dynamic_success_callbacks + if self.dynamic_success_callbacks is not None and isinstance( + self.dynamic_success_callbacks, list + ): + callbacks = self.dynamic_success_callbacks + ## keep the internal functions ## + for callback in litellm.success_callback: + if ( + isinstance(callback, CustomLogger) + and "_PROXY_" in callback.__class__.__name__ + ): + callbacks.append(callback) + else: + callbacks = litellm.success_callback for callback in callbacks: try: if callback == "lite_debugger": @@ -1452,9 +1464,19 @@ class Logging: ) self.model_call_details["response_cost"] = None - callbacks = ( - litellm._async_success_callback + self.dynamic_async_success_callbacks - ) + if self.dynamic_async_success_callbacks is not None and isinstance( + self.dynamic_async_success_callbacks, list + ): + callbacks = self.dynamic_async_success_callbacks + ## keep the internal functions ## + for callback in litellm._async_success_callback: + if ( + isinstance(callback, CustomLogger) + and "_PROXY_" in callback.__class__.__name__ + ): + callbacks.append(callback) + else: + callbacks = litellm._async_success_callback for callback in callbacks: try: if callback == "cache" and litellm.cache is not None: