diff --git a/litellm/_service_logger.py b/litellm/_service_logger.py index 814ec011a..b2bd26c26 100644 --- a/litellm/_service_logger.py +++ b/litellm/_service_logger.py @@ -1,9 +1,12 @@ -import litellm +import litellm, traceback +from litellm.proxy._types import UserAPIKeyAuth from .types.services import ServiceTypes, ServiceLoggerPayload from .integrations.prometheus_services import PrometheusServicesLogger +from .integrations.custom_logger import CustomLogger +from datetime import timedelta -class ServiceLogging: +class ServiceLogging(CustomLogger): """ Separate class used for monitoring health of litellm-adjacent services (redis/postgres). """ @@ -14,7 +17,6 @@ class ServiceLogging: self.mock_testing_async_success_hook = 0 self.mock_testing_sync_failure_hook = 0 self.mock_testing_async_failure_hook = 0 - if "prometheus_system" in litellm.service_callback: self.prometheusServicesLogger = PrometheusServicesLogger() @@ -34,7 +36,9 @@ class ServiceLogging: if self.mock_testing: self.mock_testing_sync_failure_hook += 1 - async def async_service_success_hook(self, service: ServiceTypes, duration: float): + async def async_service_success_hook( + self, service: ServiceTypes, duration: float, call_type: str + ): """ - For counting if the redis, postgres call is successful """ @@ -42,7 +46,11 @@ class ServiceLogging: self.mock_testing_async_success_hook += 1 payload = ServiceLoggerPayload( - is_error=False, error=None, service=service, duration=duration + is_error=False, + error=None, + service=service, + duration=duration, + call_type=call_type, ) for callback in litellm.service_callback: if callback == "prometheus_system": @@ -51,7 +59,7 @@ class ServiceLogging: ) async def async_service_failure_hook( - self, service: ServiceTypes, duration: float, error: Exception + self, service: ServiceTypes, duration: float, error: Exception, call_type: str ): """ - For counting if the redis, postgres call is unsuccessful @@ -60,7 +68,11 @@ class ServiceLogging: self.mock_testing_async_failure_hook += 1 payload = ServiceLoggerPayload( - is_error=True, error=str(error), service=service, duration=duration + is_error=True, + error=str(error), + service=service, + duration=duration, + call_type=call_type, ) for callback in litellm.service_callback: if callback == "prometheus_system": @@ -69,3 +81,37 @@ class ServiceLogging: await self.prometheusServicesLogger.async_service_failure_hook( payload=payload ) + + async def async_post_call_failure_hook( + self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth + ): + """ + Hook to track failed litellm-service calls + """ + return await super().async_post_call_failure_hook( + original_exception, user_api_key_dict + ) + + async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): + """ + Hook to track latency for litellm proxy llm api calls + """ + try: + _duration = end_time - start_time + if isinstance(_duration, timedelta): + _duration = _duration.total_seconds() + elif isinstance(_duration, float): + pass + else: + raise Exception( + "Duration={} is not a float or timedelta object. type={}".format( + _duration, type(_duration) + ) + ) # invalid _duration value + await self.async_service_success_hook( + service=ServiceTypes.LITELLM, + duration=_duration, + call_type=kwargs["call_type"], + ) + except Exception as e: + raise e diff --git a/litellm/caching.py b/litellm/caching.py index c15813710..79c816ff7 100644 --- a/litellm/caching.py +++ b/litellm/caching.py @@ -13,7 +13,6 @@ import json, traceback, ast, hashlib from typing import Optional, Literal, List, Union, Any, BinaryIO from openai._models import BaseModel as OpenAIObject from litellm._logging import verbose_logger -from litellm._service_logger import ServiceLogging from litellm.types.services import ServiceLoggerPayload, ServiceTypes import traceback @@ -132,6 +131,7 @@ class RedisCache(BaseCache): **kwargs, ): from ._redis import get_redis_client, get_redis_connection_pool + from litellm._service_logger import ServiceLogging import redis redis_kwargs = {} @@ -216,7 +216,9 @@ class RedisCache(BaseCache): _duration = end_time - start_time asyncio.create_task( self.service_logger_obj.async_service_success_hook( - service=ServiceTypes.REDIS, duration=_duration + service=ServiceTypes.REDIS, + duration=_duration, + call_type="async_scan_iter", ) ) # DO NOT SLOW DOWN CALL B/C OF THIS return keys @@ -227,7 +229,10 @@ class RedisCache(BaseCache): _duration = end_time - start_time asyncio.create_task( self.service_logger_obj.async_service_failure_hook( - service=ServiceTypes.REDIS, duration=_duration, error=e + service=ServiceTypes.REDIS, + duration=_duration, + error=e, + call_type="async_scan_iter", ) ) raise e @@ -359,6 +364,7 @@ class RedisCache(BaseCache): self.service_logger_obj.async_service_success_hook( service=ServiceTypes.REDIS, duration=_duration, + call_type="async_increment", ) ) return result @@ -368,7 +374,10 @@ class RedisCache(BaseCache): _duration = end_time - start_time asyncio.create_task( self.service_logger_obj.async_service_failure_hook( - service=ServiceTypes.REDIS, duration=_duration, error=e + service=ServiceTypes.REDIS, + duration=_duration, + error=e, + call_type="async_increment", ) ) verbose_logger.error( @@ -497,7 +506,9 @@ class RedisCache(BaseCache): _duration = end_time - start_time asyncio.create_task( self.service_logger_obj.async_service_success_hook( - service=ServiceTypes.REDIS, duration=_duration + service=ServiceTypes.REDIS, + duration=_duration, + call_type="async_batch_get_cache", ) ) @@ -519,7 +530,10 @@ class RedisCache(BaseCache): _duration = end_time - start_time asyncio.create_task( self.service_logger_obj.async_service_failure_hook( - service=ServiceTypes.REDIS, duration=_duration, error=e + service=ServiceTypes.REDIS, + duration=_duration, + error=e, + call_type="async_batch_get_cache", ) ) print_verbose(f"Error occurred in pipeline read - {str(e)}") diff --git a/litellm/integrations/prometheus_services.py b/litellm/integrations/prometheus_services.py index 548d0a2a3..4171593ba 100644 --- a/litellm/integrations/prometheus_services.py +++ b/litellm/integrations/prometheus_services.py @@ -30,6 +30,7 @@ class PrometheusServicesLogger: raise Exception( "Missing prometheus_client. Run `pip install prometheus-client`" ) + print("INITIALIZES PROMETHEUS SERVICE LOGGER!") self.Histogram = Histogram self.Counter = Counter @@ -151,6 +152,7 @@ class PrometheusServicesLogger: if self.mock_testing: self.mock_testing_success_calls += 1 + print(f"LOGS SUCCESSFUL CALL TO PROMETHEUS - payload={payload}") if payload.service.value in self.payload_to_prometheus_map: prom_objects = self.payload_to_prometheus_map[payload.service.value] for obj in prom_objects: diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 0f7c24576..ca8b4c539 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -31,12 +31,12 @@ litellm_settings: upperbound_key_generate_params: max_budget: os.environ/LITELLM_UPPERBOUND_KEYS_MAX_BUDGET -router_settings: - routing_strategy: usage-based-routing-v2 - redis_host: os.environ/REDIS_HOST - redis_password: os.environ/REDIS_PASSWORD - redis_port: os.environ/REDIS_PORT - enable_pre_call_checks: True +# router_settings: +# routing_strategy: usage-based-routing-v2 +# redis_host: os.environ/REDIS_HOST +# redis_password: os.environ/REDIS_PASSWORD +# redis_port: os.environ/REDIS_PORT +# enable_pre_call_checks: True general_settings: master_key: sk-1234 diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 2fccbc74e..6a34b54e1 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -18,6 +18,7 @@ from litellm.llms.custom_httpx.httpx_handler import HTTPHandler from litellm.proxy.hooks.parallel_request_limiter import ( _PROXY_MaxParallelRequestsHandler, ) +from litellm._service_logger import ServiceLogging from litellm import ModelResponse, EmbeddingResponse, ImageResponse from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter from litellm.proxy.hooks.tpm_rpm_limiter import _PROXY_MaxTPMRPMLimiter @@ -80,10 +81,12 @@ class ProxyLogging: def _init_litellm_callbacks(self): print_verbose(f"INITIALIZING LITELLM CALLBACKS!") + self.service_logging_obj = ServiceLogging() litellm.callbacks.append(self.max_parallel_request_limiter) litellm.callbacks.append(self.max_tpm_rpm_limiter) litellm.callbacks.append(self.max_budget_limiter) litellm.callbacks.append(self.cache_control_check) + litellm.callbacks.append(self.service_logging_obj) litellm.success_callback.append(self.response_taking_too_long_callback) for callback in litellm.callbacks: if callback not in litellm.input_callback: diff --git a/litellm/types/services.py b/litellm/types/services.py index ea5172ebc..b694ca807 100644 --- a/litellm/types/services.py +++ b/litellm/types/services.py @@ -5,11 +5,12 @@ from typing import Optional class ServiceTypes(enum.Enum): """ - Enum for litellm-adjacent services (redis/postgres/etc.) + Enum for litellm + litellm-adjacent services (redis/postgres/etc.) """ REDIS = "redis" DB = "postgres" + LITELLM = "self" class ServiceLoggerPayload(BaseModel): @@ -21,6 +22,7 @@ class ServiceLoggerPayload(BaseModel): error: Optional[str] = Field(None, description="what was the error") service: ServiceTypes = Field(description="who is this for? - postgres/redis") duration: float = Field(description="How long did the request take?") + call_type: str = Field(description="The call of the service, being made") def to_json(self, **kwargs): try: