feat(prometheus_services.py): emit proxy latency for successful llm api requests

uses prometheus histogram for this
This commit is contained in:
Krrish Dholakia 2024-04-18 16:04:35 -07:00
parent df70e75ee1
commit 0f95a824c4
6 changed files with 87 additions and 20 deletions

View file

@ -13,7 +13,6 @@ import json, traceback, ast, hashlib
from typing import Optional, Literal, List, Union, Any, BinaryIO
from openai._models import BaseModel as OpenAIObject
from litellm._logging import verbose_logger
from litellm._service_logger import ServiceLogging
from litellm.types.services import ServiceLoggerPayload, ServiceTypes
import traceback
@ -132,6 +131,7 @@ class RedisCache(BaseCache):
**kwargs,
):
from ._redis import get_redis_client, get_redis_connection_pool
from litellm._service_logger import ServiceLogging
import redis
redis_kwargs = {}
@ -216,7 +216,9 @@ class RedisCache(BaseCache):
_duration = end_time - start_time
asyncio.create_task(
self.service_logger_obj.async_service_success_hook(
service=ServiceTypes.REDIS, duration=_duration
service=ServiceTypes.REDIS,
duration=_duration,
call_type="async_scan_iter",
)
) # DO NOT SLOW DOWN CALL B/C OF THIS
return keys
@ -227,7 +229,10 @@ class RedisCache(BaseCache):
_duration = end_time - start_time
asyncio.create_task(
self.service_logger_obj.async_service_failure_hook(
service=ServiceTypes.REDIS, duration=_duration, error=e
service=ServiceTypes.REDIS,
duration=_duration,
error=e,
call_type="async_scan_iter",
)
)
raise e
@ -359,6 +364,7 @@ class RedisCache(BaseCache):
self.service_logger_obj.async_service_success_hook(
service=ServiceTypes.REDIS,
duration=_duration,
call_type="async_increment",
)
)
return result
@ -368,7 +374,10 @@ class RedisCache(BaseCache):
_duration = end_time - start_time
asyncio.create_task(
self.service_logger_obj.async_service_failure_hook(
service=ServiceTypes.REDIS, duration=_duration, error=e
service=ServiceTypes.REDIS,
duration=_duration,
error=e,
call_type="async_increment",
)
)
verbose_logger.error(
@ -497,7 +506,9 @@ class RedisCache(BaseCache):
_duration = end_time - start_time
asyncio.create_task(
self.service_logger_obj.async_service_success_hook(
service=ServiceTypes.REDIS, duration=_duration
service=ServiceTypes.REDIS,
duration=_duration,
call_type="async_batch_get_cache",
)
)
@ -519,7 +530,10 @@ class RedisCache(BaseCache):
_duration = end_time - start_time
asyncio.create_task(
self.service_logger_obj.async_service_failure_hook(
service=ServiceTypes.REDIS, duration=_duration, error=e
service=ServiceTypes.REDIS,
duration=_duration,
error=e,
call_type="async_batch_get_cache",
)
)
print_verbose(f"Error occurred in pipeline read - {str(e)}")