mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
Merge pull request #9719 from BerriAI/litellm_metrics_pod_lock_manager
[Reliability] Emit operational metrics for new DB Transaction architecture
This commit is contained in:
commit
8c3670e192
11 changed files with 351 additions and 40 deletions
|
@ -156,7 +156,7 @@ PROXY_LOGOUT_URL="https://www.google.com"
|
||||||
|
|
||||||
Set this in your .env (so the proxy can set the correct redirect url)
|
Set this in your .env (so the proxy can set the correct redirect url)
|
||||||
```shell
|
```shell
|
||||||
PROXY_BASE_URL=https://litellm-api.up.railway.app/
|
PROXY_BASE_URL=https://litellm-api.up.railway.app
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Step 4. Test flow
|
#### Step 4. Test flow
|
||||||
|
|
|
@ -124,6 +124,7 @@ class ServiceLogging(CustomLogger):
|
||||||
service=service,
|
service=service,
|
||||||
duration=duration,
|
duration=duration,
|
||||||
call_type=call_type,
|
call_type=call_type,
|
||||||
|
event_metadata=event_metadata,
|
||||||
)
|
)
|
||||||
|
|
||||||
for callback in litellm.service_callback:
|
for callback in litellm.service_callback:
|
||||||
|
@ -229,6 +230,7 @@ class ServiceLogging(CustomLogger):
|
||||||
service=service,
|
service=service,
|
||||||
duration=duration,
|
duration=duration,
|
||||||
call_type=call_type,
|
call_type=call_type,
|
||||||
|
event_metadata=event_metadata,
|
||||||
)
|
)
|
||||||
|
|
||||||
for callback in litellm.service_callback:
|
for callback in litellm.service_callback:
|
||||||
|
|
|
@ -3,11 +3,16 @@
|
||||||
# On success + failure, log events to Prometheus for litellm / adjacent services (litellm, redis, postgres, llm api providers)
|
# On success + failure, log events to Prometheus for litellm / adjacent services (litellm, redis, postgres, llm api providers)
|
||||||
|
|
||||||
|
|
||||||
from typing import List, Optional, Union
|
from typing import Dict, List, Optional, Union
|
||||||
|
|
||||||
from litellm._logging import print_verbose, verbose_logger
|
from litellm._logging import print_verbose, verbose_logger
|
||||||
from litellm.types.integrations.prometheus import LATENCY_BUCKETS
|
from litellm.types.integrations.prometheus import LATENCY_BUCKETS
|
||||||
from litellm.types.services import ServiceLoggerPayload, ServiceTypes
|
from litellm.types.services import (
|
||||||
|
DEFAULT_SERVICE_CONFIGS,
|
||||||
|
ServiceLoggerPayload,
|
||||||
|
ServiceMetrics,
|
||||||
|
ServiceTypes,
|
||||||
|
)
|
||||||
|
|
||||||
FAILED_REQUESTS_LABELS = ["error_class", "function_name"]
|
FAILED_REQUESTS_LABELS = ["error_class", "function_name"]
|
||||||
|
|
||||||
|
@ -23,7 +28,8 @@ class PrometheusServicesLogger:
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
try:
|
try:
|
||||||
from prometheus_client import REGISTRY, Counter, Histogram
|
from prometheus_client import REGISTRY, Counter, Gauge, Histogram
|
||||||
|
from prometheus_client.gc_collector import Collector
|
||||||
except ImportError:
|
except ImportError:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
"Missing prometheus_client. Run `pip install prometheus-client`"
|
"Missing prometheus_client. Run `pip install prometheus-client`"
|
||||||
|
@ -31,36 +37,51 @@ class PrometheusServicesLogger:
|
||||||
|
|
||||||
self.Histogram = Histogram
|
self.Histogram = Histogram
|
||||||
self.Counter = Counter
|
self.Counter = Counter
|
||||||
|
self.Gauge = Gauge
|
||||||
self.REGISTRY = REGISTRY
|
self.REGISTRY = REGISTRY
|
||||||
|
|
||||||
verbose_logger.debug("in init prometheus services metrics")
|
verbose_logger.debug("in init prometheus services metrics")
|
||||||
|
|
||||||
self.services = [item.value for item in ServiceTypes]
|
self.payload_to_prometheus_map: Dict[
|
||||||
|
str, List[Union[Histogram, Counter, Gauge, Collector]]
|
||||||
|
] = {}
|
||||||
|
|
||||||
self.payload_to_prometheus_map = (
|
for service in ServiceTypes:
|
||||||
{}
|
service_metrics: List[Union[Histogram, Counter, Gauge, Collector]] = []
|
||||||
) # store the prometheus histogram/counter we need to call for each field in payload
|
|
||||||
|
|
||||||
for service in self.services:
|
metrics_to_initialize = self._get_service_metrics_initialize(service)
|
||||||
histogram = self.create_histogram(service, type_of_request="latency")
|
|
||||||
counter_failed_request = self.create_counter(
|
|
||||||
service,
|
|
||||||
type_of_request="failed_requests",
|
|
||||||
additional_labels=FAILED_REQUESTS_LABELS,
|
|
||||||
)
|
|
||||||
counter_total_requests = self.create_counter(
|
|
||||||
service, type_of_request="total_requests"
|
|
||||||
)
|
|
||||||
self.payload_to_prometheus_map[service] = [
|
|
||||||
histogram,
|
|
||||||
counter_failed_request,
|
|
||||||
counter_total_requests,
|
|
||||||
]
|
|
||||||
|
|
||||||
self.prometheus_to_amount_map: dict = (
|
# Initialize only the configured metrics for each service
|
||||||
{}
|
if ServiceMetrics.HISTOGRAM in metrics_to_initialize:
|
||||||
) # the field / value in ServiceLoggerPayload the object needs to be incremented by
|
histogram = self.create_histogram(
|
||||||
|
service.value, type_of_request="latency"
|
||||||
|
)
|
||||||
|
if histogram:
|
||||||
|
service_metrics.append(histogram)
|
||||||
|
|
||||||
|
if ServiceMetrics.COUNTER in metrics_to_initialize:
|
||||||
|
counter_failed_request = self.create_counter(
|
||||||
|
service.value,
|
||||||
|
type_of_request="failed_requests",
|
||||||
|
additional_labels=FAILED_REQUESTS_LABELS,
|
||||||
|
)
|
||||||
|
if counter_failed_request:
|
||||||
|
service_metrics.append(counter_failed_request)
|
||||||
|
counter_total_requests = self.create_counter(
|
||||||
|
service.value, type_of_request="total_requests"
|
||||||
|
)
|
||||||
|
if counter_total_requests:
|
||||||
|
service_metrics.append(counter_total_requests)
|
||||||
|
|
||||||
|
if ServiceMetrics.GAUGE in metrics_to_initialize:
|
||||||
|
gauge = self.create_gauge(service.value, type_of_request="size")
|
||||||
|
if gauge:
|
||||||
|
service_metrics.append(gauge)
|
||||||
|
|
||||||
|
if service_metrics:
|
||||||
|
self.payload_to_prometheus_map[service.value] = service_metrics
|
||||||
|
|
||||||
|
self.prometheus_to_amount_map: dict = {}
|
||||||
### MOCK TESTING ###
|
### MOCK TESTING ###
|
||||||
self.mock_testing = mock_testing
|
self.mock_testing = mock_testing
|
||||||
self.mock_testing_success_calls = 0
|
self.mock_testing_success_calls = 0
|
||||||
|
@ -70,6 +91,19 @@ class PrometheusServicesLogger:
|
||||||
print_verbose(f"Got exception on init prometheus client {str(e)}")
|
print_verbose(f"Got exception on init prometheus client {str(e)}")
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
def _get_service_metrics_initialize(
|
||||||
|
self, service: ServiceTypes
|
||||||
|
) -> List[ServiceMetrics]:
|
||||||
|
DEFAULT_METRICS = [ServiceMetrics.COUNTER, ServiceMetrics.HISTOGRAM]
|
||||||
|
if service not in DEFAULT_SERVICE_CONFIGS:
|
||||||
|
return DEFAULT_METRICS
|
||||||
|
|
||||||
|
metrics = DEFAULT_SERVICE_CONFIGS.get(service, {}).get("metrics", [])
|
||||||
|
if not metrics:
|
||||||
|
verbose_logger.debug(f"No metrics found for service {service}")
|
||||||
|
return DEFAULT_METRICS
|
||||||
|
return metrics
|
||||||
|
|
||||||
def is_metric_registered(self, metric_name) -> bool:
|
def is_metric_registered(self, metric_name) -> bool:
|
||||||
for metric in self.REGISTRY.collect():
|
for metric in self.REGISTRY.collect():
|
||||||
if metric_name == metric.name:
|
if metric_name == metric.name:
|
||||||
|
@ -94,6 +128,15 @@ class PrometheusServicesLogger:
|
||||||
buckets=LATENCY_BUCKETS,
|
buckets=LATENCY_BUCKETS,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def create_gauge(self, service: str, type_of_request: str):
|
||||||
|
metric_name = "litellm_{}_{}".format(service, type_of_request)
|
||||||
|
is_registered = self.is_metric_registered(metric_name)
|
||||||
|
if is_registered:
|
||||||
|
return self._get_metric(metric_name)
|
||||||
|
return self.Gauge(
|
||||||
|
metric_name, "Gauge for {} service".format(service), labelnames=[service]
|
||||||
|
)
|
||||||
|
|
||||||
def create_counter(
|
def create_counter(
|
||||||
self,
|
self,
|
||||||
service: str,
|
service: str,
|
||||||
|
@ -120,6 +163,15 @@ class PrometheusServicesLogger:
|
||||||
|
|
||||||
histogram.labels(labels).observe(amount)
|
histogram.labels(labels).observe(amount)
|
||||||
|
|
||||||
|
def update_gauge(
|
||||||
|
self,
|
||||||
|
gauge,
|
||||||
|
labels: str,
|
||||||
|
amount: float,
|
||||||
|
):
|
||||||
|
assert isinstance(gauge, self.Gauge)
|
||||||
|
gauge.labels(labels).set(amount)
|
||||||
|
|
||||||
def increment_counter(
|
def increment_counter(
|
||||||
self,
|
self,
|
||||||
counter,
|
counter,
|
||||||
|
@ -190,6 +242,13 @@ class PrometheusServicesLogger:
|
||||||
labels=payload.service.value,
|
labels=payload.service.value,
|
||||||
amount=1, # LOG TOTAL REQUESTS TO PROMETHEUS
|
amount=1, # LOG TOTAL REQUESTS TO PROMETHEUS
|
||||||
)
|
)
|
||||||
|
elif isinstance(obj, self.Gauge):
|
||||||
|
if payload.event_metadata:
|
||||||
|
self.update_gauge(
|
||||||
|
gauge=obj,
|
||||||
|
labels=payload.event_metadata.get("gauge_labels") or "",
|
||||||
|
amount=payload.event_metadata.get("gauge_value") or 0,
|
||||||
|
)
|
||||||
|
|
||||||
async def async_service_failure_hook(
|
async def async_service_failure_hook(
|
||||||
self,
|
self,
|
||||||
|
|
|
@ -2,8 +2,14 @@
|
||||||
Base class for in memory buffer for database transactions
|
Base class for in memory buffer for database transactions
|
||||||
"""
|
"""
|
||||||
import asyncio
|
import asyncio
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
|
from litellm._service_logger import ServiceLogging
|
||||||
|
|
||||||
|
service_logger_obj = (
|
||||||
|
ServiceLogging()
|
||||||
|
) # used for tracking metrics for In memory buffer, redis buffer, pod lock manager
|
||||||
from litellm.constants import MAX_IN_MEMORY_QUEUE_FLUSH_COUNT, MAX_SIZE_IN_MEMORY_QUEUE
|
from litellm.constants import MAX_IN_MEMORY_QUEUE_FLUSH_COUNT, MAX_SIZE_IN_MEMORY_QUEUE
|
||||||
|
|
||||||
|
|
||||||
|
@ -18,6 +24,9 @@ class BaseUpdateQueue:
|
||||||
"""Enqueue an update."""
|
"""Enqueue an update."""
|
||||||
verbose_proxy_logger.debug("Adding update to queue: %s", update)
|
verbose_proxy_logger.debug("Adding update to queue: %s", update)
|
||||||
await self.update_queue.put(update)
|
await self.update_queue.put(update)
|
||||||
|
await self._emit_new_item_added_to_queue_event(
|
||||||
|
queue_size=self.update_queue.qsize()
|
||||||
|
)
|
||||||
|
|
||||||
async def flush_all_updates_from_in_memory_queue(self):
|
async def flush_all_updates_from_in_memory_queue(self):
|
||||||
"""Get all updates from the queue."""
|
"""Get all updates from the queue."""
|
||||||
|
@ -31,3 +40,10 @@ class BaseUpdateQueue:
|
||||||
break
|
break
|
||||||
updates.append(await self.update_queue.get())
|
updates.append(await self.update_queue.get())
|
||||||
return updates
|
return updates
|
||||||
|
|
||||||
|
async def _emit_new_item_added_to_queue_event(
|
||||||
|
self,
|
||||||
|
queue_size: Optional[int] = None,
|
||||||
|
):
|
||||||
|
"""placeholder, emit event when a new item is added to the queue"""
|
||||||
|
pass
|
||||||
|
|
|
@ -1,10 +1,14 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from typing import Dict, List
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.proxy._types import DailyUserSpendTransaction
|
from litellm.proxy._types import DailyUserSpendTransaction
|
||||||
from litellm.proxy.db.db_transaction_queue.base_update_queue import BaseUpdateQueue
|
from litellm.proxy.db.db_transaction_queue.base_update_queue import (
|
||||||
|
BaseUpdateQueue,
|
||||||
|
service_logger_obj,
|
||||||
|
)
|
||||||
|
from litellm.types.services import ServiceTypes
|
||||||
|
|
||||||
|
|
||||||
class DailySpendUpdateQueue(BaseUpdateQueue):
|
class DailySpendUpdateQueue(BaseUpdateQueue):
|
||||||
|
@ -117,3 +121,19 @@ class DailySpendUpdateQueue(BaseUpdateQueue):
|
||||||
else:
|
else:
|
||||||
aggregated_daily_spend_update_transactions[_key] = deepcopy(payload)
|
aggregated_daily_spend_update_transactions[_key] = deepcopy(payload)
|
||||||
return aggregated_daily_spend_update_transactions
|
return aggregated_daily_spend_update_transactions
|
||||||
|
|
||||||
|
async def _emit_new_item_added_to_queue_event(
|
||||||
|
self,
|
||||||
|
queue_size: Optional[int] = None,
|
||||||
|
):
|
||||||
|
asyncio.create_task(
|
||||||
|
service_logger_obj.async_service_success_hook(
|
||||||
|
service=ServiceTypes.IN_MEMORY_DAILY_SPEND_UPDATE_QUEUE,
|
||||||
|
duration=0,
|
||||||
|
call_type="_emit_new_item_added_to_queue_event",
|
||||||
|
event_metadata={
|
||||||
|
"gauge_labels": ServiceTypes.IN_MEMORY_DAILY_SPEND_UPDATE_QUEUE,
|
||||||
|
"gauge_value": queue_size,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
|
@ -1,9 +1,12 @@
|
||||||
|
import asyncio
|
||||||
import uuid
|
import uuid
|
||||||
from typing import TYPE_CHECKING, Any, Optional
|
from typing import TYPE_CHECKING, Any, Optional
|
||||||
|
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.caching.redis_cache import RedisCache
|
from litellm.caching.redis_cache import RedisCache
|
||||||
from litellm.constants import DEFAULT_CRON_JOB_LOCK_TTL_SECONDS
|
from litellm.constants import DEFAULT_CRON_JOB_LOCK_TTL_SECONDS
|
||||||
|
from litellm.proxy.db.db_transaction_queue.base_update_queue import service_logger_obj
|
||||||
|
from litellm.types.services import ServiceTypes
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
ProxyLogging = Any
|
ProxyLogging = Any
|
||||||
|
@ -57,6 +60,7 @@ class PodLockManager:
|
||||||
self.pod_id,
|
self.pod_id,
|
||||||
self.cronjob_id,
|
self.cronjob_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
# Check if the current pod already holds the lock
|
# Check if the current pod already holds the lock
|
||||||
|
@ -70,6 +74,7 @@ class PodLockManager:
|
||||||
self.pod_id,
|
self.pod_id,
|
||||||
self.cronjob_id,
|
self.cronjob_id,
|
||||||
)
|
)
|
||||||
|
self._emit_acquired_lock_event(self.cronjob_id, self.pod_id)
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -104,6 +109,7 @@ class PodLockManager:
|
||||||
self.pod_id,
|
self.pod_id,
|
||||||
self.cronjob_id,
|
self.cronjob_id,
|
||||||
)
|
)
|
||||||
|
self._emit_released_lock_event(self.cronjob_id, self.pod_id)
|
||||||
else:
|
else:
|
||||||
verbose_proxy_logger.debug(
|
verbose_proxy_logger.debug(
|
||||||
"Pod %s failed to release Redis lock for cronjob_id=%s",
|
"Pod %s failed to release Redis lock for cronjob_id=%s",
|
||||||
|
@ -127,3 +133,31 @@ class PodLockManager:
|
||||||
verbose_proxy_logger.error(
|
verbose_proxy_logger.error(
|
||||||
f"Error releasing Redis lock for {self.cronjob_id}: {e}"
|
f"Error releasing Redis lock for {self.cronjob_id}: {e}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _emit_acquired_lock_event(cronjob_id: str, pod_id: str):
|
||||||
|
asyncio.create_task(
|
||||||
|
service_logger_obj.async_service_success_hook(
|
||||||
|
service=ServiceTypes.POD_LOCK_MANAGER,
|
||||||
|
duration=DEFAULT_CRON_JOB_LOCK_TTL_SECONDS,
|
||||||
|
call_type="_emit_acquired_lock_event",
|
||||||
|
event_metadata={
|
||||||
|
"gauge_labels": f"{cronjob_id}:{pod_id}",
|
||||||
|
"gauge_value": 1,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _emit_released_lock_event(cronjob_id: str, pod_id: str):
|
||||||
|
asyncio.create_task(
|
||||||
|
service_logger_obj.async_service_success_hook(
|
||||||
|
service=ServiceTypes.POD_LOCK_MANAGER,
|
||||||
|
duration=DEFAULT_CRON_JOB_LOCK_TTL_SECONDS,
|
||||||
|
call_type="_emit_released_lock_event",
|
||||||
|
event_metadata={
|
||||||
|
"gauge_labels": f"{cronjob_id}:{pod_id}",
|
||||||
|
"gauge_value": 0,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
|
@ -4,6 +4,7 @@ Handles buffering database `UPDATE` transactions in Redis before committing them
|
||||||
This is to prevent deadlocks and improve reliability
|
This is to prevent deadlocks and improve reliability
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import json
|
import json
|
||||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
||||||
|
|
||||||
|
@ -16,11 +17,13 @@ from litellm.constants import (
|
||||||
)
|
)
|
||||||
from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
|
from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
|
||||||
from litellm.proxy._types import DailyUserSpendTransaction, DBSpendUpdateTransactions
|
from litellm.proxy._types import DailyUserSpendTransaction, DBSpendUpdateTransactions
|
||||||
|
from litellm.proxy.db.db_transaction_queue.base_update_queue import service_logger_obj
|
||||||
from litellm.proxy.db.db_transaction_queue.daily_spend_update_queue import (
|
from litellm.proxy.db.db_transaction_queue.daily_spend_update_queue import (
|
||||||
DailySpendUpdateQueue,
|
DailySpendUpdateQueue,
|
||||||
)
|
)
|
||||||
from litellm.proxy.db.db_transaction_queue.spend_update_queue import SpendUpdateQueue
|
from litellm.proxy.db.db_transaction_queue.spend_update_queue import SpendUpdateQueue
|
||||||
from litellm.secret_managers.main import str_to_bool
|
from litellm.secret_managers.main import str_to_bool
|
||||||
|
from litellm.types.services import ServiceTypes
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from litellm.proxy.utils import PrismaClient
|
from litellm.proxy.utils import PrismaClient
|
||||||
|
@ -136,18 +139,27 @@ class RedisUpdateBuffer:
|
||||||
return
|
return
|
||||||
|
|
||||||
list_of_transactions = [safe_dumps(db_spend_update_transactions)]
|
list_of_transactions = [safe_dumps(db_spend_update_transactions)]
|
||||||
await self.redis_cache.async_rpush(
|
current_redis_buffer_size = await self.redis_cache.async_rpush(
|
||||||
key=REDIS_UPDATE_BUFFER_KEY,
|
key=REDIS_UPDATE_BUFFER_KEY,
|
||||||
values=list_of_transactions,
|
values=list_of_transactions,
|
||||||
)
|
)
|
||||||
|
await self._emit_new_item_added_to_redis_buffer_event(
|
||||||
|
queue_size=current_redis_buffer_size,
|
||||||
|
service=ServiceTypes.REDIS_SPEND_UPDATE_QUEUE,
|
||||||
|
)
|
||||||
|
|
||||||
list_of_daily_spend_update_transactions = [
|
list_of_daily_spend_update_transactions = [
|
||||||
safe_dumps(daily_spend_update_transactions)
|
safe_dumps(daily_spend_update_transactions)
|
||||||
]
|
]
|
||||||
await self.redis_cache.async_rpush(
|
|
||||||
|
current_redis_buffer_size = await self.redis_cache.async_rpush(
|
||||||
key=REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY,
|
key=REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY,
|
||||||
values=list_of_daily_spend_update_transactions,
|
values=list_of_daily_spend_update_transactions,
|
||||||
)
|
)
|
||||||
|
await self._emit_new_item_added_to_redis_buffer_event(
|
||||||
|
queue_size=current_redis_buffer_size,
|
||||||
|
service=ServiceTypes.REDIS_DAILY_SPEND_UPDATE_QUEUE,
|
||||||
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _number_of_transactions_to_store_in_redis(
|
def _number_of_transactions_to_store_in_redis(
|
||||||
|
@ -300,3 +312,20 @@ class RedisUpdateBuffer:
|
||||||
)
|
)
|
||||||
|
|
||||||
return combined_transaction
|
return combined_transaction
|
||||||
|
|
||||||
|
async def _emit_new_item_added_to_redis_buffer_event(
|
||||||
|
self,
|
||||||
|
service: ServiceTypes,
|
||||||
|
queue_size: int,
|
||||||
|
):
|
||||||
|
asyncio.create_task(
|
||||||
|
service_logger_obj.async_service_success_hook(
|
||||||
|
service=service,
|
||||||
|
duration=0,
|
||||||
|
call_type="_emit_new_item_added_to_queue_event",
|
||||||
|
event_metadata={
|
||||||
|
"gauge_labels": service,
|
||||||
|
"gauge_value": queue_size,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
from typing import Dict, List
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.proxy._types import (
|
from litellm.proxy._types import (
|
||||||
|
@ -7,7 +7,11 @@ from litellm.proxy._types import (
|
||||||
Litellm_EntityType,
|
Litellm_EntityType,
|
||||||
SpendUpdateQueueItem,
|
SpendUpdateQueueItem,
|
||||||
)
|
)
|
||||||
from litellm.proxy.db.db_transaction_queue.base_update_queue import BaseUpdateQueue
|
from litellm.proxy.db.db_transaction_queue.base_update_queue import (
|
||||||
|
BaseUpdateQueue,
|
||||||
|
service_logger_obj,
|
||||||
|
)
|
||||||
|
from litellm.types.services import ServiceTypes
|
||||||
|
|
||||||
|
|
||||||
class SpendUpdateQueue(BaseUpdateQueue):
|
class SpendUpdateQueue(BaseUpdateQueue):
|
||||||
|
@ -203,3 +207,19 @@ class SpendUpdateQueue(BaseUpdateQueue):
|
||||||
transactions_dict[entity_id] += response_cost or 0
|
transactions_dict[entity_id] += response_cost or 0
|
||||||
|
|
||||||
return db_spend_update_transactions
|
return db_spend_update_transactions
|
||||||
|
|
||||||
|
async def _emit_new_item_added_to_queue_event(
|
||||||
|
self,
|
||||||
|
queue_size: Optional[int] = None,
|
||||||
|
):
|
||||||
|
asyncio.create_task(
|
||||||
|
service_logger_obj.async_service_success_hook(
|
||||||
|
service=ServiceTypes.IN_MEMORY_SPEND_UPDATE_QUEUE,
|
||||||
|
duration=0,
|
||||||
|
call_type="_emit_new_item_added_to_queue_event",
|
||||||
|
event_metadata={
|
||||||
|
"gauge_labels": ServiceTypes.IN_MEMORY_SPEND_UPDATE_QUEUE,
|
||||||
|
"gauge_value": queue_size,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
|
@ -5,11 +5,6 @@ model_list:
|
||||||
api_key: fake-key
|
api_key: fake-key
|
||||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
|
|
||||||
general_settings:
|
|
||||||
use_redis_transaction_buffer: true
|
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
cache: True
|
callbacks: ["prometheus"]
|
||||||
cache_params:
|
service_callback: ["prometheus_system"]
|
||||||
type: redis
|
|
||||||
supported_call_types: []
|
|
|
@ -1,8 +1,15 @@
|
||||||
import enum
|
import enum
|
||||||
import uuid
|
import uuid
|
||||||
from typing import Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
from typing_extensions import TypedDict
|
||||||
|
|
||||||
|
|
||||||
|
class ServiceMetrics(enum.Enum):
|
||||||
|
COUNTER = "counter"
|
||||||
|
HISTOGRAM = "histogram"
|
||||||
|
GAUGE = "gauge"
|
||||||
|
|
||||||
|
|
||||||
class ServiceTypes(str, enum.Enum):
|
class ServiceTypes(str, enum.Enum):
|
||||||
|
@ -18,6 +25,84 @@ class ServiceTypes(str, enum.Enum):
|
||||||
ROUTER = "router"
|
ROUTER = "router"
|
||||||
AUTH = "auth"
|
AUTH = "auth"
|
||||||
PROXY_PRE_CALL = "proxy_pre_call"
|
PROXY_PRE_CALL = "proxy_pre_call"
|
||||||
|
POD_LOCK_MANAGER = "pod_lock_manager"
|
||||||
|
|
||||||
|
"""
|
||||||
|
Operational metrics for DB Transaction Queues
|
||||||
|
"""
|
||||||
|
# daily spend update queue - actual transaction events
|
||||||
|
IN_MEMORY_DAILY_SPEND_UPDATE_QUEUE = "in_memory_daily_spend_update_queue"
|
||||||
|
REDIS_DAILY_SPEND_UPDATE_QUEUE = "redis_daily_spend_update_queue"
|
||||||
|
|
||||||
|
# spend update queue - current spend of key, user, team
|
||||||
|
IN_MEMORY_SPEND_UPDATE_QUEUE = "in_memory_spend_update_queue"
|
||||||
|
REDIS_SPEND_UPDATE_QUEUE = "redis_spend_update_queue"
|
||||||
|
|
||||||
|
|
||||||
|
class ServiceConfig(TypedDict):
|
||||||
|
"""
|
||||||
|
Configuration for services and their metrics
|
||||||
|
"""
|
||||||
|
|
||||||
|
metrics: List[ServiceMetrics] # What metrics this service should support
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
Metric types to use for each service
|
||||||
|
|
||||||
|
- REDIS only needs Counter, Histogram
|
||||||
|
- Pod Lock Manager only needs a gauge metric
|
||||||
|
"""
|
||||||
|
DEFAULT_SERVICE_CONFIGS = {
|
||||||
|
ServiceTypes.REDIS.value: {
|
||||||
|
"metrics": [ServiceMetrics.COUNTER, ServiceMetrics.HISTOGRAM]
|
||||||
|
},
|
||||||
|
ServiceTypes.DB.value: {
|
||||||
|
"metrics": [ServiceMetrics.COUNTER, ServiceMetrics.HISTOGRAM]
|
||||||
|
},
|
||||||
|
ServiceTypes.BATCH_WRITE_TO_DB.value: {
|
||||||
|
"metrics": [ServiceMetrics.COUNTER, ServiceMetrics.HISTOGRAM]
|
||||||
|
},
|
||||||
|
ServiceTypes.RESET_BUDGET_JOB.value: {
|
||||||
|
"metrics": [ServiceMetrics.COUNTER, ServiceMetrics.HISTOGRAM]
|
||||||
|
},
|
||||||
|
ServiceTypes.LITELLM.value: {
|
||||||
|
"metrics": [ServiceMetrics.COUNTER, ServiceMetrics.HISTOGRAM]
|
||||||
|
},
|
||||||
|
ServiceTypes.ROUTER.value: {
|
||||||
|
"metrics": [ServiceMetrics.COUNTER, ServiceMetrics.HISTOGRAM]
|
||||||
|
},
|
||||||
|
ServiceTypes.AUTH.value: {
|
||||||
|
"metrics": [ServiceMetrics.COUNTER, ServiceMetrics.HISTOGRAM]
|
||||||
|
},
|
||||||
|
ServiceTypes.PROXY_PRE_CALL.value: {
|
||||||
|
"metrics": [ServiceMetrics.COUNTER, ServiceMetrics.HISTOGRAM]
|
||||||
|
},
|
||||||
|
# Operational metrics for DB Transaction Queues
|
||||||
|
ServiceTypes.POD_LOCK_MANAGER.value: {"metrics": [ServiceMetrics.GAUGE]},
|
||||||
|
ServiceTypes.IN_MEMORY_DAILY_SPEND_UPDATE_QUEUE.value: {
|
||||||
|
"metrics": [ServiceMetrics.GAUGE]
|
||||||
|
},
|
||||||
|
ServiceTypes.REDIS_DAILY_SPEND_UPDATE_QUEUE.value: {
|
||||||
|
"metrics": [ServiceMetrics.GAUGE]
|
||||||
|
},
|
||||||
|
ServiceTypes.IN_MEMORY_SPEND_UPDATE_QUEUE.value: {
|
||||||
|
"metrics": [ServiceMetrics.GAUGE]
|
||||||
|
},
|
||||||
|
ServiceTypes.REDIS_SPEND_UPDATE_QUEUE.value: {"metrics": [ServiceMetrics.GAUGE]},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ServiceEventMetadata(TypedDict, total=False):
|
||||||
|
"""
|
||||||
|
The metadata logged during service success/failure
|
||||||
|
|
||||||
|
Add any extra fields you expect to access in the service_success_hook/service_failure_hook
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Dynamically control gauge labels and values
|
||||||
|
gauge_labels: Optional[str]
|
||||||
|
gauge_value: Optional[float]
|
||||||
|
|
||||||
|
|
||||||
class ServiceLoggerPayload(BaseModel):
|
class ServiceLoggerPayload(BaseModel):
|
||||||
|
@ -30,6 +115,9 @@ class ServiceLoggerPayload(BaseModel):
|
||||||
service: ServiceTypes = Field(description="who is this for? - postgres/redis")
|
service: ServiceTypes = Field(description="who is this for? - postgres/redis")
|
||||||
duration: float = Field(description="How long did the request take?")
|
duration: float = Field(description="How long did the request take?")
|
||||||
call_type: str = Field(description="The call of the service, being made")
|
call_type: str = Field(description="The call of the service, being made")
|
||||||
|
event_metadata: Optional[dict] = Field(
|
||||||
|
description="The metadata logged during service success/failure"
|
||||||
|
)
|
||||||
|
|
||||||
def to_json(self, **kwargs):
|
def to_json(self, **kwargs):
|
||||||
try:
|
try:
|
||||||
|
|
48
tests/litellm/integrations/test_prometheus_services.py
Normal file
48
tests/litellm/integrations/test_prometheus_services.py
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from unittest.mock import AsyncMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
from litellm.integrations.prometheus_services import (
|
||||||
|
PrometheusServicesLogger,
|
||||||
|
ServiceMetrics,
|
||||||
|
ServiceTypes,
|
||||||
|
)
|
||||||
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
|
|
||||||
|
|
||||||
|
def test_create_gauge_new():
|
||||||
|
"""Test creating a new gauge"""
|
||||||
|
pl = PrometheusServicesLogger()
|
||||||
|
|
||||||
|
# Create new gauge
|
||||||
|
gauge = pl.create_gauge(service="test_service", type_of_request="size")
|
||||||
|
|
||||||
|
assert gauge is not None
|
||||||
|
assert pl._get_metric("litellm_test_service_size") is gauge
|
||||||
|
|
||||||
|
|
||||||
|
def test_update_gauge():
|
||||||
|
"""Test updating a gauge's value"""
|
||||||
|
pl = PrometheusServicesLogger()
|
||||||
|
|
||||||
|
# Create a gauge to test with
|
||||||
|
gauge = pl.create_gauge(service="test_service", type_of_request="size")
|
||||||
|
|
||||||
|
# Mock the labels method to verify it's called correctly
|
||||||
|
with patch.object(gauge, "labels") as mock_labels:
|
||||||
|
mock_gauge = AsyncMock()
|
||||||
|
mock_labels.return_value = mock_gauge
|
||||||
|
|
||||||
|
# Call update_gauge
|
||||||
|
pl.update_gauge(gauge=gauge, labels="test_label", amount=42.5)
|
||||||
|
|
||||||
|
# Verify correct methods were called
|
||||||
|
mock_labels.assert_called_once_with("test_label")
|
||||||
|
mock_gauge.set.assert_called_once_with(42.5)
|
Loading…
Add table
Add a link
Reference in a new issue