forked from phoenix/litellm-mirror
feat - emit llm provider spend on prometheus
This commit is contained in:
parent
5ba79e986b
commit
c3e34ff540
4 changed files with 61 additions and 8 deletions
|
@ -228,6 +228,13 @@ class PrometheusLogger(CustomLogger):
|
|||
"api_key_alias",
|
||||
],
|
||||
)
|
||||
# llm api provider budget metrics
|
||||
self.litellm_provider_remaining_budget_metric = Gauge(
|
||||
"litellm_provider_remaining_budget_metric",
|
||||
"Remaining budget for provider - used when you set provider budget limits",
|
||||
labelnames=["api_provider"],
|
||||
)
|
||||
|
||||
# Get all keys
|
||||
_logged_llm_labels = [
|
||||
"litellm_model_name",
|
||||
|
@ -1130,6 +1137,19 @@ class PrometheusLogger(CustomLogger):
|
|||
litellm_model_name, model_id, api_base, api_provider, exception_status
|
||||
).inc()
|
||||
|
||||
def track_provider_remaining_budget(
|
||||
self, provider: str, spend: float, budget_limit: float
|
||||
):
|
||||
"""
|
||||
Track provider remaining budget in Prometheus
|
||||
"""
|
||||
self.litellm_provider_remaining_budget_metric.labels(provider).set(
|
||||
self._safe_get_remaining_budget(
|
||||
max_budget=budget_limit,
|
||||
spend=spend,
|
||||
)
|
||||
)
|
||||
|
||||
def _safe_get_remaining_budget(
|
||||
self, max_budget: Optional[float], spend: Optional[float]
|
||||
) -> float:
|
||||
|
|
|
@ -1,14 +1,18 @@
|
|||
model_list:
|
||||
- model_name: fake-openai-endpoint
|
||||
- model_name: gpt-4o
|
||||
litellm_params:
|
||||
model: openai/fake
|
||||
model: openai/gpt-4o
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
|
||||
|
||||
router_settings:
|
||||
provider_budget_config:
|
||||
openai:
|
||||
budget_limit: 0.000000000001 # float of $ value budget for time period
|
||||
time_period: 1d # can be 1d, 2d, 30d
|
||||
azure:
|
||||
budget_limit: 100
|
||||
time_period: 1d
|
||||
|
||||
general_settings:
|
||||
key_management_system: "aws_secret_manager"
|
||||
key_management_settings:
|
||||
store_virtual_keys: true
|
||||
access_mode: "write_only"
|
||||
litellm_settings:
|
||||
callbacks: ["prometheus"]
|
||||
|
|
|
@ -25,6 +25,9 @@ from litellm._logging import verbose_router_logger
|
|||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs
|
||||
from litellm.router_utils.cooldown_callbacks import (
|
||||
_get_prometheus_logger_from_callbacks,
|
||||
)
|
||||
from litellm.types.router import (
|
||||
LiteLLM_Params,
|
||||
ProviderBudgetConfigType,
|
||||
|
@ -148,6 +151,11 @@ class ProviderBudgetLimiting(CustomLogger):
|
|||
verbose_router_logger.debug(
|
||||
f"Current spend for {provider}: {current_spend}, budget limit: {budget_limit}"
|
||||
)
|
||||
self._track_provider_remaining_budget_prometheus(
|
||||
provider=provider,
|
||||
spend=current_spend,
|
||||
budget_limit=budget_limit,
|
||||
)
|
||||
|
||||
if current_spend >= budget_limit:
|
||||
debug_msg = f"Exceeded budget for provider {provider}: {current_spend} >= {budget_limit}"
|
||||
|
@ -242,3 +250,21 @@ class ProviderBudgetLimiting(CustomLogger):
|
|||
days = int(time_period[:-1])
|
||||
return days * 24 * 60 * 60
|
||||
raise ValueError(f"Unsupported time period format: {time_period}")
|
||||
|
||||
def _track_provider_remaining_budget_prometheus(
|
||||
self, provider: str, spend: float, budget_limit: float
|
||||
):
|
||||
"""
|
||||
Optional helper - emit provider remaining budget metric to Prometheus
|
||||
|
||||
This is helpful for debugging and monitoring provider budget limits.
|
||||
"""
|
||||
from litellm.integrations.prometheus import PrometheusLogger
|
||||
|
||||
prometheus_logger = _get_prometheus_logger_from_callbacks()
|
||||
if prometheus_logger:
|
||||
prometheus_logger.track_provider_remaining_budget(
|
||||
provider=provider,
|
||||
spend=spend,
|
||||
budget_limit=budget_limit,
|
||||
)
|
||||
|
|
|
@ -88,6 +88,9 @@ def _get_prometheus_logger_from_callbacks() -> Optional[PrometheusLogger]:
|
|||
"""
|
||||
from litellm.integrations.prometheus import PrometheusLogger
|
||||
|
||||
for _callback in litellm._async_success_callback:
|
||||
if isinstance(_callback, PrometheusLogger):
|
||||
return _callback
|
||||
for _callback in litellm.callbacks:
|
||||
if isinstance(_callback, PrometheusLogger):
|
||||
return _callback
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue