feat - add remaining team budget gauge

This commit is contained in:
Ishaan Jaff 2024-06-13 14:28:25 -07:00
parent 0ebbad9fa6
commit 8d3c9aeea3
3 changed files with 104 additions and 11 deletions

View file

@ -8,6 +8,7 @@ import traceback
import datetime, subprocess, sys import datetime, subprocess, sys
import litellm, uuid import litellm, uuid
from litellm._logging import print_verbose, verbose_logger from litellm._logging import print_verbose, verbose_logger
from typing import Optional, Union
class PrometheusLogger: class PrometheusLogger:
@ -17,33 +18,69 @@ class PrometheusLogger:
**kwargs, **kwargs,
): ):
try: try:
from prometheus_client import Counter from prometheus_client import Counter, Gauge
self.litellm_llm_api_failed_requests_metric = Counter( self.litellm_llm_api_failed_requests_metric = Counter(
name="litellm_llm_api_failed_requests_metric", name="litellm_llm_api_failed_requests_metric",
documentation="Total number of failed LLM API calls via litellm", documentation="Total number of failed LLM API calls via litellm",
labelnames=["end_user", "hashed_api_key", "model", "team", "team_alias", "user"], labelnames=[
"end_user",
"hashed_api_key",
"model",
"team",
"team_alias",
"user",
],
) )
self.litellm_requests_metric = Counter( self.litellm_requests_metric = Counter(
name="litellm_requests_metric", name="litellm_requests_metric",
documentation="Total number of LLM calls to litellm", documentation="Total number of LLM calls to litellm",
labelnames=["end_user", "hashed_api_key", "model", "team", "team_alias", "user"], labelnames=[
"end_user",
"hashed_api_key",
"model",
"team",
"team_alias",
"user",
],
) )
# Counter for spend # Counter for spend
self.litellm_spend_metric = Counter( self.litellm_spend_metric = Counter(
"litellm_spend_metric", "litellm_spend_metric",
"Total spend on LLM requests", "Total spend on LLM requests",
labelnames=["end_user", "hashed_api_key", "model", "team", "team_alias", "user"], labelnames=[
"end_user",
"hashed_api_key",
"model",
"team",
"team_alias",
"user",
],
) )
# Counter for total_output_tokens # Counter for total_output_tokens
self.litellm_tokens_metric = Counter( self.litellm_tokens_metric = Counter(
"litellm_total_tokens", "litellm_total_tokens",
"Total number of input + output tokens from LLM requests", "Total number of input + output tokens from LLM requests",
labelnames=["end_user", "hashed_api_key", "model", "team", "team_alias", "user"], labelnames=[
"end_user",
"hashed_api_key",
"model",
"team",
"team_alias",
"user",
],
) )
# Remaining Budget for Team, Key
self.litellm_remaining_team_budget_metric = Gauge(
"litellm_remaining_team_budget_metric",
"Remaining budget for team",
labelnames=["team_id", "team_alias"],
)
except Exception as e: except Exception as e:
print_verbose(f"Got exception on init prometheus client {str(e)}") print_verbose(f"Got exception on init prometheus client {str(e)}")
raise e raise e
@ -51,7 +88,9 @@ class PrometheusLogger:
async def _async_log_event( async def _async_log_event(
self, kwargs, response_obj, start_time, end_time, print_verbose, user_id self, kwargs, response_obj, start_time, end_time, print_verbose, user_id
): ):
self.log_event(kwargs, response_obj, start_time, end_time, print_verbose) self.log_event(
kwargs, response_obj, start_time, end_time, user_id, print_verbose
)
def log_event( def log_event(
self, kwargs, response_obj, start_time, end_time, user_id, print_verbose self, kwargs, response_obj, start_time, end_time, user_id, print_verbose
@ -78,6 +117,18 @@ class PrometheusLogger:
user_api_team_alias = litellm_params.get("metadata", {}).get( user_api_team_alias = litellm_params.get("metadata", {}).get(
"user_api_key_team_alias", None "user_api_key_team_alias", None
) )
_team_spend = litellm_params.get("metadata", {}).get(
"user_api_key_team_spend", None
)
_team_max_budget = litellm_params.get("metadata", {}).get(
"user_api_key_team_max_budget", None
)
_remaining_team_budget = safe_get_remaining_budget(
max_budget=_team_max_budget, spend=_team_spend
)
if response_obj is not None: if response_obj is not None:
tokens_used = response_obj.get("usage", {}).get("total_tokens", 0) tokens_used = response_obj.get("usage", {}).get("total_tokens", 0)
else: else:
@ -97,19 +148,43 @@ class PrometheusLogger:
user_api_key = hash_token(user_api_key) user_api_key = hash_token(user_api_key)
self.litellm_requests_metric.labels( self.litellm_requests_metric.labels(
end_user_id, user_api_key, model, user_api_team, user_api_team_alias, user_id end_user_id,
user_api_key,
model,
user_api_team,
user_api_team_alias,
user_id,
).inc() ).inc()
self.litellm_spend_metric.labels( self.litellm_spend_metric.labels(
end_user_id, user_api_key, model, user_api_team, user_api_team_alias, user_id end_user_id,
user_api_key,
model,
user_api_team,
user_api_team_alias,
user_id,
).inc(response_cost) ).inc(response_cost)
self.litellm_tokens_metric.labels( self.litellm_tokens_metric.labels(
end_user_id, user_api_key, model, user_api_team, user_api_team_alias, user_id end_user_id,
user_api_key,
model,
user_api_team,
user_api_team_alias,
user_id,
).inc(tokens_used) ).inc(tokens_used)
self.litellm_remaining_team_budget_metric.labels(
user_api_team, user_api_team_alias
).set(_remaining_team_budget)
### FAILURE INCREMENT ### ### FAILURE INCREMENT ###
if "exception" in kwargs: if "exception" in kwargs:
self.litellm_llm_api_failed_requests_metric.labels( self.litellm_llm_api_failed_requests_metric.labels(
end_user_id, user_api_key, model, user_api_team, user_api_team_alias, user_id end_user_id,
user_api_key,
model,
user_api_team,
user_api_team_alias,
user_id,
).inc() ).inc()
except Exception as e: except Exception as e:
verbose_logger.error( verbose_logger.error(
@ -117,3 +192,15 @@ class PrometheusLogger:
) )
verbose_logger.debug(traceback.format_exc()) verbose_logger.debug(traceback.format_exc())
pass pass
def safe_get_remaining_budget(
max_budget: Optional[float], spend: Optional[float]
) -> float:
if max_budget is None:
return float("inf")
if spend is None:
return max_budget
return max_budget - spend

View file

@ -104,6 +104,11 @@ async def add_litellm_data_to_request(
data["metadata"]["user_api_key_team_alias"] = getattr( data["metadata"]["user_api_key_team_alias"] = getattr(
user_api_key_dict, "team_alias", None user_api_key_dict, "team_alias", None
) )
# Team spend, budget - used by prometheus.py
data["metadata"]["user_api_key_team_max_budget"] = user_api_key_dict.team_max_budget
data["metadata"]["user_api_key_team_spend"] = user_api_key_dict.team_spend
data["metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata data["metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata
_headers = dict(request.headers) _headers = dict(request.headers)
_headers.pop( _headers.pop(

View file

@ -22,7 +22,8 @@ general_settings:
master_key: sk-1234 master_key: sk-1234
litellm_settings: litellm_settings:
callbacks: ["otel"] success_callback: ["prometheus"]
failure_callback: ["prometheus"]
store_audit_logs: true store_audit_logs: true
turn_off_message_logging: true turn_off_message_logging: true
redact_messages_in_exceptions: True redact_messages_in_exceptions: True