mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
feat - add remaining team budget gauge
This commit is contained in:
parent
0ebbad9fa6
commit
8d3c9aeea3
3 changed files with 104 additions and 11 deletions
|
@ -8,6 +8,7 @@ import traceback
|
|||
import datetime, subprocess, sys
|
||||
import litellm, uuid
|
||||
from litellm._logging import print_verbose, verbose_logger
|
||||
from typing import Optional, Union
|
||||
|
||||
|
||||
class PrometheusLogger:
|
||||
|
@ -17,33 +18,69 @@ class PrometheusLogger:
|
|||
**kwargs,
|
||||
):
|
||||
try:
|
||||
from prometheus_client import Counter
|
||||
from prometheus_client import Counter, Gauge
|
||||
|
||||
self.litellm_llm_api_failed_requests_metric = Counter(
|
||||
name="litellm_llm_api_failed_requests_metric",
|
||||
documentation="Total number of failed LLM API calls via litellm",
|
||||
labelnames=["end_user", "hashed_api_key", "model", "team", "team_alias", "user"],
|
||||
labelnames=[
|
||||
"end_user",
|
||||
"hashed_api_key",
|
||||
"model",
|
||||
"team",
|
||||
"team_alias",
|
||||
"user",
|
||||
],
|
||||
)
|
||||
|
||||
self.litellm_requests_metric = Counter(
|
||||
name="litellm_requests_metric",
|
||||
documentation="Total number of LLM calls to litellm",
|
||||
labelnames=["end_user", "hashed_api_key", "model", "team", "team_alias", "user"],
|
||||
labelnames=[
|
||||
"end_user",
|
||||
"hashed_api_key",
|
||||
"model",
|
||||
"team",
|
||||
"team_alias",
|
||||
"user",
|
||||
],
|
||||
)
|
||||
|
||||
# Counter for spend
|
||||
self.litellm_spend_metric = Counter(
|
||||
"litellm_spend_metric",
|
||||
"Total spend on LLM requests",
|
||||
labelnames=["end_user", "hashed_api_key", "model", "team", "team_alias", "user"],
|
||||
labelnames=[
|
||||
"end_user",
|
||||
"hashed_api_key",
|
||||
"model",
|
||||
"team",
|
||||
"team_alias",
|
||||
"user",
|
||||
],
|
||||
)
|
||||
|
||||
# Counter for total_output_tokens
|
||||
self.litellm_tokens_metric = Counter(
|
||||
"litellm_total_tokens",
|
||||
"Total number of input + output tokens from LLM requests",
|
||||
labelnames=["end_user", "hashed_api_key", "model", "team", "team_alias", "user"],
|
||||
labelnames=[
|
||||
"end_user",
|
||||
"hashed_api_key",
|
||||
"model",
|
||||
"team",
|
||||
"team_alias",
|
||||
"user",
|
||||
],
|
||||
)
|
||||
|
||||
# Remaining Budget for Team, Key
|
||||
self.litellm_remaining_team_budget_metric = Gauge(
|
||||
"litellm_remaining_team_budget_metric",
|
||||
"Remaining budget for team",
|
||||
labelnames=["team_id", "team_alias"],
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
print_verbose(f"Got exception on init prometheus client {str(e)}")
|
||||
raise e
|
||||
|
@ -51,7 +88,9 @@ class PrometheusLogger:
|
|||
async def _async_log_event(
|
||||
self, kwargs, response_obj, start_time, end_time, print_verbose, user_id
|
||||
):
|
||||
self.log_event(kwargs, response_obj, start_time, end_time, print_verbose)
|
||||
self.log_event(
|
||||
kwargs, response_obj, start_time, end_time, user_id, print_verbose
|
||||
)
|
||||
|
||||
def log_event(
|
||||
self, kwargs, response_obj, start_time, end_time, user_id, print_verbose
|
||||
|
@ -78,6 +117,18 @@ class PrometheusLogger:
|
|||
user_api_team_alias = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_team_alias", None
|
||||
)
|
||||
|
||||
_team_spend = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_team_spend", None
|
||||
)
|
||||
|
||||
_team_max_budget = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_team_max_budget", None
|
||||
)
|
||||
_remaining_team_budget = safe_get_remaining_budget(
|
||||
max_budget=_team_max_budget, spend=_team_spend
|
||||
)
|
||||
|
||||
if response_obj is not None:
|
||||
tokens_used = response_obj.get("usage", {}).get("total_tokens", 0)
|
||||
else:
|
||||
|
@ -97,19 +148,43 @@ class PrometheusLogger:
|
|||
user_api_key = hash_token(user_api_key)
|
||||
|
||||
self.litellm_requests_metric.labels(
|
||||
end_user_id, user_api_key, model, user_api_team, user_api_team_alias, user_id
|
||||
end_user_id,
|
||||
user_api_key,
|
||||
model,
|
||||
user_api_team,
|
||||
user_api_team_alias,
|
||||
user_id,
|
||||
).inc()
|
||||
self.litellm_spend_metric.labels(
|
||||
end_user_id, user_api_key, model, user_api_team, user_api_team_alias, user_id
|
||||
end_user_id,
|
||||
user_api_key,
|
||||
model,
|
||||
user_api_team,
|
||||
user_api_team_alias,
|
||||
user_id,
|
||||
).inc(response_cost)
|
||||
self.litellm_tokens_metric.labels(
|
||||
end_user_id, user_api_key, model, user_api_team, user_api_team_alias, user_id
|
||||
end_user_id,
|
||||
user_api_key,
|
||||
model,
|
||||
user_api_team,
|
||||
user_api_team_alias,
|
||||
user_id,
|
||||
).inc(tokens_used)
|
||||
|
||||
self.litellm_remaining_team_budget_metric.labels(
|
||||
user_api_team, user_api_team_alias
|
||||
).set(_remaining_team_budget)
|
||||
|
||||
### FAILURE INCREMENT ###
|
||||
if "exception" in kwargs:
|
||||
self.litellm_llm_api_failed_requests_metric.labels(
|
||||
end_user_id, user_api_key, model, user_api_team, user_api_team_alias, user_id
|
||||
end_user_id,
|
||||
user_api_key,
|
||||
model,
|
||||
user_api_team,
|
||||
user_api_team_alias,
|
||||
user_id,
|
||||
).inc()
|
||||
except Exception as e:
|
||||
verbose_logger.error(
|
||||
|
@ -117,3 +192,15 @@ class PrometheusLogger:
|
|||
)
|
||||
verbose_logger.debug(traceback.format_exc())
|
||||
pass
|
||||
|
||||
|
||||
def safe_get_remaining_budget(
|
||||
max_budget: Optional[float], spend: Optional[float]
|
||||
) -> float:
|
||||
if max_budget is None:
|
||||
return float("inf")
|
||||
|
||||
if spend is None:
|
||||
return max_budget
|
||||
|
||||
return max_budget - spend
|
||||
|
|
|
@ -104,6 +104,11 @@ async def add_litellm_data_to_request(
|
|||
data["metadata"]["user_api_key_team_alias"] = getattr(
|
||||
user_api_key_dict, "team_alias", None
|
||||
)
|
||||
|
||||
# Team spend, budget - used by prometheus.py
|
||||
data["metadata"]["user_api_key_team_max_budget"] = user_api_key_dict.team_max_budget
|
||||
data["metadata"]["user_api_key_team_spend"] = user_api_key_dict.team_spend
|
||||
|
||||
data["metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata
|
||||
_headers = dict(request.headers)
|
||||
_headers.pop(
|
||||
|
|
|
@ -22,7 +22,8 @@ general_settings:
|
|||
master_key: sk-1234
|
||||
|
||||
litellm_settings:
|
||||
callbacks: ["otel"]
|
||||
success_callback: ["prometheus"]
|
||||
failure_callback: ["prometheus"]
|
||||
store_audit_logs: true
|
||||
turn_off_message_logging: true
|
||||
redact_messages_in_exceptions: True
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue