mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
feat - refactor prometheus metrics
This commit is contained in:
parent
f4355d3e88
commit
05fba48cec
1 changed files with 132 additions and 118 deletions
|
@ -15,9 +15,10 @@ import requests # type: ignore
|
|||
|
||||
import litellm
|
||||
from litellm._logging import print_verbose, verbose_logger
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
|
||||
|
||||
class PrometheusLogger:
|
||||
class PrometheusLogger(CustomLogger):
|
||||
# Class variables or attributes
|
||||
def __init__(
|
||||
self,
|
||||
|
@ -147,83 +148,137 @@ class PrometheusLogger:
|
|||
print_verbose(f"Got exception on init prometheus client {str(e)}")
|
||||
raise e
|
||||
|
||||
async def _async_log_event(
|
||||
self, kwargs, response_obj, start_time, end_time, print_verbose, user_id
|
||||
):
|
||||
self.log_event(
|
||||
kwargs, response_obj, start_time, end_time, user_id, print_verbose
|
||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||
# Define prometheus client
|
||||
from litellm.proxy.proxy_server import premium_user
|
||||
|
||||
verbose_logger.debug(
|
||||
f"prometheus Logging - Enters success logging function for kwargs {kwargs}"
|
||||
)
|
||||
|
||||
# unpack kwargs
|
||||
model = kwargs.get("model", "")
|
||||
response_cost = kwargs.get("response_cost", 0.0) or 0
|
||||
litellm_params = kwargs.get("litellm_params", {}) or {}
|
||||
proxy_server_request = litellm_params.get("proxy_server_request") or {}
|
||||
end_user_id = proxy_server_request.get("body", {}).get("user", None)
|
||||
user_id = litellm_params.get("metadata", {}).get("user_api_key_user_id", None)
|
||||
user_api_key = litellm_params.get("metadata", {}).get("user_api_key", None)
|
||||
user_api_key_alias = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_alias", None
|
||||
)
|
||||
user_api_team = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_team_id", None
|
||||
)
|
||||
user_api_team_alias = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_team_alias", None
|
||||
)
|
||||
|
||||
_team_spend = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_team_spend", None
|
||||
)
|
||||
_team_max_budget = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_team_max_budget", None
|
||||
)
|
||||
_remaining_team_budget = safe_get_remaining_budget(
|
||||
max_budget=_team_max_budget, spend=_team_spend
|
||||
)
|
||||
|
||||
_api_key_spend = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_spend", None
|
||||
)
|
||||
_api_key_max_budget = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_max_budget", None
|
||||
)
|
||||
_remaining_api_key_budget = safe_get_remaining_budget(
|
||||
max_budget=_api_key_max_budget, spend=_api_key_spend
|
||||
)
|
||||
|
||||
if response_obj is not None:
|
||||
tokens_used = response_obj.get("usage", {}).get("total_tokens", 0)
|
||||
else:
|
||||
tokens_used = 0
|
||||
|
||||
print_verbose(
|
||||
f"inside track_prometheus_metrics, model {model}, response_cost {response_cost}, tokens_used {tokens_used}, end_user_id {end_user_id}, user_api_key {user_api_key}"
|
||||
)
|
||||
|
||||
if (
|
||||
user_api_key is not None
|
||||
and isinstance(user_api_key, str)
|
||||
and user_api_key.startswith("sk-")
|
||||
):
|
||||
from litellm.proxy.utils import hash_token
|
||||
|
||||
user_api_key = hash_token(user_api_key)
|
||||
|
||||
self.litellm_requests_metric.labels(
|
||||
end_user_id,
|
||||
user_api_key,
|
||||
user_api_key_alias,
|
||||
model,
|
||||
user_api_team,
|
||||
user_api_team_alias,
|
||||
user_id,
|
||||
).inc()
|
||||
self.litellm_spend_metric.labels(
|
||||
end_user_id,
|
||||
user_api_key,
|
||||
user_api_key_alias,
|
||||
model,
|
||||
user_api_team,
|
||||
user_api_team_alias,
|
||||
user_id,
|
||||
).inc(response_cost)
|
||||
self.litellm_tokens_metric.labels(
|
||||
end_user_id,
|
||||
user_api_key,
|
||||
user_api_key_alias,
|
||||
model,
|
||||
user_api_team,
|
||||
user_api_team_alias,
|
||||
user_id,
|
||||
).inc(tokens_used)
|
||||
|
||||
self.litellm_remaining_team_budget_metric.labels(
|
||||
user_api_team, user_api_team_alias
|
||||
).set(_remaining_team_budget)
|
||||
|
||||
self.litellm_remaining_api_key_budget_metric.labels(
|
||||
user_api_key, user_api_key_alias
|
||||
).set(_remaining_api_key_budget)
|
||||
|
||||
# set x-ratelimit headers
|
||||
if premium_user is True:
|
||||
self.set_llm_deployment_success_metrics(kwargs)
|
||||
pass
|
||||
|
||||
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
||||
from litellm.proxy.proxy_server import premium_user
|
||||
|
||||
verbose_logger.debug(
|
||||
f"prometheus Logging - Enters success logging function for kwargs {kwargs}"
|
||||
)
|
||||
|
||||
# unpack kwargs
|
||||
model = kwargs.get("model", "")
|
||||
litellm_params = kwargs.get("litellm_params", {}) or {}
|
||||
proxy_server_request = litellm_params.get("proxy_server_request") or {}
|
||||
end_user_id = proxy_server_request.get("body", {}).get("user", None)
|
||||
user_id = litellm_params.get("metadata", {}).get("user_api_key_user_id", None)
|
||||
user_api_key = litellm_params.get("metadata", {}).get("user_api_key", None)
|
||||
user_api_key_alias = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_alias", None
|
||||
)
|
||||
user_api_team = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_team_id", None
|
||||
)
|
||||
user_api_team_alias = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_team_alias", None
|
||||
)
|
||||
|
||||
def log_event(
|
||||
self, kwargs, response_obj, start_time, end_time, user_id, print_verbose
|
||||
):
|
||||
try:
|
||||
# Define prometheus client
|
||||
from litellm.proxy.proxy_server import premium_user
|
||||
|
||||
verbose_logger.debug(
|
||||
f"prometheus Logging - Enters logging function for model {kwargs}"
|
||||
)
|
||||
|
||||
# unpack kwargs
|
||||
model = kwargs.get("model", "")
|
||||
response_cost = kwargs.get("response_cost", 0.0) or 0
|
||||
litellm_params = kwargs.get("litellm_params", {}) or {}
|
||||
proxy_server_request = litellm_params.get("proxy_server_request") or {}
|
||||
end_user_id = proxy_server_request.get("body", {}).get("user", None)
|
||||
user_id = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_user_id", None
|
||||
)
|
||||
user_api_key = litellm_params.get("metadata", {}).get("user_api_key", None)
|
||||
user_api_key_alias = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_alias", None
|
||||
)
|
||||
user_api_team = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_team_id", None
|
||||
)
|
||||
user_api_team_alias = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_team_alias", None
|
||||
)
|
||||
|
||||
_team_spend = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_team_spend", None
|
||||
)
|
||||
_team_max_budget = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_team_max_budget", None
|
||||
)
|
||||
_remaining_team_budget = safe_get_remaining_budget(
|
||||
max_budget=_team_max_budget, spend=_team_spend
|
||||
)
|
||||
|
||||
_api_key_spend = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_spend", None
|
||||
)
|
||||
_api_key_max_budget = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_max_budget", None
|
||||
)
|
||||
_remaining_api_key_budget = safe_get_remaining_budget(
|
||||
max_budget=_api_key_max_budget, spend=_api_key_spend
|
||||
)
|
||||
|
||||
if response_obj is not None:
|
||||
tokens_used = response_obj.get("usage", {}).get("total_tokens", 0)
|
||||
else:
|
||||
tokens_used = 0
|
||||
|
||||
print_verbose(
|
||||
f"inside track_prometheus_metrics, model {model}, response_cost {response_cost}, tokens_used {tokens_used}, end_user_id {end_user_id}, user_api_key {user_api_key}"
|
||||
)
|
||||
|
||||
if (
|
||||
user_api_key is not None
|
||||
and isinstance(user_api_key, str)
|
||||
and user_api_key.startswith("sk-")
|
||||
):
|
||||
from litellm.proxy.utils import hash_token
|
||||
|
||||
user_api_key = hash_token(user_api_key)
|
||||
|
||||
self.litellm_requests_metric.labels(
|
||||
self.litellm_llm_api_failed_requests_metric.labels(
|
||||
end_user_id,
|
||||
user_api_key,
|
||||
user_api_key_alias,
|
||||
|
@ -232,56 +287,15 @@ class PrometheusLogger:
|
|||
user_api_team_alias,
|
||||
user_id,
|
||||
).inc()
|
||||
self.litellm_spend_metric.labels(
|
||||
end_user_id,
|
||||
user_api_key,
|
||||
user_api_key_alias,
|
||||
model,
|
||||
user_api_team,
|
||||
user_api_team_alias,
|
||||
user_id,
|
||||
).inc(response_cost)
|
||||
self.litellm_tokens_metric.labels(
|
||||
end_user_id,
|
||||
user_api_key,
|
||||
user_api_key_alias,
|
||||
model,
|
||||
user_api_team,
|
||||
user_api_team_alias,
|
||||
user_id,
|
||||
).inc(tokens_used)
|
||||
|
||||
self.litellm_remaining_team_budget_metric.labels(
|
||||
user_api_team, user_api_team_alias
|
||||
).set(_remaining_team_budget)
|
||||
|
||||
self.litellm_remaining_api_key_budget_metric.labels(
|
||||
user_api_key, user_api_key_alias
|
||||
).set(_remaining_api_key_budget)
|
||||
|
||||
# set x-ratelimit headers
|
||||
if premium_user is True:
|
||||
self.set_llm_deployment_success_metrics(kwargs)
|
||||
|
||||
### FAILURE INCREMENT ###
|
||||
if "exception" in kwargs:
|
||||
self.litellm_llm_api_failed_requests_metric.labels(
|
||||
end_user_id,
|
||||
user_api_key,
|
||||
user_api_key_alias,
|
||||
model,
|
||||
user_api_team,
|
||||
user_api_team_alias,
|
||||
user_id,
|
||||
).inc()
|
||||
|
||||
self.set_llm_deployment_failure_metrics(kwargs)
|
||||
self.set_llm_deployment_failure_metrics(kwargs)
|
||||
except Exception as e:
|
||||
verbose_logger.error(
|
||||
"prometheus Layer Error(): Exception occured - {}".format(str(e))
|
||||
)
|
||||
verbose_logger.debug(traceback.format_exc())
|
||||
pass
|
||||
pass
|
||||
|
||||
def set_llm_deployment_failure_metrics(self, request_kwargs: dict):
|
||||
try:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue