mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
add litellm-key-remaining-tokens on prometheus
This commit is contained in:
parent
785482f023
commit
412d30d362
3 changed files with 56 additions and 8 deletions
|
@ -103,13 +103,30 @@ class PrometheusLogger(CustomLogger):
|
||||||
"Remaining budget for api key",
|
"Remaining budget for api key",
|
||||||
labelnames=["hashed_api_key", "api_key_alias"],
|
labelnames=["hashed_api_key", "api_key_alias"],
|
||||||
)
|
)
|
||||||
|
|
||||||
########################################
|
|
||||||
# LLM API Deployment Metrics / analytics
|
|
||||||
########################################
|
|
||||||
|
|
||||||
# Litellm-Enterprise Metrics
|
# Litellm-Enterprise Metrics
|
||||||
if premium_user is True:
|
if premium_user is True:
|
||||||
|
|
||||||
|
########################################
|
||||||
|
# LiteLLM Virtual API KEY metrics
|
||||||
|
########################################
|
||||||
|
# Remaining MODEL RPM limit for API Key
|
||||||
|
self.litellm_remaining_api_key_requests_for_model = Gauge(
|
||||||
|
"litellm_remaining_api_key_requests_for_model",
|
||||||
|
"Remaining Requests API Key can make for model (model based rpm limit on key)",
|
||||||
|
labelnames=["hashed_api_key", "api_key_alias", "model"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Remaining MODEL TPM limit for API Key
|
||||||
|
self.litellm_remaining_api_key_tokens_for_model = Gauge(
|
||||||
|
"litellm_remaining_api_key_tokens_for_model",
|
||||||
|
"Remaining Tokens API Key can make for model (model based tpm limit on key)",
|
||||||
|
labelnames=["hashed_api_key", "api_key_alias", "model"],
|
||||||
|
)
|
||||||
|
|
||||||
|
########################################
|
||||||
|
# LLM API Deployment Metrics / analytics
|
||||||
|
########################################
|
||||||
|
|
||||||
# Remaining Rate Limit for model
|
# Remaining Rate Limit for model
|
||||||
self.litellm_remaining_requests_metric = Gauge(
|
self.litellm_remaining_requests_metric = Gauge(
|
||||||
"litellm_remaining_requests",
|
"litellm_remaining_requests",
|
||||||
|
@ -197,6 +214,7 @@ class PrometheusLogger(CustomLogger):
|
||||||
model = kwargs.get("model", "")
|
model = kwargs.get("model", "")
|
||||||
response_cost = kwargs.get("response_cost", 0.0) or 0
|
response_cost = kwargs.get("response_cost", 0.0) or 0
|
||||||
litellm_params = kwargs.get("litellm_params", {}) or {}
|
litellm_params = kwargs.get("litellm_params", {}) or {}
|
||||||
|
_metadata = litellm_params.get("metadata", {})
|
||||||
proxy_server_request = litellm_params.get("proxy_server_request") or {}
|
proxy_server_request = litellm_params.get("proxy_server_request") or {}
|
||||||
end_user_id = proxy_server_request.get("body", {}).get("user", None)
|
end_user_id = proxy_server_request.get("body", {}).get("user", None)
|
||||||
user_id = litellm_params.get("metadata", {}).get("user_api_key_user_id", None)
|
user_id = litellm_params.get("metadata", {}).get("user_api_key_user_id", None)
|
||||||
|
@ -286,6 +304,27 @@ class PrometheusLogger(CustomLogger):
|
||||||
user_api_key, user_api_key_alias
|
user_api_key, user_api_key_alias
|
||||||
).set(_remaining_api_key_budget)
|
).set(_remaining_api_key_budget)
|
||||||
|
|
||||||
|
# Set remaining rpm/tpm for API Key + model
|
||||||
|
# see parallel_request_limiter.py - variables are set there
|
||||||
|
model_group = _metadata.get("model_group", None)
|
||||||
|
remaining_requests_variable_name = (
|
||||||
|
f"litellm-key-remaining-requests-{model_group}"
|
||||||
|
)
|
||||||
|
remaining_tokens_variable_name = f"litellm-key-remaining-tokens-{model_group}"
|
||||||
|
|
||||||
|
remaining_requests = _metadata.get(
|
||||||
|
remaining_requests_variable_name, sys.maxsize
|
||||||
|
)
|
||||||
|
remaining_tokens = _metadata.get(remaining_tokens_variable_name, sys.maxsize)
|
||||||
|
|
||||||
|
self.litellm_remaining_api_key_requests_for_model.labels(
|
||||||
|
user_api_key, user_api_key_alias, model
|
||||||
|
).set(remaining_requests)
|
||||||
|
|
||||||
|
self.litellm_remaining_api_key_tokens_for_model.labels(
|
||||||
|
user_api_key, user_api_key_alias, model
|
||||||
|
).set(remaining_tokens)
|
||||||
|
|
||||||
# set x-ratelimit headers
|
# set x-ratelimit headers
|
||||||
if premium_user is True:
|
if premium_user is True:
|
||||||
self.set_llm_deployment_success_metrics(
|
self.set_llm_deployment_success_metrics(
|
||||||
|
|
|
@ -263,6 +263,17 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
||||||
request_count_api_key, new_val
|
request_count_api_key, new_val
|
||||||
)
|
)
|
||||||
|
|
||||||
|
tpm_limit_for_model = tpm_limit_for_model or sys.maxsize
|
||||||
|
rpm_limit_for_model = rpm_limit_for_model or sys.maxsize
|
||||||
|
# Add remaining tokens, requests to metadata
|
||||||
|
_remaining_tokens = tpm_limit_for_model - new_val["current_tpm"]
|
||||||
|
_remaining_requests = rpm_limit_for_model - new_val["current_rpm"]
|
||||||
|
_remaining_limits_data = {
|
||||||
|
f"litellm-key-remaining-tokens-{_model}": _remaining_tokens,
|
||||||
|
f"litellm-key-remaining-requests-{_model}": _remaining_requests,
|
||||||
|
}
|
||||||
|
data["metadata"].update(_remaining_limits_data)
|
||||||
|
|
||||||
# check if REQUEST ALLOWED for user_id
|
# check if REQUEST ALLOWED for user_id
|
||||||
user_id = user_api_key_dict.user_id
|
user_id = user_api_key_dict.user_id
|
||||||
if user_id is not None:
|
if user_id is not None:
|
||||||
|
|
|
@ -42,7 +42,5 @@ general_settings:
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
fallbacks: [{"gemini-1.5-pro-001": ["gpt-4o"]}]
|
fallbacks: [{"gemini-1.5-pro-001": ["gpt-4o"]}]
|
||||||
callbacks: ["gcs_bucket"]
|
success_callback: ["langfuse", "prometheus"]
|
||||||
success_callback: ["langfuse"]
|
|
||||||
langfuse_default_tags: ["cache_hit", "cache_key", "user_api_key_alias", "user_api_key_team_alias"]
|
langfuse_default_tags: ["cache_hit", "cache_key", "user_api_key_alias", "user_api_key_team_alias"]
|
||||||
cache: True
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue