forked from phoenix/litellm-mirror
[Feat-Prometheus] Track exception status on litellm_deployment_failure_responses
(#5706)
* add litellm_deployment_cooled_down * track num cooldowns on prometheus * track exception status * fix linting * docs prom metrics * cleanup premium user checks * prom track deployment failure state * docs prometheus
This commit is contained in:
parent
b878a67a7c
commit
c8eff2dc65
6 changed files with 171 additions and 130 deletions
|
@ -58,17 +58,9 @@ http://localhost:4000/metrics
|
||||||
|
|
||||||
## 📈 Metrics Tracked
|
## 📈 Metrics Tracked
|
||||||
|
|
||||||
### Error Metrics
|
### Virtual Keys, Teams, Internal Users Metrics
|
||||||
|
|
||||||
| Metric Name | Description |
|
Use this for for tracking per [user, key, team, etc.](virtual_keys)
|
||||||
|----------------------|--------------------------------------|
|
|
||||||
| `litellm_error_code_metric_total` | Total number of errors by error code and model |
|
|
||||||
|
|
||||||
This metric provides a count of errors encountered, categorized by error code and model. For example:
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### Proxy Requests / Spend Metrics
|
|
||||||
|
|
||||||
| Metric Name | Description |
|
| Metric Name | Description |
|
||||||
|----------------------|--------------------------------------|
|
|----------------------|--------------------------------------|
|
||||||
|
@ -76,11 +68,32 @@ This metric provides a count of errors encountered, categorized by error code an
|
||||||
| `litellm_spend_metric` | Total Spend, per `"user", "key", "model", "team", "end-user"` |
|
| `litellm_spend_metric` | Total Spend, per `"user", "key", "model", "team", "end-user"` |
|
||||||
| `litellm_total_tokens` | input + output tokens per `"user", "key", "model", "team", "end-user"` |
|
| `litellm_total_tokens` | input + output tokens per `"user", "key", "model", "team", "end-user"` |
|
||||||
|
|
||||||
### Error Monitoring Metrics
|
|
||||||
|
|
||||||
|
### LLM API / Provider Metrics
|
||||||
|
|
||||||
|
Use this for LLM API Error monitoring and tracking remaining rate limits and token limits
|
||||||
|
|
||||||
| Metric Name | Description |
|
| Metric Name | Description |
|
||||||
| `litellm_llm_api_failed_requests_metric` | Number of failed LLM API requests per `"user", "key", "model", "team", "end-user"` |
|
|----------------------|--------------------------------------|
|
||||||
| `litellm_error_code_metric_total` | Total number of errors by error code and model |
|
`litellm_deployment_success_responses` | Total number of successful LLM API calls for deployment |
|
||||||
|
| `litellm_deployment_failure_responses` | Total number of failed LLM API calls for a specific LLM deploymeny. exception_status is the status of the exception from the llm api |
|
||||||
|
| `litellm_deployment_total_requests` | Total number of LLM API calls for deployment - success + failure |
|
||||||
|
| `litellm_remaining_requests_metric` | Track `x-ratelimit-remaining-requests` returned from LLM API Deployment |
|
||||||
|
| `litellm_remaining_tokens` | Track `x-ratelimit-remaining-tokens` return from LLM API Deployment |
|
||||||
|
| `litellm_deployment_state` | The state of the deployment: 0 = healthy, 1 = partial outage, 2 = complete outage. |
|
||||||
|
| `litellm_deployment_latency_per_output_token` | Latency per output token for deployment |
|
||||||
|
|
||||||
|
## Load Balancing, Fallback, Cooldown Metrics
|
||||||
|
|
||||||
|
Use this for tracking [litellm router](../routing) load balancing metrics
|
||||||
|
|
||||||
|
| Metric Name | Description |
|
||||||
|
|----------------------|--------------------------------------|
|
||||||
|
| `litellm_deployment_cooled_down` | Number of times a deployment has been cooled down by LiteLLM load balancing logic. exception_status is the status of the exception that caused the deployment to be cooled down |
|
||||||
|
| `litellm_deployment_successful_fallbacks` | Number of successful fallback requests from primary model -> fallback model |
|
||||||
|
| `litellm_deployment_failed_fallbacks` | Number of failed fallback requests from primary model -> fallback model |
|
||||||
|
|
||||||
|
|
||||||
### Request Latency Metrics
|
### Request Latency Metrics
|
||||||
|
|
||||||
|
@ -90,24 +103,6 @@ This metric provides a count of errors encountered, categorized by error code an
|
||||||
| `litellm_llm_api_latency_metric` | latency (seconds) for just the LLM API call - tracked for labels `litellm_call_id`, `model` |
|
| `litellm_llm_api_latency_metric` | latency (seconds) for just the LLM API call - tracked for labels `litellm_call_id`, `model` |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### LLM API / Provider Metrics
|
|
||||||
|
|
||||||
| Metric Name | Description |
|
|
||||||
|----------------------|--------------------------------------|
|
|
||||||
| `litellm_deployment_state` | The state of the deployment: 0 = healthy, 1 = partial outage, 2 = complete outage. |
|
|
||||||
| `litellm_remaining_requests_metric` | Track `x-ratelimit-remaining-requests` returned from LLM API Deployment |
|
|
||||||
| `litellm_remaining_tokens` | Track `x-ratelimit-remaining-tokens` return from LLM API Deployment |
|
|
||||||
`litellm_deployment_success_responses` | Total number of successful LLM API calls for deployment |
|
|
||||||
| `litellm_deployment_failure_responses` | Total number of failed LLM API calls for deployment |
|
|
||||||
| `litellm_deployment_total_requests` | Total number of LLM API calls for deployment - success + failure |
|
|
||||||
| `litellm_deployment_latency_per_output_token` | Latency per output token for deployment |
|
|
||||||
| `litellm_deployment_successful_fallbacks` | Number of successful fallback requests from primary model -> fallback model |
|
|
||||||
| `litellm_deployment_failed_fallbacks` | Number of failed fallback requests from primary model -> fallback model |
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### Budget Metrics
|
### Budget Metrics
|
||||||
| Metric Name | Description |
|
| Metric Name | Description |
|
||||||
|----------------------|--------------------------------------|
|
|----------------------|--------------------------------------|
|
||||||
|
|
|
@ -26,8 +26,6 @@ class PrometheusLogger(CustomLogger):
|
||||||
try:
|
try:
|
||||||
from prometheus_client import Counter, Gauge, Histogram
|
from prometheus_client import Counter, Gauge, Histogram
|
||||||
|
|
||||||
from litellm.proxy.proxy_server import premium_user
|
|
||||||
|
|
||||||
verbose_logger.warning(
|
verbose_logger.warning(
|
||||||
"🚨🚨🚨 Prometheus Metrics will be moving to LiteLLM Enterprise on September 15th, 2024.\n🚨 Contact us here to get a license https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat \n🚨 Enterprise Pricing: https://www.litellm.ai/#pricing"
|
"🚨🚨🚨 Prometheus Metrics will be moving to LiteLLM Enterprise on September 15th, 2024.\n🚨 Contact us here to get a license https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat \n🚨 Enterprise Pricing: https://www.litellm.ai/#pricing"
|
||||||
)
|
)
|
||||||
|
@ -145,83 +143,86 @@ class PrometheusLogger(CustomLogger):
|
||||||
labelnames=["error_code", "model"],
|
labelnames=["error_code", "model"],
|
||||||
)
|
)
|
||||||
|
|
||||||
# Litellm-Enterprise Metrics
|
########################################
|
||||||
if premium_user is True:
|
# LLM API Deployment Metrics / analytics
|
||||||
|
########################################
|
||||||
|
|
||||||
########################################
|
# Remaining Rate Limit for model
|
||||||
# LLM API Deployment Metrics / analytics
|
self.litellm_remaining_requests_metric = Gauge(
|
||||||
########################################
|
"litellm_remaining_requests",
|
||||||
|
"LLM Deployment Analytics - remaining requests for model, returned from LLM API Provider",
|
||||||
# Remaining Rate Limit for model
|
labelnames=[
|
||||||
self.litellm_remaining_requests_metric = Gauge(
|
"model_group",
|
||||||
"litellm_remaining_requests",
|
|
||||||
"LLM Deployment Analytics - remaining requests for model, returned from LLM API Provider",
|
|
||||||
labelnames=[
|
|
||||||
"model_group",
|
|
||||||
"api_provider",
|
|
||||||
"api_base",
|
|
||||||
"litellm_model_name",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
self.litellm_remaining_tokens_metric = Gauge(
|
|
||||||
"litellm_remaining_tokens",
|
|
||||||
"remaining tokens for model, returned from LLM API Provider",
|
|
||||||
labelnames=[
|
|
||||||
"model_group",
|
|
||||||
"api_provider",
|
|
||||||
"api_base",
|
|
||||||
"litellm_model_name",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
# Get all keys
|
|
||||||
_logged_llm_labels = [
|
|
||||||
"litellm_model_name",
|
|
||||||
"model_id",
|
|
||||||
"api_base",
|
|
||||||
"api_provider",
|
"api_provider",
|
||||||
]
|
"api_base",
|
||||||
|
"litellm_model_name",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
# Metric for deployment state
|
self.litellm_remaining_tokens_metric = Gauge(
|
||||||
self.litellm_deployment_state = Gauge(
|
"litellm_remaining_tokens",
|
||||||
"litellm_deployment_state",
|
"remaining tokens for model, returned from LLM API Provider",
|
||||||
"LLM Deployment Analytics - The state of the deployment: 0 = healthy, 1 = partial outage, 2 = complete outage",
|
labelnames=[
|
||||||
labelnames=_logged_llm_labels,
|
"model_group",
|
||||||
)
|
"api_provider",
|
||||||
|
"api_base",
|
||||||
|
"litellm_model_name",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
# Get all keys
|
||||||
|
_logged_llm_labels = [
|
||||||
|
"litellm_model_name",
|
||||||
|
"model_id",
|
||||||
|
"api_base",
|
||||||
|
"api_provider",
|
||||||
|
]
|
||||||
|
|
||||||
self.litellm_deployment_success_responses = Counter(
|
# Metric for deployment state
|
||||||
name="litellm_deployment_success_responses",
|
self.litellm_deployment_state = Gauge(
|
||||||
documentation="LLM Deployment Analytics - Total number of successful LLM API calls via litellm",
|
"litellm_deployment_state",
|
||||||
labelnames=_logged_llm_labels,
|
"LLM Deployment Analytics - The state of the deployment: 0 = healthy, 1 = partial outage, 2 = complete outage",
|
||||||
)
|
labelnames=_logged_llm_labels,
|
||||||
self.litellm_deployment_failure_responses = Counter(
|
)
|
||||||
name="litellm_deployment_failure_responses",
|
|
||||||
documentation="LLM Deployment Analytics - Total number of failed LLM API calls via litellm",
|
|
||||||
labelnames=_logged_llm_labels,
|
|
||||||
)
|
|
||||||
self.litellm_deployment_total_requests = Counter(
|
|
||||||
name="litellm_deployment_total_requests",
|
|
||||||
documentation="LLM Deployment Analytics - Total number of LLM API calls via litellm - success + failure",
|
|
||||||
labelnames=_logged_llm_labels,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Deployment Latency tracking
|
self.litellm_deployment_cooled_down = Counter(
|
||||||
self.litellm_deployment_latency_per_output_token = Histogram(
|
"litellm_deployment_cooled_down",
|
||||||
name="litellm_deployment_latency_per_output_token",
|
"LLM Deployment Analytics - Number of times a deployment has been cooled down by LiteLLM load balancing logic. exception_status is the status of the exception that caused the deployment to be cooled down",
|
||||||
documentation="LLM Deployment Analytics - Latency per output token",
|
labelnames=_logged_llm_labels + ["exception_status"],
|
||||||
labelnames=_logged_llm_labels,
|
)
|
||||||
)
|
|
||||||
|
|
||||||
self.litellm_deployment_successful_fallbacks = Counter(
|
self.litellm_deployment_success_responses = Counter(
|
||||||
"litellm_deployment_successful_fallbacks",
|
name="litellm_deployment_success_responses",
|
||||||
"LLM Deployment Analytics - Number of successful fallback requests from primary model -> fallback model",
|
documentation="LLM Deployment Analytics - Total number of successful LLM API calls via litellm",
|
||||||
["primary_model", "fallback_model"],
|
labelnames=_logged_llm_labels,
|
||||||
)
|
)
|
||||||
self.litellm_deployment_failed_fallbacks = Counter(
|
self.litellm_deployment_failure_responses = Counter(
|
||||||
"litellm_deployment_failed_fallbacks",
|
name="litellm_deployment_failure_responses",
|
||||||
"LLM Deployment Analytics - Number of failed fallback requests from primary model -> fallback model",
|
documentation="LLM Deployment Analytics - Total number of failed LLM API calls for a specific LLM deploymeny. exception_status is the status of the exception from the llm api",
|
||||||
["primary_model", "fallback_model"],
|
labelnames=_logged_llm_labels + ["exception_status"],
|
||||||
)
|
)
|
||||||
|
self.litellm_deployment_total_requests = Counter(
|
||||||
|
name="litellm_deployment_total_requests",
|
||||||
|
documentation="LLM Deployment Analytics - Total number of LLM API calls via litellm - success + failure",
|
||||||
|
labelnames=_logged_llm_labels,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Deployment Latency tracking
|
||||||
|
self.litellm_deployment_latency_per_output_token = Histogram(
|
||||||
|
name="litellm_deployment_latency_per_output_token",
|
||||||
|
documentation="LLM Deployment Analytics - Latency per output token",
|
||||||
|
labelnames=_logged_llm_labels,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.litellm_deployment_successful_fallbacks = Counter(
|
||||||
|
"litellm_deployment_successful_fallbacks",
|
||||||
|
"LLM Deployment Analytics - Number of successful fallback requests from primary model -> fallback model",
|
||||||
|
["primary_model", "fallback_model"],
|
||||||
|
)
|
||||||
|
self.litellm_deployment_failed_fallbacks = Counter(
|
||||||
|
"litellm_deployment_failed_fallbacks",
|
||||||
|
"LLM Deployment Analytics - Number of failed fallback requests from primary model -> fallback model",
|
||||||
|
["primary_model", "fallback_model"],
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print_verbose(f"Got exception on init prometheus client {str(e)}")
|
print_verbose(f"Got exception on init prometheus client {str(e)}")
|
||||||
|
@ -232,7 +233,6 @@ class PrometheusLogger(CustomLogger):
|
||||||
from litellm.proxy.common_utils.callback_utils import (
|
from litellm.proxy.common_utils.callback_utils import (
|
||||||
get_model_group_from_litellm_kwargs,
|
get_model_group_from_litellm_kwargs,
|
||||||
)
|
)
|
||||||
from litellm.proxy.proxy_server import premium_user
|
|
||||||
|
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
f"prometheus Logging - Enters success logging function for kwargs {kwargs}"
|
f"prometheus Logging - Enters success logging function for kwargs {kwargs}"
|
||||||
|
@ -375,14 +375,12 @@ class PrometheusLogger(CustomLogger):
|
||||||
)
|
)
|
||||||
|
|
||||||
# set x-ratelimit headers
|
# set x-ratelimit headers
|
||||||
if premium_user is True:
|
self.set_llm_deployment_success_metrics(
|
||||||
self.set_llm_deployment_success_metrics(
|
kwargs, start_time, end_time, output_tokens
|
||||||
kwargs, start_time, end_time, output_tokens
|
)
|
||||||
)
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
from litellm.proxy.proxy_server import premium_user
|
|
||||||
|
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
f"prometheus Logging - Enters failure logging function for kwargs {kwargs}"
|
f"prometheus Logging - Enters failure logging function for kwargs {kwargs}"
|
||||||
|
@ -404,6 +402,7 @@ class PrometheusLogger(CustomLogger):
|
||||||
user_api_team_alias = litellm_params.get("metadata", {}).get(
|
user_api_team_alias = litellm_params.get("metadata", {}).get(
|
||||||
"user_api_key_team_alias", None
|
"user_api_key_team_alias", None
|
||||||
)
|
)
|
||||||
|
exception = kwargs.get("exception", None)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.litellm_llm_api_failed_requests_metric.labels(
|
self.litellm_llm_api_failed_requests_metric.labels(
|
||||||
|
@ -441,8 +440,13 @@ class PrometheusLogger(CustomLogger):
|
||||||
_metadata = _litellm_params.get("metadata", {})
|
_metadata = _litellm_params.get("metadata", {})
|
||||||
litellm_model_name = request_kwargs.get("model", None)
|
litellm_model_name = request_kwargs.get("model", None)
|
||||||
api_base = _metadata.get("api_base", None)
|
api_base = _metadata.get("api_base", None)
|
||||||
|
if api_base is None:
|
||||||
|
api_base = _litellm_params.get("api_base", None)
|
||||||
llm_provider = _litellm_params.get("custom_llm_provider", None)
|
llm_provider = _litellm_params.get("custom_llm_provider", None)
|
||||||
model_id = _metadata.get("model_id")
|
_model_info = _metadata.get("model_info") or {}
|
||||||
|
model_id = _model_info.get("id", None)
|
||||||
|
exception = request_kwargs.get("exception", None)
|
||||||
|
exception_status_code: str = str(getattr(exception, "status_code", None))
|
||||||
|
|
||||||
"""
|
"""
|
||||||
log these labels
|
log these labels
|
||||||
|
@ -460,6 +464,7 @@ class PrometheusLogger(CustomLogger):
|
||||||
model_id=model_id,
|
model_id=model_id,
|
||||||
api_base=api_base,
|
api_base=api_base,
|
||||||
api_provider=llm_provider,
|
api_provider=llm_provider,
|
||||||
|
exception_status=exception_status_code,
|
||||||
).inc()
|
).inc()
|
||||||
|
|
||||||
self.litellm_deployment_total_requests.labels(
|
self.litellm_deployment_total_requests.labels(
|
||||||
|
@ -488,8 +493,11 @@ class PrometheusLogger(CustomLogger):
|
||||||
litellm_model_name = request_kwargs.get("model", None)
|
litellm_model_name = request_kwargs.get("model", None)
|
||||||
model_group = _metadata.get("model_group", None)
|
model_group = _metadata.get("model_group", None)
|
||||||
api_base = _metadata.get("api_base", None)
|
api_base = _metadata.get("api_base", None)
|
||||||
|
if api_base is None:
|
||||||
|
api_base = _litellm_params.get("api_base", None)
|
||||||
llm_provider = _litellm_params.get("custom_llm_provider", None)
|
llm_provider = _litellm_params.get("custom_llm_provider", None)
|
||||||
model_id = _metadata.get("model_id")
|
_model_info = _metadata.get("model_info") or {}
|
||||||
|
model_id = _model_info.get("id", None)
|
||||||
|
|
||||||
remaining_requests = None
|
remaining_requests = None
|
||||||
remaining_tokens = None
|
remaining_tokens = None
|
||||||
|
@ -654,6 +662,21 @@ class PrometheusLogger(CustomLogger):
|
||||||
2, litellm_model_name, model_id, api_base, api_provider
|
2, litellm_model_name, model_id, api_base, api_provider
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def increment_deployment_cooled_down(
|
||||||
|
self,
|
||||||
|
litellm_model_name: str,
|
||||||
|
model_id: str,
|
||||||
|
api_base: str,
|
||||||
|
api_provider: str,
|
||||||
|
exception_status: str,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
increment metric when litellm.Router / load balancing logic places a deployment in cool down
|
||||||
|
"""
|
||||||
|
self.litellm_deployment_cooled_down.labels(
|
||||||
|
litellm_model_name, model_id, api_base, api_provider, exception_status
|
||||||
|
).inc()
|
||||||
|
|
||||||
|
|
||||||
def safe_get_remaining_budget(
|
def safe_get_remaining_budget(
|
||||||
max_budget: Optional[float], spend: Optional[float]
|
max_budget: Optional[float], spend: Optional[float]
|
||||||
|
|
|
@ -24,5 +24,5 @@ general_settings:
|
||||||
master_key: sk-1234
|
master_key: sk-1234
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
success_callback: ["datadog"]
|
success_callback: ["prometheus"]
|
||||||
|
|
||||||
|
|
|
@ -53,7 +53,7 @@ from litellm.router_utils.client_initalization_utils import (
|
||||||
should_initialize_sync_client,
|
should_initialize_sync_client,
|
||||||
)
|
)
|
||||||
from litellm.router_utils.cooldown_cache import CooldownCache
|
from litellm.router_utils.cooldown_cache import CooldownCache
|
||||||
from litellm.router_utils.cooldown_callbacks import router_cooldown_handler
|
from litellm.router_utils.cooldown_callbacks import router_cooldown_event_callback
|
||||||
from litellm.router_utils.cooldown_handlers import (
|
from litellm.router_utils.cooldown_handlers import (
|
||||||
DEFAULT_COOLDOWN_TIME_SECONDS,
|
DEFAULT_COOLDOWN_TIME_SECONDS,
|
||||||
_async_get_cooldown_deployments,
|
_async_get_cooldown_deployments,
|
||||||
|
|
|
@ -16,32 +16,39 @@ else:
|
||||||
LitellmRouter = Any
|
LitellmRouter = Any
|
||||||
|
|
||||||
|
|
||||||
async def router_cooldown_handler(
|
async def router_cooldown_event_callback(
|
||||||
litellm_router_instance: LitellmRouter,
|
litellm_router_instance: LitellmRouter,
|
||||||
deployment_id: str,
|
deployment_id: str,
|
||||||
exception_status: Union[str, int],
|
exception_status: Union[str, int],
|
||||||
cooldown_time: float,
|
cooldown_time: float,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Callback triggered when a deployment is put into cooldown by litellm
|
||||||
|
|
||||||
|
- Updates deploymen state on Prometheus
|
||||||
|
- Increments cooldown metric for deployment on Prometheus
|
||||||
|
"""
|
||||||
|
verbose_logger.debug("In router_cooldown_event_callback - updating prometheus")
|
||||||
_deployment = litellm_router_instance.get_deployment(model_id=deployment_id)
|
_deployment = litellm_router_instance.get_deployment(model_id=deployment_id)
|
||||||
if _deployment is None:
|
if _deployment is None:
|
||||||
verbose_logger.warning(
|
verbose_logger.warning(
|
||||||
f"in router_cooldown_handler but _deployment is None for deployment_id={deployment_id}. Doing nothing"
|
f"in router_cooldown_event_callback but _deployment is None for deployment_id={deployment_id}. Doing nothing"
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
_litellm_params = _deployment["litellm_params"]
|
_litellm_params = _deployment["litellm_params"]
|
||||||
temp_litellm_params = copy.deepcopy(_litellm_params)
|
temp_litellm_params = copy.deepcopy(_litellm_params)
|
||||||
temp_litellm_params = dict(temp_litellm_params)
|
temp_litellm_params = dict(temp_litellm_params)
|
||||||
_model_name = _deployment.get("model_name", None)
|
_model_name = _deployment.get("model_name", None) or ""
|
||||||
_api_base = litellm.get_api_base(
|
_api_base = (
|
||||||
model=_model_name, optional_params=temp_litellm_params
|
litellm.get_api_base(model=_model_name, optional_params=temp_litellm_params)
|
||||||
|
or ""
|
||||||
)
|
)
|
||||||
model_info = _deployment["model_info"]
|
model_info = _deployment["model_info"]
|
||||||
model_id = model_info.id
|
model_id = model_info.id
|
||||||
|
|
||||||
litellm_model_name = temp_litellm_params.get("model")
|
litellm_model_name = temp_litellm_params.get("model") or ""
|
||||||
llm_provider = ""
|
llm_provider = ""
|
||||||
try:
|
try:
|
||||||
|
|
||||||
_, llm_provider, _, _ = litellm.get_llm_provider(
|
_, llm_provider, _, _ = litellm.get_llm_provider(
|
||||||
model=litellm_model_name,
|
model=litellm_model_name,
|
||||||
custom_llm_provider=temp_litellm_params.get("custom_llm_provider"),
|
custom_llm_provider=temp_litellm_params.get("custom_llm_provider"),
|
||||||
|
@ -50,13 +57,29 @@ async def router_cooldown_handler(
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Trigger cooldown on Prometheus
|
# Trigger cooldown on Prometheus
|
||||||
from litellm.litellm_core_utils.litellm_logging import prometheusLogger
|
from litellm.integrations.prometheus import PrometheusLogger
|
||||||
|
|
||||||
|
prometheusLogger = None
|
||||||
|
for callback in litellm.callbacks:
|
||||||
|
if isinstance(callback, PrometheusLogger):
|
||||||
|
prometheusLogger = callback
|
||||||
|
|
||||||
if prometheusLogger is not None:
|
if prometheusLogger is not None:
|
||||||
prometheusLogger.set_deployment_complete_outage(
|
|
||||||
litellm_model_name=_model_name,
|
if isinstance(prometheusLogger, PrometheusLogger):
|
||||||
model_id=model_id,
|
prometheusLogger.set_deployment_complete_outage(
|
||||||
api_base=_api_base,
|
litellm_model_name=_model_name,
|
||||||
api_provider=llm_provider,
|
model_id=model_id,
|
||||||
)
|
api_base=_api_base,
|
||||||
|
api_provider=llm_provider,
|
||||||
|
)
|
||||||
|
|
||||||
|
prometheusLogger.increment_deployment_cooled_down(
|
||||||
|
litellm_model_name=_model_name,
|
||||||
|
model_id=model_id,
|
||||||
|
api_base=_api_base,
|
||||||
|
api_provider=llm_provider,
|
||||||
|
exception_status=str(exception_status),
|
||||||
|
)
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
|
@ -11,7 +11,7 @@ from typing import TYPE_CHECKING, Any, List, Optional, Union
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_router_logger
|
from litellm._logging import verbose_router_logger
|
||||||
from litellm.router_utils.cooldown_callbacks import router_cooldown_handler
|
from litellm.router_utils.cooldown_callbacks import router_cooldown_event_callback
|
||||||
from litellm.utils import get_utc_datetime
|
from litellm.utils import get_utc_datetime
|
||||||
|
|
||||||
from .router_callbacks.track_deployment_metrics import (
|
from .router_callbacks.track_deployment_metrics import (
|
||||||
|
@ -184,7 +184,7 @@ def _set_cooldown_deployments(
|
||||||
|
|
||||||
# Trigger cooldown callback handler
|
# Trigger cooldown callback handler
|
||||||
asyncio.create_task(
|
asyncio.create_task(
|
||||||
router_cooldown_handler(
|
router_cooldown_event_callback(
|
||||||
litellm_router_instance=litellm_router_instance,
|
litellm_router_instance=litellm_router_instance,
|
||||||
deployment_id=deployment,
|
deployment_id=deployment,
|
||||||
exception_status=exception_status,
|
exception_status=exception_status,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue