mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
commitb12a9892b7
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Wed Apr 2 08:09:56 2025 -0700 fix(utils.py): don't modify openai_token_counter commit294de31803
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 21:22:40 2025 -0700 fix: fix linting error commitcb6e9fbe40
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 19:52:45 2025 -0700 refactor: complete migration commitbfc159172d
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 19:09:59 2025 -0700 refactor: refactor more constants commit43ffb6a558
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 18:45:24 2025 -0700 fix: test commit04dbe4310c
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 18:28:58 2025 -0700 refactor: refactor: move more constants into constants.py commit3c26284aff
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 18:14:46 2025 -0700 refactor: migrate hardcoded constants out of __init__.py commitc11e0de69d
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 18:11:21 2025 -0700 build: migrate all constants into constants.py commit7882bdc787
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 18:07:37 2025 -0700 build: initial test banning hardcoded numbers in repo
90 lines
3 KiB
Python
90 lines
3 KiB
Python
from typing import TYPE_CHECKING, Any, Optional, Union
|
|
|
|
from litellm._logging import verbose_router_logger
|
|
from litellm.constants import MAX_EXCEPTION_MESSAGE_LENGTH
|
|
from litellm.router_utils.cooldown_handlers import (
|
|
_async_get_cooldown_deployments_with_debug_info,
|
|
)
|
|
from litellm.types.integrations.slack_alerting import AlertType
|
|
from litellm.types.router import RouterRateLimitError
|
|
|
|
if TYPE_CHECKING:
|
|
from opentelemetry.trace import Span as _Span
|
|
|
|
from litellm.router import Router as _Router
|
|
|
|
LitellmRouter = _Router
|
|
Span = Union[_Span, Any]
|
|
else:
|
|
LitellmRouter = Any
|
|
Span = Any
|
|
|
|
|
|
async def send_llm_exception_alert(
|
|
litellm_router_instance: LitellmRouter,
|
|
request_kwargs: dict,
|
|
error_traceback_str: str,
|
|
original_exception,
|
|
):
|
|
"""
|
|
Only runs if router.slack_alerting_logger is set
|
|
Sends a Slack / MS Teams alert for the LLM API call failure. Only if router.slack_alerting_logger is set.
|
|
|
|
Parameters:
|
|
litellm_router_instance (_Router): The LitellmRouter instance.
|
|
original_exception (Any): The original exception that occurred.
|
|
|
|
Returns:
|
|
None
|
|
"""
|
|
if litellm_router_instance is None:
|
|
return
|
|
|
|
if not hasattr(litellm_router_instance, "slack_alerting_logger"):
|
|
return
|
|
|
|
if litellm_router_instance.slack_alerting_logger is None:
|
|
return
|
|
|
|
if "proxy_server_request" in request_kwargs:
|
|
# Do not send any alert if it's a request from litellm proxy server request
|
|
# the proxy is already instrumented to send LLM API call failures
|
|
return
|
|
|
|
litellm_debug_info = getattr(original_exception, "litellm_debug_info", None)
|
|
exception_str = str(original_exception)
|
|
if litellm_debug_info is not None:
|
|
exception_str += litellm_debug_info
|
|
exception_str += f"\n\n{error_traceback_str[:MAX_EXCEPTION_MESSAGE_LENGTH]}"
|
|
|
|
await litellm_router_instance.slack_alerting_logger.send_alert(
|
|
message=f"LLM API call failed: `{exception_str}`",
|
|
level="High",
|
|
alert_type=AlertType.llm_exceptions,
|
|
alerting_metadata={},
|
|
)
|
|
|
|
|
|
async def async_raise_no_deployment_exception(
|
|
litellm_router_instance: LitellmRouter, model: str, parent_otel_span: Optional[Span]
|
|
):
|
|
"""
|
|
Raises a RouterRateLimitError if no deployment is found for the given model.
|
|
"""
|
|
verbose_router_logger.info(
|
|
f"get_available_deployment for model: {model}, No deployment available"
|
|
)
|
|
model_ids = litellm_router_instance.get_model_ids(model_name=model)
|
|
_cooldown_time = litellm_router_instance.cooldown_cache.get_min_cooldown(
|
|
model_ids=model_ids, parent_otel_span=parent_otel_span
|
|
)
|
|
_cooldown_list = await _async_get_cooldown_deployments_with_debug_info(
|
|
litellm_router_instance=litellm_router_instance,
|
|
parent_otel_span=parent_otel_span,
|
|
)
|
|
return RouterRateLimitError(
|
|
model=model,
|
|
cooldown_time=_cooldown_time,
|
|
enable_pre_call_checks=litellm_router_instance.enable_pre_call_checks,
|
|
cooldown_list=_cooldown_list,
|
|
)
|