forked from phoenix/litellm-mirror
(feat proxy slack alerting) - allow opting in to getting key / internal user alerts (#5990)
* define all slack alert types * use correct type hints for alert type * use correct defaults on slack alerting * add readme for slack alerting * fix linting error * update readme * docs all alert types * update slack alerting docs * fix slack alerting docs * handle new testing dir structure * fix config for testing * fix testing folder related imports * fix /tests import errors * fix import stream_chunk_testdata * docs alert types * fix test test_langfuse_trace_id * fix type checks for slack alerting * fix outage alerting test slack
This commit is contained in:
parent
8225880af0
commit
045ecf3ffb
21 changed files with 283 additions and 210 deletions
|
@ -16,15 +16,6 @@ jobs:
|
||||||
command: |
|
command: |
|
||||||
echo "Git commit hash: $CIRCLE_SHA1"
|
echo "Git commit hash: $CIRCLE_SHA1"
|
||||||
|
|
||||||
- run:
|
|
||||||
name: Check if litellm dir was updated or if pyproject.toml was modified
|
|
||||||
command: |
|
|
||||||
if [ -n "$(git diff --name-only $CIRCLE_SHA1^..$CIRCLE_SHA1 | grep -E 'pyproject\.toml|litellm/')" ]; then
|
|
||||||
echo "litellm updated"
|
|
||||||
else
|
|
||||||
echo "No changes to litellm or pyproject.toml. Skipping tests."
|
|
||||||
circleci step halt
|
|
||||||
fi
|
|
||||||
- restore_cache:
|
- restore_cache:
|
||||||
keys:
|
keys:
|
||||||
- v1-dependencies-{{ checksum ".circleci/requirements.txt" }}
|
- v1-dependencies-{{ checksum ".circleci/requirements.txt" }}
|
||||||
|
|
|
@ -112,31 +112,24 @@ response = client.chat.completions.create(
|
||||||
|
|
||||||
### Opting into specific alert types
|
### Opting into specific alert types
|
||||||
|
|
||||||
Set `alert_types` if you want to Opt into only specific alert types
|
Set `alert_types` if you want to Opt into only specific alert types. When alert_types is not set, all Default Alert Types are enabled.
|
||||||
|
|
||||||
|
👉 [**See all alert types here**](#all-possible-alert-types)
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
general_settings:
|
general_settings:
|
||||||
alerting: ["slack"]
|
alerting: ["slack"]
|
||||||
alert_types: ["spend_reports"]
|
alert_types: [
|
||||||
```
|
"llm_exceptions",
|
||||||
|
"llm_too_slow",
|
||||||
All Possible Alert Types
|
|
||||||
|
|
||||||
```python
|
|
||||||
AlertType = Literal[
|
|
||||||
"llm_exceptions", # LLM API Exceptions
|
|
||||||
"llm_too_slow", # LLM Responses slower than alerting_threshold
|
|
||||||
"llm_requests_hanging",
|
"llm_requests_hanging",
|
||||||
"budget_alerts",
|
"budget_alerts",
|
||||||
|
"spend_reports",
|
||||||
"db_exceptions",
|
"db_exceptions",
|
||||||
"daily_reports",
|
"daily_reports",
|
||||||
"spend_reports",
|
|
||||||
"fallback_reports",
|
|
||||||
"cooldown_deployment",
|
"cooldown_deployment",
|
||||||
"new_model_added",
|
"new_model_added",
|
||||||
"outage_alerts",
|
]
|
||||||
]
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Set specific slack channels per alert type
|
### Set specific slack channels per alert type
|
||||||
|
@ -365,7 +358,7 @@ curl -X GET --location 'http://0.0.0.0:4000/health/services?service=webhook' \
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
## **API Spec for Webhook Event**
|
### API Spec for Webhook Event
|
||||||
|
|
||||||
- `spend` *float*: The current spend amount for the 'event_group'.
|
- `spend` *float*: The current spend amount for the 'event_group'.
|
||||||
- `max_budget` *float or null*: The maximum allowed budget for the 'event_group'. null if not set.
|
- `max_budget` *float or null*: The maximum allowed budget for the 'event_group'. null if not set.
|
||||||
|
@ -417,4 +410,50 @@ general_settings:
|
||||||
region_outage_alert_ttl: 60 # time-window in seconds
|
region_outage_alert_ttl: 60 # time-window in seconds
|
||||||
minor_outage_alert_threshold: 5 # number of errors to trigger a minor alert
|
minor_outage_alert_threshold: 5 # number of errors to trigger a minor alert
|
||||||
major_outage_alert_threshold: 10 # number of errors to trigger a major alert
|
major_outage_alert_threshold: 10 # number of errors to trigger a major alert
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## **All Possible Alert Types**
|
||||||
|
|
||||||
|
👉 [**Here is how you can set specific alert types**](#opting-into-specific-alert-types)
|
||||||
|
|
||||||
|
LLM-related Alerts
|
||||||
|
|
||||||
|
| Alert Type | Description | Default On |
|
||||||
|
|------------|-------------|---------|
|
||||||
|
| `llm_exceptions` | Alerts for LLM API exceptions | ✅ |
|
||||||
|
| `llm_too_slow` | Notifications for LLM responses slower than the set threshold | ✅ |
|
||||||
|
| `llm_requests_hanging` | Alerts for LLM requests that are not completing | ✅ |
|
||||||
|
| `cooldown_deployment` | Alerts when a deployment is put into cooldown | ✅ |
|
||||||
|
| `new_model_added` | Notifications when a new model is added to litellm proxy through /model/new| ✅ |
|
||||||
|
| `outage_alerts` | Alerts when a specific LLM deployment is facing an outage | ✅ |
|
||||||
|
| `region_outage_alerts` | Alerts when a specfic LLM region is facing an outage. Example us-east-1 | ✅ |
|
||||||
|
|
||||||
|
Budget and Spend Alerts
|
||||||
|
|
||||||
|
| Alert Type | Description | Default On|
|
||||||
|
|------------|-------------|---------|
|
||||||
|
| `budget_alerts` | Notifications related to budget limits or thresholds | ✅ |
|
||||||
|
| `spend_reports` | Periodic reports on spending across teams or tags | ✅ |
|
||||||
|
| `failed_tracking_spend` | Alerts when spend tracking fails | ✅ |
|
||||||
|
| `daily_reports` | Daily Spend reports | ✅ |
|
||||||
|
| `fallback_reports` | Weekly Reports on LLM fallback occurrences | ✅ |
|
||||||
|
|
||||||
|
Database Alerts
|
||||||
|
|
||||||
|
| Alert Type | Description | Default On |
|
||||||
|
|------------|-------------|---------|
|
||||||
|
| `db_exceptions` | Notifications for database-related exceptions | ✅ |
|
||||||
|
|
||||||
|
Management Endpoint Alerts - Virtual Key, Team, Internal User
|
||||||
|
|
||||||
|
| Alert Type | Description | Default On |
|
||||||
|
|------------|-------------|---------|
|
||||||
|
| `new_virtual_key_created` | Notifications when a new virtual key is created | ❌ |
|
||||||
|
| `virtual_key_updated` | Alerts when a virtual key is modified | ❌ |
|
||||||
|
| `virtual_key_deleted` | Notifications when a virtual key is removed | ❌ |
|
||||||
|
| `new_team_created` | Alerts for the creation of a new team | ❌ |
|
||||||
|
| `team_updated` | Notifications when team details are modified | ❌ |
|
||||||
|
| `team_deleted` | Alerts when a team is deleted | ❌ |
|
||||||
|
| `new_internal_user_created` | Notifications for new internal user accounts | ❌ |
|
||||||
|
| `internal_user_updated` | Alerts when an internal user's details are changed | ❌ |
|
||||||
|
| `internal_user_deleted` | Notifications when an internal user account is removed | ❌ |
|
13
litellm/integrations/SlackAlerting/Readme.md
Normal file
13
litellm/integrations/SlackAlerting/Readme.md
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
# Slack Alerting on LiteLLM Gateway
|
||||||
|
|
||||||
|
This folder contains the Slack Alerting integration for LiteLLM Gateway.
|
||||||
|
|
||||||
|
## Folder Structure
|
||||||
|
|
||||||
|
- `slack_alerting.py`: This is the main file that handles sending different types of alerts
|
||||||
|
- `batching_handler.py`: Handles Batching + sending Httpx Post requests to slack. Slack alerts are sent every 10s or when events are greater than X events. Done to ensure litellm has good performance under high traffic
|
||||||
|
- `types.py`: This file contains the AlertType enum which is used to define the different types of alerts that can be sent to Slack.
|
||||||
|
- `utils.py`: This file contains common utils used specifically for slack alerting
|
||||||
|
|
||||||
|
## Further Reading
|
||||||
|
- [Doc setting up Alerting on LiteLLM Proxy (Gateway)](https://docs.litellm.ai/docs/proxy/alerting)
|
|
@ -41,7 +41,7 @@ from litellm.types.router import LiteLLM_Params
|
||||||
from ..email_templates.templates import *
|
from ..email_templates.templates import *
|
||||||
from .batching_handler import send_to_webhook, squash_payloads
|
from .batching_handler import send_to_webhook, squash_payloads
|
||||||
from .types import *
|
from .types import *
|
||||||
from .utils import process_slack_alerting_variables
|
from .utils import _add_langfuse_trace_id_to_alert, process_slack_alerting_variables
|
||||||
|
|
||||||
|
|
||||||
class SlackAlerting(CustomBatchLogger):
|
class SlackAlerting(CustomBatchLogger):
|
||||||
|
@ -57,7 +57,7 @@ class SlackAlerting(CustomBatchLogger):
|
||||||
float
|
float
|
||||||
] = None, # threshold for slow / hanging llm responses (in seconds)
|
] = None, # threshold for slow / hanging llm responses (in seconds)
|
||||||
alerting: Optional[List] = [],
|
alerting: Optional[List] = [],
|
||||||
alert_types: List[AlertType] = list(get_args(AlertType)),
|
alert_types: List[AlertType] = DEFAULT_ALERT_TYPES,
|
||||||
alert_to_webhook_url: Optional[
|
alert_to_webhook_url: Optional[
|
||||||
Dict[AlertType, Union[List[str], str]]
|
Dict[AlertType, Union[List[str], str]]
|
||||||
] = None, # if user wants to separate alerts to diff channels
|
] = None, # if user wants to separate alerts to diff channels
|
||||||
|
@ -87,7 +87,7 @@ class SlackAlerting(CustomBatchLogger):
|
||||||
self,
|
self,
|
||||||
alerting: Optional[List] = None,
|
alerting: Optional[List] = None,
|
||||||
alerting_threshold: Optional[float] = None,
|
alerting_threshold: Optional[float] = None,
|
||||||
alert_types: Optional[List] = None,
|
alert_types: Optional[List[AlertType]] = None,
|
||||||
alert_to_webhook_url: Optional[Dict[AlertType, Union[List[str], str]]] = None,
|
alert_to_webhook_url: Optional[Dict[AlertType, Union[List[str], str]]] = None,
|
||||||
alerting_args: Optional[Dict] = None,
|
alerting_args: Optional[Dict] = None,
|
||||||
llm_router: Optional[litellm.Router] = None,
|
llm_router: Optional[litellm.Router] = None,
|
||||||
|
@ -127,47 +127,8 @@ class SlackAlerting(CustomBatchLogger):
|
||||||
def _all_possible_alert_types(self):
|
def _all_possible_alert_types(self):
|
||||||
# used by the UI to show all supported alert types
|
# used by the UI to show all supported alert types
|
||||||
# Note: This is not the alerts the user has configured, instead it's all possible alert types a user can select
|
# Note: This is not the alerts the user has configured, instead it's all possible alert types a user can select
|
||||||
return [
|
# return list of all values AlertType enum
|
||||||
"llm_exceptions",
|
return list(AlertType)
|
||||||
"llm_too_slow",
|
|
||||||
"llm_requests_hanging",
|
|
||||||
"budget_alerts",
|
|
||||||
"db_exceptions",
|
|
||||||
]
|
|
||||||
|
|
||||||
async def _add_langfuse_trace_id_to_alert(
|
|
||||||
self,
|
|
||||||
request_data: Optional[dict] = None,
|
|
||||||
) -> Optional[str]:
|
|
||||||
"""
|
|
||||||
Returns langfuse trace url
|
|
||||||
|
|
||||||
- check:
|
|
||||||
-> existing_trace_id
|
|
||||||
-> trace_id
|
|
||||||
-> litellm_call_id
|
|
||||||
"""
|
|
||||||
# do nothing for now
|
|
||||||
if (
|
|
||||||
request_data is not None
|
|
||||||
and request_data.get("litellm_logging_obj", None) is not None
|
|
||||||
):
|
|
||||||
trace_id: Optional[str] = None
|
|
||||||
litellm_logging_obj: Logging = request_data["litellm_logging_obj"]
|
|
||||||
|
|
||||||
for _ in range(3):
|
|
||||||
trace_id = litellm_logging_obj._get_trace_id(service_name="langfuse")
|
|
||||||
if trace_id is not None:
|
|
||||||
break
|
|
||||||
await asyncio.sleep(3) # wait 3s before retrying for trace id
|
|
||||||
|
|
||||||
_langfuse_object = litellm_logging_obj._get_callback_object(
|
|
||||||
service_name="langfuse"
|
|
||||||
)
|
|
||||||
if _langfuse_object is not None:
|
|
||||||
base_url = _langfuse_object.Langfuse.base_url
|
|
||||||
return f"{base_url}/trace/{trace_id}"
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _response_taking_too_long_callback_helper(
|
def _response_taking_too_long_callback_helper(
|
||||||
self,
|
self,
|
||||||
|
@ -275,7 +236,7 @@ class SlackAlerting(CustomBatchLogger):
|
||||||
await self.send_alert(
|
await self.send_alert(
|
||||||
message=slow_message + request_info,
|
message=slow_message + request_info,
|
||||||
level="Low",
|
level="Low",
|
||||||
alert_type="llm_too_slow",
|
alert_type=AlertType.llm_too_slow,
|
||||||
alerting_metadata=alerting_metadata,
|
alerting_metadata=alerting_metadata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -467,7 +428,7 @@ class SlackAlerting(CustomBatchLogger):
|
||||||
await self.send_alert(
|
await self.send_alert(
|
||||||
message=message,
|
message=message,
|
||||||
level="Low",
|
level="Low",
|
||||||
alert_type="daily_reports",
|
alert_type=AlertType.daily_reports,
|
||||||
alerting_metadata={},
|
alerting_metadata={},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -563,7 +524,7 @@ class SlackAlerting(CustomBatchLogger):
|
||||||
)
|
)
|
||||||
|
|
||||||
if "langfuse" in litellm.success_callback:
|
if "langfuse" in litellm.success_callback:
|
||||||
langfuse_url = await self._add_langfuse_trace_id_to_alert(
|
langfuse_url = await _add_langfuse_trace_id_to_alert(
|
||||||
request_data=request_data,
|
request_data=request_data,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -580,7 +541,7 @@ class SlackAlerting(CustomBatchLogger):
|
||||||
await self.send_alert(
|
await self.send_alert(
|
||||||
message=alerting_message + request_info,
|
message=alerting_message + request_info,
|
||||||
level="Medium",
|
level="Medium",
|
||||||
alert_type="llm_requests_hanging",
|
alert_type=AlertType.llm_requests_hanging,
|
||||||
alerting_metadata=alerting_metadata,
|
alerting_metadata=alerting_metadata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -600,7 +561,7 @@ class SlackAlerting(CustomBatchLogger):
|
||||||
await self.send_alert(
|
await self.send_alert(
|
||||||
message=message,
|
message=message,
|
||||||
level="High",
|
level="High",
|
||||||
alert_type="failed_tracking_spend",
|
alert_type=AlertType.failed_tracking_spend,
|
||||||
alerting_metadata={},
|
alerting_metadata={},
|
||||||
)
|
)
|
||||||
await _cache.async_set_cache(
|
await _cache.async_set_cache(
|
||||||
|
@ -701,7 +662,7 @@ class SlackAlerting(CustomBatchLogger):
|
||||||
await self.send_alert(
|
await self.send_alert(
|
||||||
message=event_message + "\n\n" + user_info_str,
|
message=event_message + "\n\n" + user_info_str,
|
||||||
level="High",
|
level="High",
|
||||||
alert_type="budget_alerts",
|
alert_type=AlertType.budget_alerts,
|
||||||
user_info=webhook_event,
|
user_info=webhook_event,
|
||||||
alerting_metadata={},
|
alerting_metadata={},
|
||||||
)
|
)
|
||||||
|
@ -902,7 +863,7 @@ class SlackAlerting(CustomBatchLogger):
|
||||||
await self.send_alert(
|
await self.send_alert(
|
||||||
message=msg,
|
message=msg,
|
||||||
level="Medium",
|
level="Medium",
|
||||||
alert_type="outage_alerts",
|
alert_type=AlertType.outage_alerts,
|
||||||
alerting_metadata={},
|
alerting_metadata={},
|
||||||
)
|
)
|
||||||
# set to true
|
# set to true
|
||||||
|
@ -928,7 +889,7 @@ class SlackAlerting(CustomBatchLogger):
|
||||||
await self.send_alert(
|
await self.send_alert(
|
||||||
message=msg,
|
message=msg,
|
||||||
level="High",
|
level="High",
|
||||||
alert_type="outage_alerts",
|
alert_type=AlertType.outage_alerts,
|
||||||
alerting_metadata={},
|
alerting_metadata={},
|
||||||
)
|
)
|
||||||
# set to true
|
# set to true
|
||||||
|
@ -1031,7 +992,7 @@ class SlackAlerting(CustomBatchLogger):
|
||||||
await self.send_alert(
|
await self.send_alert(
|
||||||
message=msg,
|
message=msg,
|
||||||
level="Medium",
|
level="Medium",
|
||||||
alert_type="outage_alerts",
|
alert_type=AlertType.outage_alerts,
|
||||||
alerting_metadata={},
|
alerting_metadata={},
|
||||||
)
|
)
|
||||||
# set to true
|
# set to true
|
||||||
|
@ -1053,7 +1014,7 @@ class SlackAlerting(CustomBatchLogger):
|
||||||
await self.send_alert(
|
await self.send_alert(
|
||||||
message=msg,
|
message=msg,
|
||||||
level="High",
|
level="High",
|
||||||
alert_type="outage_alerts",
|
alert_type=AlertType.outage_alerts,
|
||||||
alerting_metadata={},
|
alerting_metadata={},
|
||||||
)
|
)
|
||||||
# set to true
|
# set to true
|
||||||
|
@ -1118,7 +1079,7 @@ Model Info:
|
||||||
alert_val = self.send_alert(
|
alert_val = self.send_alert(
|
||||||
message=message,
|
message=message,
|
||||||
level="Low",
|
level="Low",
|
||||||
alert_type="new_model_added",
|
alert_type=AlertType.new_model_added,
|
||||||
alerting_metadata={},
|
alerting_metadata={},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -1354,7 +1315,7 @@ Model Info:
|
||||||
self,
|
self,
|
||||||
message: str,
|
message: str,
|
||||||
level: Literal["Low", "Medium", "High"],
|
level: Literal["Low", "Medium", "High"],
|
||||||
alert_type: Literal[AlertType],
|
alert_type: AlertType,
|
||||||
alerting_metadata: dict,
|
alerting_metadata: dict,
|
||||||
user_info: Optional[WebhookEvent] = None,
|
user_info: Optional[WebhookEvent] = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
|
@ -1395,7 +1356,6 @@ Model Info:
|
||||||
|
|
||||||
if "slack" not in self.alerting:
|
if "slack" not in self.alerting:
|
||||||
return
|
return
|
||||||
|
|
||||||
if alert_type not in self.alert_types:
|
if alert_type not in self.alert_types:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -1654,7 +1614,7 @@ Model Info:
|
||||||
await self.send_alert(
|
await self.send_alert(
|
||||||
message=_spend_message,
|
message=_spend_message,
|
||||||
level="Low",
|
level="Low",
|
||||||
alert_type="spend_reports",
|
alert_type=AlertType.spend_reports,
|
||||||
alerting_metadata={},
|
alerting_metadata={},
|
||||||
)
|
)
|
||||||
except ValueError as ve:
|
except ValueError as ve:
|
||||||
|
@ -1713,7 +1673,7 @@ Model Info:
|
||||||
await self.send_alert(
|
await self.send_alert(
|
||||||
message=_spend_message,
|
message=_spend_message,
|
||||||
level="Low",
|
level="Low",
|
||||||
alert_type="spend_reports",
|
alert_type=AlertType.spend_reports,
|
||||||
alerting_metadata={},
|
alerting_metadata={},
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -1742,7 +1702,7 @@ Model Info:
|
||||||
await self.send_alert(
|
await self.send_alert(
|
||||||
message=fallback_message,
|
message=fallback_message,
|
||||||
level="Low",
|
level="Low",
|
||||||
alert_type="fallback_reports",
|
alert_type=AlertType.fallback_reports,
|
||||||
alerting_metadata={},
|
alerting_metadata={},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -1754,14 +1714,19 @@ Model Info:
|
||||||
async def send_virtual_key_event_slack(
|
async def send_virtual_key_event_slack(
|
||||||
self,
|
self,
|
||||||
key_event: VirtualKeyEvent,
|
key_event: VirtualKeyEvent,
|
||||||
|
alert_type: AlertType,
|
||||||
event_name: str,
|
event_name: str,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Helper to send fallback statistics from prometheus server -> to slack
|
Handles sending Virtual Key related alerts
|
||||||
|
|
||||||
This runs once per day and sends an overview of all the fallback statistics
|
Example:
|
||||||
|
- New Virtual Key Created
|
||||||
|
- Internal User Updated
|
||||||
|
- Team Created, Updated, Deleted
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
|
||||||
message = f"`{event_name}`\n"
|
message = f"`{event_name}`\n"
|
||||||
|
|
||||||
key_event_dict = key_event.model_dump()
|
key_event_dict = key_event.model_dump()
|
||||||
|
@ -1783,7 +1748,7 @@ Model Info:
|
||||||
await self.send_alert(
|
await self.send_alert(
|
||||||
message=message,
|
message=message,
|
||||||
level="High",
|
level="High",
|
||||||
alert_type="fallback_reports",
|
alert_type=alert_type,
|
||||||
alerting_metadata={},
|
alerting_metadata={},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -32,7 +32,7 @@ class LiteLLMBase(BaseModel):
|
||||||
Implements default functions, all pydantic objects should have.
|
Implements default functions, all pydantic objects should have.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def json(self, **kwargs):
|
def json(self, **kwargs): # type: ignore
|
||||||
try:
|
try:
|
||||||
return self.model_dump() # noqa
|
return self.model_dump() # noqa
|
||||||
except:
|
except:
|
||||||
|
@ -41,14 +41,14 @@ class LiteLLMBase(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
class SlackAlertingArgsEnum(Enum):
|
class SlackAlertingArgsEnum(Enum):
|
||||||
daily_report_frequency: int = 12 * 60 * 60
|
daily_report_frequency = 12 * 60 * 60
|
||||||
report_check_interval: int = 5 * 60
|
report_check_interval = 5 * 60
|
||||||
budget_alert_ttl: int = 24 * 60 * 60
|
budget_alert_ttl = 24 * 60 * 60
|
||||||
outage_alert_ttl: int = 1 * 60
|
outage_alert_ttl = 1 * 60
|
||||||
region_outage_alert_ttl: int = 1 * 60
|
region_outage_alert_ttl = 1 * 60
|
||||||
minor_outage_alert_threshold: int = 1 * 5
|
minor_outage_alert_threshold = 1 * 5
|
||||||
major_outage_alert_threshold: int = 1 * 10
|
major_outage_alert_threshold = 1 * 10
|
||||||
max_outage_alert_list_size: int = 1 * 10
|
max_outage_alert_list_size = 1 * 10
|
||||||
|
|
||||||
|
|
||||||
class SlackAlertingArgs(LiteLLMBase):
|
class SlackAlertingArgs(LiteLLMBase):
|
||||||
|
@ -56,7 +56,7 @@ class SlackAlertingArgs(LiteLLMBase):
|
||||||
default=int(
|
default=int(
|
||||||
os.getenv(
|
os.getenv(
|
||||||
"SLACK_DAILY_REPORT_FREQUENCY",
|
"SLACK_DAILY_REPORT_FREQUENCY",
|
||||||
SlackAlertingArgsEnum.daily_report_frequency.value,
|
int(SlackAlertingArgsEnum.daily_report_frequency.value),
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
description="Frequency of receiving deployment latency/failure reports. Default is 12hours. Value is in seconds.",
|
description="Frequency of receiving deployment latency/failure reports. Default is 12hours. Value is in seconds.",
|
||||||
|
@ -119,3 +119,75 @@ class SlackAlertingCacheKeys(Enum):
|
||||||
failed_requests_key = "failed_requests_daily_metrics"
|
failed_requests_key = "failed_requests_daily_metrics"
|
||||||
latency_key = "latency_daily_metrics"
|
latency_key = "latency_daily_metrics"
|
||||||
report_sent_key = "daily_metrics_report_sent"
|
report_sent_key = "daily_metrics_report_sent"
|
||||||
|
|
||||||
|
|
||||||
|
class AlertType(str, Enum):
|
||||||
|
"""
|
||||||
|
Enum for alert types and management event types
|
||||||
|
"""
|
||||||
|
|
||||||
|
# LLM-related alerts
|
||||||
|
llm_exceptions = "llm_exceptions"
|
||||||
|
llm_too_slow = "llm_too_slow"
|
||||||
|
llm_requests_hanging = "llm_requests_hanging"
|
||||||
|
|
||||||
|
# Budget and spend alerts
|
||||||
|
budget_alerts = "budget_alerts"
|
||||||
|
spend_reports = "spend_reports"
|
||||||
|
failed_tracking_spend = "failed_tracking_spend"
|
||||||
|
|
||||||
|
# Database alerts
|
||||||
|
db_exceptions = "db_exceptions"
|
||||||
|
|
||||||
|
# Report alerts
|
||||||
|
daily_reports = "daily_reports"
|
||||||
|
|
||||||
|
# Deployment alerts
|
||||||
|
cooldown_deployment = "cooldown_deployment"
|
||||||
|
new_model_added = "new_model_added"
|
||||||
|
|
||||||
|
# Outage alerts
|
||||||
|
outage_alerts = "outage_alerts"
|
||||||
|
region_outage_alerts = "region_outage_alerts"
|
||||||
|
|
||||||
|
# Fallback alerts
|
||||||
|
fallback_reports = "fallback_reports"
|
||||||
|
|
||||||
|
# Virtual Key Events
|
||||||
|
new_virtual_key_created = "new_virtual_key_created"
|
||||||
|
virtual_key_updated = "virtual_key_updated"
|
||||||
|
virtual_key_deleted = "virtual_key_deleted"
|
||||||
|
|
||||||
|
# Team Events
|
||||||
|
new_team_created = "new_team_created"
|
||||||
|
team_updated = "team_updated"
|
||||||
|
team_deleted = "team_deleted"
|
||||||
|
|
||||||
|
# Internal User Events
|
||||||
|
new_internal_user_created = "new_internal_user_created"
|
||||||
|
internal_user_updated = "internal_user_updated"
|
||||||
|
internal_user_deleted = "internal_user_deleted"
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_ALERT_TYPES: List[AlertType] = [
|
||||||
|
# LLM related alerts
|
||||||
|
AlertType.llm_exceptions,
|
||||||
|
AlertType.llm_too_slow,
|
||||||
|
AlertType.llm_requests_hanging,
|
||||||
|
# Budget and spend alerts
|
||||||
|
AlertType.budget_alerts,
|
||||||
|
AlertType.spend_reports,
|
||||||
|
AlertType.failed_tracking_spend,
|
||||||
|
# Database alerts
|
||||||
|
AlertType.db_exceptions,
|
||||||
|
# Report alerts
|
||||||
|
AlertType.daily_reports,
|
||||||
|
# Deployment alerts
|
||||||
|
AlertType.cooldown_deployment,
|
||||||
|
AlertType.new_model_added,
|
||||||
|
# Outage alerts
|
||||||
|
AlertType.outage_alerts,
|
||||||
|
AlertType.region_outage_alerts,
|
||||||
|
# Fallback alerts
|
||||||
|
AlertType.fallback_reports,
|
||||||
|
]
|
||||||
|
|
|
@ -2,9 +2,11 @@
|
||||||
Utils used for slack alerting
|
Utils used for slack alerting
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
from typing import Dict, List, Optional, Union
|
from typing import Dict, List, Optional, Union
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
|
from litellm.litellm_core_utils.litellm_logging import Logging
|
||||||
from litellm.proxy._types import AlertType
|
from litellm.proxy._types import AlertType
|
||||||
from litellm.secret_managers.main import get_secret
|
from litellm.secret_managers.main import get_secret
|
||||||
|
|
||||||
|
@ -49,3 +51,37 @@ def process_slack_alerting_variables(
|
||||||
alert_to_webhook_url[alert_type] = _webhook_value_str
|
alert_to_webhook_url[alert_type] = _webhook_value_str
|
||||||
|
|
||||||
return alert_to_webhook_url
|
return alert_to_webhook_url
|
||||||
|
|
||||||
|
|
||||||
|
async def _add_langfuse_trace_id_to_alert(
|
||||||
|
request_data: Optional[dict] = None,
|
||||||
|
) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Returns langfuse trace url
|
||||||
|
|
||||||
|
- check:
|
||||||
|
-> existing_trace_id
|
||||||
|
-> trace_id
|
||||||
|
-> litellm_call_id
|
||||||
|
"""
|
||||||
|
# do nothing for now
|
||||||
|
if (
|
||||||
|
request_data is not None
|
||||||
|
and request_data.get("litellm_logging_obj", None) is not None
|
||||||
|
):
|
||||||
|
trace_id: Optional[str] = None
|
||||||
|
litellm_logging_obj: Logging = request_data["litellm_logging_obj"]
|
||||||
|
|
||||||
|
for _ in range(3):
|
||||||
|
trace_id = litellm_logging_obj._get_trace_id(service_name="langfuse")
|
||||||
|
if trace_id is not None:
|
||||||
|
break
|
||||||
|
await asyncio.sleep(3) # wait 3s before retrying for trace id
|
||||||
|
|
||||||
|
_langfuse_object = litellm_logging_obj._get_callback_object(
|
||||||
|
service_name="langfuse"
|
||||||
|
)
|
||||||
|
if _langfuse_object is not None:
|
||||||
|
base_url = _langfuse_object.Langfuse.base_url
|
||||||
|
return f"{base_url}/trace/{trace_id}"
|
||||||
|
return None
|
||||||
|
|
|
@ -10,6 +10,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Union
|
||||||
from pydantic import BaseModel, ConfigDict, Extra, Field, Json, model_validator
|
from pydantic import BaseModel, ConfigDict, Extra, Field, Json, model_validator
|
||||||
from typing_extensions import Annotated, TypedDict
|
from typing_extensions import Annotated, TypedDict
|
||||||
|
|
||||||
|
from litellm.integrations.SlackAlerting.types import AlertType
|
||||||
from litellm.types.router import RouterErrors, UpdateRouterConfig
|
from litellm.types.router import RouterErrors, UpdateRouterConfig
|
||||||
from litellm.types.utils import ProviderField
|
from litellm.types.utils import ProviderField
|
||||||
|
|
||||||
|
@ -110,23 +111,6 @@ class LitellmTableNames(enum.Enum):
|
||||||
PROXY_MODEL_TABLE_NAME = "LiteLLM_ModelTable"
|
PROXY_MODEL_TABLE_NAME = "LiteLLM_ModelTable"
|
||||||
|
|
||||||
|
|
||||||
AlertType = Literal[
|
|
||||||
"llm_exceptions",
|
|
||||||
"llm_too_slow",
|
|
||||||
"llm_requests_hanging",
|
|
||||||
"budget_alerts",
|
|
||||||
"db_exceptions",
|
|
||||||
"daily_reports",
|
|
||||||
"spend_reports",
|
|
||||||
"cooldown_deployment",
|
|
||||||
"new_model_added",
|
|
||||||
"outage_alerts",
|
|
||||||
"region_outage_alerts",
|
|
||||||
"fallback_reports",
|
|
||||||
"failed_tracking_spend",
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def hash_token(token: str):
|
def hash_token(token: str):
|
||||||
import hashlib
|
import hashlib
|
||||||
|
|
||||||
|
|
|
@ -11,6 +11,7 @@ from fastapi import APIRouter, Depends, Header, HTTPException, Request, Response
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.proxy._types import (
|
from litellm.proxy._types import (
|
||||||
|
AlertType,
|
||||||
CallInfo,
|
CallInfo,
|
||||||
ProxyErrorTypes,
|
ProxyErrorTypes,
|
||||||
ProxyException,
|
ProxyException,
|
||||||
|
@ -159,13 +160,6 @@ async def health_services_endpoint(
|
||||||
for (
|
for (
|
||||||
alert_type
|
alert_type
|
||||||
) in proxy_logging_obj.slack_alerting_instance.alert_to_webhook_url:
|
) in proxy_logging_obj.slack_alerting_instance.alert_to_webhook_url:
|
||||||
"""
|
|
||||||
"llm_exceptions",
|
|
||||||
"llm_too_slow",
|
|
||||||
"llm_requests_hanging",
|
|
||||||
"budget_alerts",
|
|
||||||
"db_exceptions",
|
|
||||||
"""
|
|
||||||
# only test alert if it's in active alert types
|
# only test alert if it's in active alert types
|
||||||
if (
|
if (
|
||||||
proxy_logging_obj.slack_alerting_instance.alert_types
|
proxy_logging_obj.slack_alerting_instance.alert_types
|
||||||
|
@ -176,19 +170,19 @@ async def health_services_endpoint(
|
||||||
continue
|
continue
|
||||||
|
|
||||||
test_message = "default test message"
|
test_message = "default test message"
|
||||||
if alert_type == "llm_exceptions":
|
if alert_type == AlertType.llm_exceptions:
|
||||||
test_message = f"LLM Exception test alert"
|
test_message = f"LLM Exception test alert"
|
||||||
elif alert_type == "llm_too_slow":
|
elif alert_type == AlertType.llm_too_slow:
|
||||||
test_message = f"LLM Too Slow test alert"
|
test_message = f"LLM Too Slow test alert"
|
||||||
elif alert_type == "llm_requests_hanging":
|
elif alert_type == AlertType.llm_requests_hanging:
|
||||||
test_message = f"LLM Requests Hanging test alert"
|
test_message = f"LLM Requests Hanging test alert"
|
||||||
elif alert_type == "budget_alerts":
|
elif alert_type == AlertType.budget_alerts:
|
||||||
test_message = f"Budget Alert test alert"
|
test_message = f"Budget Alert test alert"
|
||||||
elif alert_type == "db_exceptions":
|
elif alert_type == AlertType.db_exceptions:
|
||||||
test_message = f"DB Exception test alert"
|
test_message = f"DB Exception test alert"
|
||||||
elif alert_type == "outage_alerts":
|
elif alert_type == AlertType.outage_alerts:
|
||||||
test_message = f"Outage Alert Exception test alert"
|
test_message = f"Outage Alert Exception test alert"
|
||||||
elif alert_type == "daily_reports":
|
elif alert_type == AlertType.daily_reports:
|
||||||
test_message = f"Daily Reports test alert"
|
test_message = f"Daily Reports test alert"
|
||||||
else:
|
else:
|
||||||
test_message = f"Budget Alert test alert"
|
test_message = f"Budget Alert test alert"
|
||||||
|
@ -200,7 +194,7 @@ async def health_services_endpoint(
|
||||||
await proxy_logging_obj.alerting_handler(
|
await proxy_logging_obj.alerting_handler(
|
||||||
message="This is a test slack alert message",
|
message="This is a test slack alert message",
|
||||||
level="Low",
|
level="Low",
|
||||||
alert_type="budget_alerts",
|
alert_type=AlertType.budget_alerts,
|
||||||
)
|
)
|
||||||
|
|
||||||
if prisma_client is not None:
|
if prisma_client is not None:
|
||||||
|
|
|
@ -227,25 +227,27 @@ async def send_management_endpoint_alert(
|
||||||
- An internal user is created, updated, or deleted
|
- An internal user is created, updated, or deleted
|
||||||
- A team is created, updated, or deleted
|
- A team is created, updated, or deleted
|
||||||
"""
|
"""
|
||||||
|
from litellm.integrations.SlackAlerting.types import AlertType
|
||||||
from litellm.proxy.proxy_server import premium_user, proxy_logging_obj
|
from litellm.proxy.proxy_server import premium_user, proxy_logging_obj
|
||||||
|
|
||||||
if premium_user is not True:
|
if premium_user is not True:
|
||||||
return
|
return
|
||||||
|
|
||||||
management_function_to_event_name = {
|
management_function_to_event_name = {
|
||||||
"generate_key_fn": "New Virtual Key Created",
|
"generate_key_fn": AlertType.new_virtual_key_created,
|
||||||
"update_key_fn": "Virtual Key Updated",
|
"update_key_fn": AlertType.virtual_key_updated,
|
||||||
"delete_key_fn": "Virtual Key Deleted",
|
"delete_key_fn": AlertType.virtual_key_deleted,
|
||||||
# Team events
|
# Team events
|
||||||
"new_team": "New Team Created",
|
"new_team": AlertType.new_team_created,
|
||||||
"update_team": "Team Updated",
|
"update_team": AlertType.team_updated,
|
||||||
"delete_team": "Team Deleted",
|
"delete_team": AlertType.team_deleted,
|
||||||
# Internal User events
|
# Internal User events
|
||||||
"new_user": "New Internal User Created",
|
"new_user": AlertType.new_internal_user_created,
|
||||||
"user_update": "Internal User Updated",
|
"user_update": AlertType.internal_user_updated,
|
||||||
"delete_user": "Internal User Deleted",
|
"delete_user": AlertType.internal_user_deleted,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Check if alerting is enabled
|
||||||
if (
|
if (
|
||||||
proxy_logging_obj is not None
|
proxy_logging_obj is not None
|
||||||
and proxy_logging_obj.slack_alerting_instance is not None
|
and proxy_logging_obj.slack_alerting_instance is not None
|
||||||
|
@ -253,6 +255,8 @@ async def send_management_endpoint_alert(
|
||||||
|
|
||||||
# Virtual Key Events
|
# Virtual Key Events
|
||||||
if function_name in management_function_to_event_name:
|
if function_name in management_function_to_event_name:
|
||||||
|
_event_name: AlertType = management_function_to_event_name[function_name]
|
||||||
|
|
||||||
key_event = VirtualKeyEvent(
|
key_event = VirtualKeyEvent(
|
||||||
created_by_user_id=user_api_key_dict.user_id or "Unknown",
|
created_by_user_id=user_api_key_dict.user_id or "Unknown",
|
||||||
created_by_user_role=user_api_key_dict.user_role or "Unknown",
|
created_by_user_role=user_api_key_dict.user_role or "Unknown",
|
||||||
|
@ -260,9 +264,12 @@ async def send_management_endpoint_alert(
|
||||||
request_kwargs=request_kwargs,
|
request_kwargs=request_kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
event_name = management_function_to_event_name[function_name]
|
# replace all "_" with " " and capitalize
|
||||||
|
event_name = _event_name.replace("_", " ").title()
|
||||||
await proxy_logging_obj.slack_alerting_instance.send_virtual_key_event_slack(
|
await proxy_logging_obj.slack_alerting_instance.send_virtual_key_event_slack(
|
||||||
key_event=key_event, event_name=event_name
|
key_event=key_event,
|
||||||
|
event_name=event_name,
|
||||||
|
alert_type=_event_name,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,42 +1,11 @@
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: multimodalembedding@001
|
- model_name: db-openai-endpoint
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: vertex_ai/multimodalembedding@001
|
model: openai/gpt-5
|
||||||
vertex_project: "adroit-crow-413218"
|
api_key: fake-key
|
||||||
vertex_location: "us-central1"
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json"
|
|
||||||
- model_name: text-embedding-ada-002
|
|
||||||
litellm_params:
|
|
||||||
model: openai/text-embedding-ada-002 # The `openai/` prefix will call openai.chat.completions.create
|
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
|
||||||
- model_name: db-openai-endpoint
|
|
||||||
litellm_params:
|
|
||||||
model: openai/gpt-3.5-turbo
|
|
||||||
api_key: fake-key
|
|
||||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
|
||||||
model_info:
|
|
||||||
supported_environments: ["development", "production", "staging"]
|
|
||||||
- model_name: rerank-english-v3.0
|
|
||||||
litellm_params:
|
|
||||||
model: cohere/rerank-english-v3.0
|
|
||||||
api_key: os.environ/COHERE_API_KEY
|
|
||||||
model_info:
|
|
||||||
supported_environments: ["production", "staging"]
|
|
||||||
- model_name: llava-hf
|
|
||||||
litellm_params:
|
|
||||||
model: openai/llava-hf/llava-v1.6-vicuna-7b-hf
|
|
||||||
api_key: fake-key
|
|
||||||
model_info:
|
|
||||||
supported_environments: ["production", "staging"]
|
|
||||||
|
|
||||||
general_settings:
|
general_settings:
|
||||||
service_account_settings:
|
alerting: ["slack"]
|
||||||
enforced_params: ["user"]
|
|
||||||
|
|
||||||
|
|
||||||
litellm_settings:
|
|
||||||
drop_params: True
|
|
||||||
callbacks: ["otel"]
|
|
||||||
success_callback: ["langfuse"]
|
|
||||||
failure_callback: ["langfuse"]
|
|
||||||
|
|
||||||
|
|
|
@ -49,6 +49,8 @@ from litellm.exceptions import RejectedRequestError
|
||||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.integrations.SlackAlerting.slack_alerting import SlackAlerting
|
from litellm.integrations.SlackAlerting.slack_alerting import SlackAlerting
|
||||||
|
from litellm.integrations.SlackAlerting.types import DEFAULT_ALERT_TYPES
|
||||||
|
from litellm.integrations.SlackAlerting.utils import _add_langfuse_trace_id_to_alert
|
||||||
from litellm.litellm_core_utils.core_helpers import (
|
from litellm.litellm_core_utils.core_helpers import (
|
||||||
_get_parent_otel_span_from_kwargs,
|
_get_parent_otel_span_from_kwargs,
|
||||||
get_litellm_metadata_from_kwargs,
|
get_litellm_metadata_from_kwargs,
|
||||||
|
@ -333,12 +335,11 @@ class ProxyLogging:
|
||||||
self.cache_control_check = _PROXY_CacheControlCheck()
|
self.cache_control_check = _PROXY_CacheControlCheck()
|
||||||
self.alerting: Optional[List] = None
|
self.alerting: Optional[List] = None
|
||||||
self.alerting_threshold: float = 300 # default to 5 min. threshold
|
self.alerting_threshold: float = 300 # default to 5 min. threshold
|
||||||
self.alert_types: List[AlertType] = list(get_args(AlertType))
|
self.alert_types: List[AlertType] = DEFAULT_ALERT_TYPES
|
||||||
self.alert_to_webhook_url: Optional[dict] = None
|
self.alert_to_webhook_url: Optional[dict] = None
|
||||||
self.slack_alerting_instance: SlackAlerting = SlackAlerting(
|
self.slack_alerting_instance: SlackAlerting = SlackAlerting(
|
||||||
alerting_threshold=self.alerting_threshold,
|
alerting_threshold=self.alerting_threshold,
|
||||||
alerting=self.alerting,
|
alerting=self.alerting,
|
||||||
alert_types=self.alert_types,
|
|
||||||
internal_usage_cache=self.internal_usage_cache.dual_cache,
|
internal_usage_cache=self.internal_usage_cache.dual_cache,
|
||||||
)
|
)
|
||||||
self.premium_user = premium_user
|
self.premium_user = premium_user
|
||||||
|
@ -644,9 +645,11 @@ class ProxyLogging:
|
||||||
async def failed_tracking_alert(self, error_message: str):
|
async def failed_tracking_alert(self, error_message: str):
|
||||||
if self.alerting is None:
|
if self.alerting is None:
|
||||||
return
|
return
|
||||||
await self.slack_alerting_instance.failed_tracking_alert(
|
|
||||||
error_message=error_message
|
if self.slack_alerting_instance:
|
||||||
)
|
await self.slack_alerting_instance.failed_tracking_alert(
|
||||||
|
error_message=error_message
|
||||||
|
)
|
||||||
|
|
||||||
async def budget_alerts(
|
async def budget_alerts(
|
||||||
self,
|
self,
|
||||||
|
@ -705,10 +708,7 @@ class ProxyLogging:
|
||||||
extra_kwargs = {}
|
extra_kwargs = {}
|
||||||
alerting_metadata = {}
|
alerting_metadata = {}
|
||||||
if request_data is not None:
|
if request_data is not None:
|
||||||
|
_url = await _add_langfuse_trace_id_to_alert(request_data=request_data)
|
||||||
_url = await self.slack_alerting_instance._add_langfuse_trace_id_to_alert(
|
|
||||||
request_data=request_data
|
|
||||||
)
|
|
||||||
|
|
||||||
if _url is not None:
|
if _url is not None:
|
||||||
extra_kwargs["🪢 Langfuse Trace"] = _url
|
extra_kwargs["🪢 Langfuse Trace"] = _url
|
||||||
|
@ -744,7 +744,7 @@ class ProxyLogging:
|
||||||
Currently only logs exceptions to sentry
|
Currently only logs exceptions to sentry
|
||||||
"""
|
"""
|
||||||
### ALERTING ###
|
### ALERTING ###
|
||||||
if "db_exceptions" not in self.alert_types:
|
if AlertType.db_exceptions not in self.alert_types:
|
||||||
return
|
return
|
||||||
if isinstance(original_exception, HTTPException):
|
if isinstance(original_exception, HTTPException):
|
||||||
if isinstance(original_exception.detail, str):
|
if isinstance(original_exception.detail, str):
|
||||||
|
@ -761,7 +761,7 @@ class ProxyLogging:
|
||||||
self.alerting_handler(
|
self.alerting_handler(
|
||||||
message=f"DB read/write call failed: {error_message}",
|
message=f"DB read/write call failed: {error_message}",
|
||||||
level="High",
|
level="High",
|
||||||
alert_type="db_exceptions",
|
alert_type=AlertType.db_exceptions,
|
||||||
request_data={},
|
request_data={},
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
@ -796,7 +796,7 @@ class ProxyLogging:
|
||||||
await self.update_request_status(
|
await self.update_request_status(
|
||||||
litellm_call_id=request_data.get("litellm_call_id", ""), status="fail"
|
litellm_call_id=request_data.get("litellm_call_id", ""), status="fail"
|
||||||
)
|
)
|
||||||
if "llm_exceptions" in self.alert_types and not isinstance(
|
if AlertType.llm_exceptions in self.alert_types and not isinstance(
|
||||||
original_exception, HTTPException
|
original_exception, HTTPException
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
|
@ -813,7 +813,7 @@ class ProxyLogging:
|
||||||
self.alerting_handler(
|
self.alerting_handler(
|
||||||
message=f"LLM API call failed: `{exception_str}`",
|
message=f"LLM API call failed: `{exception_str}`",
|
||||||
level="High",
|
level="High",
|
||||||
alert_type="llm_exceptions",
|
alert_type=AlertType.llm_exceptions,
|
||||||
request_data=request_data,
|
request_data=request_data,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
|
@ -14,6 +14,8 @@ from typing import Optional
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
|
from litellm.integrations.SlackAlerting.types import AlertType
|
||||||
|
|
||||||
# import logging
|
# import logging
|
||||||
# logging.basicConfig(level=logging.DEBUG)
|
# logging.basicConfig(level=logging.DEBUG)
|
||||||
sys.path.insert(0, os.path.abspath("../.."))
|
sys.path.insert(0, os.path.abspath("../.."))
|
||||||
|
@ -99,7 +101,7 @@ async def test_get_api_base():
|
||||||
await _pl.alerting_handler(
|
await _pl.alerting_handler(
|
||||||
message=slow_message + request_info,
|
message=slow_message + request_info,
|
||||||
level="Low",
|
level="Low",
|
||||||
alert_type="llm_too_slow",
|
alert_type=AlertType.llm_too_slow,
|
||||||
)
|
)
|
||||||
print("passed test_get_api_base")
|
print("passed test_get_api_base")
|
||||||
|
|
||||||
|
@ -117,7 +119,7 @@ def test_init():
|
||||||
slack_alerting = SlackAlerting(
|
slack_alerting = SlackAlerting(
|
||||||
alerting_threshold=32,
|
alerting_threshold=32,
|
||||||
alerting=["slack"],
|
alerting=["slack"],
|
||||||
alert_types=["llm_exceptions"],
|
alert_types=[AlertType.llm_exceptions],
|
||||||
internal_usage_cache=DualCache(),
|
internal_usage_cache=DualCache(),
|
||||||
)
|
)
|
||||||
assert slack_alerting.alerting_threshold == 32
|
assert slack_alerting.alerting_threshold == 32
|
||||||
|
@ -710,7 +712,7 @@ async def test_region_outage_alerting_called(
|
||||||
If multiple calls fail, outage alert is sent
|
If multiple calls fail, outage alert is sent
|
||||||
"""
|
"""
|
||||||
slack_alerting = SlackAlerting(
|
slack_alerting = SlackAlerting(
|
||||||
alerting=["webhook"], alert_types=["region_outage_alerts"]
|
alerting=["webhook"], alert_types=[AlertType.region_outage_alerts]
|
||||||
)
|
)
|
||||||
|
|
||||||
litellm.callbacks = [slack_alerting]
|
litellm.callbacks = [slack_alerting]
|
||||||
|
@ -829,6 +831,7 @@ async def test_langfuse_trace_id():
|
||||||
- Unit test for `_add_langfuse_trace_id_to_alert` function in slack_alerting.py
|
- Unit test for `_add_langfuse_trace_id_to_alert` function in slack_alerting.py
|
||||||
"""
|
"""
|
||||||
from litellm.litellm_core_utils.litellm_logging import Logging
|
from litellm.litellm_core_utils.litellm_logging import Logging
|
||||||
|
from litellm.integrations.SlackAlerting.utils import _add_langfuse_trace_id_to_alert
|
||||||
|
|
||||||
litellm.success_callback = ["langfuse"]
|
litellm.success_callback = ["langfuse"]
|
||||||
|
|
||||||
|
@ -856,11 +859,11 @@ async def test_langfuse_trace_id():
|
||||||
slack_alerting = SlackAlerting(
|
slack_alerting = SlackAlerting(
|
||||||
alerting_threshold=32,
|
alerting_threshold=32,
|
||||||
alerting=["slack"],
|
alerting=["slack"],
|
||||||
alert_types=["llm_exceptions"],
|
alert_types=[AlertType.llm_exceptions],
|
||||||
internal_usage_cache=DualCache(),
|
internal_usage_cache=DualCache(),
|
||||||
)
|
)
|
||||||
|
|
||||||
trace_url = await slack_alerting._add_langfuse_trace_id_to_alert(
|
trace_url = await _add_langfuse_trace_id_to_alert(
|
||||||
request_data={"litellm_logging_obj": litellm_logging_obj}
|
request_data={"litellm_logging_obj": litellm_logging_obj}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@ load_dotenv()
|
||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from tests.local_testing.test_streaming import streaming_format_tests
|
from test_streaming import streaming_format_tests
|
||||||
|
|
||||||
sys.path.insert(
|
sys.path.insert(
|
||||||
0, os.path.abspath("../..")
|
0, os.path.abspath("../..")
|
||||||
|
@ -933,7 +933,7 @@ async def test_gemini_pro_function_calling_httpx(model, sync_mode):
|
||||||
pytest.fail("An unexpected exception occurred - {}".format(str(e)))
|
pytest.fail("An unexpected exception occurred - {}".format(str(e)))
|
||||||
|
|
||||||
|
|
||||||
from tests.local_testing.test_completion import response_format_tests
|
from test_completion import response_format_tests
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
|
|
|
@ -9,7 +9,7 @@ load_dotenv()
|
||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from tests.local_testing.test_streaming import streaming_format_tests
|
from test_streaming import streaming_format_tests
|
||||||
|
|
||||||
sys.path.insert(
|
sys.path.insert(
|
||||||
0, os.path.abspath("../..")
|
0, os.path.abspath("../..")
|
||||||
|
|
|
@ -829,7 +829,7 @@ def test_vertex_ai_embedding_completion_cost(caplog):
|
||||||
# """
|
# """
|
||||||
# Relevant issue - https://github.com/BerriAI/litellm/issues/4630
|
# Relevant issue - https://github.com/BerriAI/litellm/issues/4630
|
||||||
# """
|
# """
|
||||||
# from tests.local_testing.test_amazing_vertex_completion import load_vertex_ai_credentials
|
# from test_amazing_vertex_completion import load_vertex_ai_credentials
|
||||||
|
|
||||||
# load_vertex_ai_credentials()
|
# load_vertex_ai_credentials()
|
||||||
# os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
# os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||||
|
|
|
@ -235,7 +235,7 @@ async def test_team_disable_guardrails(mock_acompletion, client_no_auth):
|
||||||
assert e.code == str(403)
|
assert e.code == str(403)
|
||||||
|
|
||||||
|
|
||||||
from tests.local_testing.test_custom_callback_input import CompletionCustomHandler
|
from test_custom_callback_input import CompletionCustomHandler
|
||||||
|
|
||||||
|
|
||||||
@mock_patch_acompletion()
|
@mock_patch_acompletion()
|
||||||
|
@ -815,7 +815,7 @@ from litellm.proxy._types import (
|
||||||
)
|
)
|
||||||
from litellm.proxy.management_endpoints.internal_user_endpoints import new_user
|
from litellm.proxy.management_endpoints.internal_user_endpoints import new_user
|
||||||
from litellm.proxy.management_endpoints.team_endpoints import team_member_add
|
from litellm.proxy.management_endpoints.team_endpoints import team_member_add
|
||||||
from tests.local_testing.test_key_generate_prisma import prisma_client
|
from test_key_generate_prisma import prisma_client
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
|
|
|
@ -9,7 +9,7 @@ load_dotenv()
|
||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from tests.local_testing.test_streaming import streaming_format_tests
|
from test_streaming import streaming_format_tests
|
||||||
|
|
||||||
sys.path.insert(
|
sys.path.insert(
|
||||||
0, os.path.abspath("../..")
|
0, os.path.abspath("../..")
|
||||||
|
|
|
@ -16,7 +16,7 @@ import pytest
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from tests.local_testing import stream_chunk_testdata
|
import stream_chunk_testdata
|
||||||
from litellm import completion, stream_chunk_builder
|
from litellm import completion, stream_chunk_builder
|
||||||
|
|
||||||
dotenv.load_dotenv()
|
dotenv.load_dotenv()
|
||||||
|
|
|
@ -1145,7 +1145,7 @@ def test_completion_claude_stream_bad_key():
|
||||||
|
|
||||||
@pytest.mark.parametrize("provider", ["vertex_ai_beta"]) # ""
|
@pytest.mark.parametrize("provider", ["vertex_ai_beta"]) # ""
|
||||||
def test_vertex_ai_stream(provider):
|
def test_vertex_ai_stream(provider):
|
||||||
from tests.local_testing.test_amazing_vertex_completion import (
|
from test_amazing_vertex_completion import (
|
||||||
load_vertex_ai_credentials,
|
load_vertex_ai_credentials,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -3951,7 +3951,7 @@ def test_unit_test_perplexity_citations_chunk():
|
||||||
@pytest.mark.flaky(retries=3, delay=1)
|
@pytest.mark.flaky(retries=3, delay=1)
|
||||||
def test_streaming_tool_calls_valid_json_str(model):
|
def test_streaming_tool_calls_valid_json_str(model):
|
||||||
if "vertex_ai" in model:
|
if "vertex_ai" in model:
|
||||||
from tests.local_testing.test_amazing_vertex_completion import (
|
from test_amazing_vertex_completion import (
|
||||||
load_vertex_ai_credentials,
|
load_vertex_ai_credentials,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -4114,7 +4114,7 @@ async def test_async_text_completion_chat_model_stream():
|
||||||
async def test_completion_codestral_fim_api(model):
|
async def test_completion_codestral_fim_api(model):
|
||||||
try:
|
try:
|
||||||
if model == "vertex_ai/codestral@2405":
|
if model == "vertex_ai/codestral@2405":
|
||||||
from tests.local_testing.test_amazing_vertex_completion import (
|
from test_amazing_vertex_completion import (
|
||||||
load_vertex_ai_credentials,
|
load_vertex_ai_credentials,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -4158,7 +4158,7 @@ async def test_completion_codestral_fim_api(model):
|
||||||
async def test_completion_codestral_fim_api_stream(model):
|
async def test_completion_codestral_fim_api_stream(model):
|
||||||
try:
|
try:
|
||||||
if model == "vertex_ai/codestral@2405":
|
if model == "vertex_ai/codestral@2405":
|
||||||
from tests.local_testing.test_amazing_vertex_completion import (
|
from test_amazing_vertex_completion import (
|
||||||
load_vertex_ai_credentials,
|
load_vertex_ai_credentials,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -21,8 +21,8 @@ from litellm import (
|
||||||
get_modified_max_tokens,
|
get_modified_max_tokens,
|
||||||
token_counter,
|
token_counter,
|
||||||
)
|
)
|
||||||
from tests.local_testing.large_text import text
|
from large_text import text
|
||||||
from tests.local_testing.messages_with_counts import (
|
from messages_with_counts import (
|
||||||
MESSAGES_TEXT,
|
MESSAGES_TEXT,
|
||||||
MESSAGES_WITH_IMAGES,
|
MESSAGES_WITH_IMAGES,
|
||||||
MESSAGES_WITH_TOOLS,
|
MESSAGES_WITH_TOOLS,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue