diff --git a/.circleci/config.yml b/.circleci/config.yml index b6ed9f3b7..26aed145b 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -16,15 +16,6 @@ jobs: command: | echo "Git commit hash: $CIRCLE_SHA1" - - run: - name: Check if litellm dir was updated or if pyproject.toml was modified - command: | - if [ -n "$(git diff --name-only $CIRCLE_SHA1^..$CIRCLE_SHA1 | grep -E 'pyproject\.toml|litellm/')" ]; then - echo "litellm updated" - else - echo "No changes to litellm or pyproject.toml. Skipping tests." - circleci step halt - fi - restore_cache: keys: - v1-dependencies-{{ checksum ".circleci/requirements.txt" }} diff --git a/docs/my-website/docs/proxy/alerting.md b/docs/my-website/docs/proxy/alerting.md index 76b900f8d..c4fd22d1e 100644 --- a/docs/my-website/docs/proxy/alerting.md +++ b/docs/my-website/docs/proxy/alerting.md @@ -112,31 +112,24 @@ response = client.chat.completions.create( ### Opting into specific alert types -Set `alert_types` if you want to Opt into only specific alert types +Set `alert_types` if you want to Opt into only specific alert types. When alert_types is not set, all Default Alert Types are enabled. + +👉 [**See all alert types here**](#all-possible-alert-types) ```shell general_settings: alerting: ["slack"] - alert_types: ["spend_reports"] -``` - -All Possible Alert Types - -```python -AlertType = Literal[ - "llm_exceptions", # LLM API Exceptions - "llm_too_slow", # LLM Responses slower than alerting_threshold + alert_types: [ + "llm_exceptions", + "llm_too_slow", "llm_requests_hanging", "budget_alerts", + "spend_reports", "db_exceptions", "daily_reports", - "spend_reports", - "fallback_reports", "cooldown_deployment", "new_model_added", - "outage_alerts", -] - + ] ``` ### Set specific slack channels per alert type @@ -365,7 +358,7 @@ curl -X GET --location 'http://0.0.0.0:4000/health/services?service=webhook' \ } ``` -## **API Spec for Webhook Event** +### API Spec for Webhook Event - `spend` *float*: The current spend amount for the 'event_group'. - `max_budget` *float or null*: The maximum allowed budget for the 'event_group'. null if not set. @@ -417,4 +410,50 @@ general_settings: region_outage_alert_ttl: 60 # time-window in seconds minor_outage_alert_threshold: 5 # number of errors to trigger a minor alert major_outage_alert_threshold: 10 # number of errors to trigger a major alert -``` \ No newline at end of file +``` + +## **All Possible Alert Types** + +👉 [**Here is how you can set specific alert types**](#opting-into-specific-alert-types) + +LLM-related Alerts + +| Alert Type | Description | Default On | +|------------|-------------|---------| +| `llm_exceptions` | Alerts for LLM API exceptions | ✅ | +| `llm_too_slow` | Notifications for LLM responses slower than the set threshold | ✅ | +| `llm_requests_hanging` | Alerts for LLM requests that are not completing | ✅ | +| `cooldown_deployment` | Alerts when a deployment is put into cooldown | ✅ | +| `new_model_added` | Notifications when a new model is added to litellm proxy through /model/new| ✅ | +| `outage_alerts` | Alerts when a specific LLM deployment is facing an outage | ✅ | +| `region_outage_alerts` | Alerts when a specfic LLM region is facing an outage. Example us-east-1 | ✅ | + +Budget and Spend Alerts + +| Alert Type | Description | Default On| +|------------|-------------|---------| +| `budget_alerts` | Notifications related to budget limits or thresholds | ✅ | +| `spend_reports` | Periodic reports on spending across teams or tags | ✅ | +| `failed_tracking_spend` | Alerts when spend tracking fails | ✅ | +| `daily_reports` | Daily Spend reports | ✅ | +| `fallback_reports` | Weekly Reports on LLM fallback occurrences | ✅ | + +Database Alerts + +| Alert Type | Description | Default On | +|------------|-------------|---------| +| `db_exceptions` | Notifications for database-related exceptions | ✅ | + +Management Endpoint Alerts - Virtual Key, Team, Internal User + +| Alert Type | Description | Default On | +|------------|-------------|---------| +| `new_virtual_key_created` | Notifications when a new virtual key is created | ❌ | +| `virtual_key_updated` | Alerts when a virtual key is modified | ❌ | +| `virtual_key_deleted` | Notifications when a virtual key is removed | ❌ | +| `new_team_created` | Alerts for the creation of a new team | ❌ | +| `team_updated` | Notifications when team details are modified | ❌ | +| `team_deleted` | Alerts when a team is deleted | ❌ | +| `new_internal_user_created` | Notifications for new internal user accounts | ❌ | +| `internal_user_updated` | Alerts when an internal user's details are changed | ❌ | +| `internal_user_deleted` | Notifications when an internal user account is removed | ❌ | \ No newline at end of file diff --git a/litellm/integrations/SlackAlerting/Readme.md b/litellm/integrations/SlackAlerting/Readme.md new file mode 100644 index 000000000..f28f71500 --- /dev/null +++ b/litellm/integrations/SlackAlerting/Readme.md @@ -0,0 +1,13 @@ +# Slack Alerting on LiteLLM Gateway + +This folder contains the Slack Alerting integration for LiteLLM Gateway. + +## Folder Structure + +- `slack_alerting.py`: This is the main file that handles sending different types of alerts +- `batching_handler.py`: Handles Batching + sending Httpx Post requests to slack. Slack alerts are sent every 10s or when events are greater than X events. Done to ensure litellm has good performance under high traffic +- `types.py`: This file contains the AlertType enum which is used to define the different types of alerts that can be sent to Slack. +- `utils.py`: This file contains common utils used specifically for slack alerting + +## Further Reading +- [Doc setting up Alerting on LiteLLM Proxy (Gateway)](https://docs.litellm.ai/docs/proxy/alerting) \ No newline at end of file diff --git a/litellm/integrations/SlackAlerting/slack_alerting.py b/litellm/integrations/SlackAlerting/slack_alerting.py index a6126afa7..08e47c936 100644 --- a/litellm/integrations/SlackAlerting/slack_alerting.py +++ b/litellm/integrations/SlackAlerting/slack_alerting.py @@ -41,7 +41,7 @@ from litellm.types.router import LiteLLM_Params from ..email_templates.templates import * from .batching_handler import send_to_webhook, squash_payloads from .types import * -from .utils import process_slack_alerting_variables +from .utils import _add_langfuse_trace_id_to_alert, process_slack_alerting_variables class SlackAlerting(CustomBatchLogger): @@ -57,7 +57,7 @@ class SlackAlerting(CustomBatchLogger): float ] = None, # threshold for slow / hanging llm responses (in seconds) alerting: Optional[List] = [], - alert_types: List[AlertType] = list(get_args(AlertType)), + alert_types: List[AlertType] = DEFAULT_ALERT_TYPES, alert_to_webhook_url: Optional[ Dict[AlertType, Union[List[str], str]] ] = None, # if user wants to separate alerts to diff channels @@ -87,7 +87,7 @@ class SlackAlerting(CustomBatchLogger): self, alerting: Optional[List] = None, alerting_threshold: Optional[float] = None, - alert_types: Optional[List] = None, + alert_types: Optional[List[AlertType]] = None, alert_to_webhook_url: Optional[Dict[AlertType, Union[List[str], str]]] = None, alerting_args: Optional[Dict] = None, llm_router: Optional[litellm.Router] = None, @@ -127,47 +127,8 @@ class SlackAlerting(CustomBatchLogger): def _all_possible_alert_types(self): # used by the UI to show all supported alert types # Note: This is not the alerts the user has configured, instead it's all possible alert types a user can select - return [ - "llm_exceptions", - "llm_too_slow", - "llm_requests_hanging", - "budget_alerts", - "db_exceptions", - ] - - async def _add_langfuse_trace_id_to_alert( - self, - request_data: Optional[dict] = None, - ) -> Optional[str]: - """ - Returns langfuse trace url - - - check: - -> existing_trace_id - -> trace_id - -> litellm_call_id - """ - # do nothing for now - if ( - request_data is not None - and request_data.get("litellm_logging_obj", None) is not None - ): - trace_id: Optional[str] = None - litellm_logging_obj: Logging = request_data["litellm_logging_obj"] - - for _ in range(3): - trace_id = litellm_logging_obj._get_trace_id(service_name="langfuse") - if trace_id is not None: - break - await asyncio.sleep(3) # wait 3s before retrying for trace id - - _langfuse_object = litellm_logging_obj._get_callback_object( - service_name="langfuse" - ) - if _langfuse_object is not None: - base_url = _langfuse_object.Langfuse.base_url - return f"{base_url}/trace/{trace_id}" - return None + # return list of all values AlertType enum + return list(AlertType) def _response_taking_too_long_callback_helper( self, @@ -275,7 +236,7 @@ class SlackAlerting(CustomBatchLogger): await self.send_alert( message=slow_message + request_info, level="Low", - alert_type="llm_too_slow", + alert_type=AlertType.llm_too_slow, alerting_metadata=alerting_metadata, ) @@ -467,7 +428,7 @@ class SlackAlerting(CustomBatchLogger): await self.send_alert( message=message, level="Low", - alert_type="daily_reports", + alert_type=AlertType.daily_reports, alerting_metadata={}, ) @@ -563,7 +524,7 @@ class SlackAlerting(CustomBatchLogger): ) if "langfuse" in litellm.success_callback: - langfuse_url = await self._add_langfuse_trace_id_to_alert( + langfuse_url = await _add_langfuse_trace_id_to_alert( request_data=request_data, ) @@ -580,7 +541,7 @@ class SlackAlerting(CustomBatchLogger): await self.send_alert( message=alerting_message + request_info, level="Medium", - alert_type="llm_requests_hanging", + alert_type=AlertType.llm_requests_hanging, alerting_metadata=alerting_metadata, ) @@ -600,7 +561,7 @@ class SlackAlerting(CustomBatchLogger): await self.send_alert( message=message, level="High", - alert_type="failed_tracking_spend", + alert_type=AlertType.failed_tracking_spend, alerting_metadata={}, ) await _cache.async_set_cache( @@ -701,7 +662,7 @@ class SlackAlerting(CustomBatchLogger): await self.send_alert( message=event_message + "\n\n" + user_info_str, level="High", - alert_type="budget_alerts", + alert_type=AlertType.budget_alerts, user_info=webhook_event, alerting_metadata={}, ) @@ -902,7 +863,7 @@ class SlackAlerting(CustomBatchLogger): await self.send_alert( message=msg, level="Medium", - alert_type="outage_alerts", + alert_type=AlertType.outage_alerts, alerting_metadata={}, ) # set to true @@ -928,7 +889,7 @@ class SlackAlerting(CustomBatchLogger): await self.send_alert( message=msg, level="High", - alert_type="outage_alerts", + alert_type=AlertType.outage_alerts, alerting_metadata={}, ) # set to true @@ -1031,7 +992,7 @@ class SlackAlerting(CustomBatchLogger): await self.send_alert( message=msg, level="Medium", - alert_type="outage_alerts", + alert_type=AlertType.outage_alerts, alerting_metadata={}, ) # set to true @@ -1053,7 +1014,7 @@ class SlackAlerting(CustomBatchLogger): await self.send_alert( message=msg, level="High", - alert_type="outage_alerts", + alert_type=AlertType.outage_alerts, alerting_metadata={}, ) # set to true @@ -1118,7 +1079,7 @@ Model Info: alert_val = self.send_alert( message=message, level="Low", - alert_type="new_model_added", + alert_type=AlertType.new_model_added, alerting_metadata={}, ) @@ -1354,7 +1315,7 @@ Model Info: self, message: str, level: Literal["Low", "Medium", "High"], - alert_type: Literal[AlertType], + alert_type: AlertType, alerting_metadata: dict, user_info: Optional[WebhookEvent] = None, **kwargs, @@ -1395,7 +1356,6 @@ Model Info: if "slack" not in self.alerting: return - if alert_type not in self.alert_types: return @@ -1654,7 +1614,7 @@ Model Info: await self.send_alert( message=_spend_message, level="Low", - alert_type="spend_reports", + alert_type=AlertType.spend_reports, alerting_metadata={}, ) except ValueError as ve: @@ -1713,7 +1673,7 @@ Model Info: await self.send_alert( message=_spend_message, level="Low", - alert_type="spend_reports", + alert_type=AlertType.spend_reports, alerting_metadata={}, ) except Exception as e: @@ -1742,7 +1702,7 @@ Model Info: await self.send_alert( message=fallback_message, level="Low", - alert_type="fallback_reports", + alert_type=AlertType.fallback_reports, alerting_metadata={}, ) @@ -1754,14 +1714,19 @@ Model Info: async def send_virtual_key_event_slack( self, key_event: VirtualKeyEvent, + alert_type: AlertType, event_name: str, ): """ - Helper to send fallback statistics from prometheus server -> to slack + Handles sending Virtual Key related alerts - This runs once per day and sends an overview of all the fallback statistics + Example: + - New Virtual Key Created + - Internal User Updated + - Team Created, Updated, Deleted """ try: + message = f"`{event_name}`\n" key_event_dict = key_event.model_dump() @@ -1783,7 +1748,7 @@ Model Info: await self.send_alert( message=message, level="High", - alert_type="fallback_reports", + alert_type=alert_type, alerting_metadata={}, ) diff --git a/litellm/integrations/SlackAlerting/types.py b/litellm/integrations/SlackAlerting/types.py index d5519068e..bc669d427 100644 --- a/litellm/integrations/SlackAlerting/types.py +++ b/litellm/integrations/SlackAlerting/types.py @@ -32,7 +32,7 @@ class LiteLLMBase(BaseModel): Implements default functions, all pydantic objects should have. """ - def json(self, **kwargs): + def json(self, **kwargs): # type: ignore try: return self.model_dump() # noqa except: @@ -41,14 +41,14 @@ class LiteLLMBase(BaseModel): class SlackAlertingArgsEnum(Enum): - daily_report_frequency: int = 12 * 60 * 60 - report_check_interval: int = 5 * 60 - budget_alert_ttl: int = 24 * 60 * 60 - outage_alert_ttl: int = 1 * 60 - region_outage_alert_ttl: int = 1 * 60 - minor_outage_alert_threshold: int = 1 * 5 - major_outage_alert_threshold: int = 1 * 10 - max_outage_alert_list_size: int = 1 * 10 + daily_report_frequency = 12 * 60 * 60 + report_check_interval = 5 * 60 + budget_alert_ttl = 24 * 60 * 60 + outage_alert_ttl = 1 * 60 + region_outage_alert_ttl = 1 * 60 + minor_outage_alert_threshold = 1 * 5 + major_outage_alert_threshold = 1 * 10 + max_outage_alert_list_size = 1 * 10 class SlackAlertingArgs(LiteLLMBase): @@ -56,7 +56,7 @@ class SlackAlertingArgs(LiteLLMBase): default=int( os.getenv( "SLACK_DAILY_REPORT_FREQUENCY", - SlackAlertingArgsEnum.daily_report_frequency.value, + int(SlackAlertingArgsEnum.daily_report_frequency.value), ) ), description="Frequency of receiving deployment latency/failure reports. Default is 12hours. Value is in seconds.", @@ -119,3 +119,75 @@ class SlackAlertingCacheKeys(Enum): failed_requests_key = "failed_requests_daily_metrics" latency_key = "latency_daily_metrics" report_sent_key = "daily_metrics_report_sent" + + +class AlertType(str, Enum): + """ + Enum for alert types and management event types + """ + + # LLM-related alerts + llm_exceptions = "llm_exceptions" + llm_too_slow = "llm_too_slow" + llm_requests_hanging = "llm_requests_hanging" + + # Budget and spend alerts + budget_alerts = "budget_alerts" + spend_reports = "spend_reports" + failed_tracking_spend = "failed_tracking_spend" + + # Database alerts + db_exceptions = "db_exceptions" + + # Report alerts + daily_reports = "daily_reports" + + # Deployment alerts + cooldown_deployment = "cooldown_deployment" + new_model_added = "new_model_added" + + # Outage alerts + outage_alerts = "outage_alerts" + region_outage_alerts = "region_outage_alerts" + + # Fallback alerts + fallback_reports = "fallback_reports" + + # Virtual Key Events + new_virtual_key_created = "new_virtual_key_created" + virtual_key_updated = "virtual_key_updated" + virtual_key_deleted = "virtual_key_deleted" + + # Team Events + new_team_created = "new_team_created" + team_updated = "team_updated" + team_deleted = "team_deleted" + + # Internal User Events + new_internal_user_created = "new_internal_user_created" + internal_user_updated = "internal_user_updated" + internal_user_deleted = "internal_user_deleted" + + +DEFAULT_ALERT_TYPES: List[AlertType] = [ + # LLM related alerts + AlertType.llm_exceptions, + AlertType.llm_too_slow, + AlertType.llm_requests_hanging, + # Budget and spend alerts + AlertType.budget_alerts, + AlertType.spend_reports, + AlertType.failed_tracking_spend, + # Database alerts + AlertType.db_exceptions, + # Report alerts + AlertType.daily_reports, + # Deployment alerts + AlertType.cooldown_deployment, + AlertType.new_model_added, + # Outage alerts + AlertType.outage_alerts, + AlertType.region_outage_alerts, + # Fallback alerts + AlertType.fallback_reports, +] diff --git a/litellm/integrations/SlackAlerting/utils.py b/litellm/integrations/SlackAlerting/utils.py index 638d0d955..d6c0a3168 100644 --- a/litellm/integrations/SlackAlerting/utils.py +++ b/litellm/integrations/SlackAlerting/utils.py @@ -2,9 +2,11 @@ Utils used for slack alerting """ +import asyncio from typing import Dict, List, Optional, Union import litellm +from litellm.litellm_core_utils.litellm_logging import Logging from litellm.proxy._types import AlertType from litellm.secret_managers.main import get_secret @@ -49,3 +51,37 @@ def process_slack_alerting_variables( alert_to_webhook_url[alert_type] = _webhook_value_str return alert_to_webhook_url + + +async def _add_langfuse_trace_id_to_alert( + request_data: Optional[dict] = None, +) -> Optional[str]: + """ + Returns langfuse trace url + + - check: + -> existing_trace_id + -> trace_id + -> litellm_call_id + """ + # do nothing for now + if ( + request_data is not None + and request_data.get("litellm_logging_obj", None) is not None + ): + trace_id: Optional[str] = None + litellm_logging_obj: Logging = request_data["litellm_logging_obj"] + + for _ in range(3): + trace_id = litellm_logging_obj._get_trace_id(service_name="langfuse") + if trace_id is not None: + break + await asyncio.sleep(3) # wait 3s before retrying for trace id + + _langfuse_object = litellm_logging_obj._get_callback_object( + service_name="langfuse" + ) + if _langfuse_object is not None: + base_url = _langfuse_object.Langfuse.base_url + return f"{base_url}/trace/{trace_id}" + return None diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index 4dc1075a4..b4d24854f 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -10,6 +10,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Union from pydantic import BaseModel, ConfigDict, Extra, Field, Json, model_validator from typing_extensions import Annotated, TypedDict +from litellm.integrations.SlackAlerting.types import AlertType from litellm.types.router import RouterErrors, UpdateRouterConfig from litellm.types.utils import ProviderField @@ -110,23 +111,6 @@ class LitellmTableNames(enum.Enum): PROXY_MODEL_TABLE_NAME = "LiteLLM_ModelTable" -AlertType = Literal[ - "llm_exceptions", - "llm_too_slow", - "llm_requests_hanging", - "budget_alerts", - "db_exceptions", - "daily_reports", - "spend_reports", - "cooldown_deployment", - "new_model_added", - "outage_alerts", - "region_outage_alerts", - "fallback_reports", - "failed_tracking_spend", -] - - def hash_token(token: str): import hashlib diff --git a/litellm/proxy/health_endpoints/_health_endpoints.py b/litellm/proxy/health_endpoints/_health_endpoints.py index 462034116..35cec9ceb 100644 --- a/litellm/proxy/health_endpoints/_health_endpoints.py +++ b/litellm/proxy/health_endpoints/_health_endpoints.py @@ -11,6 +11,7 @@ from fastapi import APIRouter, Depends, Header, HTTPException, Request, Response import litellm from litellm._logging import verbose_proxy_logger from litellm.proxy._types import ( + AlertType, CallInfo, ProxyErrorTypes, ProxyException, @@ -159,13 +160,6 @@ async def health_services_endpoint( for ( alert_type ) in proxy_logging_obj.slack_alerting_instance.alert_to_webhook_url: - """ - "llm_exceptions", - "llm_too_slow", - "llm_requests_hanging", - "budget_alerts", - "db_exceptions", - """ # only test alert if it's in active alert types if ( proxy_logging_obj.slack_alerting_instance.alert_types @@ -176,19 +170,19 @@ async def health_services_endpoint( continue test_message = "default test message" - if alert_type == "llm_exceptions": + if alert_type == AlertType.llm_exceptions: test_message = f"LLM Exception test alert" - elif alert_type == "llm_too_slow": + elif alert_type == AlertType.llm_too_slow: test_message = f"LLM Too Slow test alert" - elif alert_type == "llm_requests_hanging": + elif alert_type == AlertType.llm_requests_hanging: test_message = f"LLM Requests Hanging test alert" - elif alert_type == "budget_alerts": + elif alert_type == AlertType.budget_alerts: test_message = f"Budget Alert test alert" - elif alert_type == "db_exceptions": + elif alert_type == AlertType.db_exceptions: test_message = f"DB Exception test alert" - elif alert_type == "outage_alerts": + elif alert_type == AlertType.outage_alerts: test_message = f"Outage Alert Exception test alert" - elif alert_type == "daily_reports": + elif alert_type == AlertType.daily_reports: test_message = f"Daily Reports test alert" else: test_message = f"Budget Alert test alert" @@ -200,7 +194,7 @@ async def health_services_endpoint( await proxy_logging_obj.alerting_handler( message="This is a test slack alert message", level="Low", - alert_type="budget_alerts", + alert_type=AlertType.budget_alerts, ) if prisma_client is not None: diff --git a/litellm/proxy/management_helpers/utils.py b/litellm/proxy/management_helpers/utils.py index 14e38e666..8fa22ee90 100644 --- a/litellm/proxy/management_helpers/utils.py +++ b/litellm/proxy/management_helpers/utils.py @@ -227,25 +227,27 @@ async def send_management_endpoint_alert( - An internal user is created, updated, or deleted - A team is created, updated, or deleted """ + from litellm.integrations.SlackAlerting.types import AlertType from litellm.proxy.proxy_server import premium_user, proxy_logging_obj if premium_user is not True: return management_function_to_event_name = { - "generate_key_fn": "New Virtual Key Created", - "update_key_fn": "Virtual Key Updated", - "delete_key_fn": "Virtual Key Deleted", + "generate_key_fn": AlertType.new_virtual_key_created, + "update_key_fn": AlertType.virtual_key_updated, + "delete_key_fn": AlertType.virtual_key_deleted, # Team events - "new_team": "New Team Created", - "update_team": "Team Updated", - "delete_team": "Team Deleted", + "new_team": AlertType.new_team_created, + "update_team": AlertType.team_updated, + "delete_team": AlertType.team_deleted, # Internal User events - "new_user": "New Internal User Created", - "user_update": "Internal User Updated", - "delete_user": "Internal User Deleted", + "new_user": AlertType.new_internal_user_created, + "user_update": AlertType.internal_user_updated, + "delete_user": AlertType.internal_user_deleted, } + # Check if alerting is enabled if ( proxy_logging_obj is not None and proxy_logging_obj.slack_alerting_instance is not None @@ -253,6 +255,8 @@ async def send_management_endpoint_alert( # Virtual Key Events if function_name in management_function_to_event_name: + _event_name: AlertType = management_function_to_event_name[function_name] + key_event = VirtualKeyEvent( created_by_user_id=user_api_key_dict.user_id or "Unknown", created_by_user_role=user_api_key_dict.user_role or "Unknown", @@ -260,9 +264,12 @@ async def send_management_endpoint_alert( request_kwargs=request_kwargs, ) - event_name = management_function_to_event_name[function_name] + # replace all "_" with " " and capitalize + event_name = _event_name.replace("_", " ").title() await proxy_logging_obj.slack_alerting_instance.send_virtual_key_event_slack( - key_event=key_event, event_name=event_name + key_event=key_event, + event_name=event_name, + alert_type=_event_name, ) diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index a2a2d4ce1..c37e433de 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -1,42 +1,11 @@ model_list: - - model_name: multimodalembedding@001 + - model_name: db-openai-endpoint litellm_params: - model: vertex_ai/multimodalembedding@001 - vertex_project: "adroit-crow-413218" - vertex_location: "us-central1" - vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json" - - model_name: text-embedding-ada-002 - litellm_params: - model: openai/text-embedding-ada-002 # The `openai/` prefix will call openai.chat.completions.create - api_key: os.environ/OPENAI_API_KEY - - model_name: db-openai-endpoint - litellm_params: - model: openai/gpt-3.5-turbo - api_key: fake-key - api_base: https://exampleopenaiendpoint-production.up.railway.app/ - model_info: - supported_environments: ["development", "production", "staging"] - - model_name: rerank-english-v3.0 - litellm_params: - model: cohere/rerank-english-v3.0 - api_key: os.environ/COHERE_API_KEY - model_info: - supported_environments: ["production", "staging"] - - model_name: llava-hf - litellm_params: - model: openai/llava-hf/llava-v1.6-vicuna-7b-hf - api_key: fake-key - model_info: - supported_environments: ["production", "staging"] + model: openai/gpt-5 + api_key: fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app/ general_settings: - service_account_settings: - enforced_params: ["user"] + alerting: ["slack"] -litellm_settings: - drop_params: True - callbacks: ["otel"] - success_callback: ["langfuse"] - failure_callback: ["langfuse"] - diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index e6c9bcad2..16935c2c7 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -49,6 +49,8 @@ from litellm.exceptions import RejectedRequestError from litellm.integrations.custom_guardrail import CustomGuardrail from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.SlackAlerting.slack_alerting import SlackAlerting +from litellm.integrations.SlackAlerting.types import DEFAULT_ALERT_TYPES +from litellm.integrations.SlackAlerting.utils import _add_langfuse_trace_id_to_alert from litellm.litellm_core_utils.core_helpers import ( _get_parent_otel_span_from_kwargs, get_litellm_metadata_from_kwargs, @@ -333,12 +335,11 @@ class ProxyLogging: self.cache_control_check = _PROXY_CacheControlCheck() self.alerting: Optional[List] = None self.alerting_threshold: float = 300 # default to 5 min. threshold - self.alert_types: List[AlertType] = list(get_args(AlertType)) + self.alert_types: List[AlertType] = DEFAULT_ALERT_TYPES self.alert_to_webhook_url: Optional[dict] = None self.slack_alerting_instance: SlackAlerting = SlackAlerting( alerting_threshold=self.alerting_threshold, alerting=self.alerting, - alert_types=self.alert_types, internal_usage_cache=self.internal_usage_cache.dual_cache, ) self.premium_user = premium_user @@ -644,9 +645,11 @@ class ProxyLogging: async def failed_tracking_alert(self, error_message: str): if self.alerting is None: return - await self.slack_alerting_instance.failed_tracking_alert( - error_message=error_message - ) + + if self.slack_alerting_instance: + await self.slack_alerting_instance.failed_tracking_alert( + error_message=error_message + ) async def budget_alerts( self, @@ -705,10 +708,7 @@ class ProxyLogging: extra_kwargs = {} alerting_metadata = {} if request_data is not None: - - _url = await self.slack_alerting_instance._add_langfuse_trace_id_to_alert( - request_data=request_data - ) + _url = await _add_langfuse_trace_id_to_alert(request_data=request_data) if _url is not None: extra_kwargs["🪢 Langfuse Trace"] = _url @@ -744,7 +744,7 @@ class ProxyLogging: Currently only logs exceptions to sentry """ ### ALERTING ### - if "db_exceptions" not in self.alert_types: + if AlertType.db_exceptions not in self.alert_types: return if isinstance(original_exception, HTTPException): if isinstance(original_exception.detail, str): @@ -761,7 +761,7 @@ class ProxyLogging: self.alerting_handler( message=f"DB read/write call failed: {error_message}", level="High", - alert_type="db_exceptions", + alert_type=AlertType.db_exceptions, request_data={}, ) ) @@ -796,7 +796,7 @@ class ProxyLogging: await self.update_request_status( litellm_call_id=request_data.get("litellm_call_id", ""), status="fail" ) - if "llm_exceptions" in self.alert_types and not isinstance( + if AlertType.llm_exceptions in self.alert_types and not isinstance( original_exception, HTTPException ): """ @@ -813,7 +813,7 @@ class ProxyLogging: self.alerting_handler( message=f"LLM API call failed: `{exception_str}`", level="High", - alert_type="llm_exceptions", + alert_type=AlertType.llm_exceptions, request_data=request_data, ) ) diff --git a/tests/local_testing/test_alerting.py b/tests/local_testing/test_alerting.py index 4f1cd21ab..5bc18abf0 100644 --- a/tests/local_testing/test_alerting.py +++ b/tests/local_testing/test_alerting.py @@ -14,6 +14,8 @@ from typing import Optional import httpx +from litellm.integrations.SlackAlerting.types import AlertType + # import logging # logging.basicConfig(level=logging.DEBUG) sys.path.insert(0, os.path.abspath("../..")) @@ -99,7 +101,7 @@ async def test_get_api_base(): await _pl.alerting_handler( message=slow_message + request_info, level="Low", - alert_type="llm_too_slow", + alert_type=AlertType.llm_too_slow, ) print("passed test_get_api_base") @@ -117,7 +119,7 @@ def test_init(): slack_alerting = SlackAlerting( alerting_threshold=32, alerting=["slack"], - alert_types=["llm_exceptions"], + alert_types=[AlertType.llm_exceptions], internal_usage_cache=DualCache(), ) assert slack_alerting.alerting_threshold == 32 @@ -710,7 +712,7 @@ async def test_region_outage_alerting_called( If multiple calls fail, outage alert is sent """ slack_alerting = SlackAlerting( - alerting=["webhook"], alert_types=["region_outage_alerts"] + alerting=["webhook"], alert_types=[AlertType.region_outage_alerts] ) litellm.callbacks = [slack_alerting] @@ -829,6 +831,7 @@ async def test_langfuse_trace_id(): - Unit test for `_add_langfuse_trace_id_to_alert` function in slack_alerting.py """ from litellm.litellm_core_utils.litellm_logging import Logging + from litellm.integrations.SlackAlerting.utils import _add_langfuse_trace_id_to_alert litellm.success_callback = ["langfuse"] @@ -856,11 +859,11 @@ async def test_langfuse_trace_id(): slack_alerting = SlackAlerting( alerting_threshold=32, alerting=["slack"], - alert_types=["llm_exceptions"], + alert_types=[AlertType.llm_exceptions], internal_usage_cache=DualCache(), ) - trace_url = await slack_alerting._add_langfuse_trace_id_to_alert( + trace_url = await _add_langfuse_trace_id_to_alert( request_data={"litellm_logging_obj": litellm_logging_obj} ) diff --git a/tests/local_testing/test_amazing_vertex_completion.py b/tests/local_testing/test_amazing_vertex_completion.py index 1bc2ef8d1..0fed67d5f 100644 --- a/tests/local_testing/test_amazing_vertex_completion.py +++ b/tests/local_testing/test_amazing_vertex_completion.py @@ -8,7 +8,7 @@ load_dotenv() import io import os -from tests.local_testing.test_streaming import streaming_format_tests +from test_streaming import streaming_format_tests sys.path.insert( 0, os.path.abspath("../..") @@ -933,7 +933,7 @@ async def test_gemini_pro_function_calling_httpx(model, sync_mode): pytest.fail("An unexpected exception occurred - {}".format(str(e))) -from tests.local_testing.test_completion import response_format_tests +from test_completion import response_format_tests @pytest.mark.parametrize( diff --git a/tests/local_testing/test_anthropic_prompt_caching.py b/tests/local_testing/test_anthropic_prompt_caching.py index 75b7ceec9..829f5699b 100644 --- a/tests/local_testing/test_anthropic_prompt_caching.py +++ b/tests/local_testing/test_anthropic_prompt_caching.py @@ -9,7 +9,7 @@ load_dotenv() import io import os -from tests.local_testing.test_streaming import streaming_format_tests +from test_streaming import streaming_format_tests sys.path.insert( 0, os.path.abspath("../..") diff --git a/tests/local_testing/test_completion_cost.py b/tests/local_testing/test_completion_cost.py index 6d620e781..1ab1f41bb 100644 --- a/tests/local_testing/test_completion_cost.py +++ b/tests/local_testing/test_completion_cost.py @@ -829,7 +829,7 @@ def test_vertex_ai_embedding_completion_cost(caplog): # """ # Relevant issue - https://github.com/BerriAI/litellm/issues/4630 # """ -# from tests.local_testing.test_amazing_vertex_completion import load_vertex_ai_credentials +# from test_amazing_vertex_completion import load_vertex_ai_credentials # load_vertex_ai_credentials() # os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" diff --git a/tests/local_testing/test_proxy_server.py b/tests/local_testing/test_proxy_server.py index c952e4fb4..5bca1136f 100644 --- a/tests/local_testing/test_proxy_server.py +++ b/tests/local_testing/test_proxy_server.py @@ -235,7 +235,7 @@ async def test_team_disable_guardrails(mock_acompletion, client_no_auth): assert e.code == str(403) -from tests.local_testing.test_custom_callback_input import CompletionCustomHandler +from test_custom_callback_input import CompletionCustomHandler @mock_patch_acompletion() @@ -815,7 +815,7 @@ from litellm.proxy._types import ( ) from litellm.proxy.management_endpoints.internal_user_endpoints import new_user from litellm.proxy.management_endpoints.team_endpoints import team_member_add -from tests.local_testing.test_key_generate_prisma import prisma_client +from test_key_generate_prisma import prisma_client @pytest.mark.parametrize( diff --git a/tests/local_testing/test_sagemaker.py b/tests/local_testing/test_sagemaker.py index 4986a5405..fcf5dd71e 100644 --- a/tests/local_testing/test_sagemaker.py +++ b/tests/local_testing/test_sagemaker.py @@ -9,7 +9,7 @@ load_dotenv() import io import os -from tests.local_testing.test_streaming import streaming_format_tests +from test_streaming import streaming_format_tests sys.path.insert( 0, os.path.abspath("../..") diff --git a/tests/local_testing/test_stream_chunk_builder.py b/tests/local_testing/test_stream_chunk_builder.py index f45175f39..35ce34c0c 100644 --- a/tests/local_testing/test_stream_chunk_builder.py +++ b/tests/local_testing/test_stream_chunk_builder.py @@ -16,7 +16,7 @@ import pytest from openai import OpenAI import litellm -from tests.local_testing import stream_chunk_testdata +import stream_chunk_testdata from litellm import completion, stream_chunk_builder dotenv.load_dotenv() diff --git a/tests/local_testing/test_streaming.py b/tests/local_testing/test_streaming.py index 988032e12..25518de32 100644 --- a/tests/local_testing/test_streaming.py +++ b/tests/local_testing/test_streaming.py @@ -1145,7 +1145,7 @@ def test_completion_claude_stream_bad_key(): @pytest.mark.parametrize("provider", ["vertex_ai_beta"]) # "" def test_vertex_ai_stream(provider): - from tests.local_testing.test_amazing_vertex_completion import ( + from test_amazing_vertex_completion import ( load_vertex_ai_credentials, ) @@ -3951,7 +3951,7 @@ def test_unit_test_perplexity_citations_chunk(): @pytest.mark.flaky(retries=3, delay=1) def test_streaming_tool_calls_valid_json_str(model): if "vertex_ai" in model: - from tests.local_testing.test_amazing_vertex_completion import ( + from test_amazing_vertex_completion import ( load_vertex_ai_credentials, ) diff --git a/tests/local_testing/test_text_completion.py b/tests/local_testing/test_text_completion.py index 682da0977..8e6f6939a 100644 --- a/tests/local_testing/test_text_completion.py +++ b/tests/local_testing/test_text_completion.py @@ -4114,7 +4114,7 @@ async def test_async_text_completion_chat_model_stream(): async def test_completion_codestral_fim_api(model): try: if model == "vertex_ai/codestral@2405": - from tests.local_testing.test_amazing_vertex_completion import ( + from test_amazing_vertex_completion import ( load_vertex_ai_credentials, ) @@ -4158,7 +4158,7 @@ async def test_completion_codestral_fim_api(model): async def test_completion_codestral_fim_api_stream(model): try: if model == "vertex_ai/codestral@2405": - from tests.local_testing.test_amazing_vertex_completion import ( + from test_amazing_vertex_completion import ( load_vertex_ai_credentials, ) diff --git a/tests/local_testing/test_token_counter.py b/tests/local_testing/test_token_counter.py index dce1eb011..6dbf286e4 100644 --- a/tests/local_testing/test_token_counter.py +++ b/tests/local_testing/test_token_counter.py @@ -21,8 +21,8 @@ from litellm import ( get_modified_max_tokens, token_counter, ) -from tests.local_testing.large_text import text -from tests.local_testing.messages_with_counts import ( +from large_text import text +from messages_with_counts import ( MESSAGES_TEXT, MESSAGES_WITH_IMAGES, MESSAGES_WITH_TOOLS,