diff --git a/docs/my-website/docs/proxy/alerting.md b/docs/my-website/docs/proxy/alerting.md index 15bd518ed..76b900f8d 100644 --- a/docs/my-website/docs/proxy/alerting.md +++ b/docs/my-website/docs/proxy/alerting.md @@ -1,4 +1,6 @@ import Image from '@theme/IdealImage'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; # 🚨 Alerting / Webhooks @@ -149,6 +151,10 @@ spend_reports -> go to slack channel #llm-spend-reports Set `alert_to_webhook_url` on your config.yaml + + + + ```yaml model_list: - model_name: gpt-4 @@ -177,6 +183,44 @@ general_settings: litellm_settings: success_callback: ["langfuse"] ``` + + + + +Provide multiple slack channels for a given alert type + +```yaml +model_list: + - model_name: gpt-4 + litellm_params: + model: openai/fake + api_key: fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app/ + +general_settings: + master_key: sk-1234 + alerting: ["slack"] + alerting_threshold: 0.0001 # (Seconds) set an artifically low threshold for testing alerting + alert_to_webhook_url: { + "llm_exceptions": ["os.environ/SLACK_WEBHOOK_URL", "os.environ/SLACK_WEBHOOK_URL_2"], + "llm_too_slow": ["https://webhook.site/7843a980-a494-4967-80fb-d502dbc16886", "https://webhook.site/28cfb179-f4fb-4408-8129-729ff55cf213"], + "llm_requests_hanging": ["os.environ/SLACK_WEBHOOK_URL_5", "os.environ/SLACK_WEBHOOK_URL_6"], + "budget_alerts": ["os.environ/SLACK_WEBHOOK_URL_7", "os.environ/SLACK_WEBHOOK_URL_8"], + "db_exceptions": ["os.environ/SLACK_WEBHOOK_URL_9", "os.environ/SLACK_WEBHOOK_URL_10"], + "daily_reports": ["os.environ/SLACK_WEBHOOK_URL_11", "os.environ/SLACK_WEBHOOK_URL_12"], + "spend_reports": ["os.environ/SLACK_WEBHOOK_URL_13", "os.environ/SLACK_WEBHOOK_URL_14"], + "cooldown_deployment": ["os.environ/SLACK_WEBHOOK_URL_15", "os.environ/SLACK_WEBHOOK_URL_16"], + "new_model_added": ["os.environ/SLACK_WEBHOOK_URL_17", "os.environ/SLACK_WEBHOOK_URL_18"], + "outage_alerts": ["os.environ/SLACK_WEBHOOK_URL_19", "os.environ/SLACK_WEBHOOK_URL_20"], + } + +litellm_settings: + success_callback: ["langfuse"] +``` + + + + Test it - send a valid llm request - expect to see a `llm_too_slow` alert in it's own slack channel @@ -193,36 +237,6 @@ curl -i http://localhost:4000/v1/chat/completions \ ``` -### Provide multiple slack channels for a given alert type - -Just add it like this - `alert_type: [, ]`. - -1. Setup config.yaml - -```yaml -general_settings: - master_key: sk-1234 - alerting: ["slack"] - alert_to_webhook_url: { - "spend_reports": ["https://webhook.site/7843a980-a494-4967-80fb-d502dbc16886", "https://webhook.site/28cfb179-f4fb-4408-8129-729ff55cf213"] - } -``` - -2. Start proxy - -```bash -litellm --config /path/to/config.yaml -``` - -3. Test it! - -```bash -curl -X GET 'http://0.0.0.0:4000/health/services?service=slack' \ --H 'Authorization: Bearer sk-1234' -``` - -In case of error, check server logs for the error message! - ### Using MS Teams Webhooks MS Teams provides a slack compatible webhook url that you can use for alerting diff --git a/docs/my-website/docs/proxy/call_hooks.md b/docs/my-website/docs/proxy/call_hooks.md index 25a46609d..0afcb2158 100644 --- a/docs/my-website/docs/proxy/call_hooks.md +++ b/docs/my-website/docs/proxy/call_hooks.md @@ -57,7 +57,7 @@ class MyCustomHandler(CustomLogger): # https://docs.litellm.ai/docs/observabilit self, data: dict, user_api_key_dict: UserAPIKeyAuth, - call_type: Literal["completion", "embeddings", "image_generation"], + call_type: Literal["completion", "embeddings", "image_generation", "moderation", "audio_transcription"], ): pass diff --git a/docs/my-website/docs/proxy/guardrails/custom_guardrail.md b/docs/my-website/docs/proxy/guardrails/custom_guardrail.md index ce4d44414..5277d46d4 100644 --- a/docs/my-website/docs/proxy/guardrails/custom_guardrail.md +++ b/docs/my-website/docs/proxy/guardrails/custom_guardrail.md @@ -84,7 +84,7 @@ class myCustomGuardrail(CustomGuardrail): self, data: dict, user_api_key_dict: UserAPIKeyAuth, - call_type: Literal["completion", "embeddings", "image_generation"], + call_type: Literal["completion", "embeddings", "image_generation", "moderation", "audio_transcription"], ): """ Runs in parallel to LLM API call diff --git a/enterprise/enterprise_hooks/aporia_ai.py b/enterprise/enterprise_hooks/aporia_ai.py index 7f5339f30..2121f105d 100644 --- a/enterprise/enterprise_hooks/aporia_ai.py +++ b/enterprise/enterprise_hooks/aporia_ai.py @@ -174,7 +174,13 @@ class AporiaGuardrail(CustomGuardrail): self, data: dict, user_api_key_dict: UserAPIKeyAuth, - call_type: Literal["completion", "embeddings", "image_generation"], + call_type: Literal[ + "completion", + "embeddings", + "image_generation", + "moderation", + "audio_transcription", + ], ): from litellm.proxy.common_utils.callback_utils import ( add_guardrail_to_applied_guardrails_header, diff --git a/enterprise/enterprise_hooks/google_text_moderation.py b/enterprise/enterprise_hooks/google_text_moderation.py index b548006cf..62908e026 100644 --- a/enterprise/enterprise_hooks/google_text_moderation.py +++ b/enterprise/enterprise_hooks/google_text_moderation.py @@ -97,7 +97,13 @@ class _ENTERPRISE_GoogleTextModeration(CustomLogger): self, data: dict, user_api_key_dict: UserAPIKeyAuth, - call_type: Literal["completion", "embeddings", "image_generation"], + call_type: Literal[ + "completion", + "embeddings", + "image_generation", + "moderation", + "audio_transcription", + ], ): """ - Calls Google's Text Moderation API diff --git a/enterprise/enterprise_hooks/llama_guard.py b/enterprise/enterprise_hooks/llama_guard.py index c11a9d368..533d52d30 100644 --- a/enterprise/enterprise_hooks/llama_guard.py +++ b/enterprise/enterprise_hooks/llama_guard.py @@ -100,7 +100,13 @@ class _ENTERPRISE_LlamaGuard(CustomLogger): self, data: dict, user_api_key_dict: UserAPIKeyAuth, - call_type: Literal["completion", "embeddings", "image_generation"], + call_type: Literal[ + "completion", + "embeddings", + "image_generation", + "moderation", + "audio_transcription", + ], ): """ - Calls the Llama Guard Endpoint diff --git a/enterprise/enterprise_hooks/llm_guard.py b/enterprise/enterprise_hooks/llm_guard.py index 9724e08a8..a390d3229 100644 --- a/enterprise/enterprise_hooks/llm_guard.py +++ b/enterprise/enterprise_hooks/llm_guard.py @@ -127,7 +127,13 @@ class _ENTERPRISE_LLMGuard(CustomLogger): self, data: dict, user_api_key_dict: UserAPIKeyAuth, - call_type: Literal["completion", "embeddings", "image_generation"], + call_type: Literal[ + "completion", + "embeddings", + "image_generation", + "moderation", + "audio_transcription", + ], ): """ - Calls the LLM Guard Endpoint diff --git a/enterprise/enterprise_hooks/openai_moderation.py b/enterprise/enterprise_hooks/openai_moderation.py index 0fa375fb2..5fcd8dba3 100644 --- a/enterprise/enterprise_hooks/openai_moderation.py +++ b/enterprise/enterprise_hooks/openai_moderation.py @@ -43,7 +43,13 @@ class _ENTERPRISE_OpenAI_Moderation(CustomLogger): self, data: dict, user_api_key_dict: UserAPIKeyAuth, - call_type: Literal["completion", "embeddings", "image_generation"], + call_type: Literal[ + "completion", + "embeddings", + "image_generation", + "moderation", + "audio_transcription", + ], ): if "messages" in data and isinstance(data["messages"], list): text = "" diff --git a/litellm/integrations/SlackAlerting/slack_alerting.py b/litellm/integrations/SlackAlerting/slack_alerting.py index 38a9171a9..fa7d4bc90 100644 --- a/litellm/integrations/SlackAlerting/slack_alerting.py +++ b/litellm/integrations/SlackAlerting/slack_alerting.py @@ -41,6 +41,7 @@ from litellm.types.router import LiteLLM_Params from ..email_templates.templates import * from .batching_handler import send_to_webhook, squash_payloads from .types import * +from .utils import process_slack_alerting_variables class SlackAlerting(CustomBatchLogger): @@ -70,7 +71,7 @@ class SlackAlerting(CustomBatchLogger): "outage_alerts", ], alert_to_webhook_url: Optional[ - Dict + Dict[AlertType, Union[List[str], str]] ] = None, # if user wants to separate alerts to diff channels alerting_args={}, default_webhook_url: Optional[str] = None, @@ -85,7 +86,9 @@ class SlackAlerting(CustomBatchLogger): self.async_http_handler = get_async_httpx_client( llm_provider=httpxSpecialProvider.LoggingCallback ) - self.alert_to_webhook_url = alert_to_webhook_url + self.alert_to_webhook_url = process_slack_alerting_variables( + alert_to_webhook_url=alert_to_webhook_url + ) self.is_running = False self.alerting_args = SlackAlertingArgs(**alerting_args) self.default_webhook_url = default_webhook_url @@ -97,7 +100,7 @@ class SlackAlerting(CustomBatchLogger): alerting: Optional[List] = None, alerting_threshold: Optional[float] = None, alert_types: Optional[List] = None, - alert_to_webhook_url: Optional[Dict] = None, + alert_to_webhook_url: Optional[Dict[AlertType, Union[List[str], str]]] = None, alerting_args: Optional[Dict] = None, llm_router: Optional[litellm.Router] = None, ): @@ -113,9 +116,17 @@ class SlackAlerting(CustomBatchLogger): if alert_to_webhook_url is not None: # update the dict if self.alert_to_webhook_url is None: - self.alert_to_webhook_url = alert_to_webhook_url + self.alert_to_webhook_url = process_slack_alerting_variables( + alert_to_webhook_url=alert_to_webhook_url + ) else: - self.alert_to_webhook_url.update(alert_to_webhook_url) + _new_values = ( + process_slack_alerting_variables( + alert_to_webhook_url=alert_to_webhook_url + ) + or {} + ) + self.alert_to_webhook_url.update(_new_values) if llm_router is not None: self.llm_router = llm_router diff --git a/litellm/integrations/SlackAlerting/utils.py b/litellm/integrations/SlackAlerting/utils.py new file mode 100644 index 000000000..638d0d955 --- /dev/null +++ b/litellm/integrations/SlackAlerting/utils.py @@ -0,0 +1,51 @@ +""" +Utils used for slack alerting +""" + +from typing import Dict, List, Optional, Union + +import litellm +from litellm.proxy._types import AlertType +from litellm.secret_managers.main import get_secret + + +def process_slack_alerting_variables( + alert_to_webhook_url: Optional[Dict[AlertType, Union[List[str], str]]] +) -> Optional[Dict[AlertType, Union[List[str], str]]]: + """ + process alert_to_webhook_url + - check if any urls are set as os.environ/SLACK_WEBHOOK_URL_1 read env var and set the correct value + """ + if alert_to_webhook_url is None: + return None + + for alert_type, webhook_urls in alert_to_webhook_url.items(): + if isinstance(webhook_urls, list): + _webhook_values: List[str] = [] + for webhook_url in webhook_urls: + if "os.environ/" in webhook_url: + _env_value = get_secret(secret_name=webhook_url) + if not isinstance(_env_value, str): + raise ValueError( + f"Invalid webhook url value for: {webhook_url}. Got type={type(_env_value)}" + ) + _webhook_values.append(_env_value) + else: + _webhook_values.append(webhook_url) + + alert_to_webhook_url[alert_type] = _webhook_values + else: + _webhook_value_str: str = webhook_urls + if "os.environ/" in webhook_urls: + _env_value = get_secret(secret_name=webhook_urls) + if not isinstance(_env_value, str): + raise ValueError( + f"Invalid webhook url value for: {webhook_urls}. Got type={type(_env_value)}" + ) + _webhook_value_str = _env_value + else: + _webhook_value_str = webhook_urls + + alert_to_webhook_url[alert_type] = _webhook_value_str + + return alert_to_webhook_url diff --git a/litellm/integrations/custom_guardrail.py b/litellm/integrations/custom_guardrail.py index 25512716c..a94d2c69f 100644 --- a/litellm/integrations/custom_guardrail.py +++ b/litellm/integrations/custom_guardrail.py @@ -17,6 +17,11 @@ class CustomGuardrail(CustomLogger): self.event_hook: Optional[GuardrailEventHooks] = event_hook super().__init__(**kwargs) + # older v1 implementation - not used, just kept for backward compatibility + self.moderation_check: Literal["pre_call", "in_parallel"] = kwargs.get( + "moderation_check", "pre_call" + ) + def should_run_guardrail(self, data, event_type: GuardrailEventHooks) -> bool: metadata = data.get("metadata") or {} requested_guardrails = metadata.get("guardrails") or [] diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py index e5d3dfd56..ba38c4fb9 100644 --- a/litellm/integrations/custom_logger.py +++ b/litellm/integrations/custom_logger.py @@ -151,7 +151,13 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac self, data: dict, user_api_key_dict: UserAPIKeyAuth, - call_type: Literal["completion", "embeddings", "image_generation"], + call_type: Literal[ + "completion", + "embeddings", + "image_generation", + "moderation", + "audio_transcription", + ], ): pass diff --git a/litellm/proxy/custom_callbacks1.py b/litellm/proxy/custom_callbacks1.py index fbfbf6060..7f6eafb3c 100644 --- a/litellm/proxy/custom_callbacks1.py +++ b/litellm/proxy/custom_callbacks1.py @@ -52,7 +52,13 @@ class MyCustomHandler( self, data: dict, user_api_key_dict: UserAPIKeyAuth, - call_type: Literal["completion", "embeddings", "image_generation"], + call_type: Literal[ + "completion", + "embeddings", + "image_generation", + "moderation", + "audio_transcription", + ], ): pass diff --git a/litellm/proxy/custom_guardrail.py b/litellm/proxy/custom_guardrail.py index d8d63ab0a..0d834f24f 100644 --- a/litellm/proxy/custom_guardrail.py +++ b/litellm/proxy/custom_guardrail.py @@ -61,7 +61,13 @@ class myCustomGuardrail(CustomGuardrail): self, data: dict, user_api_key_dict: UserAPIKeyAuth, - call_type: Literal["completion", "embeddings", "image_generation"], + call_type: Literal[ + "completion", + "embeddings", + "image_generation", + "moderation", + "audio_transcription", + ], ): """ Runs in parallel to LLM API call diff --git a/litellm/proxy/example_config_yaml/custom_guardrail.py b/litellm/proxy/example_config_yaml/custom_guardrail.py index d8d63ab0a..0d834f24f 100644 --- a/litellm/proxy/example_config_yaml/custom_guardrail.py +++ b/litellm/proxy/example_config_yaml/custom_guardrail.py @@ -61,7 +61,13 @@ class myCustomGuardrail(CustomGuardrail): self, data: dict, user_api_key_dict: UserAPIKeyAuth, - call_type: Literal["completion", "embeddings", "image_generation"], + call_type: Literal[ + "completion", + "embeddings", + "image_generation", + "moderation", + "audio_transcription", + ], ): """ Runs in parallel to LLM API call diff --git a/litellm/proxy/guardrails/guardrail_hooks/aporia_ai.py b/litellm/proxy/guardrails/guardrail_hooks/aporia_ai.py index 23ac33927..ab6d4a705 100644 --- a/litellm/proxy/guardrails/guardrail_hooks/aporia_ai.py +++ b/litellm/proxy/guardrails/guardrail_hooks/aporia_ai.py @@ -177,7 +177,13 @@ class AporiaGuardrail(CustomGuardrail): self, data: dict, user_api_key_dict: UserAPIKeyAuth, - call_type: Literal["completion", "embeddings", "image_generation"], + call_type: Literal[ + "completion", + "embeddings", + "image_generation", + "moderation", + "audio_transcription", + ], ): from litellm.proxy.common_utils.callback_utils import ( add_guardrail_to_applied_guardrails_header, diff --git a/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py b/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py index 4c47aeb86..a18d8db0e 100644 --- a/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py +++ b/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py @@ -218,7 +218,7 @@ class BedrockGuardrail(CustomGuardrail, BaseAWSLLM): response = await self.async_handler.post( url=prepared_request.url, json=request_data, # type: ignore - headers=prepared_request.headers, + headers=dict(prepared_request.headers), ) verbose_proxy_logger.debug("Bedrock AI response: %s", response.text) if response.status_code == 200: @@ -243,7 +243,13 @@ class BedrockGuardrail(CustomGuardrail, BaseAWSLLM): self, data: dict, user_api_key_dict: UserAPIKeyAuth, - call_type: Literal["completion", "embeddings", "image_generation"], + call_type: Literal[ + "completion", + "embeddings", + "image_generation", + "moderation", + "audio_transcription", + ], ): from litellm.proxy.common_utils.callback_utils import ( add_guardrail_to_applied_guardrails_header, diff --git a/litellm/proxy/guardrails/guardrail_hooks/custom_guardrail.py b/litellm/proxy/guardrails/guardrail_hooks/custom_guardrail.py index d8d63ab0a..0d834f24f 100644 --- a/litellm/proxy/guardrails/guardrail_hooks/custom_guardrail.py +++ b/litellm/proxy/guardrails/guardrail_hooks/custom_guardrail.py @@ -61,7 +61,13 @@ class myCustomGuardrail(CustomGuardrail): self, data: dict, user_api_key_dict: UserAPIKeyAuth, - call_type: Literal["completion", "embeddings", "image_generation"], + call_type: Literal[ + "completion", + "embeddings", + "image_generation", + "moderation", + "audio_transcription", + ], ): """ Runs in parallel to LLM API call diff --git a/litellm/proxy/guardrails/guardrail_hooks/lakera_ai.py b/litellm/proxy/guardrails/guardrail_hooks/lakera_ai.py index 99b56daa8..d84966fd7 100644 --- a/litellm/proxy/guardrails/guardrail_hooks/lakera_ai.py +++ b/litellm/proxy/guardrails/guardrail_hooks/lakera_ai.py @@ -143,6 +143,7 @@ class lakeraAI_Moderation(CustomGuardrail): ): return text = "" + _json_data: str = "" if "messages" in data and isinstance(data["messages"], list): prompt_injection_obj: Optional[GuardrailItem] = ( litellm.guardrail_name_config_map.get("prompt_injection") @@ -320,7 +321,13 @@ class lakeraAI_Moderation(CustomGuardrail): self, data: dict, user_api_key_dict: UserAPIKeyAuth, - call_type: Literal["completion", "embeddings", "image_generation"], + call_type: Literal[ + "completion", + "embeddings", + "image_generation", + "moderation", + "audio_transcription", + ], ): if self.event_hook is None: if self.moderation_check == "pre_call": diff --git a/litellm/proxy/health_endpoints/_health_endpoints.py b/litellm/proxy/health_endpoints/_health_endpoints.py index ce80dc288..462034116 100644 --- a/litellm/proxy/health_endpoints/_health_endpoints.py +++ b/litellm/proxy/health_endpoints/_health_endpoints.py @@ -174,6 +174,7 @@ async def health_services_endpoint( not in proxy_logging_obj.slack_alerting_instance.alert_types ): continue + test_message = "default test message" if alert_type == "llm_exceptions": test_message = f"LLM Exception test alert" @@ -189,6 +190,8 @@ async def health_services_endpoint( test_message = f"Outage Alert Exception test alert" elif alert_type == "daily_reports": test_message = f"Daily Reports test alert" + else: + test_message = f"Budget Alert test alert" await proxy_logging_obj.alerting_handler( message=test_message, level="Low", alert_type=alert_type @@ -354,7 +357,7 @@ async def health_endpoint( db_health_cache = {"status": "unknown", "last_updated": datetime.now()} -def _db_health_readiness_check(): +async def _db_health_readiness_check(): from litellm.proxy.proxy_server import prisma_client global db_health_cache @@ -365,7 +368,12 @@ def _db_health_readiness_check(): time_diff = datetime.now() - db_health_cache["last_updated"] if db_health_cache["status"] != "unknown" and time_diff < timedelta(minutes=2): return db_health_cache - prisma_client.health_check() + + if prisma_client is None: + db_health_cache = {"status": "disconnected", "last_updated": datetime.now()} + return db_health_cache + + await prisma_client.health_check() db_health_cache = {"status": "connected", "last_updated": datetime.now()} return db_health_cache @@ -478,7 +486,7 @@ async def health_readiness(): # check DB if prisma_client is not None: # if db passed in, check if it's connected - db_health_status = _db_health_readiness_check() + db_health_status = await _db_health_readiness_check() return { "status": "healthy", "db": "connected", diff --git a/litellm/proxy/hooks/prompt_injection_detection.py b/litellm/proxy/hooks/prompt_injection_detection.py index ed33e3b51..9c1f1eb95 100644 --- a/litellm/proxy/hooks/prompt_injection_detection.py +++ b/litellm/proxy/hooks/prompt_injection_detection.py @@ -7,18 +7,22 @@ ## Reject a call if it contains a prompt injection attack. -from typing import Optional, Literal -import litellm -from litellm.caching import DualCache -from litellm.proxy._types import UserAPIKeyAuth, LiteLLMPromptInjectionParams -from litellm.integrations.custom_logger import CustomLogger -from litellm._logging import verbose_proxy_logger -from litellm.utils import get_formatted_prompt -from litellm.llms.prompt_templates.factory import prompt_injection_detection_default_pt -from fastapi import HTTPException -import json, traceback, re +import json +import re +import traceback from difflib import SequenceMatcher -from typing import List +from typing import List, Literal, Optional + +from fastapi import HTTPException +from typing_extensions import overload + +import litellm +from litellm._logging import verbose_proxy_logger +from litellm.caching import DualCache +from litellm.integrations.custom_logger import CustomLogger +from litellm.llms.prompt_templates.factory import prompt_injection_detection_default_pt +from litellm.proxy._types import LiteLLMPromptInjectionParams, UserAPIKeyAuth +from litellm.utils import get_formatted_prompt class _OPTIONAL_PromptInjectionDetection(CustomLogger): @@ -201,7 +205,7 @@ class _OPTIONAL_PromptInjectionDetection(CustomLogger): and self.prompt_injection_params is not None and self.prompt_injection_params.reject_as_response ): - return e.detail["error"] + return e.detail.get("error") raise e except Exception as e: verbose_proxy_logger.error( @@ -211,18 +215,24 @@ class _OPTIONAL_PromptInjectionDetection(CustomLogger): ) verbose_proxy_logger.debug(traceback.format_exc()) - async def async_moderation_hook( + async def async_moderation_hook( # type: ignore self, data: dict, user_api_key_dict: UserAPIKeyAuth, - call_type: Literal["completion", "embeddings", "image_generation"], - ): + call_type: Literal[ + "completion", + "embeddings", + "image_generation", + "moderation", + "audio_transcription", + ], + ) -> Optional[bool]: self.print_verbose( f"IN ASYNC MODERATION HOOK - self.prompt_injection_params = {self.prompt_injection_params}" ) if self.prompt_injection_params is None: - return + return None formatted_prompt = get_formatted_prompt(data=data, call_type=call_type) # type: ignore is_prompt_attack = False diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 51e67963f..07251b562 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -20,9 +20,20 @@ model_list: api_key: fake-key api_base: https://exampleopenaiendpoint-production.up.railway.app -general_settings: + +general_settings: + master_key: sk-1234 + alerting: ["slack"] + alerting_threshold: 0.0001 # (Seconds) set an artifically low threshold for testing alerting + alert_to_webhook_url: { + "llm_too_slow": [ + "os.environ/SLACK_WEBHOOK_URL", + "os.environ/SLACK_WEBHOOK_URL_2", + ], + } key_management_system: "azure_key_vault" + litellm_settings: success_callback: ["prometheus"] diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index ef4903054..bc42d76bf 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -93,6 +93,7 @@ def safe_deep_copy(data): return data # Step 1: Remove the litellm_parent_otel_span + litellm_parent_otel_span = None if isinstance(data, dict): # remove litellm_parent_otel_span since this is not picklable if "metadata" in data and "litellm_parent_otel_span" in data["metadata"]: @@ -519,13 +520,7 @@ class ProxyLogging: self, message: str, level: Literal["Low", "Medium", "High"], - alert_type: Literal[ - "llm_exceptions", - "llm_too_slow", - "llm_requests_hanging", - "budget_alerts", - "db_exceptions", - ], + alert_type: AlertType, request_data: Optional[dict] = None, ): """ @@ -1302,6 +1297,7 @@ class PrismaClient: table_name is not None and table_name == "key" ): # check if plain text or hash + hashed_token = None if token is not None: if isinstance(token, str): hashed_token = token @@ -1712,7 +1708,7 @@ class PrismaClient: updated_table_row = self.db.litellm_config.upsert( where={"param_name": k}, data={ - "create": {"param_name": k, "param_value": updated_data}, + "create": {"param_name": k, "param_value": updated_data}, # type: ignore "update": {"param_value": updated_data}, }, ) @@ -2265,11 +2261,15 @@ class DBClient: """ For closing connection on server shutdown """ - return await self.db.disconnect() + if self.db is not None: + return await self.db.disconnect() # type: ignore + return asyncio.sleep(0) # Return a dummy coroutine if self.db is None ### CUSTOM FILE ### def get_instance_fn(value: str, config_file_path: Optional[str] = None) -> Any: + module_name = value + instance_name = None try: print_verbose(f"value: {value}") # Split the path by dots to separate module from instance @@ -2363,6 +2363,15 @@ async def send_email(receiver_email, subject, html): "sending email from %s to %s", sender_email, receiver_email ) + if smtp_host is None: + raise ValueError("Trying to use SMTP, but SMTP_HOST is not set") + + if smtp_username is None: + raise ValueError("Trying to use SMTP, but SMTP_USERNAME is not set") + + if smtp_password is None: + raise ValueError("Trying to use SMTP, but SMTP_PASSWORD is not set") + # Attach the body to the email email_message.attach(MIMEText(html, "html")) @@ -2555,6 +2564,7 @@ async def update_spend( spend_logs: list, """ n_retry_times = 3 + i = None ### UPDATE USER TABLE ### if len(prisma_client.user_list_transactons.keys()) > 0: for i in range(n_retry_times + 1): @@ -2930,6 +2940,8 @@ async def update_spend( ) break except httpx.ReadTimeout: + if i is None: + i = 0 if i >= n_retry_times: # If we've reached the maximum number of retries raise # Re-raise the last exception # Optionally, sleep for a bit before retrying @@ -3044,10 +3056,11 @@ def get_error_message_str(e: Exception) -> str: elif isinstance(e.detail, dict): error_message = json.dumps(e.detail) elif hasattr(e, "message"): - if isinstance(e.message, "str"): - error_message = e.message - elif isinstance(e.message, dict): - error_message = json.dumps(e.message) + _error = getattr(e, "message", None) + if isinstance(_error, str): + error_message = _error + elif isinstance(_error, dict): + error_message = json.dumps(_error) else: error_message = str(e) else: