forked from phoenix/litellm-mirror
[Feat-Proxy] Slack Alerting - allow using os.environ/ vars for alert to webhook url (#5726)
* allow using os.environ for slack urls * use env vars for webhook urls * fix types for get_secret * fix linting * fix linting * fix linting * linting fixes * linting fix * docs alerting slack * fix get data
This commit is contained in:
parent
8103e2b2da
commit
b6ae2204a8
23 changed files with 286 additions and 84 deletions
|
@ -1,4 +1,6 @@
|
||||||
import Image from '@theme/IdealImage';
|
import Image from '@theme/IdealImage';
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# 🚨 Alerting / Webhooks
|
# 🚨 Alerting / Webhooks
|
||||||
|
|
||||||
|
@ -149,6 +151,10 @@ spend_reports -> go to slack channel #llm-spend-reports
|
||||||
|
|
||||||
Set `alert_to_webhook_url` on your config.yaml
|
Set `alert_to_webhook_url` on your config.yaml
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
|
||||||
|
<TabItem label="1 channel per alert" value="1">
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: gpt-4
|
- model_name: gpt-4
|
||||||
|
@ -177,6 +183,44 @@ general_settings:
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
success_callback: ["langfuse"]
|
success_callback: ["langfuse"]
|
||||||
```
|
```
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem label="multiple channels per alert" value="2">
|
||||||
|
|
||||||
|
Provide multiple slack channels for a given alert type
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-4
|
||||||
|
litellm_params:
|
||||||
|
model: openai/fake
|
||||||
|
api_key: fake-key
|
||||||
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
|
|
||||||
|
general_settings:
|
||||||
|
master_key: sk-1234
|
||||||
|
alerting: ["slack"]
|
||||||
|
alerting_threshold: 0.0001 # (Seconds) set an artifically low threshold for testing alerting
|
||||||
|
alert_to_webhook_url: {
|
||||||
|
"llm_exceptions": ["os.environ/SLACK_WEBHOOK_URL", "os.environ/SLACK_WEBHOOK_URL_2"],
|
||||||
|
"llm_too_slow": ["https://webhook.site/7843a980-a494-4967-80fb-d502dbc16886", "https://webhook.site/28cfb179-f4fb-4408-8129-729ff55cf213"],
|
||||||
|
"llm_requests_hanging": ["os.environ/SLACK_WEBHOOK_URL_5", "os.environ/SLACK_WEBHOOK_URL_6"],
|
||||||
|
"budget_alerts": ["os.environ/SLACK_WEBHOOK_URL_7", "os.environ/SLACK_WEBHOOK_URL_8"],
|
||||||
|
"db_exceptions": ["os.environ/SLACK_WEBHOOK_URL_9", "os.environ/SLACK_WEBHOOK_URL_10"],
|
||||||
|
"daily_reports": ["os.environ/SLACK_WEBHOOK_URL_11", "os.environ/SLACK_WEBHOOK_URL_12"],
|
||||||
|
"spend_reports": ["os.environ/SLACK_WEBHOOK_URL_13", "os.environ/SLACK_WEBHOOK_URL_14"],
|
||||||
|
"cooldown_deployment": ["os.environ/SLACK_WEBHOOK_URL_15", "os.environ/SLACK_WEBHOOK_URL_16"],
|
||||||
|
"new_model_added": ["os.environ/SLACK_WEBHOOK_URL_17", "os.environ/SLACK_WEBHOOK_URL_18"],
|
||||||
|
"outage_alerts": ["os.environ/SLACK_WEBHOOK_URL_19", "os.environ/SLACK_WEBHOOK_URL_20"],
|
||||||
|
}
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
success_callback: ["langfuse"]
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
Test it - send a valid llm request - expect to see a `llm_too_slow` alert in it's own slack channel
|
Test it - send a valid llm request - expect to see a `llm_too_slow` alert in it's own slack channel
|
||||||
|
|
||||||
|
@ -193,36 +237,6 @@ curl -i http://localhost:4000/v1/chat/completions \
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
### Provide multiple slack channels for a given alert type
|
|
||||||
|
|
||||||
Just add it like this - `alert_type: [<hook_url_channel_1>, <hook_url_channel_2>]`.
|
|
||||||
|
|
||||||
1. Setup config.yaml
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
general_settings:
|
|
||||||
master_key: sk-1234
|
|
||||||
alerting: ["slack"]
|
|
||||||
alert_to_webhook_url: {
|
|
||||||
"spend_reports": ["https://webhook.site/7843a980-a494-4967-80fb-d502dbc16886", "https://webhook.site/28cfb179-f4fb-4408-8129-729ff55cf213"]
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Start proxy
|
|
||||||
|
|
||||||
```bash
|
|
||||||
litellm --config /path/to/config.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Test it!
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl -X GET 'http://0.0.0.0:4000/health/services?service=slack' \
|
|
||||||
-H 'Authorization: Bearer sk-1234'
|
|
||||||
```
|
|
||||||
|
|
||||||
In case of error, check server logs for the error message!
|
|
||||||
|
|
||||||
### Using MS Teams Webhooks
|
### Using MS Teams Webhooks
|
||||||
|
|
||||||
MS Teams provides a slack compatible webhook url that you can use for alerting
|
MS Teams provides a slack compatible webhook url that you can use for alerting
|
||||||
|
|
|
@ -57,7 +57,7 @@ class MyCustomHandler(CustomLogger): # https://docs.litellm.ai/docs/observabilit
|
||||||
self,
|
self,
|
||||||
data: dict,
|
data: dict,
|
||||||
user_api_key_dict: UserAPIKeyAuth,
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
call_type: Literal["completion", "embeddings", "image_generation"],
|
call_type: Literal["completion", "embeddings", "image_generation", "moderation", "audio_transcription"],
|
||||||
):
|
):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
|
@ -84,7 +84,7 @@ class myCustomGuardrail(CustomGuardrail):
|
||||||
self,
|
self,
|
||||||
data: dict,
|
data: dict,
|
||||||
user_api_key_dict: UserAPIKeyAuth,
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
call_type: Literal["completion", "embeddings", "image_generation"],
|
call_type: Literal["completion", "embeddings", "image_generation", "moderation", "audio_transcription"],
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Runs in parallel to LLM API call
|
Runs in parallel to LLM API call
|
||||||
|
|
|
@ -174,7 +174,13 @@ class AporiaGuardrail(CustomGuardrail):
|
||||||
self,
|
self,
|
||||||
data: dict,
|
data: dict,
|
||||||
user_api_key_dict: UserAPIKeyAuth,
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
call_type: Literal["completion", "embeddings", "image_generation"],
|
call_type: Literal[
|
||||||
|
"completion",
|
||||||
|
"embeddings",
|
||||||
|
"image_generation",
|
||||||
|
"moderation",
|
||||||
|
"audio_transcription",
|
||||||
|
],
|
||||||
):
|
):
|
||||||
from litellm.proxy.common_utils.callback_utils import (
|
from litellm.proxy.common_utils.callback_utils import (
|
||||||
add_guardrail_to_applied_guardrails_header,
|
add_guardrail_to_applied_guardrails_header,
|
||||||
|
|
|
@ -97,7 +97,13 @@ class _ENTERPRISE_GoogleTextModeration(CustomLogger):
|
||||||
self,
|
self,
|
||||||
data: dict,
|
data: dict,
|
||||||
user_api_key_dict: UserAPIKeyAuth,
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
call_type: Literal["completion", "embeddings", "image_generation"],
|
call_type: Literal[
|
||||||
|
"completion",
|
||||||
|
"embeddings",
|
||||||
|
"image_generation",
|
||||||
|
"moderation",
|
||||||
|
"audio_transcription",
|
||||||
|
],
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
- Calls Google's Text Moderation API
|
- Calls Google's Text Moderation API
|
||||||
|
|
|
@ -100,7 +100,13 @@ class _ENTERPRISE_LlamaGuard(CustomLogger):
|
||||||
self,
|
self,
|
||||||
data: dict,
|
data: dict,
|
||||||
user_api_key_dict: UserAPIKeyAuth,
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
call_type: Literal["completion", "embeddings", "image_generation"],
|
call_type: Literal[
|
||||||
|
"completion",
|
||||||
|
"embeddings",
|
||||||
|
"image_generation",
|
||||||
|
"moderation",
|
||||||
|
"audio_transcription",
|
||||||
|
],
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
- Calls the Llama Guard Endpoint
|
- Calls the Llama Guard Endpoint
|
||||||
|
|
|
@ -127,7 +127,13 @@ class _ENTERPRISE_LLMGuard(CustomLogger):
|
||||||
self,
|
self,
|
||||||
data: dict,
|
data: dict,
|
||||||
user_api_key_dict: UserAPIKeyAuth,
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
call_type: Literal["completion", "embeddings", "image_generation"],
|
call_type: Literal[
|
||||||
|
"completion",
|
||||||
|
"embeddings",
|
||||||
|
"image_generation",
|
||||||
|
"moderation",
|
||||||
|
"audio_transcription",
|
||||||
|
],
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
- Calls the LLM Guard Endpoint
|
- Calls the LLM Guard Endpoint
|
||||||
|
|
|
@ -43,7 +43,13 @@ class _ENTERPRISE_OpenAI_Moderation(CustomLogger):
|
||||||
self,
|
self,
|
||||||
data: dict,
|
data: dict,
|
||||||
user_api_key_dict: UserAPIKeyAuth,
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
call_type: Literal["completion", "embeddings", "image_generation"],
|
call_type: Literal[
|
||||||
|
"completion",
|
||||||
|
"embeddings",
|
||||||
|
"image_generation",
|
||||||
|
"moderation",
|
||||||
|
"audio_transcription",
|
||||||
|
],
|
||||||
):
|
):
|
||||||
if "messages" in data and isinstance(data["messages"], list):
|
if "messages" in data and isinstance(data["messages"], list):
|
||||||
text = ""
|
text = ""
|
||||||
|
|
|
@ -41,6 +41,7 @@ from litellm.types.router import LiteLLM_Params
|
||||||
from ..email_templates.templates import *
|
from ..email_templates.templates import *
|
||||||
from .batching_handler import send_to_webhook, squash_payloads
|
from .batching_handler import send_to_webhook, squash_payloads
|
||||||
from .types import *
|
from .types import *
|
||||||
|
from .utils import process_slack_alerting_variables
|
||||||
|
|
||||||
|
|
||||||
class SlackAlerting(CustomBatchLogger):
|
class SlackAlerting(CustomBatchLogger):
|
||||||
|
@ -70,7 +71,7 @@ class SlackAlerting(CustomBatchLogger):
|
||||||
"outage_alerts",
|
"outage_alerts",
|
||||||
],
|
],
|
||||||
alert_to_webhook_url: Optional[
|
alert_to_webhook_url: Optional[
|
||||||
Dict
|
Dict[AlertType, Union[List[str], str]]
|
||||||
] = None, # if user wants to separate alerts to diff channels
|
] = None, # if user wants to separate alerts to diff channels
|
||||||
alerting_args={},
|
alerting_args={},
|
||||||
default_webhook_url: Optional[str] = None,
|
default_webhook_url: Optional[str] = None,
|
||||||
|
@ -85,7 +86,9 @@ class SlackAlerting(CustomBatchLogger):
|
||||||
self.async_http_handler = get_async_httpx_client(
|
self.async_http_handler = get_async_httpx_client(
|
||||||
llm_provider=httpxSpecialProvider.LoggingCallback
|
llm_provider=httpxSpecialProvider.LoggingCallback
|
||||||
)
|
)
|
||||||
self.alert_to_webhook_url = alert_to_webhook_url
|
self.alert_to_webhook_url = process_slack_alerting_variables(
|
||||||
|
alert_to_webhook_url=alert_to_webhook_url
|
||||||
|
)
|
||||||
self.is_running = False
|
self.is_running = False
|
||||||
self.alerting_args = SlackAlertingArgs(**alerting_args)
|
self.alerting_args = SlackAlertingArgs(**alerting_args)
|
||||||
self.default_webhook_url = default_webhook_url
|
self.default_webhook_url = default_webhook_url
|
||||||
|
@ -97,7 +100,7 @@ class SlackAlerting(CustomBatchLogger):
|
||||||
alerting: Optional[List] = None,
|
alerting: Optional[List] = None,
|
||||||
alerting_threshold: Optional[float] = None,
|
alerting_threshold: Optional[float] = None,
|
||||||
alert_types: Optional[List] = None,
|
alert_types: Optional[List] = None,
|
||||||
alert_to_webhook_url: Optional[Dict] = None,
|
alert_to_webhook_url: Optional[Dict[AlertType, Union[List[str], str]]] = None,
|
||||||
alerting_args: Optional[Dict] = None,
|
alerting_args: Optional[Dict] = None,
|
||||||
llm_router: Optional[litellm.Router] = None,
|
llm_router: Optional[litellm.Router] = None,
|
||||||
):
|
):
|
||||||
|
@ -113,9 +116,17 @@ class SlackAlerting(CustomBatchLogger):
|
||||||
if alert_to_webhook_url is not None:
|
if alert_to_webhook_url is not None:
|
||||||
# update the dict
|
# update the dict
|
||||||
if self.alert_to_webhook_url is None:
|
if self.alert_to_webhook_url is None:
|
||||||
self.alert_to_webhook_url = alert_to_webhook_url
|
self.alert_to_webhook_url = process_slack_alerting_variables(
|
||||||
|
alert_to_webhook_url=alert_to_webhook_url
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
self.alert_to_webhook_url.update(alert_to_webhook_url)
|
_new_values = (
|
||||||
|
process_slack_alerting_variables(
|
||||||
|
alert_to_webhook_url=alert_to_webhook_url
|
||||||
|
)
|
||||||
|
or {}
|
||||||
|
)
|
||||||
|
self.alert_to_webhook_url.update(_new_values)
|
||||||
if llm_router is not None:
|
if llm_router is not None:
|
||||||
self.llm_router = llm_router
|
self.llm_router = llm_router
|
||||||
|
|
||||||
|
|
51
litellm/integrations/SlackAlerting/utils.py
Normal file
51
litellm/integrations/SlackAlerting/utils.py
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
"""
|
||||||
|
Utils used for slack alerting
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Dict, List, Optional, Union
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm.proxy._types import AlertType
|
||||||
|
from litellm.secret_managers.main import get_secret
|
||||||
|
|
||||||
|
|
||||||
|
def process_slack_alerting_variables(
|
||||||
|
alert_to_webhook_url: Optional[Dict[AlertType, Union[List[str], str]]]
|
||||||
|
) -> Optional[Dict[AlertType, Union[List[str], str]]]:
|
||||||
|
"""
|
||||||
|
process alert_to_webhook_url
|
||||||
|
- check if any urls are set as os.environ/SLACK_WEBHOOK_URL_1 read env var and set the correct value
|
||||||
|
"""
|
||||||
|
if alert_to_webhook_url is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
for alert_type, webhook_urls in alert_to_webhook_url.items():
|
||||||
|
if isinstance(webhook_urls, list):
|
||||||
|
_webhook_values: List[str] = []
|
||||||
|
for webhook_url in webhook_urls:
|
||||||
|
if "os.environ/" in webhook_url:
|
||||||
|
_env_value = get_secret(secret_name=webhook_url)
|
||||||
|
if not isinstance(_env_value, str):
|
||||||
|
raise ValueError(
|
||||||
|
f"Invalid webhook url value for: {webhook_url}. Got type={type(_env_value)}"
|
||||||
|
)
|
||||||
|
_webhook_values.append(_env_value)
|
||||||
|
else:
|
||||||
|
_webhook_values.append(webhook_url)
|
||||||
|
|
||||||
|
alert_to_webhook_url[alert_type] = _webhook_values
|
||||||
|
else:
|
||||||
|
_webhook_value_str: str = webhook_urls
|
||||||
|
if "os.environ/" in webhook_urls:
|
||||||
|
_env_value = get_secret(secret_name=webhook_urls)
|
||||||
|
if not isinstance(_env_value, str):
|
||||||
|
raise ValueError(
|
||||||
|
f"Invalid webhook url value for: {webhook_urls}. Got type={type(_env_value)}"
|
||||||
|
)
|
||||||
|
_webhook_value_str = _env_value
|
||||||
|
else:
|
||||||
|
_webhook_value_str = webhook_urls
|
||||||
|
|
||||||
|
alert_to_webhook_url[alert_type] = _webhook_value_str
|
||||||
|
|
||||||
|
return alert_to_webhook_url
|
|
@ -17,6 +17,11 @@ class CustomGuardrail(CustomLogger):
|
||||||
self.event_hook: Optional[GuardrailEventHooks] = event_hook
|
self.event_hook: Optional[GuardrailEventHooks] = event_hook
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
|
|
||||||
|
# older v1 implementation - not used, just kept for backward compatibility
|
||||||
|
self.moderation_check: Literal["pre_call", "in_parallel"] = kwargs.get(
|
||||||
|
"moderation_check", "pre_call"
|
||||||
|
)
|
||||||
|
|
||||||
def should_run_guardrail(self, data, event_type: GuardrailEventHooks) -> bool:
|
def should_run_guardrail(self, data, event_type: GuardrailEventHooks) -> bool:
|
||||||
metadata = data.get("metadata") or {}
|
metadata = data.get("metadata") or {}
|
||||||
requested_guardrails = metadata.get("guardrails") or []
|
requested_guardrails = metadata.get("guardrails") or []
|
||||||
|
|
|
@ -151,7 +151,13 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac
|
||||||
self,
|
self,
|
||||||
data: dict,
|
data: dict,
|
||||||
user_api_key_dict: UserAPIKeyAuth,
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
call_type: Literal["completion", "embeddings", "image_generation"],
|
call_type: Literal[
|
||||||
|
"completion",
|
||||||
|
"embeddings",
|
||||||
|
"image_generation",
|
||||||
|
"moderation",
|
||||||
|
"audio_transcription",
|
||||||
|
],
|
||||||
):
|
):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
|
@ -52,7 +52,13 @@ class MyCustomHandler(
|
||||||
self,
|
self,
|
||||||
data: dict,
|
data: dict,
|
||||||
user_api_key_dict: UserAPIKeyAuth,
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
call_type: Literal["completion", "embeddings", "image_generation"],
|
call_type: Literal[
|
||||||
|
"completion",
|
||||||
|
"embeddings",
|
||||||
|
"image_generation",
|
||||||
|
"moderation",
|
||||||
|
"audio_transcription",
|
||||||
|
],
|
||||||
):
|
):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
|
@ -61,7 +61,13 @@ class myCustomGuardrail(CustomGuardrail):
|
||||||
self,
|
self,
|
||||||
data: dict,
|
data: dict,
|
||||||
user_api_key_dict: UserAPIKeyAuth,
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
call_type: Literal["completion", "embeddings", "image_generation"],
|
call_type: Literal[
|
||||||
|
"completion",
|
||||||
|
"embeddings",
|
||||||
|
"image_generation",
|
||||||
|
"moderation",
|
||||||
|
"audio_transcription",
|
||||||
|
],
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Runs in parallel to LLM API call
|
Runs in parallel to LLM API call
|
||||||
|
|
|
@ -61,7 +61,13 @@ class myCustomGuardrail(CustomGuardrail):
|
||||||
self,
|
self,
|
||||||
data: dict,
|
data: dict,
|
||||||
user_api_key_dict: UserAPIKeyAuth,
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
call_type: Literal["completion", "embeddings", "image_generation"],
|
call_type: Literal[
|
||||||
|
"completion",
|
||||||
|
"embeddings",
|
||||||
|
"image_generation",
|
||||||
|
"moderation",
|
||||||
|
"audio_transcription",
|
||||||
|
],
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Runs in parallel to LLM API call
|
Runs in parallel to LLM API call
|
||||||
|
|
|
@ -177,7 +177,13 @@ class AporiaGuardrail(CustomGuardrail):
|
||||||
self,
|
self,
|
||||||
data: dict,
|
data: dict,
|
||||||
user_api_key_dict: UserAPIKeyAuth,
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
call_type: Literal["completion", "embeddings", "image_generation"],
|
call_type: Literal[
|
||||||
|
"completion",
|
||||||
|
"embeddings",
|
||||||
|
"image_generation",
|
||||||
|
"moderation",
|
||||||
|
"audio_transcription",
|
||||||
|
],
|
||||||
):
|
):
|
||||||
from litellm.proxy.common_utils.callback_utils import (
|
from litellm.proxy.common_utils.callback_utils import (
|
||||||
add_guardrail_to_applied_guardrails_header,
|
add_guardrail_to_applied_guardrails_header,
|
||||||
|
|
|
@ -218,7 +218,7 @@ class BedrockGuardrail(CustomGuardrail, BaseAWSLLM):
|
||||||
response = await self.async_handler.post(
|
response = await self.async_handler.post(
|
||||||
url=prepared_request.url,
|
url=prepared_request.url,
|
||||||
json=request_data, # type: ignore
|
json=request_data, # type: ignore
|
||||||
headers=prepared_request.headers,
|
headers=dict(prepared_request.headers),
|
||||||
)
|
)
|
||||||
verbose_proxy_logger.debug("Bedrock AI response: %s", response.text)
|
verbose_proxy_logger.debug("Bedrock AI response: %s", response.text)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
|
@ -243,7 +243,13 @@ class BedrockGuardrail(CustomGuardrail, BaseAWSLLM):
|
||||||
self,
|
self,
|
||||||
data: dict,
|
data: dict,
|
||||||
user_api_key_dict: UserAPIKeyAuth,
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
call_type: Literal["completion", "embeddings", "image_generation"],
|
call_type: Literal[
|
||||||
|
"completion",
|
||||||
|
"embeddings",
|
||||||
|
"image_generation",
|
||||||
|
"moderation",
|
||||||
|
"audio_transcription",
|
||||||
|
],
|
||||||
):
|
):
|
||||||
from litellm.proxy.common_utils.callback_utils import (
|
from litellm.proxy.common_utils.callback_utils import (
|
||||||
add_guardrail_to_applied_guardrails_header,
|
add_guardrail_to_applied_guardrails_header,
|
||||||
|
|
|
@ -61,7 +61,13 @@ class myCustomGuardrail(CustomGuardrail):
|
||||||
self,
|
self,
|
||||||
data: dict,
|
data: dict,
|
||||||
user_api_key_dict: UserAPIKeyAuth,
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
call_type: Literal["completion", "embeddings", "image_generation"],
|
call_type: Literal[
|
||||||
|
"completion",
|
||||||
|
"embeddings",
|
||||||
|
"image_generation",
|
||||||
|
"moderation",
|
||||||
|
"audio_transcription",
|
||||||
|
],
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Runs in parallel to LLM API call
|
Runs in parallel to LLM API call
|
||||||
|
|
|
@ -143,6 +143,7 @@ class lakeraAI_Moderation(CustomGuardrail):
|
||||||
):
|
):
|
||||||
return
|
return
|
||||||
text = ""
|
text = ""
|
||||||
|
_json_data: str = ""
|
||||||
if "messages" in data and isinstance(data["messages"], list):
|
if "messages" in data and isinstance(data["messages"], list):
|
||||||
prompt_injection_obj: Optional[GuardrailItem] = (
|
prompt_injection_obj: Optional[GuardrailItem] = (
|
||||||
litellm.guardrail_name_config_map.get("prompt_injection")
|
litellm.guardrail_name_config_map.get("prompt_injection")
|
||||||
|
@ -320,7 +321,13 @@ class lakeraAI_Moderation(CustomGuardrail):
|
||||||
self,
|
self,
|
||||||
data: dict,
|
data: dict,
|
||||||
user_api_key_dict: UserAPIKeyAuth,
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
call_type: Literal["completion", "embeddings", "image_generation"],
|
call_type: Literal[
|
||||||
|
"completion",
|
||||||
|
"embeddings",
|
||||||
|
"image_generation",
|
||||||
|
"moderation",
|
||||||
|
"audio_transcription",
|
||||||
|
],
|
||||||
):
|
):
|
||||||
if self.event_hook is None:
|
if self.event_hook is None:
|
||||||
if self.moderation_check == "pre_call":
|
if self.moderation_check == "pre_call":
|
||||||
|
|
|
@ -174,6 +174,7 @@ async def health_services_endpoint(
|
||||||
not in proxy_logging_obj.slack_alerting_instance.alert_types
|
not in proxy_logging_obj.slack_alerting_instance.alert_types
|
||||||
):
|
):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
test_message = "default test message"
|
test_message = "default test message"
|
||||||
if alert_type == "llm_exceptions":
|
if alert_type == "llm_exceptions":
|
||||||
test_message = f"LLM Exception test alert"
|
test_message = f"LLM Exception test alert"
|
||||||
|
@ -189,6 +190,8 @@ async def health_services_endpoint(
|
||||||
test_message = f"Outage Alert Exception test alert"
|
test_message = f"Outage Alert Exception test alert"
|
||||||
elif alert_type == "daily_reports":
|
elif alert_type == "daily_reports":
|
||||||
test_message = f"Daily Reports test alert"
|
test_message = f"Daily Reports test alert"
|
||||||
|
else:
|
||||||
|
test_message = f"Budget Alert test alert"
|
||||||
|
|
||||||
await proxy_logging_obj.alerting_handler(
|
await proxy_logging_obj.alerting_handler(
|
||||||
message=test_message, level="Low", alert_type=alert_type
|
message=test_message, level="Low", alert_type=alert_type
|
||||||
|
@ -354,7 +357,7 @@ async def health_endpoint(
|
||||||
db_health_cache = {"status": "unknown", "last_updated": datetime.now()}
|
db_health_cache = {"status": "unknown", "last_updated": datetime.now()}
|
||||||
|
|
||||||
|
|
||||||
def _db_health_readiness_check():
|
async def _db_health_readiness_check():
|
||||||
from litellm.proxy.proxy_server import prisma_client
|
from litellm.proxy.proxy_server import prisma_client
|
||||||
|
|
||||||
global db_health_cache
|
global db_health_cache
|
||||||
|
@ -365,7 +368,12 @@ def _db_health_readiness_check():
|
||||||
time_diff = datetime.now() - db_health_cache["last_updated"]
|
time_diff = datetime.now() - db_health_cache["last_updated"]
|
||||||
if db_health_cache["status"] != "unknown" and time_diff < timedelta(minutes=2):
|
if db_health_cache["status"] != "unknown" and time_diff < timedelta(minutes=2):
|
||||||
return db_health_cache
|
return db_health_cache
|
||||||
prisma_client.health_check()
|
|
||||||
|
if prisma_client is None:
|
||||||
|
db_health_cache = {"status": "disconnected", "last_updated": datetime.now()}
|
||||||
|
return db_health_cache
|
||||||
|
|
||||||
|
await prisma_client.health_check()
|
||||||
db_health_cache = {"status": "connected", "last_updated": datetime.now()}
|
db_health_cache = {"status": "connected", "last_updated": datetime.now()}
|
||||||
return db_health_cache
|
return db_health_cache
|
||||||
|
|
||||||
|
@ -478,7 +486,7 @@ async def health_readiness():
|
||||||
|
|
||||||
# check DB
|
# check DB
|
||||||
if prisma_client is not None: # if db passed in, check if it's connected
|
if prisma_client is not None: # if db passed in, check if it's connected
|
||||||
db_health_status = _db_health_readiness_check()
|
db_health_status = await _db_health_readiness_check()
|
||||||
return {
|
return {
|
||||||
"status": "healthy",
|
"status": "healthy",
|
||||||
"db": "connected",
|
"db": "connected",
|
||||||
|
|
|
@ -7,18 +7,22 @@
|
||||||
## Reject a call if it contains a prompt injection attack.
|
## Reject a call if it contains a prompt injection attack.
|
||||||
|
|
||||||
|
|
||||||
from typing import Optional, Literal
|
import json
|
||||||
import litellm
|
import re
|
||||||
from litellm.caching import DualCache
|
import traceback
|
||||||
from litellm.proxy._types import UserAPIKeyAuth, LiteLLMPromptInjectionParams
|
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
|
||||||
from litellm._logging import verbose_proxy_logger
|
|
||||||
from litellm.utils import get_formatted_prompt
|
|
||||||
from litellm.llms.prompt_templates.factory import prompt_injection_detection_default_pt
|
|
||||||
from fastapi import HTTPException
|
|
||||||
import json, traceback, re
|
|
||||||
from difflib import SequenceMatcher
|
from difflib import SequenceMatcher
|
||||||
from typing import List
|
from typing import List, Literal, Optional
|
||||||
|
|
||||||
|
from fastapi import HTTPException
|
||||||
|
from typing_extensions import overload
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm._logging import verbose_proxy_logger
|
||||||
|
from litellm.caching import DualCache
|
||||||
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
|
from litellm.llms.prompt_templates.factory import prompt_injection_detection_default_pt
|
||||||
|
from litellm.proxy._types import LiteLLMPromptInjectionParams, UserAPIKeyAuth
|
||||||
|
from litellm.utils import get_formatted_prompt
|
||||||
|
|
||||||
|
|
||||||
class _OPTIONAL_PromptInjectionDetection(CustomLogger):
|
class _OPTIONAL_PromptInjectionDetection(CustomLogger):
|
||||||
|
@ -201,7 +205,7 @@ class _OPTIONAL_PromptInjectionDetection(CustomLogger):
|
||||||
and self.prompt_injection_params is not None
|
and self.prompt_injection_params is not None
|
||||||
and self.prompt_injection_params.reject_as_response
|
and self.prompt_injection_params.reject_as_response
|
||||||
):
|
):
|
||||||
return e.detail["error"]
|
return e.detail.get("error")
|
||||||
raise e
|
raise e
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.error(
|
verbose_proxy_logger.error(
|
||||||
|
@ -211,18 +215,24 @@ class _OPTIONAL_PromptInjectionDetection(CustomLogger):
|
||||||
)
|
)
|
||||||
verbose_proxy_logger.debug(traceback.format_exc())
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
|
|
||||||
async def async_moderation_hook(
|
async def async_moderation_hook( # type: ignore
|
||||||
self,
|
self,
|
||||||
data: dict,
|
data: dict,
|
||||||
user_api_key_dict: UserAPIKeyAuth,
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
call_type: Literal["completion", "embeddings", "image_generation"],
|
call_type: Literal[
|
||||||
):
|
"completion",
|
||||||
|
"embeddings",
|
||||||
|
"image_generation",
|
||||||
|
"moderation",
|
||||||
|
"audio_transcription",
|
||||||
|
],
|
||||||
|
) -> Optional[bool]:
|
||||||
self.print_verbose(
|
self.print_verbose(
|
||||||
f"IN ASYNC MODERATION HOOK - self.prompt_injection_params = {self.prompt_injection_params}"
|
f"IN ASYNC MODERATION HOOK - self.prompt_injection_params = {self.prompt_injection_params}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.prompt_injection_params is None:
|
if self.prompt_injection_params is None:
|
||||||
return
|
return None
|
||||||
|
|
||||||
formatted_prompt = get_formatted_prompt(data=data, call_type=call_type) # type: ignore
|
formatted_prompt = get_formatted_prompt(data=data, call_type=call_type) # type: ignore
|
||||||
is_prompt_attack = False
|
is_prompt_attack = False
|
||||||
|
|
|
@ -20,9 +20,20 @@ model_list:
|
||||||
api_key: fake-key
|
api_key: fake-key
|
||||||
api_base: https://exampleopenaiendpoint-production.up.railway.app
|
api_base: https://exampleopenaiendpoint-production.up.railway.app
|
||||||
|
|
||||||
|
|
||||||
general_settings:
|
general_settings:
|
||||||
|
master_key: sk-1234
|
||||||
|
alerting: ["slack"]
|
||||||
|
alerting_threshold: 0.0001 # (Seconds) set an artifically low threshold for testing alerting
|
||||||
|
alert_to_webhook_url: {
|
||||||
|
"llm_too_slow": [
|
||||||
|
"os.environ/SLACK_WEBHOOK_URL",
|
||||||
|
"os.environ/SLACK_WEBHOOK_URL_2",
|
||||||
|
],
|
||||||
|
}
|
||||||
key_management_system: "azure_key_vault"
|
key_management_system: "azure_key_vault"
|
||||||
|
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
success_callback: ["prometheus"]
|
success_callback: ["prometheus"]
|
||||||
|
|
||||||
|
|
|
@ -93,6 +93,7 @@ def safe_deep_copy(data):
|
||||||
return data
|
return data
|
||||||
|
|
||||||
# Step 1: Remove the litellm_parent_otel_span
|
# Step 1: Remove the litellm_parent_otel_span
|
||||||
|
litellm_parent_otel_span = None
|
||||||
if isinstance(data, dict):
|
if isinstance(data, dict):
|
||||||
# remove litellm_parent_otel_span since this is not picklable
|
# remove litellm_parent_otel_span since this is not picklable
|
||||||
if "metadata" in data and "litellm_parent_otel_span" in data["metadata"]:
|
if "metadata" in data and "litellm_parent_otel_span" in data["metadata"]:
|
||||||
|
@ -519,13 +520,7 @@ class ProxyLogging:
|
||||||
self,
|
self,
|
||||||
message: str,
|
message: str,
|
||||||
level: Literal["Low", "Medium", "High"],
|
level: Literal["Low", "Medium", "High"],
|
||||||
alert_type: Literal[
|
alert_type: AlertType,
|
||||||
"llm_exceptions",
|
|
||||||
"llm_too_slow",
|
|
||||||
"llm_requests_hanging",
|
|
||||||
"budget_alerts",
|
|
||||||
"db_exceptions",
|
|
||||||
],
|
|
||||||
request_data: Optional[dict] = None,
|
request_data: Optional[dict] = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
|
@ -1302,6 +1297,7 @@ class PrismaClient:
|
||||||
table_name is not None and table_name == "key"
|
table_name is not None and table_name == "key"
|
||||||
):
|
):
|
||||||
# check if plain text or hash
|
# check if plain text or hash
|
||||||
|
hashed_token = None
|
||||||
if token is not None:
|
if token is not None:
|
||||||
if isinstance(token, str):
|
if isinstance(token, str):
|
||||||
hashed_token = token
|
hashed_token = token
|
||||||
|
@ -1712,7 +1708,7 @@ class PrismaClient:
|
||||||
updated_table_row = self.db.litellm_config.upsert(
|
updated_table_row = self.db.litellm_config.upsert(
|
||||||
where={"param_name": k},
|
where={"param_name": k},
|
||||||
data={
|
data={
|
||||||
"create": {"param_name": k, "param_value": updated_data},
|
"create": {"param_name": k, "param_value": updated_data}, # type: ignore
|
||||||
"update": {"param_value": updated_data},
|
"update": {"param_value": updated_data},
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
@ -2265,11 +2261,15 @@ class DBClient:
|
||||||
"""
|
"""
|
||||||
For closing connection on server shutdown
|
For closing connection on server shutdown
|
||||||
"""
|
"""
|
||||||
return await self.db.disconnect()
|
if self.db is not None:
|
||||||
|
return await self.db.disconnect() # type: ignore
|
||||||
|
return asyncio.sleep(0) # Return a dummy coroutine if self.db is None
|
||||||
|
|
||||||
|
|
||||||
### CUSTOM FILE ###
|
### CUSTOM FILE ###
|
||||||
def get_instance_fn(value: str, config_file_path: Optional[str] = None) -> Any:
|
def get_instance_fn(value: str, config_file_path: Optional[str] = None) -> Any:
|
||||||
|
module_name = value
|
||||||
|
instance_name = None
|
||||||
try:
|
try:
|
||||||
print_verbose(f"value: {value}")
|
print_verbose(f"value: {value}")
|
||||||
# Split the path by dots to separate module from instance
|
# Split the path by dots to separate module from instance
|
||||||
|
@ -2363,6 +2363,15 @@ async def send_email(receiver_email, subject, html):
|
||||||
"sending email from %s to %s", sender_email, receiver_email
|
"sending email from %s to %s", sender_email, receiver_email
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if smtp_host is None:
|
||||||
|
raise ValueError("Trying to use SMTP, but SMTP_HOST is not set")
|
||||||
|
|
||||||
|
if smtp_username is None:
|
||||||
|
raise ValueError("Trying to use SMTP, but SMTP_USERNAME is not set")
|
||||||
|
|
||||||
|
if smtp_password is None:
|
||||||
|
raise ValueError("Trying to use SMTP, but SMTP_PASSWORD is not set")
|
||||||
|
|
||||||
# Attach the body to the email
|
# Attach the body to the email
|
||||||
email_message.attach(MIMEText(html, "html"))
|
email_message.attach(MIMEText(html, "html"))
|
||||||
|
|
||||||
|
@ -2555,6 +2564,7 @@ async def update_spend(
|
||||||
spend_logs: list,
|
spend_logs: list,
|
||||||
"""
|
"""
|
||||||
n_retry_times = 3
|
n_retry_times = 3
|
||||||
|
i = None
|
||||||
### UPDATE USER TABLE ###
|
### UPDATE USER TABLE ###
|
||||||
if len(prisma_client.user_list_transactons.keys()) > 0:
|
if len(prisma_client.user_list_transactons.keys()) > 0:
|
||||||
for i in range(n_retry_times + 1):
|
for i in range(n_retry_times + 1):
|
||||||
|
@ -2930,6 +2940,8 @@ async def update_spend(
|
||||||
)
|
)
|
||||||
break
|
break
|
||||||
except httpx.ReadTimeout:
|
except httpx.ReadTimeout:
|
||||||
|
if i is None:
|
||||||
|
i = 0
|
||||||
if i >= n_retry_times: # If we've reached the maximum number of retries
|
if i >= n_retry_times: # If we've reached the maximum number of retries
|
||||||
raise # Re-raise the last exception
|
raise # Re-raise the last exception
|
||||||
# Optionally, sleep for a bit before retrying
|
# Optionally, sleep for a bit before retrying
|
||||||
|
@ -3044,10 +3056,11 @@ def get_error_message_str(e: Exception) -> str:
|
||||||
elif isinstance(e.detail, dict):
|
elif isinstance(e.detail, dict):
|
||||||
error_message = json.dumps(e.detail)
|
error_message = json.dumps(e.detail)
|
||||||
elif hasattr(e, "message"):
|
elif hasattr(e, "message"):
|
||||||
if isinstance(e.message, "str"):
|
_error = getattr(e, "message", None)
|
||||||
error_message = e.message
|
if isinstance(_error, str):
|
||||||
elif isinstance(e.message, dict):
|
error_message = _error
|
||||||
error_message = json.dumps(e.message)
|
elif isinstance(_error, dict):
|
||||||
|
error_message = json.dumps(_error)
|
||||||
else:
|
else:
|
||||||
error_message = str(e)
|
error_message = str(e)
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue