mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 19:54:13 +00:00
Merge pull request #1601 from BerriAI/litellm_improve_slack_alertign
[Feat] Proxy - Improve Slack Alerting
This commit is contained in:
commit
6dac4ab8aa
4 changed files with 50 additions and 10 deletions
|
@ -7,8 +7,11 @@ handler = logging.StreamHandler()
|
||||||
handler.setLevel(logging.DEBUG)
|
handler.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
# Create a formatter and set it for the handler
|
# Create a formatter and set it for the handler
|
||||||
|
formatter = logging.Formatter(
|
||||||
|
"\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(message)s",
|
||||||
|
datefmt="%H:%M:%S",
|
||||||
|
)
|
||||||
|
|
||||||
formatter = logging.Formatter("\033[92m%(name)s - %(levelname)s\033[0m: %(message)s")
|
|
||||||
|
|
||||||
handler.setFormatter(formatter)
|
handler.setFormatter(formatter)
|
||||||
|
|
||||||
|
|
|
@ -67,6 +67,8 @@ litellm_settings:
|
||||||
|
|
||||||
general_settings:
|
general_settings:
|
||||||
master_key: sk-1234
|
master_key: sk-1234
|
||||||
|
alerting: ["slack"]
|
||||||
|
alerting_threshold: 10 # sends alerts if requests hang for 2 seconds
|
||||||
# database_type: "dynamo_db"
|
# database_type: "dynamo_db"
|
||||||
# database_args: { # 👈 all args - https://github.com/BerriAI/litellm/blob/befbcbb7ac8f59835ce47415c128decf37aac328/litellm/proxy/_types.py#L190
|
# database_args: { # 👈 all args - https://github.com/BerriAI/litellm/blob/befbcbb7ac8f59835ce47415c128decf37aac328/litellm/proxy/_types.py#L190
|
||||||
# "billing_mode": "PAY_PER_REQUEST",
|
# "billing_mode": "PAY_PER_REQUEST",
|
||||||
|
|
|
@ -1899,6 +1899,8 @@ async def chat_completion(
|
||||||
else: # router is not set
|
else: # router is not set
|
||||||
response = await litellm.acompletion(**data)
|
response = await litellm.acompletion(**data)
|
||||||
|
|
||||||
|
# Post Call Processing
|
||||||
|
data["litellm_status"] = "success" # used for alerting
|
||||||
if hasattr(response, "_hidden_params"):
|
if hasattr(response, "_hidden_params"):
|
||||||
model_id = response._hidden_params.get("model_id", None) or ""
|
model_id = response._hidden_params.get("model_id", None) or ""
|
||||||
else:
|
else:
|
||||||
|
@ -2084,6 +2086,7 @@ async def embeddings(
|
||||||
response = await litellm.aembedding(**data)
|
response = await litellm.aembedding(**data)
|
||||||
|
|
||||||
### ALERTING ###
|
### ALERTING ###
|
||||||
|
data["litellm_status"] = "success" # used for alerting
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
asyncio.create_task(
|
asyncio.create_task(
|
||||||
proxy_logging_obj.response_taking_too_long(
|
proxy_logging_obj.response_taking_too_long(
|
||||||
|
@ -2199,6 +2202,7 @@ async def image_generation(
|
||||||
response = await litellm.aimage_generation(**data)
|
response = await litellm.aimage_generation(**data)
|
||||||
|
|
||||||
### ALERTING ###
|
### ALERTING ###
|
||||||
|
data["litellm_status"] = "success" # used for alerting
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
asyncio.create_task(
|
asyncio.create_task(
|
||||||
proxy_logging_obj.response_taking_too_long(
|
proxy_logging_obj.response_taking_too_long(
|
||||||
|
|
|
@ -97,7 +97,7 @@ class ProxyLogging:
|
||||||
3. /image/generation
|
3. /image/generation
|
||||||
"""
|
"""
|
||||||
### ALERTING ###
|
### ALERTING ###
|
||||||
asyncio.create_task(self.response_taking_too_long())
|
asyncio.create_task(self.response_taking_too_long(request_data=data))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
for callback in litellm.callbacks:
|
for callback in litellm.callbacks:
|
||||||
|
@ -137,24 +137,47 @@ class ProxyLogging:
|
||||||
start_time: Optional[float] = None,
|
start_time: Optional[float] = None,
|
||||||
end_time: Optional[float] = None,
|
end_time: Optional[float] = None,
|
||||||
type: Literal["hanging_request", "slow_response"] = "hanging_request",
|
type: Literal["hanging_request", "slow_response"] = "hanging_request",
|
||||||
|
request_data: Optional[dict] = None,
|
||||||
):
|
):
|
||||||
|
if request_data is not None:
|
||||||
|
model = request_data.get("model", "")
|
||||||
|
messages = request_data.get("messages", "")
|
||||||
|
# try casting messages to str and get the first 100 characters, else mark as None
|
||||||
|
try:
|
||||||
|
messages = str(messages)
|
||||||
|
messages = messages[:10000]
|
||||||
|
except:
|
||||||
|
messages = None
|
||||||
|
|
||||||
|
request_info = f"\nRequest Model: {model}\nMessages: {messages}"
|
||||||
|
else:
|
||||||
|
request_info = ""
|
||||||
|
|
||||||
if type == "hanging_request":
|
if type == "hanging_request":
|
||||||
# Simulate a long-running operation that could take more than 5 minutes
|
# Simulate a long-running operation that could take more than 5 minutes
|
||||||
await asyncio.sleep(
|
await asyncio.sleep(
|
||||||
self.alerting_threshold
|
self.alerting_threshold
|
||||||
) # Set it to 5 minutes - i'd imagine this might be different for streaming, non-streaming, non-completion (embedding + img) requests
|
) # Set it to 5 minutes - i'd imagine this might be different for streaming, non-streaming, non-completion (embedding + img) requests
|
||||||
|
if (
|
||||||
await self.alerting_handler(
|
request_data is not None
|
||||||
message=f"Requests are hanging - {self.alerting_threshold}s+ request time",
|
and request_data.get("litellm_status", "") != "success"
|
||||||
level="Medium",
|
):
|
||||||
)
|
# only alert hanging responses if they have not been marked as success
|
||||||
|
alerting_message = (
|
||||||
|
f"Requests are hanging - {self.alerting_threshold}s+ request time"
|
||||||
|
)
|
||||||
|
await self.alerting_handler(
|
||||||
|
message=alerting_message + request_info,
|
||||||
|
level="Medium",
|
||||||
|
)
|
||||||
|
|
||||||
elif (
|
elif (
|
||||||
type == "slow_response" and start_time is not None and end_time is not None
|
type == "slow_response" and start_time is not None and end_time is not None
|
||||||
):
|
):
|
||||||
|
slow_message = f"Responses are slow - {round(end_time-start_time,2)}s response time > Alerting threshold: {self.alerting_threshold}s"
|
||||||
if end_time - start_time > self.alerting_threshold:
|
if end_time - start_time > self.alerting_threshold:
|
||||||
await self.alerting_handler(
|
await self.alerting_handler(
|
||||||
message=f"Responses are slow - {round(end_time-start_time,2)}s response time",
|
message=slow_message + request_info,
|
||||||
level="Low",
|
level="Low",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -173,7 +196,13 @@ class ProxyLogging:
|
||||||
level: str - Low|Medium|High - if calls might fail (Medium) or are failing (High); Currently, no alerts would be 'Low'.
|
level: str - Low|Medium|High - if calls might fail (Medium) or are failing (High); Currently, no alerts would be 'Low'.
|
||||||
message: str - what is the alert about
|
message: str - what is the alert about
|
||||||
"""
|
"""
|
||||||
formatted_message = f"Level: {level}\n\nMessage: {message}"
|
from datetime import datetime
|
||||||
|
|
||||||
|
# Get the current timestamp
|
||||||
|
current_time = datetime.now().strftime("%H:%M:%S")
|
||||||
|
formatted_message = (
|
||||||
|
f"Level: {level}\nTimestamp: {current_time}\n\nMessage: {message}"
|
||||||
|
)
|
||||||
if self.alerting is None:
|
if self.alerting is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -184,7 +213,9 @@ class ProxyLogging:
|
||||||
raise Exception("Missing SLACK_WEBHOOK_URL from environment")
|
raise Exception("Missing SLACK_WEBHOOK_URL from environment")
|
||||||
payload = {"text": formatted_message}
|
payload = {"text": formatted_message}
|
||||||
headers = {"Content-type": "application/json"}
|
headers = {"Content-type": "application/json"}
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession(
|
||||||
|
connector=aiohttp.TCPConnector(ssl=False)
|
||||||
|
) as session:
|
||||||
async with session.post(
|
async with session.post(
|
||||||
slack_webhook_url, json=payload, headers=headers
|
slack_webhook_url, json=payload, headers=headers
|
||||||
) as response:
|
) as response:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue