fix(slack_alerting.py): use in-memory cache for checking request status

This commit is contained in:
Krrish Dholakia 2024-07-02 13:01:59 -07:00
parent ffc173a94c
commit d09a78d7fd
4 changed files with 106 additions and 36 deletions

View file

@ -606,6 +606,13 @@ class SlackAlerting(CustomLogger):
and request_data.get("litellm_status", "") != "success" and request_data.get("litellm_status", "") != "success"
and request_data.get("litellm_status", "") != "fail" and request_data.get("litellm_status", "") != "fail"
): ):
## CHECK IF CACHE IS UPDATED
litellm_call_id = request_data.get("litellm_call_id", "")
status: Optional[str] = await self.internal_usage_cache.async_get_cache(
key="request_status:{}".format(litellm_call_id), local_only=True
)
if status is not None and (status == "success" or status == "fail"):
return
if request_data.get("deployment", None) is not None and isinstance( if request_data.get("deployment", None) is not None and isinstance(
request_data["deployment"], dict request_data["deployment"], dict
): ):

View file

@ -2,4 +2,8 @@ model_list:
- model_name: claude-3-5-sonnet # all requests where model not in your config go to this deployment - model_name: claude-3-5-sonnet # all requests where model not in your config go to this deployment
litellm_params: litellm_params:
model: "openai/*" model: "openai/*"
mock_response: "litellm.RateLimitError" mock_response: "Hello world!"
general_settings:
alerting: ["slack"]
alerting_threshold: 10

View file

@ -3066,8 +3066,11 @@ async def chat_completion(
# Post Call Processing # Post Call Processing
if llm_router is not None: if llm_router is not None:
data["deployment"] = llm_router.get_deployment(model_id=model_id) data["deployment"] = llm_router.get_deployment(model_id=model_id)
data["litellm_status"] = "success" # used for alerting asyncio.create_task(
proxy_logging_obj.update_request_status(
litellm_call_id=data.get("litellm_call_id", ""), status="success"
)
)
if ( if (
"stream" in data and data["stream"] == True "stream" in data and data["stream"] == True
): # use generate_responses to stream responses ): # use generate_responses to stream responses
@ -3117,7 +3120,6 @@ async def chat_completion(
return response return response
except RejectedRequestError as e: except RejectedRequestError as e:
_data = e.request_data _data = e.request_data
_data["litellm_status"] = "fail" # used for alerting
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, user_api_key_dict=user_api_key_dict,
original_exception=e, original_exception=e,
@ -3150,7 +3152,6 @@ async def chat_completion(
_chat_response.usage = _usage # type: ignore _chat_response.usage = _usage # type: ignore
return _chat_response return _chat_response
except Exception as e: except Exception as e:
data["litellm_status"] = "fail" # used for alerting
verbose_proxy_logger.error( verbose_proxy_logger.error(
"litellm.proxy.proxy_server.chat_completion(): Exception occured - {}\n{}".format( "litellm.proxy.proxy_server.chat_completion(): Exception occured - {}\n{}".format(
get_error_message_str(e=e), traceback.format_exc() get_error_message_str(e=e), traceback.format_exc()
@ -3306,7 +3307,11 @@ async def completion(
response_cost = hidden_params.get("response_cost", None) or "" response_cost = hidden_params.get("response_cost", None) or ""
### ALERTING ### ### ALERTING ###
data["litellm_status"] = "success" # used for alerting asyncio.create_task(
proxy_logging_obj.update_request_status(
litellm_call_id=data.get("litellm_call_id", ""), status="success"
)
)
verbose_proxy_logger.debug("final response: %s", response) verbose_proxy_logger.debug("final response: %s", response)
if ( if (
@ -3345,7 +3350,6 @@ async def completion(
return response return response
except RejectedRequestError as e: except RejectedRequestError as e:
_data = e.request_data _data = e.request_data
_data["litellm_status"] = "fail" # used for alerting
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, user_api_key_dict=user_api_key_dict,
original_exception=e, original_exception=e,
@ -3384,7 +3388,6 @@ async def completion(
_response.choices[0].text = e.message _response.choices[0].text = e.message
return _response return _response
except Exception as e: except Exception as e:
data["litellm_status"] = "fail" # used for alerting
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
@ -3536,7 +3539,11 @@ async def embeddings(
) )
### ALERTING ### ### ALERTING ###
data["litellm_status"] = "success" # used for alerting asyncio.create_task(
proxy_logging_obj.update_request_status(
litellm_call_id=data.get("litellm_call_id", ""), status="success"
)
)
### RESPONSE HEADERS ### ### RESPONSE HEADERS ###
hidden_params = getattr(response, "_hidden_params", {}) or {} hidden_params = getattr(response, "_hidden_params", {}) or {}
@ -3559,7 +3566,6 @@ async def embeddings(
return response return response
except Exception as e: except Exception as e:
data["litellm_status"] = "fail" # used for alerting
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
@ -3687,8 +3693,11 @@ async def image_generation(
) )
### ALERTING ### ### ALERTING ###
data["litellm_status"] = "success" # used for alerting asyncio.create_task(
proxy_logging_obj.update_request_status(
litellm_call_id=data.get("litellm_call_id", ""), status="success"
)
)
### RESPONSE HEADERS ### ### RESPONSE HEADERS ###
hidden_params = getattr(response, "_hidden_params", {}) or {} hidden_params = getattr(response, "_hidden_params", {}) or {}
model_id = hidden_params.get("model_id", None) or "" model_id = hidden_params.get("model_id", None) or ""
@ -3710,7 +3719,6 @@ async def image_generation(
return response return response
except Exception as e: except Exception as e:
data["litellm_status"] = "fail" # used for alerting
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
@ -3825,7 +3833,11 @@ async def audio_speech(
) )
### ALERTING ### ### ALERTING ###
data["litellm_status"] = "success" # used for alerting asyncio.create_task(
proxy_logging_obj.update_request_status(
litellm_call_id=data.get("litellm_call_id", ""), status="success"
)
)
### RESPONSE HEADERS ### ### RESPONSE HEADERS ###
hidden_params = getattr(response, "_hidden_params", {}) or {} hidden_params = getattr(response, "_hidden_params", {}) or {}
@ -3991,7 +4003,11 @@ async def audio_transcriptions(
os.remove(file.filename) # Delete the saved file os.remove(file.filename) # Delete the saved file
### ALERTING ### ### ALERTING ###
data["litellm_status"] = "success" # used for alerting asyncio.create_task(
proxy_logging_obj.update_request_status(
litellm_call_id=data.get("litellm_call_id", ""), status="success"
)
)
### RESPONSE HEADERS ### ### RESPONSE HEADERS ###
hidden_params = getattr(response, "_hidden_params", {}) or {} hidden_params = getattr(response, "_hidden_params", {}) or {}
@ -4014,7 +4030,6 @@ async def audio_transcriptions(
return response return response
except Exception as e: except Exception as e:
data["litellm_status"] = "fail" # used for alerting
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
@ -4093,7 +4108,11 @@ async def get_assistants(
response = await llm_router.aget_assistants(**data) response = await llm_router.aget_assistants(**data)
### ALERTING ### ### ALERTING ###
data["litellm_status"] = "success" # used for alerting asyncio.create_task(
proxy_logging_obj.update_request_status(
litellm_call_id=data.get("litellm_call_id", ""), status="success"
)
)
### RESPONSE HEADERS ### ### RESPONSE HEADERS ###
hidden_params = getattr(response, "_hidden_params", {}) or {} hidden_params = getattr(response, "_hidden_params", {}) or {}
@ -4114,7 +4133,6 @@ async def get_assistants(
return response return response
except Exception as e: except Exception as e:
data["litellm_status"] = "fail" # used for alerting
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
@ -4185,7 +4203,11 @@ async def create_threads(
response = await llm_router.acreate_thread(**data) response = await llm_router.acreate_thread(**data)
### ALERTING ### ### ALERTING ###
data["litellm_status"] = "success" # used for alerting asyncio.create_task(
proxy_logging_obj.update_request_status(
litellm_call_id=data.get("litellm_call_id", ""), status="success"
)
)
### RESPONSE HEADERS ### ### RESPONSE HEADERS ###
hidden_params = getattr(response, "_hidden_params", {}) or {} hidden_params = getattr(response, "_hidden_params", {}) or {}
@ -4206,7 +4228,6 @@ async def create_threads(
return response return response
except Exception as e: except Exception as e:
data["litellm_status"] = "fail" # used for alerting
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
@ -4276,7 +4297,11 @@ async def get_thread(
response = await llm_router.aget_thread(thread_id=thread_id, **data) response = await llm_router.aget_thread(thread_id=thread_id, **data)
### ALERTING ### ### ALERTING ###
data["litellm_status"] = "success" # used for alerting asyncio.create_task(
proxy_logging_obj.update_request_status(
litellm_call_id=data.get("litellm_call_id", ""), status="success"
)
)
### RESPONSE HEADERS ### ### RESPONSE HEADERS ###
hidden_params = getattr(response, "_hidden_params", {}) or {} hidden_params = getattr(response, "_hidden_params", {}) or {}
@ -4297,7 +4322,6 @@ async def get_thread(
return response return response
except Exception as e: except Exception as e:
data["litellm_status"] = "fail" # used for alerting
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
@ -4370,7 +4394,11 @@ async def add_messages(
response = await llm_router.a_add_message(thread_id=thread_id, **data) response = await llm_router.a_add_message(thread_id=thread_id, **data)
### ALERTING ### ### ALERTING ###
data["litellm_status"] = "success" # used for alerting asyncio.create_task(
proxy_logging_obj.update_request_status(
litellm_call_id=data.get("litellm_call_id", ""), status="success"
)
)
### RESPONSE HEADERS ### ### RESPONSE HEADERS ###
hidden_params = getattr(response, "_hidden_params", {}) or {} hidden_params = getattr(response, "_hidden_params", {}) or {}
@ -4391,7 +4419,6 @@ async def add_messages(
return response return response
except Exception as e: except Exception as e:
data["litellm_status"] = "fail" # used for alerting
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
@ -4460,7 +4487,11 @@ async def get_messages(
response = await llm_router.aget_messages(thread_id=thread_id, **data) response = await llm_router.aget_messages(thread_id=thread_id, **data)
### ALERTING ### ### ALERTING ###
data["litellm_status"] = "success" # used for alerting asyncio.create_task(
proxy_logging_obj.update_request_status(
litellm_call_id=data.get("litellm_call_id", ""), status="success"
)
)
### RESPONSE HEADERS ### ### RESPONSE HEADERS ###
hidden_params = getattr(response, "_hidden_params", {}) or {} hidden_params = getattr(response, "_hidden_params", {}) or {}
@ -4481,7 +4512,6 @@ async def get_messages(
return response return response
except Exception as e: except Exception as e:
data["litellm_status"] = "fail" # used for alerting
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
@ -4564,7 +4594,11 @@ async def run_thread(
) )
### ALERTING ### ### ALERTING ###
data["litellm_status"] = "success" # used for alerting asyncio.create_task(
proxy_logging_obj.update_request_status(
litellm_call_id=data.get("litellm_call_id", ""), status="success"
)
)
### RESPONSE HEADERS ### ### RESPONSE HEADERS ###
hidden_params = getattr(response, "_hidden_params", {}) or {} hidden_params = getattr(response, "_hidden_params", {}) or {}
@ -4585,7 +4619,6 @@ async def run_thread(
return response return response
except Exception as e: except Exception as e:
data["litellm_status"] = "fail" # used for alerting
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
@ -4675,7 +4708,11 @@ async def create_batch(
) )
### ALERTING ### ### ALERTING ###
data["litellm_status"] = "success" # used for alerting asyncio.create_task(
proxy_logging_obj.update_request_status(
litellm_call_id=data.get("litellm_call_id", ""), status="success"
)
)
### RESPONSE HEADERS ### ### RESPONSE HEADERS ###
hidden_params = getattr(response, "_hidden_params", {}) or {} hidden_params = getattr(response, "_hidden_params", {}) or {}
@ -4696,7 +4733,6 @@ async def create_batch(
return response return response
except Exception as e: except Exception as e:
data["litellm_status"] = "fail" # used for alerting
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
@ -4781,7 +4817,11 @@ async def retrieve_batch(
) )
### ALERTING ### ### ALERTING ###
data["litellm_status"] = "success" # used for alerting asyncio.create_task(
proxy_logging_obj.update_request_status(
litellm_call_id=data.get("litellm_call_id", ""), status="success"
)
)
### RESPONSE HEADERS ### ### RESPONSE HEADERS ###
hidden_params = getattr(response, "_hidden_params", {}) or {} hidden_params = getattr(response, "_hidden_params", {}) or {}
@ -4802,7 +4842,6 @@ async def retrieve_batch(
return response return response
except Exception as e: except Exception as e:
data["litellm_status"] = "fail" # used for alerting
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
@ -4897,7 +4936,11 @@ async def create_file(
) )
### ALERTING ### ### ALERTING ###
data["litellm_status"] = "success" # used for alerting asyncio.create_task(
proxy_logging_obj.update_request_status(
litellm_call_id=data.get("litellm_call_id", ""), status="success"
)
)
### RESPONSE HEADERS ### ### RESPONSE HEADERS ###
hidden_params = getattr(response, "_hidden_params", {}) or {} hidden_params = getattr(response, "_hidden_params", {}) or {}
@ -4918,7 +4961,6 @@ async def create_file(
return response return response
except Exception as e: except Exception as e:
data["litellm_status"] = "fail" # used for alerting
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
@ -5041,7 +5083,11 @@ async def moderations(
response = await litellm.amoderation(**data) response = await litellm.amoderation(**data)
### ALERTING ### ### ALERTING ###
data["litellm_status"] = "success" # used for alerting asyncio.create_task(
proxy_logging_obj.update_request_status(
litellm_call_id=data.get("litellm_call_id", ""), status="success"
)
)
### RESPONSE HEADERS ### ### RESPONSE HEADERS ###
hidden_params = getattr(response, "_hidden_params", {}) or {} hidden_params = getattr(response, "_hidden_params", {}) or {}
@ -5062,7 +5108,6 @@ async def moderations(
return response return response
except Exception as e: except Exception as e:
data["litellm_status"] = "fail" # used for alerting
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )

View file

@ -272,6 +272,16 @@ class ProxyLogging:
callback_list=callback_list callback_list=callback_list
) )
async def update_request_status(
self, litellm_call_id: str, status: Literal["success", "fail"]
):
await self.internal_usage_cache.async_set_cache(
key="request_status:{}".format(litellm_call_id),
value=status,
local_only=True,
ttl=3600,
)
# The actual implementation of the function # The actual implementation of the function
async def pre_call_hook( async def pre_call_hook(
self, self,
@ -560,6 +570,9 @@ class ProxyLogging:
""" """
### ALERTING ### ### ALERTING ###
await self.update_request_status(
litellm_call_id=request_data.get("litellm_call_id", ""), status="fail"
)
if "llm_exceptions" in self.alert_types and not isinstance( if "llm_exceptions" in self.alert_types and not isinstance(
original_exception, HTTPException original_exception, HTTPException
): ):
@ -611,6 +624,7 @@ class ProxyLogging:
Covers: Covers:
1. /chat/completions 1. /chat/completions
""" """
for callback in litellm.callbacks: for callback in litellm.callbacks:
try: try:
_callback: Optional[CustomLogger] = None _callback: Optional[CustomLogger] = None