diff --git a/litellm/integrations/slack_alerting.py b/litellm/integrations/slack_alerting.py index bce0fef8cd..04195705a0 100644 --- a/litellm/integrations/slack_alerting.py +++ b/litellm/integrations/slack_alerting.py @@ -606,6 +606,13 @@ class SlackAlerting(CustomLogger): and request_data.get("litellm_status", "") != "success" and request_data.get("litellm_status", "") != "fail" ): + ## CHECK IF CACHE IS UPDATED + litellm_call_id = request_data.get("litellm_call_id", "") + status: Optional[str] = await self.internal_usage_cache.async_get_cache( + key="request_status:{}".format(litellm_call_id), local_only=True + ) + if status is not None and (status == "success" or status == "fail"): + return if request_data.get("deployment", None) is not None and isinstance( request_data["deployment"], dict ): diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index c62fc9944f..31c96a0661 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -2,4 +2,8 @@ model_list: - model_name: claude-3-5-sonnet # all requests where model not in your config go to this deployment litellm_params: model: "openai/*" - mock_response: "litellm.RateLimitError" \ No newline at end of file + mock_response: "Hello world!" + +general_settings: + alerting: ["slack"] + alerting_threshold: 10 \ No newline at end of file diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 0577ec0a04..cfef527577 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -3066,8 +3066,11 @@ async def chat_completion( # Post Call Processing if llm_router is not None: data["deployment"] = llm_router.get_deployment(model_id=model_id) - data["litellm_status"] = "success" # used for alerting - + asyncio.create_task( + proxy_logging_obj.update_request_status( + litellm_call_id=data.get("litellm_call_id", ""), status="success" + ) + ) if ( "stream" in data and data["stream"] == True ): # use generate_responses to stream responses @@ -3117,7 +3120,6 @@ async def chat_completion( return response except RejectedRequestError as e: _data = e.request_data - _data["litellm_status"] = "fail" # used for alerting await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, @@ -3150,7 +3152,6 @@ async def chat_completion( _chat_response.usage = _usage # type: ignore return _chat_response except Exception as e: - data["litellm_status"] = "fail" # used for alerting verbose_proxy_logger.error( "litellm.proxy.proxy_server.chat_completion(): Exception occured - {}\n{}".format( get_error_message_str(e=e), traceback.format_exc() @@ -3306,7 +3307,11 @@ async def completion( response_cost = hidden_params.get("response_cost", None) or "" ### ALERTING ### - data["litellm_status"] = "success" # used for alerting + asyncio.create_task( + proxy_logging_obj.update_request_status( + litellm_call_id=data.get("litellm_call_id", ""), status="success" + ) + ) verbose_proxy_logger.debug("final response: %s", response) if ( @@ -3345,7 +3350,6 @@ async def completion( return response except RejectedRequestError as e: _data = e.request_data - _data["litellm_status"] = "fail" # used for alerting await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, @@ -3384,7 +3388,6 @@ async def completion( _response.choices[0].text = e.message return _response except Exception as e: - data["litellm_status"] = "fail" # used for alerting await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) @@ -3536,7 +3539,11 @@ async def embeddings( ) ### ALERTING ### - data["litellm_status"] = "success" # used for alerting + asyncio.create_task( + proxy_logging_obj.update_request_status( + litellm_call_id=data.get("litellm_call_id", ""), status="success" + ) + ) ### RESPONSE HEADERS ### hidden_params = getattr(response, "_hidden_params", {}) or {} @@ -3559,7 +3566,6 @@ async def embeddings( return response except Exception as e: - data["litellm_status"] = "fail" # used for alerting await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) @@ -3687,8 +3693,11 @@ async def image_generation( ) ### ALERTING ### - data["litellm_status"] = "success" # used for alerting - + asyncio.create_task( + proxy_logging_obj.update_request_status( + litellm_call_id=data.get("litellm_call_id", ""), status="success" + ) + ) ### RESPONSE HEADERS ### hidden_params = getattr(response, "_hidden_params", {}) or {} model_id = hidden_params.get("model_id", None) or "" @@ -3710,7 +3719,6 @@ async def image_generation( return response except Exception as e: - data["litellm_status"] = "fail" # used for alerting await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) @@ -3825,7 +3833,11 @@ async def audio_speech( ) ### ALERTING ### - data["litellm_status"] = "success" # used for alerting + asyncio.create_task( + proxy_logging_obj.update_request_status( + litellm_call_id=data.get("litellm_call_id", ""), status="success" + ) + ) ### RESPONSE HEADERS ### hidden_params = getattr(response, "_hidden_params", {}) or {} @@ -3991,7 +4003,11 @@ async def audio_transcriptions( os.remove(file.filename) # Delete the saved file ### ALERTING ### - data["litellm_status"] = "success" # used for alerting + asyncio.create_task( + proxy_logging_obj.update_request_status( + litellm_call_id=data.get("litellm_call_id", ""), status="success" + ) + ) ### RESPONSE HEADERS ### hidden_params = getattr(response, "_hidden_params", {}) or {} @@ -4014,7 +4030,6 @@ async def audio_transcriptions( return response except Exception as e: - data["litellm_status"] = "fail" # used for alerting await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) @@ -4093,7 +4108,11 @@ async def get_assistants( response = await llm_router.aget_assistants(**data) ### ALERTING ### - data["litellm_status"] = "success" # used for alerting + asyncio.create_task( + proxy_logging_obj.update_request_status( + litellm_call_id=data.get("litellm_call_id", ""), status="success" + ) + ) ### RESPONSE HEADERS ### hidden_params = getattr(response, "_hidden_params", {}) or {} @@ -4114,7 +4133,6 @@ async def get_assistants( return response except Exception as e: - data["litellm_status"] = "fail" # used for alerting await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) @@ -4185,7 +4203,11 @@ async def create_threads( response = await llm_router.acreate_thread(**data) ### ALERTING ### - data["litellm_status"] = "success" # used for alerting + asyncio.create_task( + proxy_logging_obj.update_request_status( + litellm_call_id=data.get("litellm_call_id", ""), status="success" + ) + ) ### RESPONSE HEADERS ### hidden_params = getattr(response, "_hidden_params", {}) or {} @@ -4206,7 +4228,6 @@ async def create_threads( return response except Exception as e: - data["litellm_status"] = "fail" # used for alerting await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) @@ -4276,7 +4297,11 @@ async def get_thread( response = await llm_router.aget_thread(thread_id=thread_id, **data) ### ALERTING ### - data["litellm_status"] = "success" # used for alerting + asyncio.create_task( + proxy_logging_obj.update_request_status( + litellm_call_id=data.get("litellm_call_id", ""), status="success" + ) + ) ### RESPONSE HEADERS ### hidden_params = getattr(response, "_hidden_params", {}) or {} @@ -4297,7 +4322,6 @@ async def get_thread( return response except Exception as e: - data["litellm_status"] = "fail" # used for alerting await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) @@ -4370,7 +4394,11 @@ async def add_messages( response = await llm_router.a_add_message(thread_id=thread_id, **data) ### ALERTING ### - data["litellm_status"] = "success" # used for alerting + asyncio.create_task( + proxy_logging_obj.update_request_status( + litellm_call_id=data.get("litellm_call_id", ""), status="success" + ) + ) ### RESPONSE HEADERS ### hidden_params = getattr(response, "_hidden_params", {}) or {} @@ -4391,7 +4419,6 @@ async def add_messages( return response except Exception as e: - data["litellm_status"] = "fail" # used for alerting await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) @@ -4460,7 +4487,11 @@ async def get_messages( response = await llm_router.aget_messages(thread_id=thread_id, **data) ### ALERTING ### - data["litellm_status"] = "success" # used for alerting + asyncio.create_task( + proxy_logging_obj.update_request_status( + litellm_call_id=data.get("litellm_call_id", ""), status="success" + ) + ) ### RESPONSE HEADERS ### hidden_params = getattr(response, "_hidden_params", {}) or {} @@ -4481,7 +4512,6 @@ async def get_messages( return response except Exception as e: - data["litellm_status"] = "fail" # used for alerting await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) @@ -4564,7 +4594,11 @@ async def run_thread( ) ### ALERTING ### - data["litellm_status"] = "success" # used for alerting + asyncio.create_task( + proxy_logging_obj.update_request_status( + litellm_call_id=data.get("litellm_call_id", ""), status="success" + ) + ) ### RESPONSE HEADERS ### hidden_params = getattr(response, "_hidden_params", {}) or {} @@ -4585,7 +4619,6 @@ async def run_thread( return response except Exception as e: - data["litellm_status"] = "fail" # used for alerting await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) @@ -4675,7 +4708,11 @@ async def create_batch( ) ### ALERTING ### - data["litellm_status"] = "success" # used for alerting + asyncio.create_task( + proxy_logging_obj.update_request_status( + litellm_call_id=data.get("litellm_call_id", ""), status="success" + ) + ) ### RESPONSE HEADERS ### hidden_params = getattr(response, "_hidden_params", {}) or {} @@ -4696,7 +4733,6 @@ async def create_batch( return response except Exception as e: - data["litellm_status"] = "fail" # used for alerting await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) @@ -4781,7 +4817,11 @@ async def retrieve_batch( ) ### ALERTING ### - data["litellm_status"] = "success" # used for alerting + asyncio.create_task( + proxy_logging_obj.update_request_status( + litellm_call_id=data.get("litellm_call_id", ""), status="success" + ) + ) ### RESPONSE HEADERS ### hidden_params = getattr(response, "_hidden_params", {}) or {} @@ -4802,7 +4842,6 @@ async def retrieve_batch( return response except Exception as e: - data["litellm_status"] = "fail" # used for alerting await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) @@ -4897,7 +4936,11 @@ async def create_file( ) ### ALERTING ### - data["litellm_status"] = "success" # used for alerting + asyncio.create_task( + proxy_logging_obj.update_request_status( + litellm_call_id=data.get("litellm_call_id", ""), status="success" + ) + ) ### RESPONSE HEADERS ### hidden_params = getattr(response, "_hidden_params", {}) or {} @@ -4918,7 +4961,6 @@ async def create_file( return response except Exception as e: - data["litellm_status"] = "fail" # used for alerting await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) @@ -5041,7 +5083,11 @@ async def moderations( response = await litellm.amoderation(**data) ### ALERTING ### - data["litellm_status"] = "success" # used for alerting + asyncio.create_task( + proxy_logging_obj.update_request_status( + litellm_call_id=data.get("litellm_call_id", ""), status="success" + ) + ) ### RESPONSE HEADERS ### hidden_params = getattr(response, "_hidden_params", {}) or {} @@ -5062,7 +5108,6 @@ async def moderations( return response except Exception as e: - data["litellm_status"] = "fail" # used for alerting await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 96aeb4a816..179d094667 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -272,6 +272,16 @@ class ProxyLogging: callback_list=callback_list ) + async def update_request_status( + self, litellm_call_id: str, status: Literal["success", "fail"] + ): + await self.internal_usage_cache.async_set_cache( + key="request_status:{}".format(litellm_call_id), + value=status, + local_only=True, + ttl=3600, + ) + # The actual implementation of the function async def pre_call_hook( self, @@ -560,6 +570,9 @@ class ProxyLogging: """ ### ALERTING ### + await self.update_request_status( + litellm_call_id=request_data.get("litellm_call_id", ""), status="fail" + ) if "llm_exceptions" in self.alert_types and not isinstance( original_exception, HTTPException ): @@ -611,6 +624,7 @@ class ProxyLogging: Covers: 1. /chat/completions """ + for callback in litellm.callbacks: try: _callback: Optional[CustomLogger] = None