mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
(fix) only alert users when requests are hanging
This commit is contained in:
parent
6c13776701
commit
b993c62144
2 changed files with 15 additions and 8 deletions
|
@ -1863,6 +1863,8 @@ async def chat_completion(
|
||||||
else: # router is not set
|
else: # router is not set
|
||||||
response = await litellm.acompletion(**data)
|
response = await litellm.acompletion(**data)
|
||||||
|
|
||||||
|
# Post Call Processing
|
||||||
|
data["litellm_status"] = "success" # used for alerting
|
||||||
if hasattr(response, "_hidden_params"):
|
if hasattr(response, "_hidden_params"):
|
||||||
model_id = response._hidden_params.get("model_id", None) or ""
|
model_id = response._hidden_params.get("model_id", None) or ""
|
||||||
else:
|
else:
|
||||||
|
@ -2048,6 +2050,7 @@ async def embeddings(
|
||||||
response = await litellm.aembedding(**data)
|
response = await litellm.aembedding(**data)
|
||||||
|
|
||||||
### ALERTING ###
|
### ALERTING ###
|
||||||
|
data["litellm_status"] = "success" # used for alerting
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
asyncio.create_task(
|
asyncio.create_task(
|
||||||
proxy_logging_obj.response_taking_too_long(
|
proxy_logging_obj.response_taking_too_long(
|
||||||
|
@ -2163,6 +2166,7 @@ async def image_generation(
|
||||||
response = await litellm.aimage_generation(**data)
|
response = await litellm.aimage_generation(**data)
|
||||||
|
|
||||||
### ALERTING ###
|
### ALERTING ###
|
||||||
|
data["litellm_status"] = "success" # used for alerting
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
asyncio.create_task(
|
asyncio.create_task(
|
||||||
proxy_logging_obj.response_taking_too_long(
|
proxy_logging_obj.response_taking_too_long(
|
||||||
|
|
|
@ -158,14 +158,17 @@ class ProxyLogging:
|
||||||
await asyncio.sleep(
|
await asyncio.sleep(
|
||||||
self.alerting_threshold
|
self.alerting_threshold
|
||||||
) # Set it to 5 minutes - i'd imagine this might be different for streaming, non-streaming, non-completion (embedding + img) requests
|
) # Set it to 5 minutes - i'd imagine this might be different for streaming, non-streaming, non-completion (embedding + img) requests
|
||||||
|
if (
|
||||||
alerting_message = (
|
request_data is not None
|
||||||
f"Requests are hanging - {self.alerting_threshold}s+ request time"
|
and request_data.get("litellm_status", "") != "success"
|
||||||
)
|
):
|
||||||
await self.alerting_handler(
|
alerting_message = (
|
||||||
message=alerting_message + request_info,
|
f"Requests are hanging - {self.alerting_threshold}s+ request time"
|
||||||
level="Medium",
|
)
|
||||||
)
|
await self.alerting_handler(
|
||||||
|
message=alerting_message + request_info,
|
||||||
|
level="Medium",
|
||||||
|
)
|
||||||
|
|
||||||
elif (
|
elif (
|
||||||
type == "slow_response" and start_time is not None and end_time is not None
|
type == "slow_response" and start_time is not None and end_time is not None
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue