forked from phoenix/litellm-mirror
Merge pull request #2298 from BerriAI/litellm_show_api_base_in_response_taking_too_long
[FEAT] Slack Alerts show api base in response taking too long
This commit is contained in:
commit
0c8258c522
2 changed files with 25 additions and 52 deletions
|
@ -2138,14 +2138,6 @@ async def async_data_generator(response, user_api_key_dict):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
yield f"data: {str(e)}\n\n"
|
yield f"data: {str(e)}\n\n"
|
||||||
|
|
||||||
### ALERTING ###
|
|
||||||
end_time = time.time()
|
|
||||||
asyncio.create_task(
|
|
||||||
proxy_logging_obj.response_taking_too_long(
|
|
||||||
start_time=start_time, end_time=end_time, type="slow_response"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Streaming is done, yield the [DONE] chunk
|
# Streaming is done, yield the [DONE] chunk
|
||||||
done_message = "[DONE]"
|
done_message = "[DONE]"
|
||||||
yield f"data: {done_message}\n\n"
|
yield f"data: {done_message}\n\n"
|
||||||
|
@ -2494,14 +2486,6 @@ async def completion(
|
||||||
headers=custom_headers,
|
headers=custom_headers,
|
||||||
)
|
)
|
||||||
|
|
||||||
### ALERTING ###
|
|
||||||
end_time = time.time()
|
|
||||||
asyncio.create_task(
|
|
||||||
proxy_logging_obj.response_taking_too_long(
|
|
||||||
start_time=start_time, end_time=end_time, type="slow_response"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
fastapi_response.headers["x-litellm-model-id"] = model_id
|
fastapi_response.headers["x-litellm-model-id"] = model_id
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -2700,14 +2684,6 @@ async def chat_completion(
|
||||||
headers=custom_headers,
|
headers=custom_headers,
|
||||||
)
|
)
|
||||||
|
|
||||||
### ALERTING ###
|
|
||||||
end_time = time.time()
|
|
||||||
asyncio.create_task(
|
|
||||||
proxy_logging_obj.response_taking_too_long(
|
|
||||||
start_time=start_time, end_time=end_time, type="slow_response"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
fastapi_response.headers["x-litellm-model-id"] = model_id
|
fastapi_response.headers["x-litellm-model-id"] = model_id
|
||||||
|
|
||||||
### CALL HOOKS ### - modify outgoing data
|
### CALL HOOKS ### - modify outgoing data
|
||||||
|
@ -2915,12 +2891,6 @@ async def embeddings(
|
||||||
|
|
||||||
### ALERTING ###
|
### ALERTING ###
|
||||||
data["litellm_status"] = "success" # used for alerting
|
data["litellm_status"] = "success" # used for alerting
|
||||||
end_time = time.time()
|
|
||||||
asyncio.create_task(
|
|
||||||
proxy_logging_obj.response_taking_too_long(
|
|
||||||
start_time=start_time, end_time=end_time, type="slow_response"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -3066,12 +3036,6 @@ async def image_generation(
|
||||||
|
|
||||||
### ALERTING ###
|
### ALERTING ###
|
||||||
data["litellm_status"] = "success" # used for alerting
|
data["litellm_status"] = "success" # used for alerting
|
||||||
end_time = time.time()
|
|
||||||
asyncio.create_task(
|
|
||||||
proxy_logging_obj.response_taking_too_long(
|
|
||||||
start_time=start_time, end_time=end_time, type="slow_response"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -3225,12 +3189,6 @@ async def moderations(
|
||||||
|
|
||||||
### ALERTING ###
|
### ALERTING ###
|
||||||
data["litellm_status"] = "success" # used for alerting
|
data["litellm_status"] = "success" # used for alerting
|
||||||
end_time = time.time()
|
|
||||||
asyncio.create_task(
|
|
||||||
proxy_logging_obj.response_taking_too_long(
|
|
||||||
start_time=start_time, end_time=end_time, type="slow_response"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
@ -64,6 +64,7 @@ class ProxyLogging:
|
||||||
litellm.callbacks.append(self.max_parallel_request_limiter)
|
litellm.callbacks.append(self.max_parallel_request_limiter)
|
||||||
litellm.callbacks.append(self.max_budget_limiter)
|
litellm.callbacks.append(self.max_budget_limiter)
|
||||||
litellm.callbacks.append(self.cache_control_check)
|
litellm.callbacks.append(self.cache_control_check)
|
||||||
|
litellm.callbacks.append(self.response_taking_too_long_callback)
|
||||||
for callback in litellm.callbacks:
|
for callback in litellm.callbacks:
|
||||||
if callback not in litellm.input_callback:
|
if callback not in litellm.input_callback:
|
||||||
litellm.input_callback.append(callback)
|
litellm.input_callback.append(callback)
|
||||||
|
@ -142,6 +143,30 @@ class ProxyLogging:
|
||||||
raise e
|
raise e
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
async def response_taking_too_long_callback(
|
||||||
|
self,
|
||||||
|
kwargs, # kwargs to completion
|
||||||
|
completion_response, # response from completion
|
||||||
|
start_time,
|
||||||
|
end_time, # start/end time
|
||||||
|
):
|
||||||
|
if self.alerting is None:
|
||||||
|
return
|
||||||
|
time_difference = end_time - start_time
|
||||||
|
# Convert the timedelta to float (in seconds)
|
||||||
|
time_difference_float = time_difference.total_seconds()
|
||||||
|
litellm_params = kwargs.get("litellm_params", {})
|
||||||
|
api_base = litellm_params.get("api_base", "")
|
||||||
|
model = kwargs.get("model", "")
|
||||||
|
messages = kwargs.get("messages", "")
|
||||||
|
request_info = f"\nRequest Model: `{model}`\nAPI Base: `{api_base}`\nMessages: `{messages}`"
|
||||||
|
slow_message = f"`Responses are slow - {round(time_difference_float,2)}s response time > Alerting threshold: {self.alerting_threshold}s`"
|
||||||
|
if time_difference_float > self.alerting_threshold:
|
||||||
|
await self.alerting_handler(
|
||||||
|
message=slow_message + request_info,
|
||||||
|
level="Low",
|
||||||
|
)
|
||||||
|
|
||||||
async def response_taking_too_long(
|
async def response_taking_too_long(
|
||||||
self,
|
self,
|
||||||
start_time: Optional[float] = None,
|
start_time: Optional[float] = None,
|
||||||
|
@ -189,16 +214,6 @@ class ProxyLogging:
|
||||||
level="Medium",
|
level="Medium",
|
||||||
)
|
)
|
||||||
|
|
||||||
elif (
|
|
||||||
type == "slow_response" and start_time is not None and end_time is not None
|
|
||||||
):
|
|
||||||
slow_message = f"`Responses are slow - {round(end_time-start_time,2)}s response time > Alerting threshold: {self.alerting_threshold}s`"
|
|
||||||
if end_time - start_time > self.alerting_threshold:
|
|
||||||
await self.alerting_handler(
|
|
||||||
message=slow_message + request_info,
|
|
||||||
level="Low",
|
|
||||||
)
|
|
||||||
|
|
||||||
async def budget_alerts(
|
async def budget_alerts(
|
||||||
self,
|
self,
|
||||||
type: Literal[
|
type: Literal[
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue