mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
feat(proxy/utils.py): return api base for request hanging alerts
This commit is contained in:
parent
b49e47b634
commit
6110d32b1c
7 changed files with 180 additions and 15 deletions
|
@ -182,6 +182,25 @@ class ProxyLogging:
|
|||
raise e
|
||||
return data
|
||||
|
||||
def _response_taking_too_long_callback(
|
||||
self,
|
||||
kwargs, # kwargs to completion
|
||||
start_time,
|
||||
end_time, # start/end time
|
||||
):
|
||||
try:
|
||||
time_difference = end_time - start_time
|
||||
# Convert the timedelta to float (in seconds)
|
||||
time_difference_float = time_difference.total_seconds()
|
||||
litellm_params = kwargs.get("litellm_params", {})
|
||||
api_base = litellm_params.get("api_base", "")
|
||||
model = kwargs.get("model", "")
|
||||
messages = kwargs.get("messages", "")
|
||||
|
||||
return time_difference_float, model, api_base, messages
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
async def response_taking_too_long_callback(
|
||||
self,
|
||||
kwargs, # kwargs to completion
|
||||
|
@ -191,13 +210,13 @@ class ProxyLogging:
|
|||
):
|
||||
if self.alerting is None:
|
||||
return
|
||||
time_difference = end_time - start_time
|
||||
# Convert the timedelta to float (in seconds)
|
||||
time_difference_float = time_difference.total_seconds()
|
||||
litellm_params = kwargs.get("litellm_params", {})
|
||||
api_base = litellm_params.get("api_base", "")
|
||||
model = kwargs.get("model", "")
|
||||
messages = kwargs.get("messages", "")
|
||||
time_difference_float, model, api_base, messages = (
|
||||
self._response_taking_too_long_callback(
|
||||
kwargs=kwargs,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
)
|
||||
)
|
||||
request_info = f"\nRequest Model: `{model}`\nAPI Base: `{api_base}`\nMessages: `{messages}`"
|
||||
slow_message = f"`Responses are slow - {round(time_difference_float,2)}s response time > Alerting threshold: {self.alerting_threshold}s`"
|
||||
if time_difference_float > self.alerting_threshold:
|
||||
|
@ -244,6 +263,20 @@ class ProxyLogging:
|
|||
request_data is not None
|
||||
and request_data.get("litellm_status", "") != "success"
|
||||
):
|
||||
if request_data.get("deployment", None) is not None and isinstance(
|
||||
request_data["deployment"], dict
|
||||
):
|
||||
_api_base = litellm.get_api_base(
|
||||
model=model,
|
||||
optional_params=request_data["deployment"].get(
|
||||
"litellm_params", {}
|
||||
),
|
||||
)
|
||||
|
||||
if _api_base is None:
|
||||
_api_base = ""
|
||||
|
||||
request_info += f"\nAPI Base: {_api_base}"
|
||||
# only alert hanging responses if they have not been marked as success
|
||||
alerting_message = (
|
||||
f"`Requests are hanging - {self.alerting_threshold}s+ request time`"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue