mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
feat - penalize timeout errors
This commit is contained in:
parent
9ab1a3df0b
commit
654c736d29
1 changed files with 56 additions and 0 deletions
|
@ -11,6 +11,7 @@ from litellm.caching import DualCache
|
|||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm import ModelResponse
|
||||
from litellm import token_counter
|
||||
import litellm
|
||||
|
||||
|
||||
class LiteLLMBase(BaseModel):
|
||||
|
@ -126,6 +127,61 @@ class LowestLatencyLoggingHandler(CustomLogger):
|
|||
traceback.print_exc()
|
||||
pass
|
||||
|
||||
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
||||
"""
|
||||
Check if Timeout Error, if timeout set deployment latency -> 100
|
||||
"""
|
||||
try:
|
||||
_exception = kwargs.get("exception", None)
|
||||
if isinstance(_exception, litellm.Timeout):
|
||||
if kwargs["litellm_params"].get("metadata") is None:
|
||||
pass
|
||||
else:
|
||||
model_group = kwargs["litellm_params"]["metadata"].get(
|
||||
"model_group", None
|
||||
)
|
||||
|
||||
id = kwargs["litellm_params"].get("model_info", {}).get("id", None)
|
||||
if model_group is None or id is None:
|
||||
return
|
||||
elif isinstance(id, int):
|
||||
id = str(id)
|
||||
|
||||
# ------------
|
||||
# Setup values
|
||||
# ------------
|
||||
"""
|
||||
{
|
||||
{model_group}_map: {
|
||||
id: {
|
||||
"latency": [..]
|
||||
f"{date:hour:minute}" : {"tpm": 34, "rpm": 3}
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
latency_key = f"{model_group}_map"
|
||||
request_count_dict = (
|
||||
self.router_cache.get_cache(key=latency_key) or {}
|
||||
)
|
||||
|
||||
if id not in request_count_dict:
|
||||
request_count_dict[id] = {}
|
||||
|
||||
## Latency
|
||||
request_count_dict[id].setdefault("latency", []).append(100.0)
|
||||
self.router_cache.set_cache(
|
||||
key=latency_key,
|
||||
value=request_count_dict,
|
||||
ttl=self.routing_args.ttl,
|
||||
) # reset map within window
|
||||
else:
|
||||
# do nothing if it's not a timeout error
|
||||
return
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
pass
|
||||
|
||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||
try:
|
||||
"""
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue