mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
feat - penalize timeout errors
This commit is contained in:
parent
9ab1a3df0b
commit
654c736d29
1 changed files with 56 additions and 0 deletions
|
@ -11,6 +11,7 @@ from litellm.caching import DualCache
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm import ModelResponse
|
from litellm import ModelResponse
|
||||||
from litellm import token_counter
|
from litellm import token_counter
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
|
||||||
class LiteLLMBase(BaseModel):
|
class LiteLLMBase(BaseModel):
|
||||||
|
@ -126,6 +127,61 @@ class LowestLatencyLoggingHandler(CustomLogger):
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
|
"""
|
||||||
|
Check if Timeout Error, if timeout set deployment latency -> 100
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
_exception = kwargs.get("exception", None)
|
||||||
|
if isinstance(_exception, litellm.Timeout):
|
||||||
|
if kwargs["litellm_params"].get("metadata") is None:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
model_group = kwargs["litellm_params"]["metadata"].get(
|
||||||
|
"model_group", None
|
||||||
|
)
|
||||||
|
|
||||||
|
id = kwargs["litellm_params"].get("model_info", {}).get("id", None)
|
||||||
|
if model_group is None or id is None:
|
||||||
|
return
|
||||||
|
elif isinstance(id, int):
|
||||||
|
id = str(id)
|
||||||
|
|
||||||
|
# ------------
|
||||||
|
# Setup values
|
||||||
|
# ------------
|
||||||
|
"""
|
||||||
|
{
|
||||||
|
{model_group}_map: {
|
||||||
|
id: {
|
||||||
|
"latency": [..]
|
||||||
|
f"{date:hour:minute}" : {"tpm": 34, "rpm": 3}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
latency_key = f"{model_group}_map"
|
||||||
|
request_count_dict = (
|
||||||
|
self.router_cache.get_cache(key=latency_key) or {}
|
||||||
|
)
|
||||||
|
|
||||||
|
if id not in request_count_dict:
|
||||||
|
request_count_dict[id] = {}
|
||||||
|
|
||||||
|
## Latency
|
||||||
|
request_count_dict[id].setdefault("latency", []).append(100.0)
|
||||||
|
self.router_cache.set_cache(
|
||||||
|
key=latency_key,
|
||||||
|
value=request_count_dict,
|
||||||
|
ttl=self.routing_args.ttl,
|
||||||
|
) # reset map within window
|
||||||
|
else:
|
||||||
|
# do nothing if it's not a timeout error
|
||||||
|
return
|
||||||
|
except Exception as e:
|
||||||
|
traceback.print_exc()
|
||||||
|
pass
|
||||||
|
|
||||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
try:
|
try:
|
||||||
"""
|
"""
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue