mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
Merge pull request #3370 from BerriAI/litellm_latency_buffer
fix(lowest_latency.py): allow setting a buffer for getting values within a certain latency threshold
This commit is contained in:
commit
6e84e2080d
8 changed files with 266 additions and 31 deletions
|
@ -326,9 +326,9 @@ class Router:
|
|||
litellm.failure_callback.append(self.deployment_callback_on_failure)
|
||||
else:
|
||||
litellm.failure_callback = [self.deployment_callback_on_failure]
|
||||
verbose_router_logger.info(
|
||||
print( # noqa
|
||||
f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}\n\nRouter Redis Caching={self.cache.redis_cache}"
|
||||
)
|
||||
) # noqa
|
||||
self.routing_strategy_args = routing_strategy_args
|
||||
|
||||
def print_deployment(self, deployment: dict):
|
||||
|
@ -2616,6 +2616,11 @@ class Router:
|
|||
for var in vars_to_include:
|
||||
if var in _all_vars:
|
||||
_settings_to_return[var] = _all_vars[var]
|
||||
if (
|
||||
var == "routing_strategy_args"
|
||||
and self.routing_strategy == "latency-based-routing"
|
||||
):
|
||||
_settings_to_return[var] = self.lowestlatency_logger.routing_args.json()
|
||||
return _settings_to_return
|
||||
|
||||
def update_settings(self, **kwargs):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue