mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
fix(utils.py): fix cached responses - translate dict to objects
This commit is contained in:
parent
84460b8222
commit
a4c9e6bd46
4 changed files with 108 additions and 21 deletions
|
@ -1,5 +1,5 @@
|
|||
from datetime import datetime
|
||||
from typing import Dict, List, Optional, Union
|
||||
from typing import Dict, List, Optional, Union, Literal
|
||||
import random, threading, time
|
||||
import litellm
|
||||
import logging
|
||||
|
@ -29,12 +29,16 @@ class Router:
|
|||
redis_host: Optional[str] = None,
|
||||
redis_port: Optional[int] = None,
|
||||
redis_password: Optional[str] = None,
|
||||
cache_responses: bool = False) -> None:
|
||||
cache_responses: bool = False,
|
||||
routing_strategy: Literal["simple-shuffle", "least-busy"] = "simple-shuffle") -> None:
|
||||
if model_list:
|
||||
self.set_model_list(model_list)
|
||||
self.healthy_deployments: List = []
|
||||
### HEALTH CHECK THREAD ### - commenting out as further testing required
|
||||
self._start_health_check_thread()
|
||||
self.healthy_deployments: List = self.model_list
|
||||
|
||||
self.routing_strategy = routing_strategy
|
||||
### HEALTH CHECK THREAD ###
|
||||
if self.routing_strategy == "least-busy":
|
||||
self._start_health_check_thread()
|
||||
|
||||
### CACHING ###
|
||||
if redis_host is not None and redis_port is not None and redis_password is not None:
|
||||
|
@ -104,13 +108,15 @@ class Router:
|
|||
"""
|
||||
Returns the deployment with the shortest queue
|
||||
"""
|
||||
### COMMENTING OUT AS IT NEEDS FURTHER TESTING
|
||||
logging.debug(f"self.healthy_deployments: {self.healthy_deployments}")
|
||||
if len(self.healthy_deployments) > 0:
|
||||
for item in self.healthy_deployments:
|
||||
if item[0]["model_name"] == model: # first one in queue will be the one with the most availability
|
||||
return item[0]
|
||||
else:
|
||||
if self.routing_strategy == "least-busy":
|
||||
if len(self.healthy_deployments) > 0:
|
||||
for item in self.healthy_deployments:
|
||||
if item[0]["model_name"] == model: # first one in queue will be the one with the most availability
|
||||
return item[0]
|
||||
else:
|
||||
raise ValueError("No models available.")
|
||||
elif self.routing_strategy == "simple-shuffle":
|
||||
potential_deployments = []
|
||||
for item in self.model_list:
|
||||
if item["model_name"] == model:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue