forked from phoenix/litellm-mirror
fix(vertex_ai.py): fix output parsing
This commit is contained in:
parent
3a9c828437
commit
f12dc5df21
7 changed files with 14 additions and 8 deletions
|
@ -21,6 +21,7 @@ class Router:
|
|||
router = Router(model_list=model_list)
|
||||
"""
|
||||
model_names: List = []
|
||||
cache_responses: bool = False
|
||||
def __init__(self,
|
||||
model_list: Optional[list]=None,
|
||||
redis_host: Optional[str] = None,
|
||||
|
@ -44,6 +45,7 @@ class Router:
|
|||
self.cache = litellm.Cache(cache_config) # use Redis for tracking load balancing
|
||||
if cache_responses:
|
||||
litellm.cache = litellm.Cache(**cache_config) # use Redis for caching completion requests
|
||||
self.cache_responses = cache_responses
|
||||
litellm.success_callback = [self.deployment_callback]
|
||||
|
||||
def completion(self,
|
||||
|
@ -60,10 +62,9 @@ class Router:
|
|||
|
||||
# pick the one that is available (lowest TPM/RPM)
|
||||
deployment = self.get_available_deployment(model=model, messages=messages)
|
||||
print(f"kwargs: {kwargs}")
|
||||
data = deployment["litellm_params"]
|
||||
data["messages"] = messages
|
||||
print(f"data: {data}")
|
||||
data["caching"] = self.cache_responses
|
||||
# call via litellm.completion()
|
||||
return litellm.completion(**{**data, **kwargs})
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue