forked from phoenix/litellm-mirror
fix(router.py): enable async completions with model fallbacks
This commit is contained in:
parent
9b53ea4b0f
commit
afac42e93a
2 changed files with 8 additions and 12 deletions
|
@ -178,7 +178,7 @@ class Router:
|
||||||
try:
|
try:
|
||||||
kwargs["model"] = model
|
kwargs["model"] = model
|
||||||
kwargs["messages"] = messages
|
kwargs["messages"] = messages
|
||||||
kwargs["original_function"] = self._completion
|
kwargs["original_function"] = self._acompletion
|
||||||
kwargs["num_retries"] = self.num_retries
|
kwargs["num_retries"] = self.num_retries
|
||||||
|
|
||||||
# Use asyncio.timeout to enforce the timeout
|
# Use asyncio.timeout to enforce the timeout
|
||||||
|
@ -205,13 +205,6 @@ class Router:
|
||||||
response = await litellm.acompletion(**{**data, "messages": messages, "caching": self.cache_responses, **kwargs})
|
response = await litellm.acompletion(**{**data, "messages": messages, "caching": self.cache_responses, **kwargs})
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if self.num_retries > 0:
|
|
||||||
kwargs["model"] = model
|
|
||||||
kwargs["messages"] = messages
|
|
||||||
kwargs["original_exception"] = e
|
|
||||||
kwargs["original_function"] = self.acompletion
|
|
||||||
return await self.async_function_with_retries(**kwargs)
|
|
||||||
else:
|
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
def text_completion(self,
|
def text_completion(self,
|
||||||
|
@ -279,7 +272,7 @@ class Router:
|
||||||
model_group = kwargs.get("model")
|
model_group = kwargs.get("model")
|
||||||
try:
|
try:
|
||||||
response = await self.async_function_with_retries(*args, **kwargs)
|
response = await self.async_function_with_retries(*args, **kwargs)
|
||||||
self.print_verbose(f'Response: {response}')
|
self.print_verbose(f'Async Response: {response}')
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.print_verbose(f"An exception occurs")
|
self.print_verbose(f"An exception occurs")
|
||||||
|
@ -358,6 +351,8 @@ class Router:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
raise e
|
raise e
|
||||||
|
if self.num_retries == 0:
|
||||||
|
raise e
|
||||||
|
|
||||||
def function_with_fallbacks(self, *args, **kwargs):
|
def function_with_fallbacks(self, *args, **kwargs):
|
||||||
"""
|
"""
|
||||||
|
@ -695,7 +690,7 @@ class Router:
|
||||||
return self.model_names
|
return self.model_names
|
||||||
|
|
||||||
def print_verbose(self, print_statement):
|
def print_verbose(self, print_statement):
|
||||||
if self.set_verbose:
|
if self.set_verbose or litellm.set_verbose:
|
||||||
print(f"LiteLLM.Router: {print_statement}") # noqa
|
print(f"LiteLLM.Router: {print_statement}") # noqa
|
||||||
|
|
||||||
def get_available_deployment(self,
|
def get_available_deployment(self,
|
||||||
|
|
|
@ -59,6 +59,7 @@ def test_sync_fallbacks():
|
||||||
print(e)
|
print(e)
|
||||||
|
|
||||||
def test_async_fallbacks():
|
def test_async_fallbacks():
|
||||||
|
litellm.set_verbose = False
|
||||||
async def test_get_response():
|
async def test_get_response():
|
||||||
user_message = "Hello, how are you?"
|
user_message = "Hello, how are you?"
|
||||||
messages = [{"content": user_message, "role": "user"}]
|
messages = [{"content": user_message, "role": "user"}]
|
||||||
|
@ -73,4 +74,4 @@ def test_async_fallbacks():
|
||||||
|
|
||||||
asyncio.run(test_get_response())
|
asyncio.run(test_get_response())
|
||||||
|
|
||||||
test_async_fallbacks()
|
# test_async_fallbacks()
|
Loading…
Add table
Add a link
Reference in a new issue