fix(router.py): adding support for async completion calls

https://github.com/BerriAI/litellm/issues/676
2023-10-24 17:20:19 -07:00 · 2023-10-24 17:20:19 -07:00 · 0f08335edd
commit 0f08335edd
parent 30dd0b5c6b
2 changed files with 46 additions and 3 deletions
--- a/litellm/router.py
+++ b/litellm/router.py
@ -54,7 +54,6 @@ class Router:
                   messages: List[Dict[str, str]],
                   is_retry: Optional[bool] = False,
                   is_fallback: Optional[bool] = False,
-                   is_async: Optional[bool] = False,
                   **kwargs): 
        """
        Example usage: 
@ -68,6 +67,19 @@ class Router:
        data["caching"] = self.cache_responses
        # call via litellm.completion() 
        return litellm.completion(**{**data, **kwargs})
+
+    async def acompletion(self, 
+                    model: str, 
+                    messages: List[Dict[str, str]], 
+                    is_retry: Optional[bool] = False,
+                    is_fallback: Optional[bool] = False,
+                    **kwargs):
+        # pick the one that is available (lowest TPM/RPM)
+        deployment = self.get_available_deployment(model=model, messages=messages)
+        data = deployment["litellm_params"]
+        data["messages"] = messages
+        data["caching"] = self.cache_responses
+        return await litellm.acompletion(**{**data, **kwargs})
    
    def text_completion(self, 
                        model: str, 
@ -83,6 +95,7 @@ class Router:

        data = deployment["litellm_params"]
        data["prompt"] = prompt
+        data["caching"] = self.cache_responses
        # call via litellm.completion() 
        return litellm.text_completion(**{**data, **kwargs})        

@ -96,6 +109,7 @@ class Router:

        data = deployment["litellm_params"]
        data["input"] = input
+        data["caching"] = self.cache_responses
        # call via litellm.embedding() 
        return litellm.embedding(**{**data, **kwargs})
    
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@ -2,7 +2,7 @@
 #    This tests calling batch_completions by running 100 messages together

 import sys, os
-import traceback
+import traceback, asyncio
 import pytest
 sys.path.insert(
    0, os.path.abspath("../..")
@ -151,4 +151,33 @@ def test_litellm_params_not_overwritten_by_function_calling():
 	except Exception as e:
 		pytest.fail(f"Error occurred: {e}")

-test_litellm_params_not_overwritten_by_function_calling()
+# test_litellm_params_not_overwritten_by_function_calling()
+
+def test_acompletion_on_router(): 
+	try:
+		model_list = [
+			{
+				"model_name": "gpt-3.5-turbo",
+				"litellm_params": {
+					"model": "gpt-3.5-turbo-0613",
+					"api_key": os.getenv("OPENAI_API_KEY"),
+				},
+				"tpm": 100000,
+				"rpm": 10000,
+			},
+		]
+
+		messages = [
+			{"role": "user", "content": "What is the weather like in Boston?"}
+		]
+
+		async def get_response(): 
+			router = Router(model_list=model_list)
+			response = await router.acompletion(model="gpt-3.5-turbo", messages=messages)
+			return response
+		response = asyncio.run(get_response())
+
+		assert isinstance(response['choices'][0]['message']['content'], str)
+	except Exception as e:
+		traceback.print_exc()
+		pytest.fail(f"Error occurred: {e}")