test(test_router.py): fixing router testing

2023-10-24 10:21:10 -07:00 · 2023-10-24 10:21:10 -07:00 · 653863f787
commit 653863f787
parent 8a04620562
5 changed files with 11 additions and 5 deletions
--- a/dist/litellm-0.12.4.dev1-py3-none-any.whl
+++ b/dist/litellm-0.12.4.dev1-py3-none-any.whl
--- a/dist/litellm-0.12.4.dev1.tar.gz
+++ b/dist/litellm-0.12.4.dev1.tar.gz
--- a/litellm/router.py
+++ b/litellm/router.py
@ -25,7 +25,8 @@ class Router:
                 model_list: Optional[list]=None,
                 redis_host: Optional[str] = None,
                 redis_port: Optional[int] = None,
-                 redis_password: Optional[str] = None) -> None:
+                 redis_password: Optional[str] = None, 
+                 cache_responses: bool = False) -> None:
        if model_list:
            self.model_list = model_list
            self.model_names = [m["model_name"] for m in model_list]
@ -41,7 +42,8 @@ class Router:
                "type": "local"
            }
        self.cache = litellm.Cache(cache_config) # use Redis for tracking load balancing
-        litellm.cache = litellm.Cache(**cache_config) # use Redis for caching completion requests 
+        if cache_responses:
+            litellm.cache = litellm.Cache(**cache_config) # use Redis for caching completion requests 
        litellm.success_callback = [self.deployment_callback]
    
    def completion(self,
@ -58,9 +60,10 @@ class Router:

        # pick the one that is available (lowest TPM/RPM)
        deployment = self.get_available_deployment(model=model, messages=messages)
-
+        print(f"kwargs: {kwargs}")
        data = deployment["litellm_params"]
        data["messages"] = messages
+        print(f"data: {data}")
        # call via litellm.completion() 
        return litellm.completion(**{**data, **kwargs})
    
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@ -65,7 +65,6 @@ def test_multiple_deployments():
 ### FUNCTION CALLING 

 def test_function_calling(): 
-	litellm.set_verbose =True
 	model_list = [
 		{
 			"model_name": "gpt-3.5-turbo-0613",
@ -151,3 +150,5 @@ def test_litellm_params_not_overwritten_by_function_calling():
 		assert response.choices[0].finish_reason != "function_call"
 	except Exception as e:
 		pytest.fail(f"Error occurred: {e}")
+
+test_litellm_params_not_overwritten_by_function_calling()
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -771,13 +771,15 @@ def client(original_function):

            # [OPTIONAL] CHECK CACHE
            # remove this after deprecating litellm.caching
+            print_verbose(f"litellm.caching: {litellm.caching}; litellm.caching_with_models: {litellm.caching_with_models}")
            if (litellm.caching or litellm.caching_with_models) and litellm.cache is None:
                litellm.cache = Cache() 

+            print_verbose(f"kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}")
            if kwargs.get("caching", False) or litellm.cache is not None: # allow users to control returning cached responses from the completion function
                # checking cache
                if (litellm.cache != None or litellm.caching or litellm.caching_with_models):
-                    print_verbose(f"LiteLLM: Checking Cache")
+                    print_verbose(f"Checking Cache")
                    cached_result = litellm.cache.get_cache(*args, **kwargs)
                    if cached_result != None:
                        print_verbose(f"Cache Hit!")