diff --git a/dist/litellm-0.12.4.dev1-py3-none-any.whl b/dist/litellm-0.12.4.dev1-py3-none-any.whl new file mode 100644 index 000000000..91f0678cf Binary files /dev/null and b/dist/litellm-0.12.4.dev1-py3-none-any.whl differ diff --git a/dist/litellm-0.12.4.dev1.tar.gz b/dist/litellm-0.12.4.dev1.tar.gz new file mode 100644 index 000000000..36d5d61e6 Binary files /dev/null and b/dist/litellm-0.12.4.dev1.tar.gz differ diff --git a/litellm/router.py b/litellm/router.py index 796cbb985..7123154cf 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -25,7 +25,8 @@ class Router: model_list: Optional[list]=None, redis_host: Optional[str] = None, redis_port: Optional[int] = None, - redis_password: Optional[str] = None) -> None: + redis_password: Optional[str] = None, + cache_responses: bool = False) -> None: if model_list: self.model_list = model_list self.model_names = [m["model_name"] for m in model_list] @@ -41,7 +42,8 @@ class Router: "type": "local" } self.cache = litellm.Cache(cache_config) # use Redis for tracking load balancing - litellm.cache = litellm.Cache(**cache_config) # use Redis for caching completion requests + if cache_responses: + litellm.cache = litellm.Cache(**cache_config) # use Redis for caching completion requests litellm.success_callback = [self.deployment_callback] def completion(self, @@ -58,9 +60,10 @@ class Router: # pick the one that is available (lowest TPM/RPM) deployment = self.get_available_deployment(model=model, messages=messages) - + print(f"kwargs: {kwargs}") data = deployment["litellm_params"] data["messages"] = messages + print(f"data: {data}") # call via litellm.completion() return litellm.completion(**{**data, **kwargs}) diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py index a2a311c5a..e581c435d 100644 --- a/litellm/tests/test_router.py +++ b/litellm/tests/test_router.py @@ -65,7 +65,6 @@ def test_multiple_deployments(): ### FUNCTION CALLING def test_function_calling(): - litellm.set_verbose =True model_list = [ { "model_name": "gpt-3.5-turbo-0613", @@ -151,3 +150,5 @@ def test_litellm_params_not_overwritten_by_function_calling(): assert response.choices[0].finish_reason != "function_call" except Exception as e: pytest.fail(f"Error occurred: {e}") + +test_litellm_params_not_overwritten_by_function_calling() \ No newline at end of file diff --git a/litellm/utils.py b/litellm/utils.py index 9c2611012..e872c8922 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -771,13 +771,15 @@ def client(original_function): # [OPTIONAL] CHECK CACHE # remove this after deprecating litellm.caching + print_verbose(f"litellm.caching: {litellm.caching}; litellm.caching_with_models: {litellm.caching_with_models}") if (litellm.caching or litellm.caching_with_models) and litellm.cache is None: litellm.cache = Cache() + print_verbose(f"kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}") if kwargs.get("caching", False) or litellm.cache is not None: # allow users to control returning cached responses from the completion function # checking cache if (litellm.cache != None or litellm.caching or litellm.caching_with_models): - print_verbose(f"LiteLLM: Checking Cache") + print_verbose(f"Checking Cache") cached_result = litellm.cache.get_cache(*args, **kwargs) if cached_result != None: print_verbose(f"Cache Hit!")