test(test_router.py): fixing router testing

This commit is contained in:
Krrish Dholakia 2023-10-24 10:21:10 -07:00
parent 8a04620562
commit 653863f787
5 changed files with 11 additions and 5 deletions

Binary file not shown.

BIN
dist/litellm-0.12.4.dev1.tar.gz vendored Normal file

Binary file not shown.

View file

@ -25,7 +25,8 @@ class Router:
model_list: Optional[list]=None,
redis_host: Optional[str] = None,
redis_port: Optional[int] = None,
redis_password: Optional[str] = None) -> None:
redis_password: Optional[str] = None,
cache_responses: bool = False) -> None:
if model_list:
self.model_list = model_list
self.model_names = [m["model_name"] for m in model_list]
@ -41,7 +42,8 @@ class Router:
"type": "local"
}
self.cache = litellm.Cache(cache_config) # use Redis for tracking load balancing
litellm.cache = litellm.Cache(**cache_config) # use Redis for caching completion requests
if cache_responses:
litellm.cache = litellm.Cache(**cache_config) # use Redis for caching completion requests
litellm.success_callback = [self.deployment_callback]
def completion(self,
@ -58,9 +60,10 @@ class Router:
# pick the one that is available (lowest TPM/RPM)
deployment = self.get_available_deployment(model=model, messages=messages)
print(f"kwargs: {kwargs}")
data = deployment["litellm_params"]
data["messages"] = messages
print(f"data: {data}")
# call via litellm.completion()
return litellm.completion(**{**data, **kwargs})

View file

@ -65,7 +65,6 @@ def test_multiple_deployments():
### FUNCTION CALLING
def test_function_calling():
litellm.set_verbose =True
model_list = [
{
"model_name": "gpt-3.5-turbo-0613",
@ -151,3 +150,5 @@ def test_litellm_params_not_overwritten_by_function_calling():
assert response.choices[0].finish_reason != "function_call"
except Exception as e:
pytest.fail(f"Error occurred: {e}")
test_litellm_params_not_overwritten_by_function_calling()

View file

@ -771,13 +771,15 @@ def client(original_function):
# [OPTIONAL] CHECK CACHE
# remove this after deprecating litellm.caching
print_verbose(f"litellm.caching: {litellm.caching}; litellm.caching_with_models: {litellm.caching_with_models}")
if (litellm.caching or litellm.caching_with_models) and litellm.cache is None:
litellm.cache = Cache()
print_verbose(f"kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}")
if kwargs.get("caching", False) or litellm.cache is not None: # allow users to control returning cached responses from the completion function
# checking cache
if (litellm.cache != None or litellm.caching or litellm.caching_with_models):
print_verbose(f"LiteLLM: Checking Cache")
print_verbose(f"Checking Cache")
cached_result = litellm.cache.get_cache(*args, **kwargs)
if cached_result != None:
print_verbose(f"Cache Hit!")