forked from phoenix/litellm-mirror
test(test_router.py): fixing router testing
This commit is contained in:
parent
8a04620562
commit
653863f787
5 changed files with 11 additions and 5 deletions
BIN
dist/litellm-0.12.4.dev1-py3-none-any.whl
vendored
Normal file
BIN
dist/litellm-0.12.4.dev1-py3-none-any.whl
vendored
Normal file
Binary file not shown.
BIN
dist/litellm-0.12.4.dev1.tar.gz
vendored
Normal file
BIN
dist/litellm-0.12.4.dev1.tar.gz
vendored
Normal file
Binary file not shown.
|
@ -25,7 +25,8 @@ class Router:
|
||||||
model_list: Optional[list]=None,
|
model_list: Optional[list]=None,
|
||||||
redis_host: Optional[str] = None,
|
redis_host: Optional[str] = None,
|
||||||
redis_port: Optional[int] = None,
|
redis_port: Optional[int] = None,
|
||||||
redis_password: Optional[str] = None) -> None:
|
redis_password: Optional[str] = None,
|
||||||
|
cache_responses: bool = False) -> None:
|
||||||
if model_list:
|
if model_list:
|
||||||
self.model_list = model_list
|
self.model_list = model_list
|
||||||
self.model_names = [m["model_name"] for m in model_list]
|
self.model_names = [m["model_name"] for m in model_list]
|
||||||
|
@ -41,7 +42,8 @@ class Router:
|
||||||
"type": "local"
|
"type": "local"
|
||||||
}
|
}
|
||||||
self.cache = litellm.Cache(cache_config) # use Redis for tracking load balancing
|
self.cache = litellm.Cache(cache_config) # use Redis for tracking load balancing
|
||||||
litellm.cache = litellm.Cache(**cache_config) # use Redis for caching completion requests
|
if cache_responses:
|
||||||
|
litellm.cache = litellm.Cache(**cache_config) # use Redis for caching completion requests
|
||||||
litellm.success_callback = [self.deployment_callback]
|
litellm.success_callback = [self.deployment_callback]
|
||||||
|
|
||||||
def completion(self,
|
def completion(self,
|
||||||
|
@ -58,9 +60,10 @@ class Router:
|
||||||
|
|
||||||
# pick the one that is available (lowest TPM/RPM)
|
# pick the one that is available (lowest TPM/RPM)
|
||||||
deployment = self.get_available_deployment(model=model, messages=messages)
|
deployment = self.get_available_deployment(model=model, messages=messages)
|
||||||
|
print(f"kwargs: {kwargs}")
|
||||||
data = deployment["litellm_params"]
|
data = deployment["litellm_params"]
|
||||||
data["messages"] = messages
|
data["messages"] = messages
|
||||||
|
print(f"data: {data}")
|
||||||
# call via litellm.completion()
|
# call via litellm.completion()
|
||||||
return litellm.completion(**{**data, **kwargs})
|
return litellm.completion(**{**data, **kwargs})
|
||||||
|
|
||||||
|
|
|
@ -65,7 +65,6 @@ def test_multiple_deployments():
|
||||||
### FUNCTION CALLING
|
### FUNCTION CALLING
|
||||||
|
|
||||||
def test_function_calling():
|
def test_function_calling():
|
||||||
litellm.set_verbose =True
|
|
||||||
model_list = [
|
model_list = [
|
||||||
{
|
{
|
||||||
"model_name": "gpt-3.5-turbo-0613",
|
"model_name": "gpt-3.5-turbo-0613",
|
||||||
|
@ -151,3 +150,5 @@ def test_litellm_params_not_overwritten_by_function_calling():
|
||||||
assert response.choices[0].finish_reason != "function_call"
|
assert response.choices[0].finish_reason != "function_call"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
test_litellm_params_not_overwritten_by_function_calling()
|
|
@ -771,13 +771,15 @@ def client(original_function):
|
||||||
|
|
||||||
# [OPTIONAL] CHECK CACHE
|
# [OPTIONAL] CHECK CACHE
|
||||||
# remove this after deprecating litellm.caching
|
# remove this after deprecating litellm.caching
|
||||||
|
print_verbose(f"litellm.caching: {litellm.caching}; litellm.caching_with_models: {litellm.caching_with_models}")
|
||||||
if (litellm.caching or litellm.caching_with_models) and litellm.cache is None:
|
if (litellm.caching or litellm.caching_with_models) and litellm.cache is None:
|
||||||
litellm.cache = Cache()
|
litellm.cache = Cache()
|
||||||
|
|
||||||
|
print_verbose(f"kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}")
|
||||||
if kwargs.get("caching", False) or litellm.cache is not None: # allow users to control returning cached responses from the completion function
|
if kwargs.get("caching", False) or litellm.cache is not None: # allow users to control returning cached responses from the completion function
|
||||||
# checking cache
|
# checking cache
|
||||||
if (litellm.cache != None or litellm.caching or litellm.caching_with_models):
|
if (litellm.cache != None or litellm.caching or litellm.caching_with_models):
|
||||||
print_verbose(f"LiteLLM: Checking Cache")
|
print_verbose(f"Checking Cache")
|
||||||
cached_result = litellm.cache.get_cache(*args, **kwargs)
|
cached_result = litellm.cache.get_cache(*args, **kwargs)
|
||||||
if cached_result != None:
|
if cached_result != None:
|
||||||
print_verbose(f"Cache Hit!")
|
print_verbose(f"Cache Hit!")
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue