diff --git a/litellm/router.py b/litellm/router.py index 64afae4f0..a1f9fcfc7 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -61,6 +61,8 @@ class Router: data = deployment["litellm_params"] data["messages"] = messages + for key, value in kwargs.items(): + data[key] = value # call via litellm.completion() return litellm.completion(**data) @@ -78,6 +80,8 @@ class Router: data = deployment["litellm_params"] data["prompt"] = prompt + for key, value in kwargs.items(): + data[key] = value # call via litellm.completion() return litellm.text_completion(**data) @@ -203,7 +207,10 @@ class Router: # get value cached_value = self.cache.get_cache(key) # update value - cached_value = cached_value + increment_value + try: + cached_value = cached_value + increment_value + except: + cached_value = increment_value # save updated value self.cache.add_cache(result=cached_value, cache_key=key) diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py index 1a309c39f..4206c6325 100644 --- a/litellm/tests/test_router.py +++ b/litellm/tests/test_router.py @@ -7,55 +7,103 @@ import pytest sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system path +import litellm from litellm import Router from concurrent.futures import ThreadPoolExecutor from dotenv import load_dotenv load_dotenv() -model_list = [{ # list of model deployments - "model_name": "gpt-3.5-turbo", # openai model name - "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", - "api_key": os.getenv("AZURE_API_KEY"), - "api_version": os.getenv("AZURE_API_VERSION"), - "api_base": os.getenv("AZURE_API_BASE") - }, - "tpm": 240000, - "rpm": 1800 -}, { - "model_name": "gpt-3.5-turbo", # openai model name - "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-functioncalling", - "api_key": os.getenv("AZURE_API_KEY"), - "api_version": os.getenv("AZURE_API_VERSION"), - "api_base": os.getenv("AZURE_API_BASE") - }, - "tpm": 240000, - "rpm": 1800 -}, { - "model_name": "gpt-3.5-turbo", # openai model name - "litellm_params": { # params for litellm completion/embedding call - "model": "gpt-3.5-turbo", - "api_key": os.getenv("OPENAI_API_KEY"), - }, - "tpm": 1000000, - "rpm": 9000 -}] +def test_multiple_deployments(): + model_list = [{ # list of model deployments + "model_name": "gpt-3.5-turbo", # openai model name + "litellm_params": { # params for litellm completion/embedding call + "model": "azure/chatgpt-v-2", + "api_key": os.getenv("AZURE_API_KEY"), + "api_version": os.getenv("AZURE_API_VERSION"), + "api_base": os.getenv("AZURE_API_BASE") + }, + "tpm": 240000, + "rpm": 1800 + }, { + "model_name": "gpt-3.5-turbo", # openai model name + "litellm_params": { # params for litellm completion/embedding call + "model": "azure/chatgpt-functioncalling", + "api_key": os.getenv("AZURE_API_KEY"), + "api_version": os.getenv("AZURE_API_VERSION"), + "api_base": os.getenv("AZURE_API_BASE") + }, + "tpm": 240000, + "rpm": 1800 + }, { + "model_name": "gpt-3.5-turbo", # openai model name + "litellm_params": { # params for litellm completion/embedding call + "model": "gpt-3.5-turbo", + "api_key": os.getenv("OPENAI_API_KEY"), + }, + "tpm": 1000000, + "rpm": 9000 + }] -router = Router(model_list=model_list, redis_host=os.getenv("REDIS_HOST"), redis_password=os.getenv("REDIS_PASSWORD"), redis_port=int(os.getenv("REDIS_PORT"))) # type: ignore + router = Router(model_list=model_list, redis_host=os.getenv("REDIS_HOST"), redis_password=os.getenv("REDIS_PASSWORD"), redis_port=int(os.getenv("REDIS_PORT"))) # type: ignore -completions = [] -with ThreadPoolExecutor(max_workers=100) as executor: - kwargs = { - "model": "gpt-3.5-turbo", - "messages": [{"role": "user", "content": "Hey, how's it going?"}] - } - for _ in range(20): - future = executor.submit(router.completion, **kwargs) # type: ignore - completions.append(future) + completions = [] + with ThreadPoolExecutor(max_workers=100) as executor: + kwargs = { + "model": "gpt-3.5-turbo", + "messages": [{"role": "user", "content": "Hey, how's it going?"}] + } + for _ in range(20): + future = executor.submit(router.completion, **kwargs) # type: ignore + completions.append(future) -# Retrieve the results from the futures -results = [future.result() for future in completions] + # Retrieve the results from the futures + results = [future.result() for future in completions] -print(results) \ No newline at end of file + print(results) + +### FUNCTION CALLING + +def test_function_calling(): + litellm.set_verbose =True + model_list = [ + { + "model_name": "gpt-3.5-turbo-0613", + "litellm_params": { + "model": "gpt-3.5-turbo-0613", + "api_key": "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", + }, + "tpm": 100000, + "rpm": 10000, + }, + ] + + messages = [ + {"role": "user", "content": "What is the weather like in Boston?"} + ] + functions = [ + { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"] + } + }, + "required": ["location"] + } + } + ] + + router = Router(model_list=model_list) + response = router.completion(model="gpt-3.5-turbo-0613", messages=messages, functions=functions) + print(response) + +test_function_calling() \ No newline at end of file