diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 6125a3169..6a55375ea 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -8307,7 +8307,12 @@ async def get_config(): _data_to_return.append({"name": "slack", "variables": _slack_env_vars}) - return {"status": "success", "data": _data_to_return} + _router_settings = llm_router.get_settings() + return { + "status": "success", + "data": _data_to_return, + "router_settings": _router_settings, + } except Exception as e: traceback.print_exc() if isinstance(e, HTTPException): diff --git a/litellm/router.py b/litellm/router.py index 8c2e21bff..f4d014b31 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -299,6 +299,7 @@ class Router: verbose_router_logger.info( f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}" ) + self.routing_strategy_args = routing_strategy_args def print_deployment(self, deployment: dict): """ @@ -2334,6 +2335,25 @@ class Router: return self.model_list return None + def get_settings(self): + _all_vars = vars(self) + _settings_to_return = {} + vars_to_include = [ + "routing_strategy_args", + "routing_strategy", + "allowed_fails", + "cooldown_time", + "num_retries", + "timeout", + "max_retries", + "retry_after", + ] + + for var in vars_to_include: + if var in _all_vars: + _settings_to_return[var] = _all_vars[var] + return _settings_to_return + def _get_client(self, deployment, kwargs, client_type=None): """ Returns the appropriate client based on the given deployment, kwargs, and client_type. diff --git a/litellm/tests/test_router_utils.py b/litellm/tests/test_router_utils.py new file mode 100644 index 000000000..c032783bd --- /dev/null +++ b/litellm/tests/test_router_utils.py @@ -0,0 +1,87 @@ +#### What this tests #### +# This tests utils used by llm router -> like llmrouter.get_settings() + +import sys, os, time +import traceback, asyncio +import pytest + +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path +import litellm +from litellm import Router +from litellm.router import Deployment, LiteLLM_Params, ModelInfo +from concurrent.futures import ThreadPoolExecutor +from collections import defaultdict +from dotenv import load_dotenv + +load_dotenv() + + +def test_returned_settings(): + # this tests if the router raises an exception when invalid params are set + # in this test both deployments have bad keys - Keep this test. It validates if the router raises the most recent exception + litellm.set_verbose = True + import openai + + try: + print("testing if router raises an exception") + old_api_key = os.environ["AZURE_API_KEY"] + os.environ["AZURE_API_KEY"] = "" + model_list = [ + { + "model_name": "gpt-3.5-turbo", # openai model name + "litellm_params": { # params for litellm completion/embedding call + "model": "azure/chatgpt-v-2", + "api_key": "bad-key", + "api_version": os.getenv("AZURE_API_VERSION"), + "api_base": os.getenv("AZURE_API_BASE"), + }, + "tpm": 240000, + "rpm": 1800, + }, + { + "model_name": "gpt-3.5-turbo", # openai model name + "litellm_params": { # + "model": "gpt-3.5-turbo", + "api_key": "bad-key", + }, + "tpm": 240000, + "rpm": 1800, + }, + ] + router = Router( + model_list=model_list, + redis_host=os.getenv("REDIS_HOST"), + redis_password=os.getenv("REDIS_PASSWORD"), + redis_port=int(os.getenv("REDIS_PORT")), + routing_strategy="latency-based-routing", + routing_strategy_args={"ttl": 10}, + set_verbose=False, + num_retries=3, + retry_after=5, + allowed_fails=1, + cooldown_time=30, + ) # type: ignore + + settings = router.get_settings() + print(settings) + + """ + routing_strategy: "simple-shuffle" + routing_strategy_args: {"ttl": 10} # Average the last 10 calls to compute avg latency per model + allowed_fails: 1 + num_retries: 3 + retry_after: 5 # seconds to wait before retrying a failed request + cooldown_time: 30 # seconds to cooldown a deployment after failure + """ + assert settings["routing_strategy"] == "latency-based-routing" + assert settings["routing_strategy_args"]["ttl"] == 10 + assert settings["allowed_fails"] == 1 + assert settings["num_retries"] == 3 + assert settings["retry_after"] == 5 + assert settings["cooldown_time"] == 30 + + except: + print(traceback.format_exc()) + pytest.fail("An error occurred - " + traceback.format_exc())