router - get settings

2024-04-16 14:22:54 -07:00 · 2024-04-16 14:22:54 -07:00 · e271ce8030
commit e271ce8030
parent 70716b3373
3 changed files with 113 additions and 1 deletions
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -8307,7 +8307,12 @@ async def get_config():
            _data_to_return.append({"name": "slack", "variables": _slack_env_vars})
-        return {"status": "success", "data": _data_to_return}
+        _router_settings = llm_router.get_settings()
        return {
            "status": "success",
            "data": _data_to_return,
            "router_settings": _router_settings,
        }
    except Exception as e:
        traceback.print_exc()
        if isinstance(e, HTTPException):
--- a/litellm/router.py
+++ b/litellm/router.py
@ -299,6 +299,7 @@ class Router:
        verbose_router_logger.info(
            f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}"
        )
        self.routing_strategy_args = routing_strategy_args
    def print_deployment(self, deployment: dict):
        """
@ -2334,6 +2335,25 @@ class Router:
            return self.model_list
        return None
    def get_settings(self):
        _all_vars = vars(self)
        _settings_to_return = {}
        vars_to_include = [
            "routing_strategy_args",
            "routing_strategy",
            "allowed_fails",
            "cooldown_time",
            "num_retries",
            "timeout",
            "max_retries",
            "retry_after",
        ]
        for var in vars_to_include:
            if var in _all_vars:
                _settings_to_return[var] = _all_vars[var]
        return _settings_to_return
    def _get_client(self, deployment, kwargs, client_type=None):
        """
        Returns the appropriate client based on the given deployment, kwargs, and client_type.
--- a/litellm/tests/test_router_utils.py
+++ b/litellm/tests/test_router_utils.py
@ -0,0 +1,87 @@
 #### What this tests ####
 # This tests utils used by llm router -> like llmrouter.get_settings()
 import sys, os, time
 import traceback, asyncio
 import pytest
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import litellm
 from litellm import Router
 from litellm.router import Deployment, LiteLLM_Params, ModelInfo
 from concurrent.futures import ThreadPoolExecutor
 from collections import defaultdict
 from dotenv import load_dotenv
 load_dotenv()
 def test_returned_settings():
    # this tests if the router raises an exception when invalid params are set
    # in this test both deployments have bad keys - Keep this test. It validates if the router raises the most recent exception
    litellm.set_verbose = True
    import openai
    try:
        print("testing if router raises an exception")
        old_api_key = os.environ["AZURE_API_KEY"]
        os.environ["AZURE_API_KEY"] = ""
        model_list = [
            {
                "model_name": "gpt-3.5-turbo",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
                    "model": "azure/chatgpt-v-2",
                    "api_key": "bad-key",
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
                },
                "tpm": 240000,
                "rpm": 1800,
            },
            {
                "model_name": "gpt-3.5-turbo",  # openai model name
                "litellm_params": {  #
                    "model": "gpt-3.5-turbo",
                    "api_key": "bad-key",
                },
                "tpm": 240000,
                "rpm": 1800,
            },
        ]
        router = Router(
            model_list=model_list,
            redis_host=os.getenv("REDIS_HOST"),
            redis_password=os.getenv("REDIS_PASSWORD"),
            redis_port=int(os.getenv("REDIS_PORT")),
            routing_strategy="latency-based-routing",
            routing_strategy_args={"ttl": 10},
            set_verbose=False,
            num_retries=3,
            retry_after=5,
            allowed_fails=1,
            cooldown_time=30,
        )  # type: ignore
        settings = router.get_settings()
        print(settings)
        """
        routing_strategy: "simple-shuffle"
        routing_strategy_args: {"ttl": 10} # Average the last 10 calls to compute avg latency per model
        allowed_fails: 1
        num_retries: 3
        retry_after: 5 # seconds to wait before retrying a failed request
        cooldown_time: 30 # seconds to cooldown a deployment after failure
        """
        assert settings["routing_strategy"] == "latency-based-routing"
        assert settings["routing_strategy_args"]["ttl"] == 10
        assert settings["allowed_fails"] == 1
        assert settings["num_retries"] == 3
        assert settings["retry_after"] == 5
        assert settings["cooldown_time"] == 30
    except:
        print(traceback.format_exc())
        pytest.fail("An error occurred - " + traceback.format_exc())