forked from phoenix/litellm-mirror
router - get settings
This commit is contained in:
parent
70716b3373
commit
e271ce8030
3 changed files with 113 additions and 1 deletions
|
@ -8307,7 +8307,12 @@ async def get_config():
|
|||
|
||||
_data_to_return.append({"name": "slack", "variables": _slack_env_vars})
|
||||
|
||||
return {"status": "success", "data": _data_to_return}
|
||||
_router_settings = llm_router.get_settings()
|
||||
return {
|
||||
"status": "success",
|
||||
"data": _data_to_return,
|
||||
"router_settings": _router_settings,
|
||||
}
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
if isinstance(e, HTTPException):
|
||||
|
|
|
@ -299,6 +299,7 @@ class Router:
|
|||
verbose_router_logger.info(
|
||||
f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}"
|
||||
)
|
||||
self.routing_strategy_args = routing_strategy_args
|
||||
|
||||
def print_deployment(self, deployment: dict):
|
||||
"""
|
||||
|
@ -2334,6 +2335,25 @@ class Router:
|
|||
return self.model_list
|
||||
return None
|
||||
|
||||
def get_settings(self):
|
||||
_all_vars = vars(self)
|
||||
_settings_to_return = {}
|
||||
vars_to_include = [
|
||||
"routing_strategy_args",
|
||||
"routing_strategy",
|
||||
"allowed_fails",
|
||||
"cooldown_time",
|
||||
"num_retries",
|
||||
"timeout",
|
||||
"max_retries",
|
||||
"retry_after",
|
||||
]
|
||||
|
||||
for var in vars_to_include:
|
||||
if var in _all_vars:
|
||||
_settings_to_return[var] = _all_vars[var]
|
||||
return _settings_to_return
|
||||
|
||||
def _get_client(self, deployment, kwargs, client_type=None):
|
||||
"""
|
||||
Returns the appropriate client based on the given deployment, kwargs, and client_type.
|
||||
|
|
87
litellm/tests/test_router_utils.py
Normal file
87
litellm/tests/test_router_utils.py
Normal file
|
@ -0,0 +1,87 @@
|
|||
#### What this tests ####
|
||||
# This tests utils used by llm router -> like llmrouter.get_settings()
|
||||
|
||||
import sys, os, time
|
||||
import traceback, asyncio
|
||||
import pytest
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import litellm
|
||||
from litellm import Router
|
||||
from litellm.router import Deployment, LiteLLM_Params, ModelInfo
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from collections import defaultdict
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
def test_returned_settings():
|
||||
# this tests if the router raises an exception when invalid params are set
|
||||
# in this test both deployments have bad keys - Keep this test. It validates if the router raises the most recent exception
|
||||
litellm.set_verbose = True
|
||||
import openai
|
||||
|
||||
try:
|
||||
print("testing if router raises an exception")
|
||||
old_api_key = os.environ["AZURE_API_KEY"]
|
||||
os.environ["AZURE_API_KEY"] = ""
|
||||
model_list = [
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": "bad-key",
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
},
|
||||
"tpm": 240000,
|
||||
"rpm": 1800,
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { #
|
||||
"model": "gpt-3.5-turbo",
|
||||
"api_key": "bad-key",
|
||||
},
|
||||
"tpm": 240000,
|
||||
"rpm": 1800,
|
||||
},
|
||||
]
|
||||
router = Router(
|
||||
model_list=model_list,
|
||||
redis_host=os.getenv("REDIS_HOST"),
|
||||
redis_password=os.getenv("REDIS_PASSWORD"),
|
||||
redis_port=int(os.getenv("REDIS_PORT")),
|
||||
routing_strategy="latency-based-routing",
|
||||
routing_strategy_args={"ttl": 10},
|
||||
set_verbose=False,
|
||||
num_retries=3,
|
||||
retry_after=5,
|
||||
allowed_fails=1,
|
||||
cooldown_time=30,
|
||||
) # type: ignore
|
||||
|
||||
settings = router.get_settings()
|
||||
print(settings)
|
||||
|
||||
"""
|
||||
routing_strategy: "simple-shuffle"
|
||||
routing_strategy_args: {"ttl": 10} # Average the last 10 calls to compute avg latency per model
|
||||
allowed_fails: 1
|
||||
num_retries: 3
|
||||
retry_after: 5 # seconds to wait before retrying a failed request
|
||||
cooldown_time: 30 # seconds to cooldown a deployment after failure
|
||||
"""
|
||||
assert settings["routing_strategy"] == "latency-based-routing"
|
||||
assert settings["routing_strategy_args"]["ttl"] == 10
|
||||
assert settings["allowed_fails"] == 1
|
||||
assert settings["num_retries"] == 3
|
||||
assert settings["retry_after"] == 5
|
||||
assert settings["cooldown_time"] == 30
|
||||
|
||||
except:
|
||||
print(traceback.format_exc())
|
||||
pytest.fail("An error occurred - " + traceback.format_exc())
|
Loading…
Add table
Add a link
Reference in a new issue