forked from phoenix/litellm-mirror
router - get settings
This commit is contained in:
parent
70716b3373
commit
e271ce8030
3 changed files with 113 additions and 1 deletions
|
@ -8307,7 +8307,12 @@ async def get_config():
|
||||||
|
|
||||||
_data_to_return.append({"name": "slack", "variables": _slack_env_vars})
|
_data_to_return.append({"name": "slack", "variables": _slack_env_vars})
|
||||||
|
|
||||||
return {"status": "success", "data": _data_to_return}
|
_router_settings = llm_router.get_settings()
|
||||||
|
return {
|
||||||
|
"status": "success",
|
||||||
|
"data": _data_to_return,
|
||||||
|
"router_settings": _router_settings,
|
||||||
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
|
|
|
@ -299,6 +299,7 @@ class Router:
|
||||||
verbose_router_logger.info(
|
verbose_router_logger.info(
|
||||||
f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}"
|
f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}"
|
||||||
)
|
)
|
||||||
|
self.routing_strategy_args = routing_strategy_args
|
||||||
|
|
||||||
def print_deployment(self, deployment: dict):
|
def print_deployment(self, deployment: dict):
|
||||||
"""
|
"""
|
||||||
|
@ -2334,6 +2335,25 @@ class Router:
|
||||||
return self.model_list
|
return self.model_list
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def get_settings(self):
|
||||||
|
_all_vars = vars(self)
|
||||||
|
_settings_to_return = {}
|
||||||
|
vars_to_include = [
|
||||||
|
"routing_strategy_args",
|
||||||
|
"routing_strategy",
|
||||||
|
"allowed_fails",
|
||||||
|
"cooldown_time",
|
||||||
|
"num_retries",
|
||||||
|
"timeout",
|
||||||
|
"max_retries",
|
||||||
|
"retry_after",
|
||||||
|
]
|
||||||
|
|
||||||
|
for var in vars_to_include:
|
||||||
|
if var in _all_vars:
|
||||||
|
_settings_to_return[var] = _all_vars[var]
|
||||||
|
return _settings_to_return
|
||||||
|
|
||||||
def _get_client(self, deployment, kwargs, client_type=None):
|
def _get_client(self, deployment, kwargs, client_type=None):
|
||||||
"""
|
"""
|
||||||
Returns the appropriate client based on the given deployment, kwargs, and client_type.
|
Returns the appropriate client based on the given deployment, kwargs, and client_type.
|
||||||
|
|
87
litellm/tests/test_router_utils.py
Normal file
87
litellm/tests/test_router_utils.py
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
#### What this tests ####
|
||||||
|
# This tests utils used by llm router -> like llmrouter.get_settings()
|
||||||
|
|
||||||
|
import sys, os, time
|
||||||
|
import traceback, asyncio
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
|
import litellm
|
||||||
|
from litellm import Router
|
||||||
|
from litellm.router import Deployment, LiteLLM_Params, ModelInfo
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
from collections import defaultdict
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
|
def test_returned_settings():
|
||||||
|
# this tests if the router raises an exception when invalid params are set
|
||||||
|
# in this test both deployments have bad keys - Keep this test. It validates if the router raises the most recent exception
|
||||||
|
litellm.set_verbose = True
|
||||||
|
import openai
|
||||||
|
|
||||||
|
try:
|
||||||
|
print("testing if router raises an exception")
|
||||||
|
old_api_key = os.environ["AZURE_API_KEY"]
|
||||||
|
os.environ["AZURE_API_KEY"] = ""
|
||||||
|
model_list = [
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo", # openai model name
|
||||||
|
"litellm_params": { # params for litellm completion/embedding call
|
||||||
|
"model": "azure/chatgpt-v-2",
|
||||||
|
"api_key": "bad-key",
|
||||||
|
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||||
|
"api_base": os.getenv("AZURE_API_BASE"),
|
||||||
|
},
|
||||||
|
"tpm": 240000,
|
||||||
|
"rpm": 1800,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo", # openai model name
|
||||||
|
"litellm_params": { #
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"api_key": "bad-key",
|
||||||
|
},
|
||||||
|
"tpm": 240000,
|
||||||
|
"rpm": 1800,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
router = Router(
|
||||||
|
model_list=model_list,
|
||||||
|
redis_host=os.getenv("REDIS_HOST"),
|
||||||
|
redis_password=os.getenv("REDIS_PASSWORD"),
|
||||||
|
redis_port=int(os.getenv("REDIS_PORT")),
|
||||||
|
routing_strategy="latency-based-routing",
|
||||||
|
routing_strategy_args={"ttl": 10},
|
||||||
|
set_verbose=False,
|
||||||
|
num_retries=3,
|
||||||
|
retry_after=5,
|
||||||
|
allowed_fails=1,
|
||||||
|
cooldown_time=30,
|
||||||
|
) # type: ignore
|
||||||
|
|
||||||
|
settings = router.get_settings()
|
||||||
|
print(settings)
|
||||||
|
|
||||||
|
"""
|
||||||
|
routing_strategy: "simple-shuffle"
|
||||||
|
routing_strategy_args: {"ttl": 10} # Average the last 10 calls to compute avg latency per model
|
||||||
|
allowed_fails: 1
|
||||||
|
num_retries: 3
|
||||||
|
retry_after: 5 # seconds to wait before retrying a failed request
|
||||||
|
cooldown_time: 30 # seconds to cooldown a deployment after failure
|
||||||
|
"""
|
||||||
|
assert settings["routing_strategy"] == "latency-based-routing"
|
||||||
|
assert settings["routing_strategy_args"]["ttl"] == 10
|
||||||
|
assert settings["allowed_fails"] == 1
|
||||||
|
assert settings["num_retries"] == 3
|
||||||
|
assert settings["retry_after"] == 5
|
||||||
|
assert settings["cooldown_time"] == 30
|
||||||
|
|
||||||
|
except:
|
||||||
|
print(traceback.format_exc())
|
||||||
|
pytest.fail("An error occurred - " + traceback.format_exc())
|
Loading…
Add table
Add a link
Reference in a new issue