forked from phoenix/litellm-mirror
fix(router.py): make sure pre call rpm check runs even when model not in model cost map
This commit is contained in:
parent
240aaf7af8
commit
84d43484c6
3 changed files with 52 additions and 11 deletions
|
@ -5,6 +5,7 @@ model_list:
|
|||
api_key: my-fake-key
|
||||
api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
|
||||
stream_timeout: 0.001
|
||||
rpm: 10
|
||||
- litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
|
@ -27,6 +28,12 @@ litellm_settings:
|
|||
upperbound_key_generate_params:
|
||||
max_budget: os.environ/LITELLM_UPPERBOUND_KEYS_MAX_BUDGET
|
||||
|
||||
router_settings:
|
||||
routing_strategy: usage-based-routing-v2
|
||||
redis_host: os.environ/REDIS_HOST
|
||||
redis_password: os.environ/REDIS_PASSWORD
|
||||
redis_port: os.environ/REDIS_PORT
|
||||
enable_pre_call_checks: True
|
||||
|
||||
general_settings:
|
||||
master_key: sk-1234
|
||||
|
|
|
@ -2357,20 +2357,20 @@ class Router:
|
|||
"model", None
|
||||
)
|
||||
model_info = litellm.get_model_info(model=model)
|
||||
except:
|
||||
continue
|
||||
|
||||
if (
|
||||
isinstance(model_info, dict)
|
||||
and model_info.get("max_input_tokens", None) is not None
|
||||
):
|
||||
if (
|
||||
isinstance(model_info["max_input_tokens"], int)
|
||||
and input_tokens > model_info["max_input_tokens"]
|
||||
isinstance(model_info, dict)
|
||||
and model_info.get("max_input_tokens", None) is not None
|
||||
):
|
||||
invalid_model_indices.append(idx)
|
||||
_context_window_error = True
|
||||
continue
|
||||
if (
|
||||
isinstance(model_info["max_input_tokens"], int)
|
||||
and input_tokens > model_info["max_input_tokens"]
|
||||
):
|
||||
invalid_model_indices.append(idx)
|
||||
_context_window_error = True
|
||||
continue
|
||||
except Exception as e:
|
||||
verbose_router_logger.debug("An error occurs - {}".format(str(e)))
|
||||
|
||||
## RPM CHECK ##
|
||||
_litellm_params = deployment.get("litellm_params", {})
|
||||
|
|
|
@ -398,6 +398,40 @@ async def test_async_router_context_window_fallback():
|
|||
pytest.fail(f"Got unexpected exception on router! - {str(e)}")
|
||||
|
||||
|
||||
def test_router_rpm_pre_call_check():
|
||||
"""
|
||||
- for a given model not in model cost map
|
||||
- with rpm set
|
||||
- check if rpm check is run
|
||||
"""
|
||||
try:
|
||||
model_list = [
|
||||
{
|
||||
"model_name": "fake-openai-endpoint", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "openai/my-fake-model",
|
||||
"api_key": "my-fake-key",
|
||||
"api_base": "https://openai-function-calling-workers.tasslexyz.workers.dev/",
|
||||
"rpm": 0,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
router = Router(model_list=model_list, set_verbose=True, enable_pre_call_checks=True, num_retries=0) # type: ignore
|
||||
|
||||
try:
|
||||
router._pre_call_checks(
|
||||
model="fake-openai-endpoint",
|
||||
healthy_deployments=model_list,
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||
)
|
||||
pytest.fail("Expected this to fail")
|
||||
except:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Got unexpected exception on router! - {str(e)}")
|
||||
|
||||
|
||||
def test_router_context_window_check_pre_call_check_in_group():
|
||||
"""
|
||||
- Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue